diff --git "a/checkpoint-19170/trainer_state.json" "b/checkpoint-19170/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-19170/trainer_state.json" @@ -0,0 +1,268401 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9813374805598754, + "eval_steps": 500, + "global_step": 19170, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.4140000000000001e-06, + "logits/chosen": -2.5086417198181152, + "logits/rejected": -3.203915596008301, + "logps/chosen": -269.4033508300781, + "logps/rejected": -367.62701416015625, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2.8280000000000003e-06, + "logits/chosen": -3.1561923027038574, + "logits/rejected": -2.894665479660034, + "logps/chosen": -262.482177734375, + "logps/rejected": -399.16632080078125, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.242e-06, + "logits/chosen": -2.1246187686920166, + "logits/rejected": -3.3757476806640625, + "logps/chosen": -126.50579071044922, + "logps/rejected": -295.8104553222656, + "loss": 0.6548, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03143272548913956, + "rewards/margins": 0.08754082769155502, + "rewards/rejected": -0.05610809847712517, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 5.6560000000000006e-06, + "logits/chosen": -2.6667706966400146, + "logits/rejected": -3.327197551727295, + "logps/chosen": -75.5797119140625, + "logps/rejected": -231.74932861328125, + "loss": 0.7548, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.011453056707978249, + "rewards/margins": -0.11505832523107529, + "rewards/rejected": 0.1265113800764084, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 7.07e-06, + "logits/chosen": -2.287858724594116, + "logits/rejected": -3.250361204147339, + "logps/chosen": -127.48991394042969, + "logps/rejected": -297.7454833984375, + "loss": 0.6063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00843658298254013, + "rewards/margins": 0.1822967529296875, + "rewards/rejected": -0.19073334336280823, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 8.484e-06, + "logits/chosen": -1.3831285238265991, + "logits/rejected": -3.2593634128570557, + "logps/chosen": -26.63646697998047, + "logps/rejected": -309.2322998046875, + "loss": 0.6619, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03124532662332058, + "rewards/margins": 0.08337698876857758, + "rewards/rejected": -0.05213166028261185, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 9.897999999999999e-06, + "logits/chosen": -3.174571990966797, + "logits/rejected": -3.393775463104248, + "logps/chosen": -458.83203125, + "logps/rejected": -445.83074951171875, + "loss": 0.6141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009772110730409622, + "rewards/margins": 0.1659069061279297, + "rewards/rejected": -0.15613479912281036, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.1312000000000001e-05, + "logits/chosen": -1.7797986268997192, + "logits/rejected": -3.1419765949249268, + "logps/chosen": -73.52362060546875, + "logps/rejected": -282.697509765625, + "loss": 0.6524, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.028323745355010033, + "rewards/margins": 0.0866062194108963, + "rewards/rejected": -0.11492996662855148, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.2726e-05, + "logits/chosen": -2.2057223320007324, + "logits/rejected": -3.106630325317383, + "logps/chosen": -449.2705078125, + "logps/rejected": -323.7679443359375, + "loss": 0.4808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09592133015394211, + "rewards/margins": 0.4825485348701477, + "rewards/rejected": -0.386627197265625, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.414e-05, + "logits/chosen": -2.5301289558410645, + "logits/rejected": -3.225210428237915, + "logps/chosen": -556.2100830078125, + "logps/rejected": -372.2738037109375, + "loss": 0.4348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2774917781352997, + "rewards/margins": 0.6076034903526306, + "rewards/rejected": -0.33011168241500854, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.4139266559468852e-05, + "logits/chosen": -3.0371036529541016, + "logits/rejected": -3.4004364013671875, + "logps/chosen": -133.5618896484375, + "logps/rejected": -128.92803955078125, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011985018849372864, + "rewards/margins": 0.3236961364746094, + "rewards/rejected": -0.3117111325263977, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 1.4138533118937704e-05, + "logits/chosen": -2.3650686740875244, + "logits/rejected": -3.174999475479126, + "logps/chosen": -244.70138549804688, + "logps/rejected": -268.13934326171875, + "loss": 0.6843, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13391418755054474, + "rewards/margins": 0.02821960300207138, + "rewards/rejected": -0.16213379800319672, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 1.4137799678406556e-05, + "logits/chosen": -3.058119058609009, + "logits/rejected": -2.821755886077881, + "logps/chosen": -195.75218200683594, + "logps/rejected": -163.65643310546875, + "loss": 0.4584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4121711850166321, + "rewards/margins": 0.5678001642227173, + "rewards/rejected": -0.9799713492393494, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 1.4137066237875408e-05, + "logits/chosen": -2.434926986694336, + "logits/rejected": -3.309842586517334, + "logps/chosen": -486.09210205078125, + "logps/rejected": -501.29052734375, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22082214057445526, + "rewards/margins": 1.077412486076355, + "rewards/rejected": -1.2982345819473267, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 1.4136332797344261e-05, + "logits/chosen": -3.2895941734313965, + "logits/rejected": -3.2228989601135254, + "logps/chosen": -380.43310546875, + "logps/rejected": -186.6892852783203, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2637947201728821, + "rewards/margins": 0.8599517941474915, + "rewards/rejected": -0.5961570739746094, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.4135599356813113e-05, + "logits/chosen": -3.2979166507720947, + "logits/rejected": -3.1061203479766846, + "logps/chosen": -142.44114685058594, + "logps/rejected": -346.1613464355469, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20711135864257812, + "rewards/margins": 1.576393961906433, + "rewards/rejected": -1.369282603263855, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 1.4134865916281965e-05, + "logits/chosen": -3.3012208938598633, + "logits/rejected": -2.5676136016845703, + "logps/chosen": -199.82125854492188, + "logps/rejected": -256.73162841796875, + "loss": 0.5426, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.28947752714157104, + "rewards/margins": 0.5156166553497314, + "rewards/rejected": -0.8050941228866577, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 1.4134132475750817e-05, + "logits/chosen": -2.259432792663574, + "logits/rejected": -2.842252016067505, + "logps/chosen": -141.25851440429688, + "logps/rejected": -240.53997802734375, + "loss": 0.2337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18080464005470276, + "rewards/margins": 1.403419017791748, + "rewards/rejected": -1.2226142883300781, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 1.4133399035219669e-05, + "logits/chosen": -3.0986716747283936, + "logits/rejected": -2.7734227180480957, + "logps/chosen": -189.27720642089844, + "logps/rejected": -246.6731719970703, + "loss": 0.1499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12278899550437927, + "rewards/margins": 1.8253586292266846, + "rewards/rejected": -1.9481475353240967, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 1.413266559468852e-05, + "logits/chosen": -3.338831901550293, + "logits/rejected": -3.1463205814361572, + "logps/chosen": -52.455162048339844, + "logps/rejected": -47.041255950927734, + "loss": 0.5807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21637341380119324, + "rewards/margins": 0.24284303188323975, + "rewards/rejected": -0.026469610631465912, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.4131932154157372e-05, + "logits/chosen": -3.1297948360443115, + "logits/rejected": -3.289628028869629, + "logps/chosen": -27.345109939575195, + "logps/rejected": -156.64242553710938, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08488836139440536, + "rewards/margins": 1.2791380882263184, + "rewards/rejected": -1.1942497491836548, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 1.4131198713626224e-05, + "logits/chosen": -2.2943403720855713, + "logits/rejected": -3.014017105102539, + "logps/chosen": -259.5403747558594, + "logps/rejected": -317.24481201171875, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07488556206226349, + "rewards/margins": 1.3045883178710938, + "rewards/rejected": -1.3794738054275513, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.4130465273095076e-05, + "logits/chosen": -2.5674049854278564, + "logits/rejected": -3.313891649246216, + "logps/chosen": -295.3573913574219, + "logps/rejected": -334.8388671875, + "loss": 0.2006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13698109984397888, + "rewards/margins": 1.511479139328003, + "rewards/rejected": -1.3744980096817017, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.412973183256393e-05, + "logits/chosen": -3.4474947452545166, + "logits/rejected": -3.1368541717529297, + "logps/chosen": -135.3192596435547, + "logps/rejected": -113.61327362060547, + "loss": 0.8012, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3773350119590759, + "rewards/margins": 0.05142664909362793, + "rewards/rejected": -0.42876166105270386, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.4128998392032782e-05, + "logits/chosen": -1.8953357934951782, + "logits/rejected": -3.2400851249694824, + "logps/chosen": -121.24170684814453, + "logps/rejected": -379.90924072265625, + "loss": 0.3751, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.3013401925563812, + "rewards/margins": 1.7896745204925537, + "rewards/rejected": -1.48833429813385, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.4128264951501634e-05, + "logits/chosen": -3.2839300632476807, + "logits/rejected": -3.331352949142456, + "logps/chosen": -392.43658447265625, + "logps/rejected": -321.1914367675781, + "loss": 1.2372, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.2185386419296265, + "rewards/margins": -0.21571874618530273, + "rewards/rejected": -1.0028198957443237, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.4127531510970485e-05, + "logits/chosen": -3.148965358734131, + "logits/rejected": -2.372163772583008, + "logps/chosen": -461.156982421875, + "logps/rejected": -255.55641174316406, + "loss": 1.0023, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7082420587539673, + "rewards/margins": -0.17505431175231934, + "rewards/rejected": -0.533187747001648, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.4126798070439339e-05, + "logits/chosen": -3.154017210006714, + "logits/rejected": -1.6296652555465698, + "logps/chosen": -158.13185119628906, + "logps/rejected": -89.58174133300781, + "loss": 1.4068, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.2216283082962036, + "rewards/margins": -0.6168500781059265, + "rewards/rejected": -0.6047782897949219, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.4126064629908191e-05, + "logits/chosen": -2.2476418018341064, + "logits/rejected": -3.2716259956359863, + "logps/chosen": -154.4903564453125, + "logps/rejected": -233.02072143554688, + "loss": 0.2767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012425616383552551, + "rewards/margins": 2.288769006729126, + "rewards/rejected": -2.301194667816162, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 1.4125331189377043e-05, + "logits/chosen": -2.65697979927063, + "logits/rejected": -3.064199447631836, + "logps/chosen": -211.4549560546875, + "logps/rejected": -133.25994873046875, + "loss": 0.6546, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.8226875066757202, + "rewards/margins": 0.5049644708633423, + "rewards/rejected": -1.3276519775390625, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.4124597748845895e-05, + "logits/chosen": -3.239142894744873, + "logits/rejected": -2.861971139907837, + "logps/chosen": -383.2229919433594, + "logps/rejected": -477.25787353515625, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4815448820590973, + "rewards/margins": 2.8938283920288086, + "rewards/rejected": -3.375373125076294, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 1.4123864308314747e-05, + "logits/chosen": -2.9143593311309814, + "logits/rejected": -3.126284599304199, + "logps/chosen": -199.43202209472656, + "logps/rejected": -249.26019287109375, + "loss": 0.1218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.244944766163826, + "rewards/margins": 2.0441932678222656, + "rewards/rejected": -2.289138078689575, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.41231308677836e-05, + "logits/chosen": -2.8893258571624756, + "logits/rejected": -3.016972541809082, + "logps/chosen": -379.676025390625, + "logps/rejected": -349.13946533203125, + "loss": 0.1582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25186461210250854, + "rewards/margins": 2.065934658050537, + "rewards/rejected": -2.3177995681762695, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 1.4122397427252452e-05, + "logits/chosen": -1.3720592260360718, + "logits/rejected": -2.971277952194214, + "logps/chosen": -108.38858032226562, + "logps/rejected": -251.36083984375, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5055263638496399, + "rewards/margins": 2.385104179382324, + "rewards/rejected": -1.87957763671875, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 1.4121663986721304e-05, + "logits/chosen": -3.239633321762085, + "logits/rejected": -1.7536165714263916, + "logps/chosen": -888.7161254882812, + "logps/rejected": -474.2915954589844, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04518737643957138, + "rewards/margins": 3.9924960136413574, + "rewards/rejected": -3.9473085403442383, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.4120930546190156e-05, + "logits/chosen": -3.289085865020752, + "logits/rejected": -2.5526106357574463, + "logps/chosen": -359.944091796875, + "logps/rejected": -432.78900146484375, + "loss": 0.0933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02560730278491974, + "rewards/margins": 2.835693359375, + "rewards/rejected": -2.8613007068634033, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.4120197105659008e-05, + "logits/chosen": -2.6263740062713623, + "logits/rejected": -3.19953989982605, + "logps/chosen": -19.30487823486328, + "logps/rejected": -135.13307189941406, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22573308646678925, + "rewards/margins": 1.1389992237091064, + "rewards/rejected": -0.913266122341156, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 1.411946366512786e-05, + "logits/chosen": -3.3266303539276123, + "logits/rejected": -3.242114305496216, + "logps/chosen": -184.8331298828125, + "logps/rejected": -401.67425537109375, + "loss": 0.1173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05235329270362854, + "rewards/margins": 3.52862811088562, + "rewards/rejected": -3.580981492996216, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 1.4118730224596711e-05, + "logits/chosen": -2.89294171333313, + "logits/rejected": -3.050645589828491, + "logps/chosen": -28.56173324584961, + "logps/rejected": -214.82565307617188, + "loss": 0.1017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07908592373132706, + "rewards/margins": 2.5985684394836426, + "rewards/rejected": -2.677654266357422, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 1.4117996784065563e-05, + "logits/chosen": -2.811469793319702, + "logits/rejected": -3.235835552215576, + "logps/chosen": -293.6539611816406, + "logps/rejected": -395.3082275390625, + "loss": 0.0635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.554071843624115, + "rewards/margins": 2.93430233001709, + "rewards/rejected": -3.4883742332458496, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.4117263343534415e-05, + "logits/chosen": -2.3765769004821777, + "logits/rejected": -3.250920057296753, + "logps/chosen": -122.56066131591797, + "logps/rejected": -273.47760009765625, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4677238464355469, + "rewards/margins": 4.679996013641357, + "rewards/rejected": -4.2122721672058105, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 1.4116529903003269e-05, + "logits/chosen": -2.4142613410949707, + "logits/rejected": -3.1852529048919678, + "logps/chosen": -147.04425048828125, + "logps/rejected": -469.5093994140625, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10973912477493286, + "rewards/margins": 5.503190040588379, + "rewards/rejected": -5.393450736999512, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.411579646247212e-05, + "logits/chosen": -2.6715641021728516, + "logits/rejected": -2.4797372817993164, + "logps/chosen": -709.9578857421875, + "logps/rejected": -378.41265869140625, + "loss": 0.0811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37777405977249146, + "rewards/margins": 3.1494760513305664, + "rewards/rejected": -3.527250051498413, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 1.4115063021940972e-05, + "logits/chosen": -2.865339994430542, + "logits/rejected": -3.248429775238037, + "logps/chosen": -205.4887237548828, + "logps/rejected": -420.59375, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7158309817314148, + "rewards/margins": 3.4666824340820312, + "rewards/rejected": -4.182513236999512, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 1.4114329581409824e-05, + "logits/chosen": -2.654479742050171, + "logits/rejected": -3.164245128631592, + "logps/chosen": -132.29518127441406, + "logps/rejected": -219.65077209472656, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1695118099451065, + "rewards/margins": 3.9429075717926025, + "rewards/rejected": -4.112419128417969, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 1.4113596140878676e-05, + "logits/chosen": -2.2106897830963135, + "logits/rejected": -3.2540743350982666, + "logps/chosen": -223.14463806152344, + "logps/rejected": -571.0731811523438, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1131912469863892, + "rewards/margins": 3.8669309616088867, + "rewards/rejected": -4.9801225662231445, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 1.4112862700347528e-05, + "logits/chosen": -3.2090466022491455, + "logits/rejected": -3.2723896503448486, + "logps/chosen": -219.63699340820312, + "logps/rejected": -202.2224884033203, + "loss": 0.2433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.734113335609436, + "rewards/margins": 1.4282569885253906, + "rewards/rejected": -2.162370204925537, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 1.411212925981638e-05, + "logits/chosen": -2.9305551052093506, + "logits/rejected": -3.090211868286133, + "logps/chosen": -143.3037567138672, + "logps/rejected": -242.89358520507812, + "loss": 0.0878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03542671725153923, + "rewards/margins": 2.3892810344696045, + "rewards/rejected": -2.3538544178009033, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 1.4111395819285232e-05, + "logits/chosen": -2.636638641357422, + "logits/rejected": -3.1256260871887207, + "logps/chosen": -563.8216552734375, + "logps/rejected": -608.9473266601562, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9135517477989197, + "rewards/margins": 2.9238991737365723, + "rewards/rejected": -3.8374509811401367, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 1.4110662378754084e-05, + "logits/chosen": -3.155632257461548, + "logits/rejected": -2.1130244731903076, + "logps/chosen": -189.9978485107422, + "logps/rejected": -118.34403991699219, + "loss": 2.6634, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3415961265563965, + "rewards/margins": 0.5503494739532471, + "rewards/rejected": -2.8919456005096436, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.4109928938222937e-05, + "logits/chosen": -2.328481912612915, + "logits/rejected": -3.1478891372680664, + "logps/chosen": -295.6512756347656, + "logps/rejected": -417.5212707519531, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7795395255088806, + "rewards/margins": 4.902569770812988, + "rewards/rejected": -5.6821088790893555, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 1.410919549769179e-05, + "logits/chosen": -2.592844009399414, + "logits/rejected": -3.1233012676239014, + "logps/chosen": -300.1712951660156, + "logps/rejected": -277.4001159667969, + "loss": 1.3125, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3385730981826782, + "rewards/margins": 2.409625768661499, + "rewards/rejected": -3.7481987476348877, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 1.4108462057160641e-05, + "logits/chosen": -2.8006911277770996, + "logits/rejected": -3.327759027481079, + "logps/chosen": -161.68019104003906, + "logps/rejected": -317.7685546875, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7204326391220093, + "rewards/margins": 4.148226737976074, + "rewards/rejected": -5.868659019470215, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 1.4107728616629493e-05, + "logits/chosen": -1.9462476968765259, + "logits/rejected": -2.767289638519287, + "logps/chosen": -232.19509887695312, + "logps/rejected": -345.6363220214844, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32929229736328125, + "rewards/margins": 6.257097244262695, + "rewards/rejected": -6.586389541625977, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 1.4106995176098345e-05, + "logits/chosen": -1.4855555295944214, + "logits/rejected": -2.784919500350952, + "logps/chosen": -360.93719482421875, + "logps/rejected": -608.4627075195312, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9639912247657776, + "rewards/margins": 6.926948547363281, + "rewards/rejected": -7.890939712524414, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 1.4106261735567197e-05, + "logits/chosen": -3.138418436050415, + "logits/rejected": -2.669344902038574, + "logps/chosen": -567.259765625, + "logps/rejected": -317.53851318359375, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2513000965118408, + "rewards/margins": 3.8051199913024902, + "rewards/rejected": -5.056419849395752, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 1.4105528295036049e-05, + "logits/chosen": -1.1724745035171509, + "logits/rejected": -3.035754919052124, + "logps/chosen": -24.494060516357422, + "logps/rejected": -225.23402404785156, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19154730439186096, + "rewards/margins": 6.542359828948975, + "rewards/rejected": -6.3508124351501465, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 1.41047948545049e-05, + "logits/chosen": -2.74204421043396, + "logits/rejected": -2.8871958255767822, + "logps/chosen": -559.2006225585938, + "logps/rejected": -611.2367553710938, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6184124946594238, + "rewards/margins": 6.983996391296387, + "rewards/rejected": -8.602409362792969, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 1.4104061413973754e-05, + "logits/chosen": -2.012796401977539, + "logits/rejected": -3.177419662475586, + "logps/chosen": -214.34249877929688, + "logps/rejected": -287.50177001953125, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4181738793849945, + "rewards/margins": 1.9556629657745361, + "rewards/rejected": -2.3738367557525635, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 1.4103327973442606e-05, + "logits/chosen": -2.4921419620513916, + "logits/rejected": -2.7941842079162598, + "logps/chosen": -225.37979125976562, + "logps/rejected": -311.69219970703125, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1316711902618408, + "rewards/margins": 4.420755386352539, + "rewards/rejected": -5.552426815032959, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.4102594532911458e-05, + "logits/chosen": -2.716118812561035, + "logits/rejected": -3.1972899436950684, + "logps/chosen": -139.38931274414062, + "logps/rejected": -397.4482421875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4219324588775635, + "rewards/margins": 6.504172325134277, + "rewards/rejected": -7.92610502243042, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 1.4101861092380311e-05, + "logits/chosen": -1.7047643661499023, + "logits/rejected": -2.9832139015197754, + "logps/chosen": -164.7735137939453, + "logps/rejected": -193.9362030029297, + "loss": 3.0438, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0000967979431152, + "rewards/margins": 1.2232871055603027, + "rewards/rejected": -4.223384380340576, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 1.4101127651849163e-05, + "logits/chosen": -3.049916982650757, + "logits/rejected": -2.9663047790527344, + "logps/chosen": -97.95878601074219, + "logps/rejected": -290.0465087890625, + "loss": 0.1061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0238819122314453, + "rewards/margins": 4.52006721496582, + "rewards/rejected": -5.543949127197266, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 1.4100394211318015e-05, + "logits/chosen": -3.167811155319214, + "logits/rejected": -2.9585390090942383, + "logps/chosen": -187.05926513671875, + "logps/rejected": -240.50572204589844, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2773185968399048, + "rewards/margins": 4.9026288986206055, + "rewards/rejected": -5.179947853088379, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.4099660770786867e-05, + "logits/chosen": -1.7142236232757568, + "logits/rejected": -3.1432106494903564, + "logps/chosen": -133.66578674316406, + "logps/rejected": -543.159912109375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08204080164432526, + "rewards/margins": 7.703408718109131, + "rewards/rejected": -7.785449028015137, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 1.4098927330255719e-05, + "logits/chosen": -3.2010879516601562, + "logits/rejected": -2.390185832977295, + "logps/chosen": -269.18658447265625, + "logps/rejected": -230.22105407714844, + "loss": 3.1224, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.144019603729248, + "rewards/margins": -0.6794967651367188, + "rewards/rejected": -3.46452260017395, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 1.409819388972457e-05, + "logits/chosen": -3.170147657394409, + "logits/rejected": -3.3049988746643066, + "logps/chosen": -36.01892852783203, + "logps/rejected": -129.99632263183594, + "loss": 0.1022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4393461346626282, + "rewards/margins": 4.160342693328857, + "rewards/rejected": -3.720996618270874, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 1.4097460449193424e-05, + "logits/chosen": -1.9985405206680298, + "logits/rejected": -3.111098527908325, + "logps/chosen": -84.87568664550781, + "logps/rejected": -403.38238525390625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4605675935745239, + "rewards/margins": 7.374953269958496, + "rewards/rejected": -7.8355207443237305, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 1.4096727008662276e-05, + "logits/chosen": -2.787776470184326, + "logits/rejected": -2.9037892818450928, + "logps/chosen": -186.72157287597656, + "logps/rejected": -299.1474609375, + "loss": 0.2603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5438945293426514, + "rewards/margins": 5.006444454193115, + "rewards/rejected": -5.5503387451171875, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 1.4095993568131128e-05, + "logits/chosen": -3.20245623588562, + "logits/rejected": -1.8926314115524292, + "logps/chosen": -458.6402587890625, + "logps/rejected": -107.56018829345703, + "loss": 3.4985, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.4525985717773438, + "rewards/margins": -3.26259708404541, + "rewards/rejected": -0.19000130891799927, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.409526012759998e-05, + "logits/chosen": -2.9066805839538574, + "logits/rejected": -2.8711607456207275, + "logps/chosen": -162.5769500732422, + "logps/rejected": -378.83551025390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8935917615890503, + "rewards/margins": 6.860810279846191, + "rewards/rejected": -8.754402160644531, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 1.4094526687068832e-05, + "logits/chosen": -3.015829086303711, + "logits/rejected": -3.1844711303710938, + "logps/chosen": -28.33389663696289, + "logps/rejected": -187.33551025390625, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08596497029066086, + "rewards/margins": 5.581160545349121, + "rewards/rejected": -5.495195388793945, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 1.4093793246537684e-05, + "logits/chosen": -2.9902729988098145, + "logits/rejected": -3.155439853668213, + "logps/chosen": -165.72056579589844, + "logps/rejected": -402.98828125, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7172593474388123, + "rewards/margins": 5.085604667663574, + "rewards/rejected": -5.802864074707031, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 1.4093059806006536e-05, + "logits/chosen": -3.1577019691467285, + "logits/rejected": -1.544668436050415, + "logps/chosen": -652.17822265625, + "logps/rejected": -229.62303161621094, + "loss": 5.0821, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.295568943023682, + "rewards/margins": -3.933912754058838, + "rewards/rejected": -1.3616561889648438, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 1.4092326365475387e-05, + "logits/chosen": -3.141834259033203, + "logits/rejected": -3.1538596153259277, + "logps/chosen": -246.31732177734375, + "logps/rejected": -330.97271728515625, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7481445670127869, + "rewards/margins": 4.228979587554932, + "rewards/rejected": -4.977124214172363, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 1.409159292494424e-05, + "logits/chosen": -2.9411988258361816, + "logits/rejected": -1.8432508707046509, + "logps/chosen": -561.7764892578125, + "logps/rejected": -346.9847412109375, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7403500080108643, + "rewards/margins": 5.062065124511719, + "rewards/rejected": -6.802415370941162, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 1.4090859484413093e-05, + "logits/chosen": -3.1362669467926025, + "logits/rejected": -3.140937566757202, + "logps/chosen": -192.16090393066406, + "logps/rejected": -144.77584838867188, + "loss": 0.5496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36675509810447693, + "rewards/margins": 0.3184751272201538, + "rewards/rejected": -0.6852302551269531, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 1.4090126043881945e-05, + "logits/chosen": -0.9966374039649963, + "logits/rejected": -2.853728771209717, + "logps/chosen": -9.659101486206055, + "logps/rejected": -453.4775085449219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10880632698535919, + "rewards/margins": 11.764625549316406, + "rewards/rejected": -11.6558198928833, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 1.4089392603350797e-05, + "logits/chosen": -2.460829734802246, + "logits/rejected": -3.0917153358459473, + "logps/chosen": -51.14653778076172, + "logps/rejected": -151.83018493652344, + "loss": 0.0735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2845606803894043, + "rewards/margins": 3.815502166748047, + "rewards/rejected": -4.100062847137451, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 1.4088659162819649e-05, + "logits/chosen": -2.777294397354126, + "logits/rejected": -2.7373132705688477, + "logps/chosen": -264.8917236328125, + "logps/rejected": -376.13421630859375, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43589478731155396, + "rewards/margins": 4.977968215942383, + "rewards/rejected": -4.5420732498168945, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.40879257222885e-05, + "logits/chosen": -1.9690183401107788, + "logits/rejected": -3.1244380474090576, + "logps/chosen": -55.26750183105469, + "logps/rejected": -246.25270080566406, + "loss": 0.1432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011519715189933777, + "rewards/margins": 4.764491558074951, + "rewards/rejected": -4.75297212600708, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 1.4087192281757352e-05, + "logits/chosen": -3.124129056930542, + "logits/rejected": -2.8204216957092285, + "logps/chosen": -827.9840087890625, + "logps/rejected": -516.7529907226562, + "loss": 0.0701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02456054836511612, + "rewards/margins": 5.101218223571777, + "rewards/rejected": -5.1257781982421875, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 1.4086458841226204e-05, + "logits/chosen": -2.9070253372192383, + "logits/rejected": -3.1985230445861816, + "logps/chosen": -309.88427734375, + "logps/rejected": -349.03662109375, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036777496337890625, + "rewards/margins": 7.554184913635254, + "rewards/rejected": -7.5909624099731445, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 1.4085725400695056e-05, + "logits/chosen": -2.3516762256622314, + "logits/rejected": -3.115957498550415, + "logps/chosen": -176.7771453857422, + "logps/rejected": -455.1728210449219, + "loss": 2.0006, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7282673120498657, + "rewards/margins": 1.640203833580017, + "rewards/rejected": -3.368471145629883, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 1.4084991960163908e-05, + "logits/chosen": -2.5035336017608643, + "logits/rejected": -3.161635637283325, + "logps/chosen": -236.27694702148438, + "logps/rejected": -311.4019775390625, + "loss": 0.1945, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.128682017326355, + "rewards/margins": 3.735896587371826, + "rewards/rejected": -4.8645782470703125, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.4084258519632762e-05, + "logits/chosen": -2.5791778564453125, + "logits/rejected": -3.208611488342285, + "logps/chosen": -82.57880401611328, + "logps/rejected": -242.08444213867188, + "loss": 0.2739, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5016483068466187, + "rewards/margins": 5.384100914001465, + "rewards/rejected": -6.885749340057373, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 1.4083525079101613e-05, + "logits/chosen": -2.9731059074401855, + "logits/rejected": -1.7196627855300903, + "logps/chosen": -632.6104736328125, + "logps/rejected": -193.69483947753906, + "loss": 5.6596, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.660763740539551, + "rewards/margins": -3.846034288406372, + "rewards/rejected": -1.8147293329238892, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 1.4082791638570465e-05, + "logits/chosen": -3.1676766872406006, + "logits/rejected": -1.961521863937378, + "logps/chosen": -561.1615600585938, + "logps/rejected": -383.1304931640625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.299957275390625, + "rewards/margins": 7.879086494445801, + "rewards/rejected": -8.179043769836426, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 1.4082058198039317e-05, + "logits/chosen": -2.9433672428131104, + "logits/rejected": -3.264108419418335, + "logps/chosen": -193.05416870117188, + "logps/rejected": -346.28424072265625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5426574945449829, + "rewards/margins": 6.5635271072387695, + "rewards/rejected": -7.106184482574463, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 1.4081324757508169e-05, + "logits/chosen": -2.9720005989074707, + "logits/rejected": -3.3555727005004883, + "logps/chosen": -11.764184951782227, + "logps/rejected": -189.725341796875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.025509685277938843, + "rewards/margins": 4.768563270568848, + "rewards/rejected": -4.743053436279297, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.4080591316977021e-05, + "logits/chosen": -2.910702705383301, + "logits/rejected": -2.010645627975464, + "logps/chosen": -267.0122985839844, + "logps/rejected": -376.8834228515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7800067663192749, + "rewards/margins": 8.005531311035156, + "rewards/rejected": -8.785537719726562, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 1.4079857876445873e-05, + "logits/chosen": -3.128509044647217, + "logits/rejected": -2.0317466259002686, + "logps/chosen": -299.989501953125, + "logps/rejected": -231.77992248535156, + "loss": 0.0864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9572189450263977, + "rewards/margins": 2.406130790710449, + "rewards/rejected": -3.3633499145507812, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 1.4079124435914725e-05, + "logits/chosen": -2.9654877185821533, + "logits/rejected": -2.351299285888672, + "logps/chosen": -104.8874282836914, + "logps/rejected": -243.82150268554688, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1060212850570679, + "rewards/margins": 5.005117416381836, + "rewards/rejected": -6.111139297485352, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 1.4078390995383578e-05, + "logits/chosen": -3.098271131515503, + "logits/rejected": -2.5507304668426514, + "logps/chosen": -230.50050354003906, + "logps/rejected": -262.92901611328125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4497499465942383, + "rewards/margins": 6.193783760070801, + "rewards/rejected": -5.7440338134765625, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 1.407765755485243e-05, + "logits/chosen": -2.5193445682525635, + "logits/rejected": -3.0373809337615967, + "logps/chosen": -201.92315673828125, + "logps/rejected": -335.52191162109375, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12115098536014557, + "rewards/margins": 4.780361175537109, + "rewards/rejected": -4.901512145996094, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.4076924114321284e-05, + "logits/chosen": -2.7040843963623047, + "logits/rejected": -3.1072871685028076, + "logps/chosen": -130.72256469726562, + "logps/rejected": -143.1368408203125, + "loss": 1.9704, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6809452772140503, + "rewards/margins": 0.43530333042144775, + "rewards/rejected": -2.116248607635498, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 1.4076190673790136e-05, + "logits/chosen": -3.143909215927124, + "logits/rejected": -2.1254079341888428, + "logps/chosen": -351.46600341796875, + "logps/rejected": -147.99644470214844, + "loss": 8.6752, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.271784782409668, + "rewards/margins": -8.674738883972168, + "rewards/rejected": 0.4029541015625, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 1.4075457233258987e-05, + "logits/chosen": -3.1177303791046143, + "logits/rejected": -3.3527262210845947, + "logps/chosen": -140.43426513671875, + "logps/rejected": -120.49134826660156, + "loss": 2.4945, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2747509479522705, + "rewards/margins": 1.4805595874786377, + "rewards/rejected": -3.755310535430908, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 1.407472379272784e-05, + "logits/chosen": -3.176112174987793, + "logits/rejected": -3.07924485206604, + "logps/chosen": -565.258056640625, + "logps/rejected": -496.4239501953125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9338394403457642, + "rewards/margins": 7.284897327423096, + "rewards/rejected": -8.21873664855957, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 1.4073990352196691e-05, + "logits/chosen": -3.270714282989502, + "logits/rejected": -3.2275683879852295, + "logps/chosen": -251.5394287109375, + "logps/rejected": -262.38079833984375, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24075394868850708, + "rewards/margins": 2.827162742614746, + "rewards/rejected": -3.0679168701171875, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 1.4073256911665543e-05, + "logits/chosen": -3.024155616760254, + "logits/rejected": -2.607529640197754, + "logps/chosen": -127.84226989746094, + "logps/rejected": -172.0180206298828, + "loss": 2.8721, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.382777690887451, + "rewards/margins": 1.3801672458648682, + "rewards/rejected": -4.76294469833374, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 1.4072523471134395e-05, + "logits/chosen": -1.2158269882202148, + "logits/rejected": -3.0131607055664062, + "logps/chosen": -42.686500549316406, + "logps/rejected": -257.9658203125, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0735815092921257, + "rewards/margins": 3.783698797225952, + "rewards/rejected": -3.8572802543640137, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 1.4071790030603247e-05, + "logits/chosen": -2.847900867462158, + "logits/rejected": -2.478466749191284, + "logps/chosen": -229.7882080078125, + "logps/rejected": -223.6863555908203, + "loss": 0.0523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1962675154209137, + "rewards/margins": 3.9896254539489746, + "rewards/rejected": -4.1858930587768555, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 1.40710565900721e-05, + "logits/chosen": -1.486276626586914, + "logits/rejected": -2.9029643535614014, + "logps/chosen": -125.77071380615234, + "logps/rejected": -365.1062316894531, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8040760159492493, + "rewards/margins": 6.777219295501709, + "rewards/rejected": -7.581295013427734, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 1.4070323149540952e-05, + "logits/chosen": -2.1254289150238037, + "logits/rejected": -3.03289794921875, + "logps/chosen": -60.82428741455078, + "logps/rejected": -254.8223419189453, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28048574924468994, + "rewards/margins": 6.691647529602051, + "rewards/rejected": -6.41116189956665, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 1.4069589709009804e-05, + "logits/chosen": -3.107715606689453, + "logits/rejected": -3.037278413772583, + "logps/chosen": -178.19808959960938, + "logps/rejected": -234.01617431640625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14045867323875427, + "rewards/margins": 6.054421901702881, + "rewards/rejected": -5.913963317871094, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 1.4068856268478656e-05, + "logits/chosen": -2.813054084777832, + "logits/rejected": -3.224017858505249, + "logps/chosen": -318.6181335449219, + "logps/rejected": -568.7596435546875, + "loss": 0.823, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3415467739105225, + "rewards/margins": 0.503042459487915, + "rewards/rejected": -1.8445892333984375, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 1.4068122827947508e-05, + "logits/chosen": -2.5339033603668213, + "logits/rejected": -3.213797092437744, + "logps/chosen": -95.97489166259766, + "logps/rejected": -287.7912902832031, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.173699975013733, + "rewards/margins": 5.858774185180664, + "rewards/rejected": -7.032474040985107, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 1.406738938741636e-05, + "logits/chosen": -3.0311291217803955, + "logits/rejected": -1.498777151107788, + "logps/chosen": -423.12298583984375, + "logps/rejected": -135.71426391601562, + "loss": 3.5981, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.040032386779785, + "rewards/margins": -0.30222511291503906, + "rewards/rejected": -3.737807273864746, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 1.4066655946885212e-05, + "logits/chosen": -3.2125508785247803, + "logits/rejected": -2.1686909198760986, + "logps/chosen": -313.74383544921875, + "logps/rejected": -116.99127197265625, + "loss": 1.5857, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.0584831237792969, + "rewards/margins": 0.020174741744995117, + "rewards/rejected": -1.078657865524292, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 1.4065922506354064e-05, + "logits/chosen": -2.078157663345337, + "logits/rejected": -3.239241600036621, + "logps/chosen": -153.5667724609375, + "logps/rejected": -378.9678039550781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13970261812210083, + "rewards/margins": 6.814199447631836, + "rewards/rejected": -6.674496650695801, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 1.4065189065822915e-05, + "logits/chosen": -3.0685713291168213, + "logits/rejected": -2.991624593734741, + "logps/chosen": -168.77392578125, + "logps/rejected": -148.8946990966797, + "loss": 1.999, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3662277460098267, + "rewards/margins": -1.0460482835769653, + "rewards/rejected": -0.32017937302589417, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 1.4064455625291769e-05, + "logits/chosen": -3.0488312244415283, + "logits/rejected": -1.9660900831222534, + "logps/chosen": -410.4831237792969, + "logps/rejected": -250.65386962890625, + "loss": 2.1829, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.175157308578491, + "rewards/margins": 0.49102282524108887, + "rewards/rejected": -2.666179895401001, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 1.4063722184760621e-05, + "logits/chosen": -3.0795323848724365, + "logits/rejected": -2.3542728424072266, + "logps/chosen": -532.76416015625, + "logps/rejected": -451.930419921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9599502682685852, + "rewards/margins": 8.277000427246094, + "rewards/rejected": -9.236950874328613, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 1.4062988744229473e-05, + "logits/chosen": -1.509883999824524, + "logits/rejected": -3.0490713119506836, + "logps/chosen": -42.94734573364258, + "logps/rejected": -298.12677001953125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3569048047065735, + "rewards/margins": 7.57316780090332, + "rewards/rejected": -7.930072784423828, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 1.4062255303698325e-05, + "logits/chosen": -2.8234407901763916, + "logits/rejected": -3.081902265548706, + "logps/chosen": -298.3720703125, + "logps/rejected": -350.02099609375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7373882532119751, + "rewards/margins": 6.187140464782715, + "rewards/rejected": -5.449751853942871, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 1.4061521863167177e-05, + "logits/chosen": -2.5702428817749023, + "logits/rejected": -2.2656266689300537, + "logps/chosen": -564.6000366210938, + "logps/rejected": -525.3060913085938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8273880481719971, + "rewards/margins": 6.39691162109375, + "rewards/rejected": -7.224299430847168, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 1.4060788422636028e-05, + "logits/chosen": -2.7506086826324463, + "logits/rejected": -3.2771730422973633, + "logps/chosen": -51.76686477661133, + "logps/rejected": -162.08348083496094, + "loss": 0.145, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4146925210952759, + "rewards/margins": 3.942627429962158, + "rewards/rejected": -5.3573198318481445, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 1.406005498210488e-05, + "logits/chosen": -3.118535041809082, + "logits/rejected": -2.5582759380340576, + "logps/chosen": -229.5237579345703, + "logps/rejected": -304.7815856933594, + "loss": 6.2195, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.419735908508301, + "rewards/margins": -3.8127477169036865, + "rewards/rejected": -2.6069881916046143, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 1.4059321541573732e-05, + "logits/chosen": -1.9653520584106445, + "logits/rejected": -3.198803186416626, + "logps/chosen": -166.76907348632812, + "logps/rejected": -414.16357421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03910371661186218, + "rewards/margins": 7.909660816192627, + "rewards/rejected": -7.870556831359863, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 1.4058588101042584e-05, + "logits/chosen": -2.3533754348754883, + "logits/rejected": -3.2021944522857666, + "logps/chosen": -50.164039611816406, + "logps/rejected": -119.99439239501953, + "loss": 0.1684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18960732221603394, + "rewards/margins": 2.4167237281799316, + "rewards/rejected": -2.6063308715820312, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 1.4057854660511438e-05, + "logits/chosen": -1.623924970626831, + "logits/rejected": -3.088088035583496, + "logps/chosen": -66.2769775390625, + "logps/rejected": -304.386474609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21918335556983948, + "rewards/margins": 7.0517683029174805, + "rewards/rejected": -7.270951747894287, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 1.405712121998029e-05, + "logits/chosen": -2.9226832389831543, + "logits/rejected": -3.057607889175415, + "logps/chosen": -299.0283203125, + "logps/rejected": -429.28131103515625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8855453729629517, + "rewards/margins": 7.151938438415527, + "rewards/rejected": -6.266392707824707, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 1.4056387779449141e-05, + "logits/chosen": -3.0799477100372314, + "logits/rejected": -3.29148006439209, + "logps/chosen": -810.645263671875, + "logps/rejected": -658.369873046875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8842483758926392, + "rewards/margins": 5.747746467590332, + "rewards/rejected": -6.631994724273682, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 1.4055654338917993e-05, + "logits/chosen": -2.8642311096191406, + "logits/rejected": -3.0844838619232178, + "logps/chosen": -463.97015380859375, + "logps/rejected": -273.33892822265625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2062179446220398, + "rewards/margins": 4.880588054656982, + "rewards/rejected": -4.674369812011719, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 1.4054920898386845e-05, + "logits/chosen": -1.926129698753357, + "logits/rejected": -3.20182728767395, + "logps/chosen": -139.52554321289062, + "logps/rejected": -406.45440673828125, + "loss": 1.6133, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5022897720336914, + "rewards/margins": 2.3603694438934326, + "rewards/rejected": -4.862658977508545, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 1.4054187457855697e-05, + "logits/chosen": -1.5586082935333252, + "logits/rejected": -2.5530381202697754, + "logps/chosen": -48.246559143066406, + "logps/rejected": -209.8062286376953, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038666725158691406, + "rewards/margins": 2.7776503562927246, + "rewards/rejected": -2.7815170288085938, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 1.405345401732455e-05, + "logits/chosen": -1.369247317314148, + "logits/rejected": -2.8779184818267822, + "logps/chosen": -99.06793975830078, + "logps/rejected": -400.24267578125, + "loss": 0.1305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2709430754184723, + "rewards/margins": 3.522075891494751, + "rewards/rejected": -3.7930190563201904, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.4052720576793402e-05, + "logits/chosen": -2.8032896518707275, + "logits/rejected": -3.2602946758270264, + "logps/chosen": -20.731311798095703, + "logps/rejected": -164.76116943359375, + "loss": 0.1211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4871983528137207, + "rewards/margins": 3.319852828979492, + "rewards/rejected": -3.807051181793213, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 1.4051987136262254e-05, + "logits/chosen": -3.0301430225372314, + "logits/rejected": -1.8463631868362427, + "logps/chosen": -305.605712890625, + "logps/rejected": -303.8360595703125, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16704484820365906, + "rewards/margins": 5.840075969696045, + "rewards/rejected": -6.0071210861206055, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 1.4051253695731108e-05, + "logits/chosen": -0.6512682437896729, + "logits/rejected": -3.0789849758148193, + "logps/chosen": -64.77520751953125, + "logps/rejected": -303.4212951660156, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.04751718044281, + "rewards/margins": 5.496860027313232, + "rewards/rejected": -6.544377326965332, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 1.405052025519996e-05, + "logits/chosen": -2.6168651580810547, + "logits/rejected": -3.1220500469207764, + "logps/chosen": -81.12246704101562, + "logps/rejected": -131.12841796875, + "loss": 1.385, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.838469386100769, + "rewards/margins": 1.4287701845169067, + "rewards/rejected": -3.2672393321990967, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 1.4049786814668812e-05, + "logits/chosen": -3.2334144115448, + "logits/rejected": -3.146683931350708, + "logps/chosen": -503.9326477050781, + "logps/rejected": -385.52008056640625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1735550165176392, + "rewards/margins": 4.1636247634887695, + "rewards/rejected": -5.337179660797119, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 1.4049053374137664e-05, + "logits/chosen": -3.1464531421661377, + "logits/rejected": -2.704549551010132, + "logps/chosen": -148.33807373046875, + "logps/rejected": -94.45451354980469, + "loss": 4.4598, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.084069728851318, + "rewards/margins": -4.433653354644775, + "rewards/rejected": -0.6504164934158325, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 1.4048319933606515e-05, + "logits/chosen": -2.8916501998901367, + "logits/rejected": -3.2173585891723633, + "logps/chosen": -196.97488403320312, + "logps/rejected": -271.2926025390625, + "loss": 1.9979, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.614396810531616, + "rewards/margins": -0.47247135639190674, + "rewards/rejected": -2.14192533493042, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 1.4047586493075367e-05, + "logits/chosen": -2.9821224212646484, + "logits/rejected": -3.1145167350769043, + "logps/chosen": -321.15789794921875, + "logps/rejected": -182.53341674804688, + "loss": 7.0448, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.20854377746582, + "rewards/margins": -7.043896675109863, + "rewards/rejected": -0.16464653611183167, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 1.404685305254422e-05, + "logits/chosen": -3.1035823822021484, + "logits/rejected": -3.169919013977051, + "logps/chosen": -46.41209411621094, + "logps/rejected": -159.25555419921875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44947120547294617, + "rewards/margins": 4.079174995422363, + "rewards/rejected": -4.528645992279053, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 1.4046119612013071e-05, + "logits/chosen": -1.5671418905258179, + "logits/rejected": -3.0444717407226562, + "logps/chosen": -237.23165893554688, + "logps/rejected": -468.1154479980469, + "loss": 1.1714, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3149696588516235, + "rewards/margins": 3.195079803466797, + "rewards/rejected": -4.510049343109131, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 1.4045386171481923e-05, + "logits/chosen": -2.6871392726898193, + "logits/rejected": -3.185950994491577, + "logps/chosen": -669.3670043945312, + "logps/rejected": -670.3478393554688, + "loss": 0.0612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.343021422624588, + "rewards/margins": 5.745822906494141, + "rewards/rejected": -6.088844299316406, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 1.4044652730950777e-05, + "logits/chosen": -2.7135262489318848, + "logits/rejected": -2.99619460105896, + "logps/chosen": -140.79605102539062, + "logps/rejected": -176.24903869628906, + "loss": 0.7455, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0222373008728027, + "rewards/margins": 2.823230266571045, + "rewards/rejected": -4.845467567443848, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 1.4043919290419628e-05, + "logits/chosen": -2.033722162246704, + "logits/rejected": -2.996286630630493, + "logps/chosen": -222.9698486328125, + "logps/rejected": -333.42633056640625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3981155455112457, + "rewards/margins": 6.359478950500488, + "rewards/rejected": -6.757594108581543, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 1.404318584988848e-05, + "logits/chosen": -3.2094123363494873, + "logits/rejected": -3.252929210662842, + "logps/chosen": -320.0847473144531, + "logps/rejected": -235.1302947998047, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20695114135742188, + "rewards/margins": 3.4612648487091064, + "rewards/rejected": -3.6682159900665283, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 1.4042452409357332e-05, + "logits/chosen": -1.3514900207519531, + "logits/rejected": -3.114370584487915, + "logps/chosen": -184.26551818847656, + "logps/rejected": -475.26263427734375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7291362881660461, + "rewards/margins": 6.619921684265137, + "rewards/rejected": -7.349058151245117, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 1.4041718968826184e-05, + "logits/chosen": -3.0558784008026123, + "logits/rejected": -2.070467233657837, + "logps/chosen": -110.82780456542969, + "logps/rejected": -155.31814575195312, + "loss": 0.2597, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.613146424293518, + "rewards/margins": 2.036968231201172, + "rewards/rejected": -3.6501145362854004, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 1.4040985528295036e-05, + "logits/chosen": -3.143052816390991, + "logits/rejected": -3.2887673377990723, + "logps/chosen": -187.4743194580078, + "logps/rejected": -237.90084838867188, + "loss": 0.1609, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7280426025390625, + "rewards/margins": 2.665557622909546, + "rewards/rejected": -3.3936002254486084, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 1.4040252087763888e-05, + "logits/chosen": -2.58248233795166, + "logits/rejected": -2.9897232055664062, + "logps/chosen": -480.3372802734375, + "logps/rejected": -310.873291015625, + "loss": 3.4672, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.529890537261963, + "rewards/margins": 0.2914729118347168, + "rewards/rejected": -4.8213629722595215, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 1.403951864723274e-05, + "logits/chosen": -0.740471363067627, + "logits/rejected": -3.153139114379883, + "logps/chosen": -53.352577209472656, + "logps/rejected": -449.3125305175781, + "loss": 0.2115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6508352756500244, + "rewards/margins": 3.756040573120117, + "rewards/rejected": -4.4068756103515625, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 1.4038785206701592e-05, + "logits/chosen": -3.163011074066162, + "logits/rejected": -3.2026965618133545, + "logps/chosen": -727.489990234375, + "logps/rejected": -686.5921630859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19287031888961792, + "rewards/margins": 9.318321228027344, + "rewards/rejected": -9.511191368103027, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 1.4038051766170445e-05, + "logits/chosen": -3.0778968334198, + "logits/rejected": -3.225226879119873, + "logps/chosen": -19.67176055908203, + "logps/rejected": -99.86713409423828, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3690433204174042, + "rewards/margins": 2.569936752319336, + "rewards/rejected": -2.9389801025390625, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 1.4037318325639297e-05, + "logits/chosen": -1.4018080234527588, + "logits/rejected": -3.2425036430358887, + "logps/chosen": -55.99711608886719, + "logps/rejected": -417.175048828125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47940266132354736, + "rewards/margins": 7.1457319259643555, + "rewards/rejected": -7.625134468078613, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.4036584885108149e-05, + "logits/chosen": -2.4461357593536377, + "logits/rejected": -3.2207515239715576, + "logps/chosen": -28.2572078704834, + "logps/rejected": -167.1986083984375, + "loss": 0.2167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5945075750350952, + "rewards/margins": 2.5371510982513428, + "rewards/rejected": -3.1316587924957275, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 1.4035851444577e-05, + "logits/chosen": -1.6984515190124512, + "logits/rejected": -2.9718549251556396, + "logps/chosen": -153.30714416503906, + "logps/rejected": -292.871337890625, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5461689233779907, + "rewards/margins": 5.211982250213623, + "rewards/rejected": -5.758151054382324, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 1.4035118004045853e-05, + "logits/chosen": -2.3423922061920166, + "logits/rejected": -3.0584089756011963, + "logps/chosen": -167.05386352539062, + "logps/rejected": -384.7779541015625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7688385248184204, + "rewards/margins": 7.721070766448975, + "rewards/rejected": -8.489909172058105, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 1.4034384563514704e-05, + "logits/chosen": -3.1607983112335205, + "logits/rejected": -2.5857608318328857, + "logps/chosen": -394.6033630371094, + "logps/rejected": -321.24981689453125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.330152988433838, + "rewards/margins": 5.429242134094238, + "rewards/rejected": -6.759394645690918, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 1.4033651122983556e-05, + "logits/chosen": -2.244684934616089, + "logits/rejected": -3.192662000656128, + "logps/chosen": -277.6309814453125, + "logps/rejected": -278.3343505859375, + "loss": 6.0111, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.330122947692871, + "rewards/margins": -1.990980625152588, + "rewards/rejected": -4.339142322540283, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 1.4032917682452408e-05, + "logits/chosen": -2.858048677444458, + "logits/rejected": -3.138115167617798, + "logps/chosen": -484.6504211425781, + "logps/rejected": -620.673583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.22979736328125, + "rewards/margins": 8.402729034423828, + "rewards/rejected": -9.632526397705078, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 1.4032184241921262e-05, + "logits/chosen": -2.3370301723480225, + "logits/rejected": -3.1018831729888916, + "logps/chosen": -165.13783264160156, + "logps/rejected": -157.1363525390625, + "loss": 4.4347, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7890625, + "rewards/margins": -2.1940624713897705, + "rewards/rejected": -2.5950000286102295, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 1.4031450801390114e-05, + "logits/chosen": -3.1308815479278564, + "logits/rejected": -3.1610026359558105, + "logps/chosen": -204.57997131347656, + "logps/rejected": -71.40421295166016, + "loss": 4.0757, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.23699951171875, + "rewards/margins": -1.662895917892456, + "rewards/rejected": -2.5741031169891357, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 1.4030717360858966e-05, + "logits/chosen": -2.8525502681732178, + "logits/rejected": -3.2082810401916504, + "logps/chosen": -139.2957305908203, + "logps/rejected": -101.78530883789062, + "loss": 2.3598, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1307520866394043, + "rewards/margins": -0.9452277421951294, + "rewards/rejected": -2.1855242252349854, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 1.4029983920327817e-05, + "logits/chosen": -1.3927249908447266, + "logits/rejected": -3.2480645179748535, + "logps/chosen": -215.73660278320312, + "logps/rejected": -481.8995666503906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.366964340209961, + "rewards/margins": 9.164051055908203, + "rewards/rejected": -10.531015396118164, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 1.402925047979667e-05, + "logits/chosen": -3.170358180999756, + "logits/rejected": -1.9528805017471313, + "logps/chosen": -344.59222412109375, + "logps/rejected": -301.1311950683594, + "loss": 2.4363, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.899723768234253, + "rewards/margins": 1.5337166786193848, + "rewards/rejected": -3.4334404468536377, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 1.4028517039265523e-05, + "logits/chosen": -3.2794854640960693, + "logits/rejected": -3.4230825901031494, + "logps/chosen": -32.24395751953125, + "logps/rejected": -67.48942565917969, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4664277136325836, + "rewards/margins": 1.7411216497421265, + "rewards/rejected": -2.2075493335723877, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 1.4027783598734375e-05, + "logits/chosen": -3.180250883102417, + "logits/rejected": -2.933677911758423, + "logps/chosen": -181.1337890625, + "logps/rejected": -161.4140167236328, + "loss": 2.5264, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.4345855712890625, + "rewards/margins": -2.442751407623291, + "rewards/rejected": -0.991834282875061, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 1.4027050158203227e-05, + "logits/chosen": -3.2740097045898438, + "logits/rejected": -2.5141959190368652, + "logps/chosen": -653.0733642578125, + "logps/rejected": -319.4391784667969, + "loss": 0.8838, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4367378950119019, + "rewards/margins": 1.680279016494751, + "rewards/rejected": -3.1170170307159424, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 1.4026316717672079e-05, + "logits/chosen": -2.8845391273498535, + "logits/rejected": -3.2027604579925537, + "logps/chosen": -170.2748260498047, + "logps/rejected": -335.0379943847656, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45937347412109375, + "rewards/margins": 3.9830048084259033, + "rewards/rejected": -4.442378044128418, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 1.4025583277140932e-05, + "logits/chosen": -3.262019634246826, + "logits/rejected": -2.4432671070098877, + "logps/chosen": -511.94091796875, + "logps/rejected": -230.57630920410156, + "loss": 3.9406, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.577499866485596, + "rewards/margins": -1.3492319583892822, + "rewards/rejected": -3.2282676696777344, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 1.4024849836609784e-05, + "logits/chosen": -2.6538307666778564, + "logits/rejected": -3.274165153503418, + "logps/chosen": -111.40486907958984, + "logps/rejected": -295.8091735839844, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3387433886528015, + "rewards/margins": 5.497929096221924, + "rewards/rejected": -5.159185886383057, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 1.4024116396078636e-05, + "logits/chosen": -3.201803684234619, + "logits/rejected": -3.1272976398468018, + "logps/chosen": -158.65713500976562, + "logps/rejected": -120.02027130126953, + "loss": 1.8904, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0216803550720215, + "rewards/margins": -0.9451591968536377, + "rewards/rejected": -2.0765209197998047, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 1.4023382955547488e-05, + "logits/chosen": -3.18961238861084, + "logits/rejected": -2.4527065753936768, + "logps/chosen": -487.9825439453125, + "logps/rejected": -348.3439636230469, + "loss": 0.1828, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5345367193222046, + "rewards/margins": 2.949368953704834, + "rewards/rejected": -3.483905792236328, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 1.402264951501634e-05, + "logits/chosen": -3.219999313354492, + "logits/rejected": -2.6342580318450928, + "logps/chosen": -78.6775131225586, + "logps/rejected": -49.05897521972656, + "loss": 1.9113, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.231576681137085, + "rewards/margins": -0.6624884605407715, + "rewards/rejected": -1.5690882205963135, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 1.4021916074485192e-05, + "logits/chosen": -2.6074976921081543, + "logits/rejected": -3.108506202697754, + "logps/chosen": -113.25102233886719, + "logps/rejected": -197.483154296875, + "loss": 0.0675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8993004560470581, + "rewards/margins": 2.713906764984131, + "rewards/rejected": -3.6132073402404785, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 1.4021182633954043e-05, + "logits/chosen": -3.219654083251953, + "logits/rejected": -1.7438279390335083, + "logps/chosen": -423.0234680175781, + "logps/rejected": -199.55166625976562, + "loss": 4.4235, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.73944091796875, + "rewards/margins": -4.380421161651611, + "rewards/rejected": -0.3590196669101715, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 1.4020449193422895e-05, + "logits/chosen": -2.283191442489624, + "logits/rejected": -3.0281760692596436, + "logps/chosen": -91.82456970214844, + "logps/rejected": -398.3910217285156, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9143945574760437, + "rewards/margins": 5.421634674072266, + "rewards/rejected": -6.336029052734375, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 1.4019715752891747e-05, + "logits/chosen": -2.6594109535217285, + "logits/rejected": -2.84454083442688, + "logps/chosen": -240.54190063476562, + "logps/rejected": -381.72772216796875, + "loss": 2.7099, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.742281198501587, + "rewards/margins": 1.9954323768615723, + "rewards/rejected": -5.737713813781738, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 1.40189823123606e-05, + "logits/chosen": -3.2058839797973633, + "logits/rejected": -2.696855068206787, + "logps/chosen": -95.35279846191406, + "logps/rejected": -114.14395141601562, + "loss": 1.0226, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.505282998085022, + "rewards/margins": 0.9722380638122559, + "rewards/rejected": -2.4775209426879883, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 1.4018248871829453e-05, + "logits/chosen": -2.59051251411438, + "logits/rejected": -3.2057833671569824, + "logps/chosen": -25.70195770263672, + "logps/rejected": -132.85379028320312, + "loss": 0.1924, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9495477676391602, + "rewards/margins": 1.5504459142684937, + "rewards/rejected": -2.4999938011169434, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 1.4017515431298304e-05, + "logits/chosen": -3.0927317142486572, + "logits/rejected": -2.9161274433135986, + "logps/chosen": -542.6322021484375, + "logps/rejected": -390.4591979980469, + "loss": 0.8087, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.07867312431335449, + "rewards/margins": 2.0380964279174805, + "rewards/rejected": -2.116769313812256, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 1.4016781990767156e-05, + "logits/chosen": -2.5693600177764893, + "logits/rejected": -3.2705814838409424, + "logps/chosen": -266.7143249511719, + "logps/rejected": -303.3470458984375, + "loss": 0.0845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3578506410121918, + "rewards/margins": 3.2811355590820312, + "rewards/rejected": -2.9232850074768066, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 1.4016048550236008e-05, + "logits/chosen": -2.3647427558898926, + "logits/rejected": -3.2081775665283203, + "logps/chosen": -69.60836791992188, + "logps/rejected": -262.4646301269531, + "loss": 0.0613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44122153520584106, + "rewards/margins": 6.47877836227417, + "rewards/rejected": -6.037557125091553, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 1.401531510970486e-05, + "logits/chosen": -2.9002785682678223, + "logits/rejected": -3.1912384033203125, + "logps/chosen": -25.212514877319336, + "logps/rejected": -142.0551300048828, + "loss": 0.1025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9173253178596497, + "rewards/margins": 2.226381301879883, + "rewards/rejected": -3.1437065601348877, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 1.4014581669173712e-05, + "logits/chosen": -3.2523458003997803, + "logits/rejected": -2.937786102294922, + "logps/chosen": -310.7915344238281, + "logps/rejected": -212.21591186523438, + "loss": 0.7122, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.1561126708984375, + "rewards/margins": 1.6543456315994263, + "rewards/rejected": -1.4982330799102783, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 1.4013848228642564e-05, + "logits/chosen": -2.832043170928955, + "logits/rejected": -3.240903854370117, + "logps/chosen": -390.07281494140625, + "logps/rejected": -510.6436767578125, + "loss": 2.0158, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.060680866241455, + "rewards/margins": -0.3550534248352051, + "rewards/rejected": -0.7056273818016052, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 1.4013114788111416e-05, + "logits/chosen": -2.605855941772461, + "logits/rejected": -3.1888647079467773, + "logps/chosen": -52.90601348876953, + "logps/rejected": -237.97817993164062, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2511052191257477, + "rewards/margins": 2.6594486236572266, + "rewards/rejected": -2.9105539321899414, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 1.401238134758027e-05, + "logits/chosen": -2.753803014755249, + "logits/rejected": -3.299407720565796, + "logps/chosen": -266.0102233886719, + "logps/rejected": -325.5157470703125, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11007004976272583, + "rewards/margins": 3.213939666748047, + "rewards/rejected": -3.324009895324707, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 1.4011647907049121e-05, + "logits/chosen": -3.0950851440429688, + "logits/rejected": -2.0598299503326416, + "logps/chosen": -678.3980712890625, + "logps/rejected": -353.960205078125, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9190540313720703, + "rewards/margins": 4.494198799133301, + "rewards/rejected": -3.5751450061798096, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 1.4010914466517973e-05, + "logits/chosen": -2.71305775642395, + "logits/rejected": -3.0011305809020996, + "logps/chosen": -225.72579956054688, + "logps/rejected": -306.3951416015625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3637050688266754, + "rewards/margins": 4.1303558349609375, + "rewards/rejected": -4.494061470031738, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 1.4010181025986825e-05, + "logits/chosen": -1.6881204843521118, + "logits/rejected": -3.1725332736968994, + "logps/chosen": -53.09040832519531, + "logps/rejected": -224.08071899414062, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39502623677253723, + "rewards/margins": 4.587936878204346, + "rewards/rejected": -4.9829630851745605, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 1.4009447585455677e-05, + "logits/chosen": -2.680356740951538, + "logits/rejected": -3.1289560794830322, + "logps/chosen": -169.55955505371094, + "logps/rejected": -181.77481079101562, + "loss": 0.957, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8712161779403687, + "rewards/margins": 0.769504189491272, + "rewards/rejected": -2.6407203674316406, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 1.4008714144924529e-05, + "logits/chosen": -1.9220157861709595, + "logits/rejected": -3.1640982627868652, + "logps/chosen": -125.5991439819336, + "logps/rejected": -300.2742919921875, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5281667709350586, + "rewards/margins": 3.0934150218963623, + "rewards/rejected": -3.621581792831421, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 1.400798070439338e-05, + "logits/chosen": -2.5842506885528564, + "logits/rejected": -3.2656729221343994, + "logps/chosen": -310.7033996582031, + "logps/rejected": -416.71124267578125, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2461908906698227, + "rewards/margins": 5.029735088348389, + "rewards/rejected": -4.783544540405273, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.4007247263862232e-05, + "logits/chosen": -2.474775791168213, + "logits/rejected": -3.224919080734253, + "logps/chosen": -543.6678466796875, + "logps/rejected": -465.404541015625, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16098783910274506, + "rewards/margins": 3.090977430343628, + "rewards/rejected": -2.9299895763397217, + "step": 191 + }, + { + "epoch": 0.03, + "learning_rate": 1.4006513823331084e-05, + "logits/chosen": -1.1019113063812256, + "logits/rejected": -2.764784812927246, + "logps/chosen": -15.122116088867188, + "logps/rejected": -386.07464599609375, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2468574494123459, + "rewards/margins": 4.354513168334961, + "rewards/rejected": -4.601370334625244, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 1.4005780382799938e-05, + "logits/chosen": -2.568516731262207, + "logits/rejected": -3.217529296875, + "logps/chosen": -29.422672271728516, + "logps/rejected": -257.1299133300781, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10423406958580017, + "rewards/margins": 4.922812461853027, + "rewards/rejected": -5.027046203613281, + "step": 193 + }, + { + "epoch": 0.03, + "learning_rate": 1.400504694226879e-05, + "logits/chosen": -3.1003148555755615, + "logits/rejected": -1.341787576675415, + "logps/chosen": -152.62261962890625, + "logps/rejected": -104.31842041015625, + "loss": 2.2085, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.04732346534729, + "rewards/margins": -0.10112667083740234, + "rewards/rejected": -2.9461967945098877, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 1.4004313501737642e-05, + "logits/chosen": -3.08681321144104, + "logits/rejected": -1.3091222047805786, + "logps/chosen": -511.8238830566406, + "logps/rejected": -409.16168212890625, + "loss": 3.5134, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1327927112579346, + "rewards/margins": -0.4347977638244629, + "rewards/rejected": -2.6979949474334717, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 1.4003580061206495e-05, + "logits/chosen": -2.4435698986053467, + "logits/rejected": -3.00534987449646, + "logps/chosen": -217.54092407226562, + "logps/rejected": -276.362060546875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6971519589424133, + "rewards/margins": 5.987970352172852, + "rewards/rejected": -5.290818214416504, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 1.4002846620675347e-05, + "logits/chosen": -1.5879443883895874, + "logits/rejected": -2.724151134490967, + "logps/chosen": -132.21115112304688, + "logps/rejected": -349.9459228515625, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8769833445549011, + "rewards/margins": 4.078869819641113, + "rewards/rejected": -4.955853462219238, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 1.4002113180144199e-05, + "logits/chosen": -3.263244390487671, + "logits/rejected": -3.370572566986084, + "logps/chosen": -124.45972442626953, + "logps/rejected": -215.35647583007812, + "loss": 0.1965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.906731903553009, + "rewards/margins": 3.3886146545410156, + "rewards/rejected": -4.295346736907959, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 1.4001379739613051e-05, + "logits/chosen": -2.1546785831451416, + "logits/rejected": -3.160074234008789, + "logps/chosen": -32.00113296508789, + "logps/rejected": -136.52484130859375, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5689853429794312, + "rewards/margins": 4.4740095138549805, + "rewards/rejected": -5.042995452880859, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 1.4000646299081903e-05, + "logits/chosen": -3.066455602645874, + "logits/rejected": -1.907586932182312, + "logps/chosen": -120.34658813476562, + "logps/rejected": -137.93511962890625, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8716362118721008, + "rewards/margins": 2.056596040725708, + "rewards/rejected": -2.928232192993164, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.3999912858550755e-05, + "logits/chosen": -2.8877885341644287, + "logits/rejected": -3.160905599594116, + "logps/chosen": -90.42741394042969, + "logps/rejected": -326.0619812011719, + "loss": 0.9758, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.6991305351257324, + "rewards/margins": 0.938274085521698, + "rewards/rejected": -1.6374046802520752, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 1.3999179418019608e-05, + "logits/chosen": -2.0221428871154785, + "logits/rejected": -2.9591310024261475, + "logps/chosen": -68.44993591308594, + "logps/rejected": -288.23077392578125, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0403834581375122, + "rewards/margins": 3.852280616760254, + "rewards/rejected": -4.892664432525635, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 1.399844597748846e-05, + "logits/chosen": -2.6060564517974854, + "logits/rejected": -3.0946414470672607, + "logps/chosen": -148.9664306640625, + "logps/rejected": -226.91380310058594, + "loss": 0.3643, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5923221111297607, + "rewards/margins": 1.9916720390319824, + "rewards/rejected": -3.5839943885803223, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 1.3997712536957312e-05, + "logits/chosen": -2.6714131832122803, + "logits/rejected": -3.3451430797576904, + "logps/chosen": -131.95033264160156, + "logps/rejected": -235.36587524414062, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03590773791074753, + "rewards/margins": 3.7521591186523438, + "rewards/rejected": -3.788066864013672, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 1.3996979096426164e-05, + "logits/chosen": -2.9365339279174805, + "logits/rejected": -3.140393018722534, + "logps/chosen": -19.56757354736328, + "logps/rejected": -140.2930908203125, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6483761668205261, + "rewards/margins": 4.816903114318848, + "rewards/rejected": -5.4652791023254395, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 1.3996245655895016e-05, + "logits/chosen": -2.9637067317962646, + "logits/rejected": -3.154275894165039, + "logps/chosen": -54.21587371826172, + "logps/rejected": -267.4487609863281, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6161869764328003, + "rewards/margins": 5.658080577850342, + "rewards/rejected": -6.274267673492432, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 1.3995512215363868e-05, + "logits/chosen": -3.008659601211548, + "logits/rejected": -3.085279941558838, + "logps/chosen": -670.210693359375, + "logps/rejected": -594.05126953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5828338861465454, + "rewards/margins": 10.519340515136719, + "rewards/rejected": -8.936506271362305, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 1.399477877483272e-05, + "logits/chosen": -3.0365495681762695, + "logits/rejected": -2.9392454624176025, + "logps/chosen": -168.81634521484375, + "logps/rejected": -229.75213623046875, + "loss": 0.0946, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1172035932540894, + "rewards/margins": 3.7309913635253906, + "rewards/rejected": -4.8481950759887695, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 1.3994045334301571e-05, + "logits/chosen": -2.701586961746216, + "logits/rejected": -2.6334357261657715, + "logps/chosen": -208.02407836914062, + "logps/rejected": -332.49554443359375, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.099469393491745, + "rewards/margins": 7.170982837677002, + "rewards/rejected": -7.27045202255249, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 1.3993311893770423e-05, + "logits/chosen": -2.6404409408569336, + "logits/rejected": -3.0476839542388916, + "logps/chosen": -278.85296630859375, + "logps/rejected": -334.5871887207031, + "loss": 2.6562, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.329782247543335, + "rewards/margins": 2.351048469543457, + "rewards/rejected": -4.680830478668213, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.3992578453239277e-05, + "logits/chosen": -1.619014859199524, + "logits/rejected": -3.0314786434173584, + "logps/chosen": -70.62420654296875, + "logps/rejected": -324.6544189453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13514328002929688, + "rewards/margins": 9.187169075012207, + "rewards/rejected": -9.05202579498291, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 1.3991845012708129e-05, + "logits/chosen": -2.7789466381073, + "logits/rejected": -3.2639307975769043, + "logps/chosen": -69.2427978515625, + "logps/rejected": -252.5205535888672, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8678086400032043, + "rewards/margins": 6.620849609375, + "rewards/rejected": -7.4886579513549805, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 1.399111157217698e-05, + "logits/chosen": -2.492037296295166, + "logits/rejected": -3.2014987468719482, + "logps/chosen": -44.46752166748047, + "logps/rejected": -279.579345703125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.699627697467804, + "rewards/margins": 5.607601165771484, + "rewards/rejected": -6.307229042053223, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 1.3990378131645832e-05, + "logits/chosen": -3.0538463592529297, + "logits/rejected": -0.44703972339630127, + "logps/chosen": -533.6542358398438, + "logps/rejected": -13.421316146850586, + "loss": 6.5005, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.484054088592529, + "rewards/margins": -6.435161590576172, + "rewards/rejected": -0.04889194667339325, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 1.3989644691114684e-05, + "logits/chosen": -3.127373695373535, + "logits/rejected": -3.183122158050537, + "logps/chosen": -68.73934173583984, + "logps/rejected": -146.9935760498047, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46612119674682617, + "rewards/margins": 4.314206123352051, + "rewards/rejected": -4.780326843261719, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 1.3988911250583536e-05, + "logits/chosen": -3.2620596885681152, + "logits/rejected": -2.9649360179901123, + "logps/chosen": -534.937744140625, + "logps/rejected": -436.7746887207031, + "loss": 2.3006, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.583859443664551, + "rewards/margins": -0.4160217046737671, + "rewards/rejected": -3.167837619781494, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 1.3988177810052388e-05, + "logits/chosen": -2.5683951377868652, + "logits/rejected": -2.9338369369506836, + "logps/chosen": -190.89862060546875, + "logps/rejected": -214.34613037109375, + "loss": 4.3547, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.468705177307129, + "rewards/margins": 0.1472454071044922, + "rewards/rejected": -4.615950584411621, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 1.398744436952124e-05, + "logits/chosen": -3.1349384784698486, + "logits/rejected": -2.796931266784668, + "logps/chosen": -206.73367309570312, + "logps/rejected": -58.88450622558594, + "loss": 3.6214, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.955432891845703, + "rewards/margins": -3.5905568599700928, + "rewards/rejected": -0.3648759722709656, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 1.3986710928990092e-05, + "logits/chosen": -2.635080337524414, + "logits/rejected": -3.065330743789673, + "logps/chosen": -435.6900634765625, + "logps/rejected": -513.1083984375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9805496335029602, + "rewards/margins": 6.546184062957764, + "rewards/rejected": -7.5267333984375, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 1.3985977488458945e-05, + "logits/chosen": -3.1550021171569824, + "logits/rejected": -2.986222267150879, + "logps/chosen": -89.83547973632812, + "logps/rejected": -23.187129974365234, + "loss": 1.071, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.0118048191070557, + "rewards/margins": -0.4102352559566498, + "rewards/rejected": -0.6015695333480835, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.3985244047927797e-05, + "logits/chosen": -2.1839213371276855, + "logits/rejected": -3.0340211391448975, + "logps/chosen": -204.53628540039062, + "logps/rejected": -192.31396484375, + "loss": 4.7991, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.00675630569458, + "rewards/margins": -3.0641086101531982, + "rewards/rejected": -1.9426475763320923, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 1.398451060739665e-05, + "logits/chosen": -3.2816286087036133, + "logits/rejected": -3.290320873260498, + "logps/chosen": -134.47744750976562, + "logps/rejected": -159.31423950195312, + "loss": 0.2294, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.009393334388733, + "rewards/margins": 1.4785152673721313, + "rewards/rejected": -2.4879086017608643, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 1.3983777166865501e-05, + "logits/chosen": -3.0597314834594727, + "logits/rejected": -3.2184860706329346, + "logps/chosen": -89.27241516113281, + "logps/rejected": -260.58050537109375, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43154487013816833, + "rewards/margins": 4.728482246398926, + "rewards/rejected": -5.160027027130127, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 1.3983043726334353e-05, + "logits/chosen": -2.413797616958618, + "logits/rejected": -1.4205691814422607, + "logps/chosen": -377.2537536621094, + "logps/rejected": -269.91748046875, + "loss": 0.0898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24395103752613068, + "rewards/margins": 3.824895143508911, + "rewards/rejected": -4.068846225738525, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 1.3982310285803205e-05, + "logits/chosen": -2.961749792098999, + "logits/rejected": -2.0103445053100586, + "logps/chosen": -233.58497619628906, + "logps/rejected": -137.92333984375, + "loss": 2.5073, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1065735816955566, + "rewards/margins": -0.4633615016937256, + "rewards/rejected": -2.643212080001831, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 1.3981576845272057e-05, + "logits/chosen": -3.0870230197906494, + "logits/rejected": -3.1625959873199463, + "logps/chosen": -97.34003448486328, + "logps/rejected": -157.75115966796875, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43612366914749146, + "rewards/margins": 2.684535264968872, + "rewards/rejected": -3.1206588745117188, + "step": 226 + }, + { + "epoch": 0.04, + "learning_rate": 1.3980843404740909e-05, + "logits/chosen": -2.837679862976074, + "logits/rejected": -3.096672296524048, + "logps/chosen": -490.1009826660156, + "logps/rejected": -498.202880859375, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10872650891542435, + "rewards/margins": 4.425142765045166, + "rewards/rejected": -4.316416263580322, + "step": 227 + }, + { + "epoch": 0.04, + "learning_rate": 1.3980109964209762e-05, + "logits/chosen": -2.561997890472412, + "logits/rejected": -3.0783851146698, + "logps/chosen": -189.58621215820312, + "logps/rejected": -243.90631103515625, + "loss": 0.1102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1363006830215454, + "rewards/margins": 2.726850986480713, + "rewards/rejected": -3.8631515502929688, + "step": 228 + }, + { + "epoch": 0.04, + "learning_rate": 1.3979376523678614e-05, + "logits/chosen": -2.520702362060547, + "logits/rejected": -3.2125771045684814, + "logps/chosen": -88.36930847167969, + "logps/rejected": -279.96435546875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08207741379737854, + "rewards/margins": 6.975037574768066, + "rewards/rejected": -7.057115077972412, + "step": 229 + }, + { + "epoch": 0.04, + "learning_rate": 1.3978643083147468e-05, + "logits/chosen": -2.9426515102386475, + "logits/rejected": -1.6930222511291504, + "logps/chosen": -248.01589965820312, + "logps/rejected": -143.98069763183594, + "loss": 3.7205, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.658071994781494, + "rewards/margins": -2.4158334732055664, + "rewards/rejected": -1.2422386407852173, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 1.397790964261632e-05, + "logits/chosen": -2.480340003967285, + "logits/rejected": -3.1181881427764893, + "logps/chosen": -141.0540771484375, + "logps/rejected": -111.4599380493164, + "loss": 3.9855, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.25268030166626, + "rewards/margins": -3.9446873664855957, + "rewards/rejected": -0.3079930245876312, + "step": 231 + }, + { + "epoch": 0.04, + "learning_rate": 1.3977176202085171e-05, + "logits/chosen": -2.3658759593963623, + "logits/rejected": -2.3975260257720947, + "logps/chosen": -311.2043762207031, + "logps/rejected": -483.9167785644531, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8981644511222839, + "rewards/margins": 5.361284255981445, + "rewards/rejected": -6.259448528289795, + "step": 232 + }, + { + "epoch": 0.04, + "learning_rate": 1.3976442761554023e-05, + "logits/chosen": -2.9515302181243896, + "logits/rejected": -2.063830614089966, + "logps/chosen": -160.89231872558594, + "logps/rejected": -183.71673583984375, + "loss": 0.0645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7535064816474915, + "rewards/margins": 3.9668798446655273, + "rewards/rejected": -4.720386505126953, + "step": 233 + }, + { + "epoch": 0.04, + "learning_rate": 1.3975709321022875e-05, + "logits/chosen": -3.1039326190948486, + "logits/rejected": -2.0475313663482666, + "logps/chosen": -216.59976196289062, + "logps/rejected": -133.0999298095703, + "loss": 1.6583, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1381468772888184, + "rewards/margins": 1.4127979278564453, + "rewards/rejected": -3.5509448051452637, + "step": 234 + }, + { + "epoch": 0.04, + "learning_rate": 1.3974975880491727e-05, + "logits/chosen": -3.1353304386138916, + "logits/rejected": -2.931157112121582, + "logps/chosen": -889.2889404296875, + "logps/rejected": -465.30975341796875, + "loss": 3.1234, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.046218156814575, + "rewards/margins": 0.2745227813720703, + "rewards/rejected": -3.3207409381866455, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 1.3974242439960579e-05, + "logits/chosen": -3.174116373062134, + "logits/rejected": -2.0327420234680176, + "logps/chosen": -497.9042053222656, + "logps/rejected": -321.4927978515625, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03082885593175888, + "rewards/margins": 3.228926181793213, + "rewards/rejected": -3.1980972290039062, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 1.397350899942943e-05, + "logits/chosen": -3.2290563583374023, + "logits/rejected": -2.5461337566375732, + "logps/chosen": -459.3538818359375, + "logps/rejected": -122.95880126953125, + "loss": 2.1068, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.248584032058716, + "rewards/margins": -1.9258549213409424, + "rewards/rejected": -1.3227291107177734, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 1.3972775558898284e-05, + "logits/chosen": -2.5375845432281494, + "logits/rejected": -3.0951805114746094, + "logps/chosen": -430.4708251953125, + "logps/rejected": -418.226318359375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21710053086280823, + "rewards/margins": 5.0109663009643555, + "rewards/rejected": -4.793866157531738, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 1.3972042118367136e-05, + "logits/chosen": -2.181722640991211, + "logits/rejected": -3.143584966659546, + "logps/chosen": -81.01676177978516, + "logps/rejected": -165.35458374023438, + "loss": 1.126, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4404077529907227, + "rewards/margins": 1.2684144973754883, + "rewards/rejected": -2.708822250366211, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 1.3971308677835988e-05, + "logits/chosen": -3.195403814315796, + "logits/rejected": -2.879711151123047, + "logps/chosen": -284.89898681640625, + "logps/rejected": -195.6822967529297, + "loss": 0.6439, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5387802124023438, + "rewards/margins": 2.7631139755249023, + "rewards/rejected": -3.301894187927246, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 1.397057523730484e-05, + "logits/chosen": -2.7849924564361572, + "logits/rejected": -3.0741422176361084, + "logps/chosen": -107.73532104492188, + "logps/rejected": -230.48602294921875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7548045516014099, + "rewards/margins": 5.221304893493652, + "rewards/rejected": -5.976109504699707, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 1.3969841796773692e-05, + "logits/chosen": -2.4438328742980957, + "logits/rejected": -3.235691785812378, + "logps/chosen": -808.9854736328125, + "logps/rejected": -771.3998413085938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3680984377861023, + "rewards/margins": 8.38176441192627, + "rewards/rejected": -8.749862670898438, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 1.3969108356242544e-05, + "logits/chosen": -1.182057499885559, + "logits/rejected": -3.16318678855896, + "logps/chosen": -31.229862213134766, + "logps/rejected": -235.73880004882812, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.373024582862854, + "rewards/margins": 2.987273693084717, + "rewards/rejected": -4.360298156738281, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 1.3968374915711396e-05, + "logits/chosen": -2.1500244140625, + "logits/rejected": -3.180072069168091, + "logps/chosen": -65.92660522460938, + "logps/rejected": -230.43276977539062, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006489945575594902, + "rewards/margins": 5.405932903289795, + "rewards/rejected": -5.412423133850098, + "step": 244 + }, + { + "epoch": 0.04, + "learning_rate": 1.3967641475180247e-05, + "logits/chosen": -3.2497262954711914, + "logits/rejected": -2.342787742614746, + "logps/chosen": -492.3577880859375, + "logps/rejected": -277.4607238769531, + "loss": 3.6779, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.672879219055176, + "rewards/margins": -1.8377320766448975, + "rewards/rejected": -2.8351471424102783, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 1.3966908034649101e-05, + "logits/chosen": -2.751016855239868, + "logits/rejected": -3.2185587882995605, + "logps/chosen": -14.177921295166016, + "logps/rejected": -83.85748291015625, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4179031252861023, + "rewards/margins": 2.176410675048828, + "rewards/rejected": -2.594313859939575, + "step": 246 + }, + { + "epoch": 0.04, + "learning_rate": 1.3966174594117953e-05, + "logits/chosen": -3.042198419570923, + "logits/rejected": -3.0270538330078125, + "logps/chosen": -133.8990020751953, + "logps/rejected": -233.056640625, + "loss": 2.3423, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.890568733215332, + "rewards/margins": -1.0166627168655396, + "rewards/rejected": -1.8739060163497925, + "step": 247 + }, + { + "epoch": 0.04, + "learning_rate": 1.3965441153586805e-05, + "logits/chosen": -3.134228229522705, + "logits/rejected": -2.4634649753570557, + "logps/chosen": -827.51220703125, + "logps/rejected": -445.3179016113281, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9212433099746704, + "rewards/margins": 5.58433723449707, + "rewards/rejected": -4.663093566894531, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 1.3964707713055657e-05, + "logits/chosen": -3.222931146621704, + "logits/rejected": -2.609100103378296, + "logps/chosen": -122.98188781738281, + "logps/rejected": -70.39000701904297, + "loss": 0.4357, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.08654403686523438, + "rewards/margins": 2.191671133041382, + "rewards/rejected": -2.1051270961761475, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 1.3963974272524509e-05, + "logits/chosen": -1.1537407636642456, + "logits/rejected": -2.9607772827148438, + "logps/chosen": -127.12583923339844, + "logps/rejected": -264.89764404296875, + "loss": 2.0804, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4236243963241577, + "rewards/margins": 0.8813657760620117, + "rewards/rejected": -2.30499005317688, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 1.396324083199336e-05, + "logits/chosen": -2.394007682800293, + "logits/rejected": -3.0529189109802246, + "logps/chosen": -188.07884216308594, + "logps/rejected": -480.168212890625, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6425155401229858, + "rewards/margins": 9.998583793640137, + "rewards/rejected": -10.64109992980957, + "step": 251 + }, + { + "epoch": 0.04, + "learning_rate": 1.3962507391462212e-05, + "logits/chosen": -1.955869436264038, + "logits/rejected": -3.2626686096191406, + "logps/chosen": -723.205078125, + "logps/rejected": -636.7788696289062, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.424896240234375, + "rewards/margins": 6.657057285308838, + "rewards/rejected": -7.081953525543213, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 1.3961773950931064e-05, + "logits/chosen": -2.8036251068115234, + "logits/rejected": -3.012864828109741, + "logps/chosen": -188.1822509765625, + "logps/rejected": -374.60882568359375, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8888259530067444, + "rewards/margins": 8.350152969360352, + "rewards/rejected": -9.238978385925293, + "step": 253 + }, + { + "epoch": 0.04, + "learning_rate": 1.3961040510399916e-05, + "logits/chosen": -2.2010462284088135, + "logits/rejected": -3.124453544616699, + "logps/chosen": -88.0716552734375, + "logps/rejected": -303.52618408203125, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3929317593574524, + "rewards/margins": 5.521196365356445, + "rewards/rejected": -5.914128303527832, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 1.396030706986877e-05, + "logits/chosen": -2.868206262588501, + "logits/rejected": -3.2133443355560303, + "logps/chosen": -76.8046875, + "logps/rejected": -200.0888671875, + "loss": 0.1574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7102587819099426, + "rewards/margins": 1.8341153860092163, + "rewards/rejected": -2.5443742275238037, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 1.3959573629337621e-05, + "logits/chosen": -2.807352066040039, + "logits/rejected": -3.1015167236328125, + "logps/chosen": -226.18551635742188, + "logps/rejected": -341.35552978515625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11465111374855042, + "rewards/margins": 5.429648399353027, + "rewards/rejected": -5.544299125671387, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 1.3958840188806473e-05, + "logits/chosen": -2.648805618286133, + "logits/rejected": -2.7441813945770264, + "logps/chosen": -197.70535278320312, + "logps/rejected": -344.862548828125, + "loss": 0.7713, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.898618459701538, + "rewards/margins": 7.611955642700195, + "rewards/rejected": -9.510574340820312, + "step": 257 + }, + { + "epoch": 0.04, + "learning_rate": 1.3958106748275325e-05, + "logits/chosen": -2.5490829944610596, + "logits/rejected": -3.0532469749450684, + "logps/chosen": -217.1167755126953, + "logps/rejected": -332.23065185546875, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6476211547851562, + "rewards/margins": 4.416913986206055, + "rewards/rejected": -3.7692925930023193, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 1.3957373307744177e-05, + "logits/chosen": -2.7834112644195557, + "logits/rejected": -3.1691324710845947, + "logps/chosen": -187.52352905273438, + "logps/rejected": -295.3063659667969, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9675415754318237, + "rewards/margins": 3.880840301513672, + "rewards/rejected": -4.848381996154785, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 1.3956639867213029e-05, + "logits/chosen": -3.1530344486236572, + "logits/rejected": -3.1165804862976074, + "logps/chosen": -147.75355529785156, + "logps/rejected": -318.78533935546875, + "loss": 0.1165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4244979918003082, + "rewards/margins": 3.531832218170166, + "rewards/rejected": -3.9563302993774414, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.3955906426681881e-05, + "logits/chosen": -1.9944334030151367, + "logits/rejected": -2.1047284603118896, + "logps/chosen": -342.2174072265625, + "logps/rejected": -275.38232421875, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2671752870082855, + "rewards/margins": 5.477038383483887, + "rewards/rejected": -5.744214057922363, + "step": 261 + }, + { + "epoch": 0.04, + "learning_rate": 1.3955172986150734e-05, + "logits/chosen": -2.3887453079223633, + "logits/rejected": -2.9074411392211914, + "logps/chosen": -356.37548828125, + "logps/rejected": -516.1868896484375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9042602777481079, + "rewards/margins": 4.821948051452637, + "rewards/rejected": -3.9176881313323975, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 1.3954439545619586e-05, + "logits/chosen": -1.9161189794540405, + "logits/rejected": -2.747464179992676, + "logps/chosen": -447.53802490234375, + "logps/rejected": -556.4004516601562, + "loss": 3.8046, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.020085334777832, + "rewards/margins": 0.3531324863433838, + "rewards/rejected": -4.373218059539795, + "step": 263 + }, + { + "epoch": 0.04, + "learning_rate": 1.395370610508844e-05, + "logits/chosen": -2.979192018508911, + "logits/rejected": -1.64603590965271, + "logps/chosen": -320.3489074707031, + "logps/rejected": -252.11355590820312, + "loss": 3.3126, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6180131435394287, + "rewards/margins": -0.1980888843536377, + "rewards/rejected": -3.419924259185791, + "step": 264 + }, + { + "epoch": 0.04, + "learning_rate": 1.3952972664557292e-05, + "logits/chosen": -1.8090285062789917, + "logits/rejected": -3.137571334838867, + "logps/chosen": -256.44921875, + "logps/rejected": -556.7885131835938, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5403488278388977, + "rewards/margins": 5.953668117523193, + "rewards/rejected": -6.494017124176025, + "step": 265 + }, + { + "epoch": 0.04, + "learning_rate": 1.3952239224026144e-05, + "logits/chosen": -3.2560107707977295, + "logits/rejected": -2.4310998916625977, + "logps/chosen": -379.17315673828125, + "logps/rejected": -253.3798370361328, + "loss": 0.2327, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5517022609710693, + "rewards/margins": 2.094087600708008, + "rewards/rejected": -3.6457901000976562, + "step": 266 + }, + { + "epoch": 0.04, + "learning_rate": 1.3951505783494996e-05, + "logits/chosen": -2.0756564140319824, + "logits/rejected": -3.145324230194092, + "logps/chosen": -120.60350036621094, + "logps/rejected": -265.9230651855469, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06808853149414062, + "rewards/margins": 5.42453670501709, + "rewards/rejected": -5.4926252365112305, + "step": 267 + }, + { + "epoch": 0.04, + "learning_rate": 1.3950772342963847e-05, + "logits/chosen": -2.7587077617645264, + "logits/rejected": -3.220017671585083, + "logps/chosen": -312.66680908203125, + "logps/rejected": -323.3785705566406, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6317901611328125, + "rewards/margins": 4.851534843444824, + "rewards/rejected": -5.483325004577637, + "step": 268 + }, + { + "epoch": 0.04, + "learning_rate": 1.39500389024327e-05, + "logits/chosen": -2.0976312160491943, + "logits/rejected": -3.0231361389160156, + "logps/chosen": -221.99790954589844, + "logps/rejected": -282.1643981933594, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4652343690395355, + "rewards/margins": 3.4617652893066406, + "rewards/rejected": -3.926999807357788, + "step": 269 + }, + { + "epoch": 0.04, + "learning_rate": 1.3949305461901551e-05, + "logits/chosen": -2.8766119480133057, + "logits/rejected": -1.5870869159698486, + "logps/chosen": -252.11227416992188, + "logps/rejected": -121.19239807128906, + "loss": 2.9876, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.327744483947754, + "rewards/margins": -1.3888529539108276, + "rewards/rejected": -2.9388914108276367, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.3948572021370403e-05, + "logits/chosen": -2.9710655212402344, + "logits/rejected": -3.134204149246216, + "logps/chosen": -140.60916137695312, + "logps/rejected": -60.383419036865234, + "loss": 2.116, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.414308547973633, + "rewards/margins": -1.2949786186218262, + "rewards/rejected": -1.119329810142517, + "step": 271 + }, + { + "epoch": 0.04, + "learning_rate": 1.3947838580839255e-05, + "logits/chosen": -2.397721767425537, + "logits/rejected": -2.5841267108917236, + "logps/chosen": -213.93072509765625, + "logps/rejected": -305.09039306640625, + "loss": 0.0559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18279573321342468, + "rewards/margins": 5.4978861808776855, + "rewards/rejected": -5.6806817054748535, + "step": 272 + }, + { + "epoch": 0.04, + "learning_rate": 1.3947105140308109e-05, + "logits/chosen": -3.052168846130371, + "logits/rejected": -1.589404582977295, + "logps/chosen": -414.9770202636719, + "logps/rejected": -264.3679504394531, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7568401098251343, + "rewards/margins": 4.065433502197266, + "rewards/rejected": -4.8222737312316895, + "step": 273 + }, + { + "epoch": 0.04, + "learning_rate": 1.394637169977696e-05, + "logits/chosen": -2.8210058212280273, + "logits/rejected": -3.1193153858184814, + "logps/chosen": -196.60382080078125, + "logps/rejected": -398.3277893066406, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010230988264083862, + "rewards/margins": 5.460845470428467, + "rewards/rejected": -5.459822177886963, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 1.3945638259245812e-05, + "logits/chosen": -2.996802806854248, + "logits/rejected": -2.4378323554992676, + "logps/chosen": -518.42138671875, + "logps/rejected": -420.64703369140625, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6191848516464233, + "rewards/margins": 3.43829345703125, + "rewards/rejected": -5.057478427886963, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 1.3944904818714664e-05, + "logits/chosen": -1.5241467952728271, + "logits/rejected": -3.1648764610290527, + "logps/chosen": -136.31312561035156, + "logps/rejected": -315.9339599609375, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6736640930175781, + "rewards/margins": 3.698582649230957, + "rewards/rejected": -4.372246742248535, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 1.3944171378183516e-05, + "logits/chosen": -3.2321159839630127, + "logits/rejected": -3.184389591217041, + "logps/chosen": -106.54560089111328, + "logps/rejected": -94.71903991699219, + "loss": 0.7327, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.818821668624878, + "rewards/margins": 0.9599000215530396, + "rewards/rejected": -2.778721809387207, + "step": 277 + }, + { + "epoch": 0.04, + "learning_rate": 1.3943437937652368e-05, + "logits/chosen": -2.8224258422851562, + "logits/rejected": -3.13932728767395, + "logps/chosen": -243.83352661132812, + "logps/rejected": -316.38665771484375, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9381788969039917, + "rewards/margins": 4.382724285125732, + "rewards/rejected": -5.3209028244018555, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 1.394270449712122e-05, + "logits/chosen": -2.467745542526245, + "logits/rejected": -3.1837663650512695, + "logps/chosen": -181.009033203125, + "logps/rejected": -325.8335876464844, + "loss": 0.1366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.859030544757843, + "rewards/margins": 3.7170236110687256, + "rewards/rejected": -4.576054096221924, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 1.3941971056590072e-05, + "logits/chosen": -2.327786922454834, + "logits/rejected": -2.869560718536377, + "logps/chosen": -174.73556518554688, + "logps/rejected": -294.3075256347656, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8744598031044006, + "rewards/margins": 4.877983093261719, + "rewards/rejected": -5.752442836761475, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.3941237616058924e-05, + "logits/chosen": -2.3123326301574707, + "logits/rejected": -3.1498966217041016, + "logps/chosen": -219.3203125, + "logps/rejected": -285.0863037109375, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5130456686019897, + "rewards/margins": 3.219677448272705, + "rewards/rejected": -3.7327232360839844, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 1.3940504175527777e-05, + "logits/chosen": -2.8098626136779785, + "logits/rejected": -2.868931293487549, + "logps/chosen": -92.00486755371094, + "logps/rejected": -423.3739318847656, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27562370896339417, + "rewards/margins": 4.785912036895752, + "rewards/rejected": -5.061535835266113, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 1.3939770734996629e-05, + "logits/chosen": -2.2538633346557617, + "logits/rejected": -3.152438163757324, + "logps/chosen": -139.80970764160156, + "logps/rejected": -283.7694091796875, + "loss": 0.1232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1997387409210205, + "rewards/margins": 3.387063980102539, + "rewards/rejected": -4.5868024826049805, + "step": 283 + }, + { + "epoch": 0.04, + "learning_rate": 1.3939037294465481e-05, + "logits/chosen": -2.7902615070343018, + "logits/rejected": -3.061946392059326, + "logps/chosen": -162.14759826660156, + "logps/rejected": -197.0005340576172, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4920269250869751, + "rewards/margins": 3.9309377670288086, + "rewards/rejected": -4.422965049743652, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 1.3938303853934333e-05, + "logits/chosen": -2.4168760776519775, + "logits/rejected": -3.035595655441284, + "logps/chosen": -260.1217956542969, + "logps/rejected": -358.29339599609375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7685661315917969, + "rewards/margins": 6.5384345054626465, + "rewards/rejected": -7.307001113891602, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 1.3937570413403185e-05, + "logits/chosen": -2.9990851879119873, + "logits/rejected": -2.5690488815307617, + "logps/chosen": -301.9977722167969, + "logps/rejected": -261.83551025390625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5119113922119141, + "rewards/margins": 5.678307056427002, + "rewards/rejected": -6.190218448638916, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 1.3936836972872036e-05, + "logits/chosen": -2.8179638385772705, + "logits/rejected": -3.3045501708984375, + "logps/chosen": -22.86819839477539, + "logps/rejected": -239.15992736816406, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.159670114517212, + "rewards/margins": 6.455106735229492, + "rewards/rejected": -7.614776611328125, + "step": 287 + }, + { + "epoch": 0.04, + "learning_rate": 1.3936103532340888e-05, + "logits/chosen": -2.085380792617798, + "logits/rejected": -2.9822778701782227, + "logps/chosen": -282.0330810546875, + "logps/rejected": -323.7939758300781, + "loss": 3.7488, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4137043952941895, + "rewards/margins": -0.2965514659881592, + "rewards/rejected": -4.117153167724609, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 1.393537009180974e-05, + "logits/chosen": -1.930430293083191, + "logits/rejected": -2.8380579948425293, + "logps/chosen": -55.48210906982422, + "logps/rejected": -171.34866333007812, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9242584705352783, + "rewards/margins": 4.3359174728393555, + "rewards/rejected": -5.260176181793213, + "step": 289 + }, + { + "epoch": 0.05, + "learning_rate": 1.3934636651278592e-05, + "logits/chosen": -2.6011407375335693, + "logits/rejected": -3.17004132270813, + "logps/chosen": -164.99063110351562, + "logps/rejected": -310.3304748535156, + "loss": 2.7553, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.350515842437744, + "rewards/margins": 1.5835134983062744, + "rewards/rejected": -4.9340291023254395, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 1.3933903210747446e-05, + "logits/chosen": -2.974061965942383, + "logits/rejected": -3.2008230686187744, + "logps/chosen": -277.6763916015625, + "logps/rejected": -279.84210205078125, + "loss": 3.2526, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.009398937225342, + "rewards/margins": -2.370060443878174, + "rewards/rejected": -1.639338493347168, + "step": 291 + }, + { + "epoch": 0.05, + "learning_rate": 1.3933169770216298e-05, + "logits/chosen": -2.8337199687957764, + "logits/rejected": -3.1439504623413086, + "logps/chosen": -136.13995361328125, + "logps/rejected": -243.44271850585938, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6755799055099487, + "rewards/margins": 3.453845977783203, + "rewards/rejected": -5.129426002502441, + "step": 292 + }, + { + "epoch": 0.05, + "learning_rate": 1.393243632968515e-05, + "logits/chosen": -2.580453395843506, + "logits/rejected": -3.0182974338531494, + "logps/chosen": -49.49711608886719, + "logps/rejected": -156.51638793945312, + "loss": 0.054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7334760427474976, + "rewards/margins": 2.919590950012207, + "rewards/rejected": -3.653067111968994, + "step": 293 + }, + { + "epoch": 0.05, + "learning_rate": 1.3931702889154001e-05, + "logits/chosen": -3.259427070617676, + "logits/rejected": -2.2712066173553467, + "logps/chosen": -432.7691345214844, + "logps/rejected": -124.67796325683594, + "loss": 5.2566, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.661901473999023, + "rewards/margins": -3.8170008659362793, + "rewards/rejected": -1.8449004888534546, + "step": 294 + }, + { + "epoch": 0.05, + "learning_rate": 1.3930969448622853e-05, + "logits/chosen": -3.1756832599639893, + "logits/rejected": -2.8678202629089355, + "logps/chosen": -118.48837280273438, + "logps/rejected": -165.39993286132812, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.560815453529358, + "rewards/margins": 4.173524856567383, + "rewards/rejected": -5.734340667724609, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 1.3930236008091707e-05, + "logits/chosen": -3.047617197036743, + "logits/rejected": -2.504485845565796, + "logps/chosen": -191.2159881591797, + "logps/rejected": -200.3509979248047, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0865447521209717, + "rewards/margins": 6.088534355163574, + "rewards/rejected": -7.175079345703125, + "step": 296 + }, + { + "epoch": 0.05, + "learning_rate": 1.3929502567560559e-05, + "logits/chosen": -3.1392862796783447, + "logits/rejected": -3.1981911659240723, + "logps/chosen": -339.31292724609375, + "logps/rejected": -314.64825439453125, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5734526515007019, + "rewards/margins": 3.3967466354370117, + "rewards/rejected": -3.9701991081237793, + "step": 297 + }, + { + "epoch": 0.05, + "learning_rate": 1.392876912702941e-05, + "logits/chosen": -2.3051254749298096, + "logits/rejected": -2.908782958984375, + "logps/chosen": -207.52896118164062, + "logps/rejected": -381.28216552734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1332291066646576, + "rewards/margins": 9.030173301696777, + "rewards/rejected": -8.896944046020508, + "step": 298 + }, + { + "epoch": 0.05, + "learning_rate": 1.3928035686498262e-05, + "logits/chosen": -0.36619892716407776, + "logits/rejected": -3.002427339553833, + "logps/chosen": -35.083988189697266, + "logps/rejected": -804.5609130859375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3973360061645508, + "rewards/margins": 7.878911018371582, + "rewards/rejected": -9.276247024536133, + "step": 299 + }, + { + "epoch": 0.05, + "learning_rate": 1.3927302245967116e-05, + "logits/chosen": -3.1223292350769043, + "logits/rejected": -2.8428683280944824, + "logps/chosen": -203.9774627685547, + "logps/rejected": -139.37515258789062, + "loss": 2.7843, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.528777837753296, + "rewards/margins": -0.3858366012573242, + "rewards/rejected": -3.1429412364959717, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 1.3926568805435968e-05, + "logits/chosen": -2.99192214012146, + "logits/rejected": -3.1802446842193604, + "logps/chosen": -51.55027770996094, + "logps/rejected": -170.973876953125, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4691520631313324, + "rewards/margins": 1.3181796073913574, + "rewards/rejected": -1.7873315811157227, + "step": 301 + }, + { + "epoch": 0.05, + "learning_rate": 1.392583536490482e-05, + "logits/chosen": -2.1061878204345703, + "logits/rejected": -3.0287156105041504, + "logps/chosen": -143.13682556152344, + "logps/rejected": -296.6378173828125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7784379720687866, + "rewards/margins": 6.017709732055664, + "rewards/rejected": -7.796147346496582, + "step": 302 + }, + { + "epoch": 0.05, + "learning_rate": 1.3925101924373672e-05, + "logits/chosen": -1.2993807792663574, + "logits/rejected": -2.889724016189575, + "logps/chosen": -161.6129150390625, + "logps/rejected": -380.44000244140625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8469116687774658, + "rewards/margins": 7.17529296875, + "rewards/rejected": -8.022204399108887, + "step": 303 + }, + { + "epoch": 0.05, + "learning_rate": 1.3924368483842524e-05, + "logits/chosen": -2.813121795654297, + "logits/rejected": -1.6395188570022583, + "logps/chosen": -202.22451782226562, + "logps/rejected": -198.1426239013672, + "loss": 5.3374, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.607664585113525, + "rewards/margins": -1.2318401336669922, + "rewards/rejected": -4.375824451446533, + "step": 304 + }, + { + "epoch": 0.05, + "learning_rate": 1.3923635043311375e-05, + "logits/chosen": -2.6399471759796143, + "logits/rejected": -3.083216905593872, + "logps/chosen": -334.1250915527344, + "logps/rejected": -514.7235717773438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1674665212631226, + "rewards/margins": 9.431511878967285, + "rewards/rejected": -10.598978042602539, + "step": 305 + }, + { + "epoch": 0.05, + "learning_rate": 1.3922901602780227e-05, + "logits/chosen": -2.969339609146118, + "logits/rejected": -2.1820483207702637, + "logps/chosen": -212.90745544433594, + "logps/rejected": -227.4326934814453, + "loss": 3.2987, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.189547538757324, + "rewards/margins": -1.8053319454193115, + "rewards/rejected": -2.3842155933380127, + "step": 306 + }, + { + "epoch": 0.05, + "learning_rate": 1.3922168162249079e-05, + "logits/chosen": -2.9838075637817383, + "logits/rejected": -3.1955082416534424, + "logps/chosen": -161.84890747070312, + "logps/rejected": -295.96435546875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9405765533447266, + "rewards/margins": 8.477974891662598, + "rewards/rejected": -9.418551445007324, + "step": 307 + }, + { + "epoch": 0.05, + "learning_rate": 1.3921434721717931e-05, + "logits/chosen": -1.9699825048446655, + "logits/rejected": -3.1413090229034424, + "logps/chosen": -247.52859497070312, + "logps/rejected": -282.74163818359375, + "loss": 4.8538, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.375044345855713, + "rewards/margins": -2.1864547729492188, + "rewards/rejected": -4.188589572906494, + "step": 308 + }, + { + "epoch": 0.05, + "learning_rate": 1.3920701281186785e-05, + "logits/chosen": -3.183572292327881, + "logits/rejected": -2.198529005050659, + "logps/chosen": -542.6114501953125, + "logps/rejected": -427.7409362792969, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.455698847770691, + "rewards/margins": 4.6350908279418945, + "rewards/rejected": -6.090789794921875, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 1.3919967840655636e-05, + "logits/chosen": -3.1992666721343994, + "logits/rejected": -2.5796549320220947, + "logps/chosen": -785.2682495117188, + "logps/rejected": -394.9787292480469, + "loss": 3.3305, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.615727424621582, + "rewards/margins": -0.26107072830200195, + "rewards/rejected": -2.35465669631958, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 1.3919234400124488e-05, + "logits/chosen": -2.1106536388397217, + "logits/rejected": -3.0159170627593994, + "logps/chosen": -83.18609619140625, + "logps/rejected": -166.05361938476562, + "loss": 0.483, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6252739429473877, + "rewards/margins": 3.3926284313201904, + "rewards/rejected": -6.017902374267578, + "step": 311 + }, + { + "epoch": 0.05, + "learning_rate": 1.391850095959334e-05, + "logits/chosen": -2.0164268016815186, + "logits/rejected": -3.309534788131714, + "logps/chosen": -90.04408264160156, + "logps/rejected": -376.08673095703125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3999983072280884, + "rewards/margins": 5.363059997558594, + "rewards/rejected": -5.763058185577393, + "step": 312 + }, + { + "epoch": 0.05, + "learning_rate": 1.3917767519062192e-05, + "logits/chosen": -2.423795700073242, + "logits/rejected": -2.2373251914978027, + "logps/chosen": -892.5393676757812, + "logps/rejected": -272.5159606933594, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013949811458587646, + "rewards/margins": 5.303926467895508, + "rewards/rejected": -5.317875862121582, + "step": 313 + }, + { + "epoch": 0.05, + "learning_rate": 1.3917034078531044e-05, + "logits/chosen": -3.166862964630127, + "logits/rejected": -2.3993399143218994, + "logps/chosen": -305.32696533203125, + "logps/rejected": -231.65032958984375, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0012489557266235, + "rewards/margins": 5.387609481811523, + "rewards/rejected": -6.388857841491699, + "step": 314 + }, + { + "epoch": 0.05, + "learning_rate": 1.3916300637999896e-05, + "logits/chosen": -2.0632834434509277, + "logits/rejected": -3.141233205795288, + "logps/chosen": -100.88878631591797, + "logps/rejected": -550.4349365234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.824505627155304, + "rewards/margins": 7.937870979309082, + "rewards/rejected": -8.76237678527832, + "step": 315 + }, + { + "epoch": 0.05, + "learning_rate": 1.3915567197468748e-05, + "logits/chosen": -3.202329635620117, + "logits/rejected": -3.067138433456421, + "logps/chosen": -327.99871826171875, + "logps/rejected": -390.73443603515625, + "loss": 4.5899, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.398050785064697, + "rewards/margins": -2.2519259452819824, + "rewards/rejected": -2.146125078201294, + "step": 316 + }, + { + "epoch": 0.05, + "learning_rate": 1.39148337569376e-05, + "logits/chosen": -2.2745401859283447, + "logits/rejected": -3.069648027420044, + "logps/chosen": -129.94769287109375, + "logps/rejected": -254.77978515625, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7660755515098572, + "rewards/margins": 3.613171339035034, + "rewards/rejected": -4.379246711730957, + "step": 317 + }, + { + "epoch": 0.05, + "learning_rate": 1.3914100316406453e-05, + "logits/chosen": -2.3127553462982178, + "logits/rejected": -3.1088552474975586, + "logps/chosen": -115.4085464477539, + "logps/rejected": -260.591552734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8831904530525208, + "rewards/margins": 7.6686787605285645, + "rewards/rejected": -8.55186939239502, + "step": 318 + }, + { + "epoch": 0.05, + "learning_rate": 1.3913366875875305e-05, + "logits/chosen": -2.4022703170776367, + "logits/rejected": -3.160193681716919, + "logps/chosen": -415.7076416015625, + "logps/rejected": -629.3311767578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8619011640548706, + "rewards/margins": 6.820209503173828, + "rewards/rejected": -8.682110786437988, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 1.3912633435344157e-05, + "logits/chosen": -1.2108962535858154, + "logits/rejected": -2.909191608428955, + "logps/chosen": -56.102542877197266, + "logps/rejected": -430.65447998046875, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.580448627471924, + "rewards/margins": 10.806973457336426, + "rewards/rejected": -13.387422561645508, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 1.3911899994813009e-05, + "logits/chosen": -2.262115955352783, + "logits/rejected": -3.193601369857788, + "logps/chosen": -42.22088623046875, + "logps/rejected": -248.2942352294922, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08381520211696625, + "rewards/margins": 5.481444358825684, + "rewards/rejected": -5.5652594566345215, + "step": 321 + }, + { + "epoch": 0.05, + "learning_rate": 1.391116655428186e-05, + "logits/chosen": -3.098529815673828, + "logits/rejected": -2.7321293354034424, + "logps/chosen": -462.6239013671875, + "logps/rejected": -329.63653564453125, + "loss": 0.7736, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.30173659324646, + "rewards/margins": 1.011918544769287, + "rewards/rejected": -3.313655138015747, + "step": 322 + }, + { + "epoch": 0.05, + "learning_rate": 1.3910433113750713e-05, + "logits/chosen": -1.9654450416564941, + "logits/rejected": -3.132944345474243, + "logps/chosen": -120.78205871582031, + "logps/rejected": -417.2001037597656, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5581893920898438, + "rewards/margins": 5.601334571838379, + "rewards/rejected": -6.159523963928223, + "step": 323 + }, + { + "epoch": 0.05, + "learning_rate": 1.3909699673219564e-05, + "logits/chosen": -2.2349538803100586, + "logits/rejected": -3.0398056507110596, + "logps/chosen": -84.37515258789062, + "logps/rejected": -347.834228515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6481631994247437, + "rewards/margins": 7.268343925476074, + "rewards/rejected": -8.91650676727295, + "step": 324 + }, + { + "epoch": 0.05, + "learning_rate": 1.3908966232688416e-05, + "logits/chosen": -2.4443488121032715, + "logits/rejected": -3.206267833709717, + "logps/chosen": -23.715408325195312, + "logps/rejected": -206.01187133789062, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.960675060749054, + "rewards/margins": 4.511430740356445, + "rewards/rejected": -5.472105979919434, + "step": 325 + }, + { + "epoch": 0.05, + "learning_rate": 1.3908232792157268e-05, + "logits/chosen": -3.009488821029663, + "logits/rejected": -2.35675311088562, + "logps/chosen": -319.3578186035156, + "logps/rejected": -92.66446685791016, + "loss": 10.3131, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.793739318847656, + "rewards/margins": -9.857070922851562, + "rewards/rejected": -1.936668872833252, + "step": 326 + }, + { + "epoch": 0.05, + "learning_rate": 1.3907499351626122e-05, + "logits/chosen": -1.797061562538147, + "logits/rejected": -2.964250087738037, + "logps/chosen": -207.94215393066406, + "logps/rejected": -454.75018310546875, + "loss": 1.5693, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4953436851501465, + "rewards/margins": 6.7401814460754395, + "rewards/rejected": -10.235525131225586, + "step": 327 + }, + { + "epoch": 0.05, + "learning_rate": 1.3906765911094974e-05, + "logits/chosen": -1.92696213722229, + "logits/rejected": -3.132702589035034, + "logps/chosen": -293.04974365234375, + "logps/rejected": -426.9845886230469, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7516693472862244, + "rewards/margins": 6.153307914733887, + "rewards/rejected": -6.904977798461914, + "step": 328 + }, + { + "epoch": 0.05, + "learning_rate": 1.3906032470563826e-05, + "logits/chosen": -2.9441816806793213, + "logits/rejected": -2.877976417541504, + "logps/chosen": -201.14405822753906, + "logps/rejected": -308.94024658203125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.879084825515747, + "rewards/margins": 5.324343681335449, + "rewards/rejected": -7.203428268432617, + "step": 329 + }, + { + "epoch": 0.05, + "learning_rate": 1.3905299030032679e-05, + "logits/chosen": -3.1366043090820312, + "logits/rejected": -2.1660215854644775, + "logps/chosen": -508.33868408203125, + "logps/rejected": -167.5116729736328, + "loss": 2.8742, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.608146905899048, + "rewards/margins": -0.4351682662963867, + "rewards/rejected": -2.172978639602661, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 1.3904565589501531e-05, + "logits/chosen": -3.1269304752349854, + "logits/rejected": -2.497406005859375, + "logps/chosen": -267.2000427246094, + "logps/rejected": -191.25563049316406, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8175184726715088, + "rewards/margins": 2.832998275756836, + "rewards/rejected": -3.6505167484283447, + "step": 331 + }, + { + "epoch": 0.05, + "learning_rate": 1.3903832148970383e-05, + "logits/chosen": -3.2190093994140625, + "logits/rejected": -2.399416923522949, + "logps/chosen": -640.666748046875, + "logps/rejected": -492.4984436035156, + "loss": 2.2379, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9455597400665283, + "rewards/margins": 1.3988187313079834, + "rewards/rejected": -4.344378471374512, + "step": 332 + }, + { + "epoch": 0.05, + "learning_rate": 1.3903098708439235e-05, + "logits/chosen": -3.1519086360931396, + "logits/rejected": -2.8339293003082275, + "logps/chosen": -594.7197265625, + "logps/rejected": -199.202880859375, + "loss": 3.9896, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.929996013641357, + "rewards/margins": -0.20678949356079102, + "rewards/rejected": -5.723206520080566, + "step": 333 + }, + { + "epoch": 0.05, + "learning_rate": 1.3902365267908087e-05, + "logits/chosen": -3.110962390899658, + "logits/rejected": -2.732717752456665, + "logps/chosen": -172.54563903808594, + "logps/rejected": -220.791748046875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.360377550125122, + "rewards/margins": 5.314233779907227, + "rewards/rejected": -6.674611568450928, + "step": 334 + }, + { + "epoch": 0.05, + "learning_rate": 1.3901631827376939e-05, + "logits/chosen": -2.9093315601348877, + "logits/rejected": -2.2884953022003174, + "logps/chosen": -159.49436950683594, + "logps/rejected": -168.18356323242188, + "loss": 2.8911, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.204570770263672, + "rewards/margins": 1.6992840766906738, + "rewards/rejected": -5.903854846954346, + "step": 335 + }, + { + "epoch": 0.05, + "learning_rate": 1.3900898386845792e-05, + "logits/chosen": -2.8552448749542236, + "logits/rejected": -3.3715178966522217, + "logps/chosen": -92.52399444580078, + "logps/rejected": -239.33517456054688, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21505603194236755, + "rewards/margins": 4.89462947845459, + "rewards/rejected": -5.109685897827148, + "step": 336 + }, + { + "epoch": 0.05, + "learning_rate": 1.3900164946314644e-05, + "logits/chosen": -3.0400097370147705, + "logits/rejected": -2.3672661781311035, + "logps/chosen": -445.45977783203125, + "logps/rejected": -548.5006713867188, + "loss": 3.7419, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.351350784301758, + "rewards/margins": -0.7125461101531982, + "rewards/rejected": -3.6388046741485596, + "step": 337 + }, + { + "epoch": 0.05, + "learning_rate": 1.3899431505783496e-05, + "logits/chosen": -1.230692744255066, + "logits/rejected": -2.57480525970459, + "logps/chosen": -349.24029541015625, + "logps/rejected": -736.2906494140625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4904129207134247, + "rewards/margins": 7.498170852661133, + "rewards/rejected": -7.988583564758301, + "step": 338 + }, + { + "epoch": 0.05, + "learning_rate": 1.3898698065252348e-05, + "logits/chosen": -2.7160804271698, + "logits/rejected": -3.2221732139587402, + "logps/chosen": -91.8559799194336, + "logps/rejected": -353.4632873535156, + "loss": 0.133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5104032754898071, + "rewards/margins": 2.7610459327697754, + "rewards/rejected": -4.271449089050293, + "step": 339 + }, + { + "epoch": 0.05, + "learning_rate": 1.38979646247212e-05, + "logits/chosen": -2.356886625289917, + "logits/rejected": -3.2815144062042236, + "logps/chosen": -37.14722442626953, + "logps/rejected": -226.90989685058594, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5234076380729675, + "rewards/margins": 5.339783668518066, + "rewards/rejected": -5.863191604614258, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 1.3897231184190051e-05, + "logits/chosen": -3.1452159881591797, + "logits/rejected": -2.791757106781006, + "logps/chosen": -239.85069274902344, + "logps/rejected": -284.94110107421875, + "loss": 1.4803, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8679308891296387, + "rewards/margins": 0.6175233125686646, + "rewards/rejected": -2.4854543209075928, + "step": 341 + }, + { + "epoch": 0.05, + "learning_rate": 1.3896497743658903e-05, + "logits/chosen": -1.2319563627243042, + "logits/rejected": -2.8339996337890625, + "logps/chosen": -59.826881408691406, + "logps/rejected": -283.94085693359375, + "loss": 0.0562, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1474635601043701, + "rewards/margins": 3.7043724060058594, + "rewards/rejected": -4.85183572769165, + "step": 342 + }, + { + "epoch": 0.05, + "learning_rate": 1.3895764303127755e-05, + "logits/chosen": -2.523728132247925, + "logits/rejected": -2.6772940158843994, + "logps/chosen": -257.34423828125, + "logps/rejected": -339.3541259765625, + "loss": 0.0436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9045517444610596, + "rewards/margins": 3.298306941986084, + "rewards/rejected": -4.202858924865723, + "step": 343 + }, + { + "epoch": 0.05, + "learning_rate": 1.3895030862596609e-05, + "logits/chosen": -2.465141534805298, + "logits/rejected": -3.084933042526245, + "logps/chosen": -32.314598083496094, + "logps/rejected": -175.0936737060547, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9501100778579712, + "rewards/margins": 5.135483741760254, + "rewards/rejected": -7.085594177246094, + "step": 344 + }, + { + "epoch": 0.05, + "learning_rate": 1.389429742206546e-05, + "logits/chosen": -2.8936045169830322, + "logits/rejected": -3.218575954437256, + "logps/chosen": -30.91324806213379, + "logps/rejected": -214.43820190429688, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2420182228088379, + "rewards/margins": 5.61885929107666, + "rewards/rejected": -5.86087703704834, + "step": 345 + }, + { + "epoch": 0.05, + "learning_rate": 1.3893563981534313e-05, + "logits/chosen": -1.987146258354187, + "logits/rejected": -2.811357259750366, + "logps/chosen": -156.45936584472656, + "logps/rejected": -422.15960693359375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3702764511108398, + "rewards/margins": 8.828950881958008, + "rewards/rejected": -10.199228286743164, + "step": 346 + }, + { + "epoch": 0.05, + "learning_rate": 1.3892830541003164e-05, + "logits/chosen": -1.853097915649414, + "logits/rejected": -3.0449962615966797, + "logps/chosen": -157.27342224121094, + "logps/rejected": -312.736572265625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0906250476837158, + "rewards/margins": 5.891899108886719, + "rewards/rejected": -6.9825239181518555, + "step": 347 + }, + { + "epoch": 0.05, + "learning_rate": 1.3892097100472016e-05, + "logits/chosen": -2.798245906829834, + "logits/rejected": -3.2234435081481934, + "logps/chosen": -515.6106567382812, + "logps/rejected": -526.1159057617188, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18090209364891052, + "rewards/margins": 7.072756767272949, + "rewards/rejected": -7.253659248352051, + "step": 348 + }, + { + "epoch": 0.05, + "learning_rate": 1.3891363659940868e-05, + "logits/chosen": -2.5509965419769287, + "logits/rejected": -3.1538007259368896, + "logps/chosen": -413.8829345703125, + "logps/rejected": -409.8868408203125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8624237775802612, + "rewards/margins": 6.980221748352051, + "rewards/rejected": -8.842645645141602, + "step": 349 + }, + { + "epoch": 0.05, + "learning_rate": 1.389063021940972e-05, + "logits/chosen": -1.4352002143859863, + "logits/rejected": -2.9329285621643066, + "logps/chosen": -107.6964340209961, + "logps/rejected": -290.030517578125, + "loss": 0.1792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5743904113769531, + "rewards/margins": 3.9921350479125977, + "rewards/rejected": -4.566525459289551, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 1.3889896778878572e-05, + "logits/chosen": -2.8704118728637695, + "logits/rejected": -2.7392358779907227, + "logps/chosen": -171.7534637451172, + "logps/rejected": -353.4228515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8454341888427734, + "rewards/margins": 10.548528671264648, + "rewards/rejected": -11.393962860107422, + "step": 351 + }, + { + "epoch": 0.05, + "learning_rate": 1.3889163338347424e-05, + "logits/chosen": -3.2826240062713623, + "logits/rejected": -1.5942221879959106, + "logps/chosen": -416.2756042480469, + "logps/rejected": -109.0499038696289, + "loss": 5.8624, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.31352424621582, + "rewards/margins": -5.85935115814209, + "rewards/rejected": -0.4541727304458618, + "step": 352 + }, + { + "epoch": 0.05, + "learning_rate": 1.3888429897816277e-05, + "logits/chosen": -1.7584807872772217, + "logits/rejected": -3.0441222190856934, + "logps/chosen": -148.8487091064453, + "logps/rejected": -358.44342041015625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9164444208145142, + "rewards/margins": 4.925248622894287, + "rewards/rejected": -5.841692924499512, + "step": 353 + }, + { + "epoch": 0.06, + "learning_rate": 1.388769645728513e-05, + "logits/chosen": -3.2760329246520996, + "logits/rejected": -3.061039924621582, + "logps/chosen": -673.3963623046875, + "logps/rejected": -517.527099609375, + "loss": 1.1291, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7335342168807983, + "rewards/margins": 2.209045648574829, + "rewards/rejected": -2.942579746246338, + "step": 354 + }, + { + "epoch": 0.06, + "learning_rate": 1.3886963016753981e-05, + "logits/chosen": -2.4452974796295166, + "logits/rejected": -3.051382064819336, + "logps/chosen": -281.2966003417969, + "logps/rejected": -364.5290832519531, + "loss": 0.4936, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.9735153913497925, + "rewards/margins": 3.9624626636505127, + "rewards/rejected": -4.935977935791016, + "step": 355 + }, + { + "epoch": 0.06, + "learning_rate": 1.3886229576222833e-05, + "logits/chosen": -3.1122565269470215, + "logits/rejected": -2.168646812438965, + "logps/chosen": -437.88067626953125, + "logps/rejected": -203.58380126953125, + "loss": 4.9885, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.015489101409912, + "rewards/margins": -1.5386195182800293, + "rewards/rejected": -4.476870059967041, + "step": 356 + }, + { + "epoch": 0.06, + "learning_rate": 1.3885496135691685e-05, + "logits/chosen": -2.7898833751678467, + "logits/rejected": -3.0878076553344727, + "logps/chosen": -86.88899993896484, + "logps/rejected": -226.31065368652344, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9714404940605164, + "rewards/margins": 5.309360504150391, + "rewards/rejected": -6.280801296234131, + "step": 357 + }, + { + "epoch": 0.06, + "learning_rate": 1.3884762695160537e-05, + "logits/chosen": -2.3022871017456055, + "logits/rejected": -3.029289484024048, + "logps/chosen": -291.2488098144531, + "logps/rejected": -481.3355407714844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5139496326446533, + "rewards/margins": 8.302385330200195, + "rewards/rejected": -9.81633472442627, + "step": 358 + }, + { + "epoch": 0.06, + "learning_rate": 1.3884029254629389e-05, + "logits/chosen": -2.1445059776306152, + "logits/rejected": -2.949674367904663, + "logps/chosen": -568.479248046875, + "logps/rejected": -373.9548645019531, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3852745294570923, + "rewards/margins": 4.387948036193848, + "rewards/rejected": -5.77322244644165, + "step": 359 + }, + { + "epoch": 0.06, + "learning_rate": 1.388329581409824e-05, + "logits/chosen": -2.7616186141967773, + "logits/rejected": -3.143683433532715, + "logps/chosen": -245.80023193359375, + "logps/rejected": -471.2894287109375, + "loss": 2.6471, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.474887371063232, + "rewards/margins": 1.1595227718353271, + "rewards/rejected": -5.634410381317139, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 1.3882562373567092e-05, + "logits/chosen": -1.8844701051712036, + "logits/rejected": -3.2416512966156006, + "logps/chosen": -39.538307189941406, + "logps/rejected": -418.7684326171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4551520347595215, + "rewards/margins": 7.495622634887695, + "rewards/rejected": -7.950774192810059, + "step": 361 + }, + { + "epoch": 0.06, + "learning_rate": 1.3881828933035946e-05, + "logits/chosen": -3.1234259605407715, + "logits/rejected": -3.3034541606903076, + "logps/chosen": -93.56253051757812, + "logps/rejected": -234.58990478515625, + "loss": 0.0577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8035198450088501, + "rewards/margins": 4.838437080383301, + "rewards/rejected": -5.6419572830200195, + "step": 362 + }, + { + "epoch": 0.06, + "learning_rate": 1.3881095492504798e-05, + "logits/chosen": -1.1133689880371094, + "logits/rejected": -3.105093002319336, + "logps/chosen": -110.93942260742188, + "logps/rejected": -268.1721496582031, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.654953956604004, + "rewards/margins": 5.098967552185059, + "rewards/rejected": -6.7539215087890625, + "step": 363 + }, + { + "epoch": 0.06, + "learning_rate": 1.3880362051973651e-05, + "logits/chosen": -3.2229127883911133, + "logits/rejected": -3.3010497093200684, + "logps/chosen": -81.72748565673828, + "logps/rejected": -184.8765869140625, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1364672183990479, + "rewards/margins": 3.718323230743408, + "rewards/rejected": -4.854790687561035, + "step": 364 + }, + { + "epoch": 0.06, + "learning_rate": 1.3879628611442503e-05, + "logits/chosen": -2.5388565063476562, + "logits/rejected": -3.176757574081421, + "logps/chosen": -117.89166259765625, + "logps/rejected": -257.513916015625, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8296982049942017, + "rewards/margins": 4.1807966232299805, + "rewards/rejected": -5.010495185852051, + "step": 365 + }, + { + "epoch": 0.06, + "learning_rate": 1.3878895170911355e-05, + "logits/chosen": -1.9648011922836304, + "logits/rejected": -3.060030460357666, + "logps/chosen": -228.05435180664062, + "logps/rejected": -283.3526306152344, + "loss": 3.0853, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.228992938995361, + "rewards/margins": -1.6269919872283936, + "rewards/rejected": -2.6020009517669678, + "step": 366 + }, + { + "epoch": 0.06, + "learning_rate": 1.3878161730380207e-05, + "logits/chosen": -2.8140432834625244, + "logits/rejected": -3.2185418605804443, + "logps/chosen": -48.10844039916992, + "logps/rejected": -135.88401794433594, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4441550970077515, + "rewards/margins": 5.555589199066162, + "rewards/rejected": -6.999744415283203, + "step": 367 + }, + { + "epoch": 0.06, + "learning_rate": 1.3877428289849059e-05, + "logits/chosen": -1.2056593894958496, + "logits/rejected": -3.1678621768951416, + "logps/chosen": -141.59417724609375, + "logps/rejected": -354.16656494140625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0855457782745361, + "rewards/margins": 5.919473171234131, + "rewards/rejected": -7.005019187927246, + "step": 368 + }, + { + "epoch": 0.06, + "learning_rate": 1.387669484931791e-05, + "logits/chosen": -3.1526896953582764, + "logits/rejected": -3.313671827316284, + "logps/chosen": -48.795589447021484, + "logps/rejected": -276.30194091796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5707302093505859, + "rewards/margins": 5.715235233306885, + "rewards/rejected": -6.285965442657471, + "step": 369 + }, + { + "epoch": 0.06, + "learning_rate": 1.3875961408786763e-05, + "logits/chosen": -3.0915262699127197, + "logits/rejected": -3.1816439628601074, + "logps/chosen": -94.4470443725586, + "logps/rejected": -316.94781494140625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11071924865245819, + "rewards/margins": 7.284276008605957, + "rewards/rejected": -7.173557281494141, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 1.3875227968255616e-05, + "logits/chosen": -2.675027370452881, + "logits/rejected": -3.096116781234741, + "logps/chosen": -107.30384826660156, + "logps/rejected": -181.7537841796875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1529422402381897, + "rewards/margins": 5.580485820770264, + "rewards/rejected": -5.733428001403809, + "step": 371 + }, + { + "epoch": 0.06, + "learning_rate": 1.3874494527724468e-05, + "logits/chosen": -2.9036011695861816, + "logits/rejected": -3.2649495601654053, + "logps/chosen": -262.85577392578125, + "logps/rejected": -337.01177978515625, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19396820664405823, + "rewards/margins": 4.802046775817871, + "rewards/rejected": -4.996015548706055, + "step": 372 + }, + { + "epoch": 0.06, + "learning_rate": 1.387376108719332e-05, + "logits/chosen": -2.9267423152923584, + "logits/rejected": -2.766957998275757, + "logps/chosen": -389.7217712402344, + "logps/rejected": -290.0395202636719, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7010154724121094, + "rewards/margins": 5.11144495010376, + "rewards/rejected": -6.812460422515869, + "step": 373 + }, + { + "epoch": 0.06, + "learning_rate": 1.3873027646662172e-05, + "logits/chosen": -2.3381035327911377, + "logits/rejected": -3.2057247161865234, + "logps/chosen": -13.6384916305542, + "logps/rejected": -187.68833923339844, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04576249048113823, + "rewards/margins": 5.672003746032715, + "rewards/rejected": -5.717766284942627, + "step": 374 + }, + { + "epoch": 0.06, + "learning_rate": 1.3872294206131024e-05, + "logits/chosen": -3.200685977935791, + "logits/rejected": -2.602396011352539, + "logps/chosen": -541.9927368164062, + "logps/rejected": -451.21771240234375, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.819040060043335, + "rewards/margins": 4.281093597412109, + "rewards/rejected": -5.100133419036865, + "step": 375 + }, + { + "epoch": 0.06, + "learning_rate": 1.3871560765599876e-05, + "logits/chosen": -3.033649444580078, + "logits/rejected": -3.058351993560791, + "logps/chosen": -185.0884246826172, + "logps/rejected": -243.47244262695312, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.276240587234497, + "rewards/margins": 3.4753613471984863, + "rewards/rejected": -4.7516021728515625, + "step": 376 + }, + { + "epoch": 0.06, + "learning_rate": 1.3870827325068728e-05, + "logits/chosen": -3.008124828338623, + "logits/rejected": -3.2986466884613037, + "logps/chosen": -39.28913116455078, + "logps/rejected": -202.79379272460938, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.359976202249527, + "rewards/margins": 6.111150741577148, + "rewards/rejected": -6.471127033233643, + "step": 377 + }, + { + "epoch": 0.06, + "learning_rate": 1.387009388453758e-05, + "logits/chosen": -2.2917211055755615, + "logits/rejected": -3.0877645015716553, + "logps/chosen": -25.730886459350586, + "logps/rejected": -124.51174926757812, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4492414891719818, + "rewards/margins": 3.8482933044433594, + "rewards/rejected": -4.297534942626953, + "step": 378 + }, + { + "epoch": 0.06, + "learning_rate": 1.3869360444006431e-05, + "logits/chosen": -2.2198145389556885, + "logits/rejected": -2.8658053874969482, + "logps/chosen": -224.7145233154297, + "logps/rejected": -259.9518127441406, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.827920138835907, + "rewards/margins": 4.750278949737549, + "rewards/rejected": -5.5781989097595215, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 1.3868627003475285e-05, + "logits/chosen": -3.2907559871673584, + "logits/rejected": -1.7841503620147705, + "logps/chosen": -410.91656494140625, + "logps/rejected": -228.05624389648438, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6541347503662109, + "rewards/margins": 4.959400653839111, + "rewards/rejected": -5.613535404205322, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 1.3867893562944137e-05, + "logits/chosen": -2.5266902446746826, + "logits/rejected": -2.914670944213867, + "logps/chosen": -159.99227905273438, + "logps/rejected": -221.634765625, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.238777995109558, + "rewards/margins": 5.451568126678467, + "rewards/rejected": -6.6903462409973145, + "step": 381 + }, + { + "epoch": 0.06, + "learning_rate": 1.3867160122412989e-05, + "logits/chosen": -3.090696334838867, + "logits/rejected": -1.9469796419143677, + "logps/chosen": -359.1051330566406, + "logps/rejected": -406.6455993652344, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.138667494058609, + "rewards/margins": 9.035750389099121, + "rewards/rejected": -9.174417495727539, + "step": 382 + }, + { + "epoch": 0.06, + "learning_rate": 1.386642668188184e-05, + "logits/chosen": -1.8405832052230835, + "logits/rejected": -3.093553304672241, + "logps/chosen": -226.199462890625, + "logps/rejected": -298.64935302734375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1157143115997314, + "rewards/margins": 4.943578243255615, + "rewards/rejected": -7.059292793273926, + "step": 383 + }, + { + "epoch": 0.06, + "learning_rate": 1.3865693241350692e-05, + "logits/chosen": -1.6176754236221313, + "logits/rejected": -3.0692808628082275, + "logps/chosen": -98.08980560302734, + "logps/rejected": -518.2933349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9676814079284668, + "rewards/margins": 10.191858291625977, + "rewards/rejected": -12.159540176391602, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 1.3864959800819544e-05, + "logits/chosen": -2.830990791320801, + "logits/rejected": -2.7046689987182617, + "logps/chosen": -258.4342346191406, + "logps/rejected": -288.8689880371094, + "loss": 4.8836, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.113336563110352, + "rewards/margins": -1.7435762882232666, + "rewards/rejected": -4.369760036468506, + "step": 385 + }, + { + "epoch": 0.06, + "learning_rate": 1.3864226360288396e-05, + "logits/chosen": -3.2687835693359375, + "logits/rejected": -3.0369043350219727, + "logps/chosen": -276.0753173828125, + "logps/rejected": -120.86508178710938, + "loss": 3.0137, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.84634256362915, + "rewards/margins": -1.7912074327468872, + "rewards/rejected": -3.0551350116729736, + "step": 386 + }, + { + "epoch": 0.06, + "learning_rate": 1.3863492919757248e-05, + "logits/chosen": -2.8731753826141357, + "logits/rejected": -3.2384696006774902, + "logps/chosen": -45.022918701171875, + "logps/rejected": -166.89459228515625, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0471770316362381, + "rewards/margins": 4.434938907623291, + "rewards/rejected": -4.482115745544434, + "step": 387 + }, + { + "epoch": 0.06, + "learning_rate": 1.38627594792261e-05, + "logits/chosen": -2.283043622970581, + "logits/rejected": -2.9262900352478027, + "logps/chosen": -438.66448974609375, + "logps/rejected": -394.91229248046875, + "loss": 3.648, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.620362758636475, + "rewards/margins": 1.7312302589416504, + "rewards/rejected": -8.351593017578125, + "step": 388 + }, + { + "epoch": 0.06, + "learning_rate": 1.3862026038694953e-05, + "logits/chosen": -2.8607001304626465, + "logits/rejected": -1.3488061428070068, + "logps/chosen": -448.06195068359375, + "logps/rejected": -158.30055236816406, + "loss": 8.5067, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.068336486816406, + "rewards/margins": -8.506431579589844, + "rewards/rejected": -0.5619049072265625, + "step": 389 + }, + { + "epoch": 0.06, + "learning_rate": 1.3861292598163805e-05, + "logits/chosen": -3.0792622566223145, + "logits/rejected": -3.1205291748046875, + "logps/chosen": -198.51788330078125, + "logps/rejected": -82.02416229248047, + "loss": 3.7999, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.743955612182617, + "rewards/margins": -1.5112736225128174, + "rewards/rejected": -3.2326819896698, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 1.3860559157632657e-05, + "logits/chosen": -3.250211477279663, + "logits/rejected": -2.797349691390991, + "logps/chosen": -123.37352752685547, + "logps/rejected": -119.28817749023438, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3158782720565796, + "rewards/margins": 2.1105198860168457, + "rewards/rejected": -2.426398277282715, + "step": 391 + }, + { + "epoch": 0.06, + "learning_rate": 1.3859825717101509e-05, + "logits/chosen": -3.1484463214874268, + "logits/rejected": -2.253864288330078, + "logps/chosen": -801.155517578125, + "logps/rejected": -592.2935180664062, + "loss": 2.851, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.184008836746216, + "rewards/margins": 0.9053268432617188, + "rewards/rejected": -4.0893354415893555, + "step": 392 + }, + { + "epoch": 0.06, + "learning_rate": 1.3859092276570361e-05, + "logits/chosen": -1.0583549737930298, + "logits/rejected": -2.707937717437744, + "logps/chosen": -192.9579315185547, + "logps/rejected": -460.604736328125, + "loss": 3.5451, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2195188999176025, + "rewards/margins": 1.7097716331481934, + "rewards/rejected": -4.929290771484375, + "step": 393 + }, + { + "epoch": 0.06, + "learning_rate": 1.3858358836039213e-05, + "logits/chosen": -2.632096290588379, + "logits/rejected": -2.8802685737609863, + "logps/chosen": -85.0283203125, + "logps/rejected": -237.61990356445312, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0688507556915283, + "rewards/margins": 4.587612152099609, + "rewards/rejected": -5.656462669372559, + "step": 394 + }, + { + "epoch": 0.06, + "learning_rate": 1.3857625395508065e-05, + "logits/chosen": -2.999894857406616, + "logits/rejected": -3.234792947769165, + "logps/chosen": -37.044952392578125, + "logps/rejected": -123.432373046875, + "loss": 0.0904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2673472762107849, + "rewards/margins": 4.823686599731445, + "rewards/rejected": -4.556338787078857, + "step": 395 + }, + { + "epoch": 0.06, + "learning_rate": 1.3856891954976917e-05, + "logits/chosen": -3.093270778656006, + "logits/rejected": -2.1349501609802246, + "logps/chosen": -283.9753112792969, + "logps/rejected": -140.15174865722656, + "loss": 3.0026, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4605796337127686, + "rewards/margins": -1.6776371002197266, + "rewards/rejected": -1.782942533493042, + "step": 396 + }, + { + "epoch": 0.06, + "learning_rate": 1.385615851444577e-05, + "logits/chosen": -2.6963820457458496, + "logits/rejected": -3.20061993598938, + "logps/chosen": -22.143077850341797, + "logps/rejected": -208.57528686523438, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25853821635246277, + "rewards/margins": 5.645563125610352, + "rewards/rejected": -5.904101371765137, + "step": 397 + }, + { + "epoch": 0.06, + "learning_rate": 1.3855425073914624e-05, + "logits/chosen": -3.164577007293701, + "logits/rejected": -3.0851478576660156, + "logps/chosen": -45.580108642578125, + "logps/rejected": -66.65383911132812, + "loss": 0.1962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12314853817224503, + "rewards/margins": 2.086228370666504, + "rewards/rejected": -2.209376811981201, + "step": 398 + }, + { + "epoch": 0.06, + "learning_rate": 1.3854691633383476e-05, + "logits/chosen": -2.6744983196258545, + "logits/rejected": -3.286475419998169, + "logps/chosen": -171.62823486328125, + "logps/rejected": -242.21420288085938, + "loss": 0.6959, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7813878059387207, + "rewards/margins": 3.037646532058716, + "rewards/rejected": -4.819034099578857, + "step": 399 + }, + { + "epoch": 0.06, + "learning_rate": 1.3853958192852328e-05, + "logits/chosen": -2.467362642288208, + "logits/rejected": -3.0278289318084717, + "logps/chosen": -106.10857391357422, + "logps/rejected": -235.82431030273438, + "loss": 0.1098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4043790102005005, + "rewards/margins": 2.8395113945007324, + "rewards/rejected": -4.243890285491943, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 1.385322475232118e-05, + "logits/chosen": -3.272069215774536, + "logits/rejected": -2.655240297317505, + "logps/chosen": -381.96539306640625, + "logps/rejected": -279.94573974609375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4820987582206726, + "rewards/margins": 5.30349063873291, + "rewards/rejected": -5.785589218139648, + "step": 401 + }, + { + "epoch": 0.06, + "learning_rate": 1.3852491311790031e-05, + "logits/chosen": -3.237868070602417, + "logits/rejected": -2.490736484527588, + "logps/chosen": -259.866943359375, + "logps/rejected": -16.124311447143555, + "loss": 3.9098, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.5934035778045654, + "rewards/margins": -3.6252453327178955, + "rewards/rejected": 0.03184185177087784, + "step": 402 + }, + { + "epoch": 0.06, + "learning_rate": 1.3851757871258883e-05, + "logits/chosen": -2.9963622093200684, + "logits/rejected": -2.841689348220825, + "logps/chosen": -196.48768615722656, + "logps/rejected": -278.11114501953125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4432964324951172, + "rewards/margins": 5.732486724853516, + "rewards/rejected": -6.175783157348633, + "step": 403 + }, + { + "epoch": 0.06, + "learning_rate": 1.3851024430727735e-05, + "logits/chosen": -3.2088255882263184, + "logits/rejected": -2.8085336685180664, + "logps/chosen": -692.4420166015625, + "logps/rejected": -506.3796081542969, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.318983554840088, + "rewards/margins": 4.4322919845581055, + "rewards/rejected": -5.751275539398193, + "step": 404 + }, + { + "epoch": 0.06, + "learning_rate": 1.3850290990196587e-05, + "logits/chosen": -2.481257438659668, + "logits/rejected": -3.309734344482422, + "logps/chosen": -293.635498046875, + "logps/rejected": -364.85137939453125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2159736156463623, + "rewards/margins": 5.590421676635742, + "rewards/rejected": -6.806395053863525, + "step": 405 + }, + { + "epoch": 0.06, + "learning_rate": 1.3849557549665439e-05, + "logits/chosen": -2.4517927169799805, + "logits/rejected": -3.1065778732299805, + "logps/chosen": -100.5552749633789, + "logps/rejected": -309.68194580078125, + "loss": 1.7274, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.060072422027588, + "rewards/margins": -0.11756587028503418, + "rewards/rejected": -2.9425063133239746, + "step": 406 + }, + { + "epoch": 0.06, + "learning_rate": 1.3848824109134292e-05, + "logits/chosen": -2.873090982437134, + "logits/rejected": -3.1416282653808594, + "logps/chosen": -29.466222763061523, + "logps/rejected": -226.7779998779297, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6604347825050354, + "rewards/margins": 8.883340835571289, + "rewards/rejected": -9.54377555847168, + "step": 407 + }, + { + "epoch": 0.06, + "learning_rate": 1.3848090668603144e-05, + "logits/chosen": -1.2297463417053223, + "logits/rejected": -3.23248553276062, + "logps/chosen": -175.638671875, + "logps/rejected": -495.1302795410156, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35448992252349854, + "rewards/margins": 7.924395561218262, + "rewards/rejected": -7.5699052810668945, + "step": 408 + }, + { + "epoch": 0.06, + "learning_rate": 1.3847357228071996e-05, + "logits/chosen": -3.1224124431610107, + "logits/rejected": -3.1997997760772705, + "logps/chosen": -14.364174842834473, + "logps/rejected": -189.40087890625, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27019447088241577, + "rewards/margins": 4.568615913391113, + "rewards/rejected": -4.838809967041016, + "step": 409 + }, + { + "epoch": 0.06, + "learning_rate": 1.3846623787540848e-05, + "logits/chosen": -2.79500675201416, + "logits/rejected": -2.7763195037841797, + "logps/chosen": -83.42253112792969, + "logps/rejected": -198.27639770507812, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9024225473403931, + "rewards/margins": 5.010154724121094, + "rewards/rejected": -5.912576675415039, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 1.38458903470097e-05, + "logits/chosen": -3.145453929901123, + "logits/rejected": -2.415428876876831, + "logps/chosen": -375.8213806152344, + "logps/rejected": -346.2965393066406, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3311712741851807, + "rewards/margins": 5.9617085456848145, + "rewards/rejected": -7.292880058288574, + "step": 411 + }, + { + "epoch": 0.06, + "learning_rate": 1.3845156906478552e-05, + "logits/chosen": -2.578777551651001, + "logits/rejected": -2.9757561683654785, + "logps/chosen": -257.5711669921875, + "logps/rejected": -557.7808837890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7406967282295227, + "rewards/margins": 10.174388885498047, + "rewards/rejected": -10.91508674621582, + "step": 412 + }, + { + "epoch": 0.06, + "learning_rate": 1.3844423465947404e-05, + "logits/chosen": -2.108220100402832, + "logits/rejected": -2.767940044403076, + "logps/chosen": -58.18455505371094, + "logps/rejected": -389.59906005859375, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2151105403900146, + "rewards/margins": 8.571154594421387, + "rewards/rejected": -9.78626537322998, + "step": 413 + }, + { + "epoch": 0.06, + "learning_rate": 1.3843690025416256e-05, + "logits/chosen": -3.048795700073242, + "logits/rejected": -2.5912764072418213, + "logps/chosen": -466.4284973144531, + "logps/rejected": -405.8116760253906, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6406158804893494, + "rewards/margins": 7.3439741134643555, + "rewards/rejected": -7.984590530395508, + "step": 414 + }, + { + "epoch": 0.06, + "learning_rate": 1.3842956584885107e-05, + "logits/chosen": -2.3388636112213135, + "logits/rejected": -3.0889430046081543, + "logps/chosen": -405.72149658203125, + "logps/rejected": -368.14300537109375, + "loss": 0.1643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2838226556777954, + "rewards/margins": 5.06150484085083, + "rewards/rejected": -6.345327377319336, + "step": 415 + }, + { + "epoch": 0.06, + "learning_rate": 1.3842223144353961e-05, + "logits/chosen": -2.694734573364258, + "logits/rejected": -3.1901657581329346, + "logps/chosen": -15.637422561645508, + "logps/rejected": -121.7634048461914, + "loss": 0.12, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3761458992958069, + "rewards/margins": 3.8307878971099854, + "rewards/rejected": -4.206933498382568, + "step": 416 + }, + { + "epoch": 0.06, + "learning_rate": 1.3841489703822813e-05, + "logits/chosen": -1.5254536867141724, + "logits/rejected": -3.065915584564209, + "logps/chosen": -222.50836181640625, + "logps/rejected": -296.7484130859375, + "loss": 3.2851, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.039559364318848, + "rewards/margins": 0.15955042839050293, + "rewards/rejected": -4.19911003112793, + "step": 417 + }, + { + "epoch": 0.07, + "learning_rate": 1.3840756263291665e-05, + "logits/chosen": -2.9825150966644287, + "logits/rejected": -3.0960817337036133, + "logps/chosen": -501.0368957519531, + "logps/rejected": -323.20928955078125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.049624621868133545, + "rewards/margins": 5.861800193786621, + "rewards/rejected": -5.9114251136779785, + "step": 418 + }, + { + "epoch": 0.07, + "learning_rate": 1.3840022822760517e-05, + "logits/chosen": -2.589991569519043, + "logits/rejected": -3.0776174068450928, + "logps/chosen": -213.76705932617188, + "logps/rejected": -239.23464965820312, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.186312198638916, + "rewards/margins": 4.228090286254883, + "rewards/rejected": -6.414402008056641, + "step": 419 + }, + { + "epoch": 0.07, + "learning_rate": 1.3839289382229368e-05, + "logits/chosen": -3.0268139839172363, + "logits/rejected": -3.1440670490264893, + "logps/chosen": -206.7677001953125, + "logps/rejected": -148.95651245117188, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5374491214752197, + "rewards/margins": 5.310769081115723, + "rewards/rejected": -6.848217964172363, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 1.383855594169822e-05, + "logits/chosen": -3.192070722579956, + "logits/rejected": -2.58718204498291, + "logps/chosen": -130.88034057617188, + "logps/rejected": -11.575170516967773, + "loss": 4.5049, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.673419952392578, + "rewards/margins": -4.490024089813232, + "rewards/rejected": -0.18339583277702332, + "step": 421 + }, + { + "epoch": 0.07, + "learning_rate": 1.3837822501167072e-05, + "logits/chosen": -1.6291321516036987, + "logits/rejected": -2.9478085041046143, + "logps/chosen": -196.00909423828125, + "logps/rejected": -125.71430969238281, + "loss": 5.21, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.068603515625, + "rewards/margins": -2.015087604522705, + "rewards/rejected": -4.053516387939453, + "step": 422 + }, + { + "epoch": 0.07, + "learning_rate": 1.3837089060635924e-05, + "logits/chosen": -3.2412302494049072, + "logits/rejected": -2.664537191390991, + "logps/chosen": -236.21237182617188, + "logps/rejected": -253.4356231689453, + "loss": 1.8376, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2971444129943848, + "rewards/margins": 1.9839704036712646, + "rewards/rejected": -5.28111457824707, + "step": 423 + }, + { + "epoch": 0.07, + "learning_rate": 1.3836355620104776e-05, + "logits/chosen": -2.969914674758911, + "logits/rejected": -3.129793405532837, + "logps/chosen": -729.0839233398438, + "logps/rejected": -680.266845703125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7467193603515625, + "rewards/margins": 5.734152793884277, + "rewards/rejected": -6.48087215423584, + "step": 424 + }, + { + "epoch": 0.07, + "learning_rate": 1.383562217957363e-05, + "logits/chosen": -2.481675624847412, + "logits/rejected": -2.683037757873535, + "logps/chosen": -273.4399719238281, + "logps/rejected": -355.22021484375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6885036826133728, + "rewards/margins": 6.996029376983643, + "rewards/rejected": -7.68453311920166, + "step": 425 + }, + { + "epoch": 0.07, + "learning_rate": 1.3834888739042481e-05, + "logits/chosen": -2.399651288986206, + "logits/rejected": -2.7526824474334717, + "logps/chosen": -194.88754272460938, + "logps/rejected": -385.895751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.75189208984375, + "rewards/margins": 9.160139083862305, + "rewards/rejected": -10.912031173706055, + "step": 426 + }, + { + "epoch": 0.07, + "learning_rate": 1.3834155298511333e-05, + "logits/chosen": -3.022627830505371, + "logits/rejected": -3.121915578842163, + "logps/chosen": -130.38143920898438, + "logps/rejected": -273.833984375, + "loss": 0.0635, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8037052154541016, + "rewards/margins": 4.089498519897461, + "rewards/rejected": -5.8932037353515625, + "step": 427 + }, + { + "epoch": 0.07, + "learning_rate": 1.3833421857980185e-05, + "logits/chosen": -1.5959316492080688, + "logits/rejected": -2.951312303543091, + "logps/chosen": -121.94102478027344, + "logps/rejected": -524.22802734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1780906915664673, + "rewards/margins": 7.04976749420166, + "rewards/rejected": -8.22785758972168, + "step": 428 + }, + { + "epoch": 0.07, + "learning_rate": 1.3832688417449037e-05, + "logits/chosen": -3.039442300796509, + "logits/rejected": -3.0536627769470215, + "logps/chosen": -190.77828979492188, + "logps/rejected": -327.9275207519531, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9166272878646851, + "rewards/margins": 4.464962959289551, + "rewards/rejected": -5.381590366363525, + "step": 429 + }, + { + "epoch": 0.07, + "learning_rate": 1.3831954976917889e-05, + "logits/chosen": -2.94085955619812, + "logits/rejected": -1.857588768005371, + "logps/chosen": -250.1539764404297, + "logps/rejected": -122.54647827148438, + "loss": 1.5243, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.9811626076698303, + "rewards/margins": 0.9169731140136719, + "rewards/rejected": -1.8981356620788574, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 1.3831221536386743e-05, + "logits/chosen": -2.5890302658081055, + "logits/rejected": -3.036539077758789, + "logps/chosen": -132.5432586669922, + "logps/rejected": -145.6352996826172, + "loss": 1.2275, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.14359974861145, + "rewards/margins": 2.997474193572998, + "rewards/rejected": -5.141074180603027, + "step": 431 + }, + { + "epoch": 0.07, + "learning_rate": 1.3830488095855594e-05, + "logits/chosen": -2.1974520683288574, + "logits/rejected": -2.984174966812134, + "logps/chosen": -182.13507080078125, + "logps/rejected": -594.725830078125, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5134687423706055, + "rewards/margins": 9.681587219238281, + "rewards/rejected": -10.195056915283203, + "step": 432 + }, + { + "epoch": 0.07, + "learning_rate": 1.3829754655324448e-05, + "logits/chosen": -3.1379029750823975, + "logits/rejected": -2.6244826316833496, + "logps/chosen": -154.1754150390625, + "logps/rejected": -273.9195556640625, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5907379388809204, + "rewards/margins": 7.029916763305664, + "rewards/rejected": -7.620655059814453, + "step": 433 + }, + { + "epoch": 0.07, + "learning_rate": 1.38290212147933e-05, + "logits/chosen": -1.9082691669464111, + "logits/rejected": -3.084794521331787, + "logps/chosen": -34.9876708984375, + "logps/rejected": -210.90988159179688, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6696212291717529, + "rewards/margins": 4.098773002624512, + "rewards/rejected": -4.768394470214844, + "step": 434 + }, + { + "epoch": 0.07, + "learning_rate": 1.3828287774262152e-05, + "logits/chosen": -3.0647172927856445, + "logits/rejected": -1.726072907447815, + "logps/chosen": -429.0281066894531, + "logps/rejected": -433.5974426269531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.542029619216919, + "rewards/margins": 7.635122299194336, + "rewards/rejected": -8.177151679992676, + "step": 435 + }, + { + "epoch": 0.07, + "learning_rate": 1.3827554333731004e-05, + "logits/chosen": -1.5042780637741089, + "logits/rejected": -2.426194906234741, + "logps/chosen": -151.43902587890625, + "logps/rejected": -306.8212890625, + "loss": 0.7199, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7622129917144775, + "rewards/margins": 1.426504135131836, + "rewards/rejected": -3.1887171268463135, + "step": 436 + }, + { + "epoch": 0.07, + "learning_rate": 1.3826820893199856e-05, + "logits/chosen": -2.610442876815796, + "logits/rejected": -2.9660277366638184, + "logps/chosen": -105.47987365722656, + "logps/rejected": -310.73046875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7799515128135681, + "rewards/margins": 7.381687164306641, + "rewards/rejected": -8.161638259887695, + "step": 437 + }, + { + "epoch": 0.07, + "learning_rate": 1.3826087452668707e-05, + "logits/chosen": -2.058433771133423, + "logits/rejected": -2.898237943649292, + "logps/chosen": -188.85519409179688, + "logps/rejected": -621.8976440429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4275333881378174, + "rewards/margins": 14.054248809814453, + "rewards/rejected": -14.481782913208008, + "step": 438 + }, + { + "epoch": 0.07, + "learning_rate": 1.382535401213756e-05, + "logits/chosen": -2.7979111671447754, + "logits/rejected": -3.309738874435425, + "logps/chosen": -274.5032958984375, + "logps/rejected": -411.43048095703125, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4594998359680176, + "rewards/margins": 4.252135753631592, + "rewards/rejected": -4.711635589599609, + "step": 439 + }, + { + "epoch": 0.07, + "learning_rate": 1.3824620571606411e-05, + "logits/chosen": -1.9576725959777832, + "logits/rejected": -2.806368589401245, + "logps/chosen": -26.935192108154297, + "logps/rejected": -143.19033813476562, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.269457221031189, + "rewards/margins": 2.9536890983581543, + "rewards/rejected": -4.223146438598633, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 1.3823887131075263e-05, + "logits/chosen": -2.4196159839630127, + "logits/rejected": -2.9893970489501953, + "logps/chosen": -147.9686737060547, + "logps/rejected": -183.18955993652344, + "loss": 2.0357, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5865883827209473, + "rewards/margins": 1.8182878494262695, + "rewards/rejected": -4.404876232147217, + "step": 441 + }, + { + "epoch": 0.07, + "learning_rate": 1.3823153690544117e-05, + "logits/chosen": -3.0324647426605225, + "logits/rejected": -3.137596368789673, + "logps/chosen": -21.965492248535156, + "logps/rejected": -251.53335571289062, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11273441463708878, + "rewards/margins": 9.280238151550293, + "rewards/rejected": -9.167503356933594, + "step": 442 + }, + { + "epoch": 0.07, + "learning_rate": 1.3822420250012968e-05, + "logits/chosen": -2.995051145553589, + "logits/rejected": -2.29590106010437, + "logps/chosen": -290.3197937011719, + "logps/rejected": -208.67501831054688, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3502476215362549, + "rewards/margins": 3.902137279510498, + "rewards/rejected": -5.252385139465332, + "step": 443 + }, + { + "epoch": 0.07, + "learning_rate": 1.382168680948182e-05, + "logits/chosen": -2.99214243888855, + "logits/rejected": -2.568025588989258, + "logps/chosen": -363.2644958496094, + "logps/rejected": -239.58401489257812, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26186713576316833, + "rewards/margins": 4.741971492767334, + "rewards/rejected": -5.003838539123535, + "step": 444 + }, + { + "epoch": 0.07, + "learning_rate": 1.3820953368950672e-05, + "logits/chosen": -3.2270615100860596, + "logits/rejected": -2.968228816986084, + "logps/chosen": -641.7103271484375, + "logps/rejected": -382.2108154296875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3975174129009247, + "rewards/margins": 5.0307512283325195, + "rewards/rejected": -5.428268909454346, + "step": 445 + }, + { + "epoch": 0.07, + "learning_rate": 1.3820219928419524e-05, + "logits/chosen": -3.0956997871398926, + "logits/rejected": -3.150193214416504, + "logps/chosen": -641.96337890625, + "logps/rejected": -712.916748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3466399908065796, + "rewards/margins": 8.494434356689453, + "rewards/rejected": -8.841073989868164, + "step": 446 + }, + { + "epoch": 0.07, + "learning_rate": 1.3819486487888376e-05, + "logits/chosen": -2.0077016353607178, + "logits/rejected": -3.084429979324341, + "logps/chosen": -95.3428955078125, + "logps/rejected": -303.01275634765625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0544794797897339, + "rewards/margins": 4.815617561340332, + "rewards/rejected": -5.8700971603393555, + "step": 447 + }, + { + "epoch": 0.07, + "learning_rate": 1.3818753047357228e-05, + "logits/chosen": -2.9239256381988525, + "logits/rejected": -1.5474227666854858, + "logps/chosen": -496.48175048828125, + "logps/rejected": -329.41510009765625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5451114177703857, + "rewards/margins": 5.27068567276001, + "rewards/rejected": -6.815796852111816, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 1.381801960682608e-05, + "logits/chosen": -2.9513933658599854, + "logits/rejected": -3.2043657302856445, + "logps/chosen": -102.22151947021484, + "logps/rejected": -193.47604370117188, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4040944576263428, + "rewards/margins": 5.1809892654418945, + "rewards/rejected": -7.585083961486816, + "step": 449 + }, + { + "epoch": 0.07, + "learning_rate": 1.3817286166294932e-05, + "logits/chosen": -2.707810401916504, + "logits/rejected": -3.0572116374969482, + "logps/chosen": -573.2094116210938, + "logps/rejected": -578.748046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7244461178779602, + "rewards/margins": 8.157203674316406, + "rewards/rejected": -8.8816499710083, + "step": 450 + }, + { + "epoch": 0.07, + "learning_rate": 1.3816552725763785e-05, + "logits/chosen": -2.850553512573242, + "logits/rejected": -3.267911911010742, + "logps/chosen": -113.32877349853516, + "logps/rejected": -240.30596923828125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8728580474853516, + "rewards/margins": 5.8273210525512695, + "rewards/rejected": -6.700179100036621, + "step": 451 + }, + { + "epoch": 0.07, + "learning_rate": 1.3815819285232637e-05, + "logits/chosen": -1.90372896194458, + "logits/rejected": -2.9545552730560303, + "logps/chosen": -118.58152770996094, + "logps/rejected": -386.59356689453125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.756077766418457, + "rewards/margins": 5.776089668273926, + "rewards/rejected": -8.532167434692383, + "step": 452 + }, + { + "epoch": 0.07, + "learning_rate": 1.3815085844701489e-05, + "logits/chosen": -2.598217010498047, + "logits/rejected": -3.024571180343628, + "logps/chosen": -112.12741088867188, + "logps/rejected": -92.9511489868164, + "loss": 3.1594, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6162943840026855, + "rewards/margins": -2.0795986652374268, + "rewards/rejected": -2.536695718765259, + "step": 453 + }, + { + "epoch": 0.07, + "learning_rate": 1.381435240417034e-05, + "logits/chosen": -2.665752649307251, + "logits/rejected": -3.1477577686309814, + "logps/chosen": -87.22102355957031, + "logps/rejected": -228.9222412109375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7573478817939758, + "rewards/margins": 6.6948699951171875, + "rewards/rejected": -7.4522175788879395, + "step": 454 + }, + { + "epoch": 0.07, + "learning_rate": 1.3813618963639193e-05, + "logits/chosen": -1.9291077852249146, + "logits/rejected": -3.178718328475952, + "logps/chosen": -58.61846923828125, + "logps/rejected": -217.42535400390625, + "loss": 0.175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.889295220375061, + "rewards/margins": 3.267270088195801, + "rewards/rejected": -5.156565189361572, + "step": 455 + }, + { + "epoch": 0.07, + "learning_rate": 1.3812885523108045e-05, + "logits/chosen": -2.9811861515045166, + "logits/rejected": -3.0156383514404297, + "logps/chosen": -294.8236999511719, + "logps/rejected": -290.8450927734375, + "loss": 2.1882, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.316793918609619, + "rewards/margins": 1.770540475845337, + "rewards/rejected": -5.087334156036377, + "step": 456 + }, + { + "epoch": 0.07, + "learning_rate": 1.3812152082576896e-05, + "logits/chosen": -3.0749452114105225, + "logits/rejected": -3.177244186401367, + "logps/chosen": -107.78006744384766, + "logps/rejected": -265.5882568359375, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10596317052841187, + "rewards/margins": 3.6445603370666504, + "rewards/rejected": -3.750523328781128, + "step": 457 + }, + { + "epoch": 0.07, + "learning_rate": 1.3811418642045748e-05, + "logits/chosen": -2.0630061626434326, + "logits/rejected": -2.8266966342926025, + "logps/chosen": -190.8152618408203, + "logps/rejected": -491.9664001464844, + "loss": 2.254, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.915077209472656, + "rewards/margins": 0.7906632423400879, + "rewards/rejected": -5.705740451812744, + "step": 458 + }, + { + "epoch": 0.07, + "learning_rate": 1.38106852015146e-05, + "logits/chosen": -1.7419694662094116, + "logits/rejected": -3.0985302925109863, + "logps/chosen": -130.5919189453125, + "logps/rejected": -332.6203308105469, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.786065697669983, + "rewards/margins": 8.174986839294434, + "rewards/rejected": -9.961051940917969, + "step": 459 + }, + { + "epoch": 0.07, + "learning_rate": 1.3809951760983454e-05, + "logits/chosen": -2.8533499240875244, + "logits/rejected": -3.143031597137451, + "logps/chosen": -31.006568908691406, + "logps/rejected": -224.36468505859375, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3409910798072815, + "rewards/margins": 6.146725177764893, + "rewards/rejected": -6.4877166748046875, + "step": 460 + }, + { + "epoch": 0.07, + "learning_rate": 1.3809218320452306e-05, + "logits/chosen": -2.85288667678833, + "logits/rejected": -3.0576865673065186, + "logps/chosen": -25.874462127685547, + "logps/rejected": -137.494384765625, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8097925186157227, + "rewards/margins": 3.639524459838867, + "rewards/rejected": -4.44931697845459, + "step": 461 + }, + { + "epoch": 0.07, + "learning_rate": 1.3808484879921158e-05, + "logits/chosen": -1.902491807937622, + "logits/rejected": -3.0760555267333984, + "logps/chosen": -195.71145629882812, + "logps/rejected": -402.3197937011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20568962395191193, + "rewards/margins": 10.59953784942627, + "rewards/rejected": -10.805227279663086, + "step": 462 + }, + { + "epoch": 0.07, + "learning_rate": 1.380775143939001e-05, + "logits/chosen": -2.9423749446868896, + "logits/rejected": -2.6823291778564453, + "logps/chosen": -453.55615234375, + "logps/rejected": -478.87347412109375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33404406905174255, + "rewards/margins": 6.939334869384766, + "rewards/rejected": -7.273379325866699, + "step": 463 + }, + { + "epoch": 0.07, + "learning_rate": 1.3807017998858861e-05, + "logits/chosen": -3.062361001968384, + "logits/rejected": -2.2837655544281006, + "logps/chosen": -228.8021240234375, + "logps/rejected": -360.6396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5188798904418945, + "rewards/margins": 10.157302856445312, + "rewards/rejected": -10.67618179321289, + "step": 464 + }, + { + "epoch": 0.07, + "learning_rate": 1.3806284558327715e-05, + "logits/chosen": -2.6640660762786865, + "logits/rejected": -3.1835074424743652, + "logps/chosen": -78.05970001220703, + "logps/rejected": -347.211669921875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8897237181663513, + "rewards/margins": 10.194632530212402, + "rewards/rejected": -11.084356307983398, + "step": 465 + }, + { + "epoch": 0.07, + "learning_rate": 1.3805551117796567e-05, + "logits/chosen": -2.0803937911987305, + "logits/rejected": -3.069167375564575, + "logps/chosen": -110.5615463256836, + "logps/rejected": -384.5872802734375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4093966484069824, + "rewards/margins": 7.394484043121338, + "rewards/rejected": -8.80388069152832, + "step": 466 + }, + { + "epoch": 0.07, + "learning_rate": 1.3804817677265419e-05, + "logits/chosen": -2.2411067485809326, + "logits/rejected": -3.022425889968872, + "logps/chosen": -112.195556640625, + "logps/rejected": -230.52706909179688, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2740411758422852, + "rewards/margins": 5.464106559753418, + "rewards/rejected": -6.738147735595703, + "step": 467 + }, + { + "epoch": 0.07, + "learning_rate": 1.380408423673427e-05, + "logits/chosen": -3.1413795948028564, + "logits/rejected": -2.5846292972564697, + "logps/chosen": -452.9453125, + "logps/rejected": -70.83848571777344, + "loss": 7.5526, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.362312316894531, + "rewards/margins": -7.537078380584717, + "rewards/rejected": -0.8252341151237488, + "step": 468 + }, + { + "epoch": 0.07, + "learning_rate": 1.3803350796203124e-05, + "logits/chosen": -3.0329771041870117, + "logits/rejected": -2.220135450363159, + "logps/chosen": -403.1234130859375, + "logps/rejected": -227.6131591796875, + "loss": 3.1884, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6685967445373535, + "rewards/margins": -0.21074748039245605, + "rewards/rejected": -4.457849502563477, + "step": 469 + }, + { + "epoch": 0.07, + "learning_rate": 1.3802617355671976e-05, + "logits/chosen": -2.996924638748169, + "logits/rejected": -2.6838393211364746, + "logps/chosen": -496.1693115234375, + "logps/rejected": -449.77874755859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2596373558044434, + "rewards/margins": 8.532489776611328, + "rewards/rejected": -10.792126655578613, + "step": 470 + }, + { + "epoch": 0.07, + "learning_rate": 1.3801883915140828e-05, + "logits/chosen": -3.224081516265869, + "logits/rejected": -3.3434183597564697, + "logps/chosen": -25.267871856689453, + "logps/rejected": -172.63311767578125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026294291019439697, + "rewards/margins": 6.681970596313477, + "rewards/rejected": -6.655675888061523, + "step": 471 + }, + { + "epoch": 0.07, + "learning_rate": 1.380115047460968e-05, + "logits/chosen": -3.197693109512329, + "logits/rejected": -1.7342365980148315, + "logps/chosen": -437.1882019042969, + "logps/rejected": -206.5771942138672, + "loss": 4.7533, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.338729381561279, + "rewards/margins": -1.9043619632720947, + "rewards/rejected": -4.4343671798706055, + "step": 472 + }, + { + "epoch": 0.07, + "learning_rate": 1.3800417034078532e-05, + "logits/chosen": -2.134209156036377, + "logits/rejected": -2.908581256866455, + "logps/chosen": -151.85536193847656, + "logps/rejected": -236.206298828125, + "loss": 1.4644, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5891549587249756, + "rewards/margins": 5.1676836013793945, + "rewards/rejected": -7.756838321685791, + "step": 473 + }, + { + "epoch": 0.07, + "learning_rate": 1.3799683593547383e-05, + "logits/chosen": -3.1637816429138184, + "logits/rejected": -2.496105670928955, + "logps/chosen": -271.5757141113281, + "logps/rejected": -126.80320739746094, + "loss": 2.9386, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.073879241943359, + "rewards/margins": -2.7125186920166016, + "rewards/rejected": -1.3613603115081787, + "step": 474 + }, + { + "epoch": 0.07, + "learning_rate": 1.3798950153016235e-05, + "logits/chosen": -0.9405056834220886, + "logits/rejected": -2.858987808227539, + "logps/chosen": -97.98667907714844, + "logps/rejected": -558.9778442382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4303451776504517, + "rewards/margins": 11.723880767822266, + "rewards/rejected": -13.15422534942627, + "step": 475 + }, + { + "epoch": 0.07, + "learning_rate": 1.3798216712485087e-05, + "logits/chosen": -2.977098226547241, + "logits/rejected": -1.1348021030426025, + "logps/chosen": -386.3106994628906, + "logps/rejected": -65.02642822265625, + "loss": 7.9655, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.743760108947754, + "rewards/margins": -7.965056419372559, + "rewards/rejected": -0.7787038683891296, + "step": 476 + }, + { + "epoch": 0.07, + "learning_rate": 1.3797483271953939e-05, + "logits/chosen": -1.934203863143921, + "logits/rejected": -3.055651903152466, + "logps/chosen": -139.2400665283203, + "logps/rejected": -357.9558410644531, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.643261432647705, + "rewards/margins": 7.1898064613342285, + "rewards/rejected": -9.833067893981934, + "step": 477 + }, + { + "epoch": 0.07, + "learning_rate": 1.3796749831422793e-05, + "logits/chosen": -2.6690824031829834, + "logits/rejected": -3.05375337600708, + "logps/chosen": -297.53912353515625, + "logps/rejected": -270.2738037109375, + "loss": 2.7296, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.112015724182129, + "rewards/margins": -0.533684492111206, + "rewards/rejected": -3.578331232070923, + "step": 478 + }, + { + "epoch": 0.07, + "learning_rate": 1.3796016390891645e-05, + "logits/chosen": -3.268080234527588, + "logits/rejected": -2.80897855758667, + "logps/chosen": -283.9917297363281, + "logps/rejected": -118.27947235107422, + "loss": 1.4951, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9278247356414795, + "rewards/margins": -0.20565640926361084, + "rewards/rejected": -2.722168445587158, + "step": 479 + }, + { + "epoch": 0.07, + "learning_rate": 1.3795282950360496e-05, + "logits/chosen": -1.2199063301086426, + "logits/rejected": -2.60066556930542, + "logps/chosen": -182.57325744628906, + "logps/rejected": -274.4941711425781, + "loss": 2.3753, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.35945463180542, + "rewards/margins": 1.4782953262329102, + "rewards/rejected": -4.837749481201172, + "step": 480 + }, + { + "epoch": 0.07, + "learning_rate": 1.3794549509829348e-05, + "logits/chosen": -2.97148060798645, + "logits/rejected": -2.810887575149536, + "logps/chosen": -485.58013916015625, + "logps/rejected": -430.5955810546875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7758781909942627, + "rewards/margins": 5.598987579345703, + "rewards/rejected": -6.374865531921387, + "step": 481 + }, + { + "epoch": 0.07, + "learning_rate": 1.37938160692982e-05, + "logits/chosen": -3.006610631942749, + "logits/rejected": -2.687999963760376, + "logps/chosen": -249.00064086914062, + "logps/rejected": -190.34182739257812, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.836088180541992, + "rewards/margins": 4.185868740081787, + "rewards/rejected": -7.021956920623779, + "step": 482 + }, + { + "epoch": 0.08, + "learning_rate": 1.3793082628767052e-05, + "logits/chosen": -2.9014344215393066, + "logits/rejected": -3.1199240684509277, + "logps/chosen": -212.61839294433594, + "logps/rejected": -299.9679870605469, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6066024899482727, + "rewards/margins": 4.327699184417725, + "rewards/rejected": -4.934301853179932, + "step": 483 + }, + { + "epoch": 0.08, + "learning_rate": 1.3792349188235904e-05, + "logits/chosen": -1.55538010597229, + "logits/rejected": -2.9351770877838135, + "logps/chosen": -109.07803344726562, + "logps/rejected": -318.1282958984375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7631868124008179, + "rewards/margins": 5.676630973815918, + "rewards/rejected": -6.439817428588867, + "step": 484 + }, + { + "epoch": 0.08, + "learning_rate": 1.3791615747704756e-05, + "logits/chosen": -2.4352357387542725, + "logits/rejected": -3.3314905166625977, + "logps/chosen": -97.17851257324219, + "logps/rejected": -166.86154174804688, + "loss": 1.9667, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3687033653259277, + "rewards/margins": 1.3734501600265503, + "rewards/rejected": -4.742153644561768, + "step": 485 + }, + { + "epoch": 0.08, + "learning_rate": 1.3790882307173608e-05, + "logits/chosen": -2.8140478134155273, + "logits/rejected": -3.1252102851867676, + "logps/chosen": -836.9286499023438, + "logps/rejected": -398.068115234375, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1161407232284546, + "rewards/margins": 3.6485435962677, + "rewards/rejected": -4.764684200286865, + "step": 486 + }, + { + "epoch": 0.08, + "learning_rate": 1.3790148866642461e-05, + "logits/chosen": -0.8784336447715759, + "logits/rejected": -1.8942300081253052, + "logps/chosen": -134.61534118652344, + "logps/rejected": -341.32452392578125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.500063419342041, + "rewards/margins": 4.034579753875732, + "rewards/rejected": -5.534643173217773, + "step": 487 + }, + { + "epoch": 0.08, + "learning_rate": 1.3789415426111313e-05, + "logits/chosen": -2.6236987113952637, + "logits/rejected": -3.1460134983062744, + "logps/chosen": -66.79254913330078, + "logps/rejected": -185.80630493164062, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2585541009902954, + "rewards/margins": 5.601048469543457, + "rewards/rejected": -6.859602451324463, + "step": 488 + }, + { + "epoch": 0.08, + "learning_rate": 1.3788681985580165e-05, + "logits/chosen": -3.2981390953063965, + "logits/rejected": -3.3215622901916504, + "logps/chosen": -43.585365295410156, + "logps/rejected": -251.03050231933594, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.261183261871338, + "rewards/margins": 4.471990585327148, + "rewards/rejected": -5.7331743240356445, + "step": 489 + }, + { + "epoch": 0.08, + "learning_rate": 1.3787948545049017e-05, + "logits/chosen": -2.7472140789031982, + "logits/rejected": -3.0404882431030273, + "logps/chosen": -68.70008850097656, + "logps/rejected": -255.7772216796875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0279557704925537, + "rewards/margins": 7.0114054679870605, + "rewards/rejected": -9.039361000061035, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 1.3787215104517869e-05, + "logits/chosen": -1.582146167755127, + "logits/rejected": -2.0740339756011963, + "logps/chosen": -184.85763549804688, + "logps/rejected": -169.5072021484375, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6162397265434265, + "rewards/margins": 5.537215232849121, + "rewards/rejected": -6.153454780578613, + "step": 491 + }, + { + "epoch": 0.08, + "learning_rate": 1.378648166398672e-05, + "logits/chosen": -0.9023066163063049, + "logits/rejected": -3.1710915565490723, + "logps/chosen": -86.85079956054688, + "logps/rejected": -492.65618896484375, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2111469507217407, + "rewards/margins": 3.1097347736358643, + "rewards/rejected": -4.3208818435668945, + "step": 492 + }, + { + "epoch": 0.08, + "learning_rate": 1.3785748223455573e-05, + "logits/chosen": -2.1632516384124756, + "logits/rejected": -3.03536319732666, + "logps/chosen": -267.8410339355469, + "logps/rejected": -459.3923645019531, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.251065492630005, + "rewards/margins": 6.2171478271484375, + "rewards/rejected": -8.468213081359863, + "step": 493 + }, + { + "epoch": 0.08, + "learning_rate": 1.3785014782924424e-05, + "logits/chosen": -2.6659622192382812, + "logits/rejected": -3.0115389823913574, + "logps/chosen": -274.876220703125, + "logps/rejected": -233.73556518554688, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9903682470321655, + "rewards/margins": 3.3639354705810547, + "rewards/rejected": -4.35430383682251, + "step": 494 + }, + { + "epoch": 0.08, + "learning_rate": 1.3784281342393276e-05, + "logits/chosen": -1.8056319952011108, + "logits/rejected": -3.0363950729370117, + "logps/chosen": -318.98681640625, + "logps/rejected": -534.0952758789062, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5586090087890625, + "rewards/margins": 6.493959426879883, + "rewards/rejected": -8.052568435668945, + "step": 495 + }, + { + "epoch": 0.08, + "learning_rate": 1.378354790186213e-05, + "logits/chosen": -2.5505411624908447, + "logits/rejected": -2.997237205505371, + "logps/chosen": -189.82093811035156, + "logps/rejected": -391.4273681640625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8942874670028687, + "rewards/margins": 7.5073652267456055, + "rewards/rejected": -8.401652336120605, + "step": 496 + }, + { + "epoch": 0.08, + "learning_rate": 1.3782814461330982e-05, + "logits/chosen": -2.3062872886657715, + "logits/rejected": -3.136993885040283, + "logps/chosen": -215.96560668945312, + "logps/rejected": -253.09442138671875, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5323855876922607, + "rewards/margins": 3.710221767425537, + "rewards/rejected": -6.242607116699219, + "step": 497 + }, + { + "epoch": 0.08, + "learning_rate": 1.3782081020799834e-05, + "logits/chosen": -3.0829317569732666, + "logits/rejected": -2.604581356048584, + "logps/chosen": -302.32257080078125, + "logps/rejected": -274.5401916503906, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.28266978263855, + "rewards/margins": 3.722525119781494, + "rewards/rejected": -6.005194664001465, + "step": 498 + }, + { + "epoch": 0.08, + "learning_rate": 1.3781347580268687e-05, + "logits/chosen": -2.853066921234131, + "logits/rejected": -2.4496278762817383, + "logps/chosen": -277.313232421875, + "logps/rejected": -369.953369140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.82865571975708, + "rewards/margins": 6.684503078460693, + "rewards/rejected": -8.513158798217773, + "step": 499 + }, + { + "epoch": 0.08, + "learning_rate": 1.3780614139737539e-05, + "logits/chosen": -2.11911940574646, + "logits/rejected": -3.1079816818237305, + "logps/chosen": -264.4539794921875, + "logps/rejected": -401.7734680175781, + "loss": 3.7358, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.945178508758545, + "rewards/margins": 0.45841193199157715, + "rewards/rejected": -5.403590202331543, + "step": 500 + }, + { + "epoch": 0.08, + "learning_rate": 1.3779880699206391e-05, + "logits/chosen": -3.176748514175415, + "logits/rejected": -3.1021530628204346, + "logps/chosen": -187.3514404296875, + "logps/rejected": -144.38424682617188, + "loss": 0.0712, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7624073028564453, + "rewards/margins": 2.7648425102233887, + "rewards/rejected": -4.527249813079834, + "step": 501 + }, + { + "epoch": 0.08, + "learning_rate": 1.3779147258675243e-05, + "logits/chosen": -1.4295510053634644, + "logits/rejected": -2.602020025253296, + "logps/chosen": -68.33187866210938, + "logps/rejected": -419.3321533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6072153449058533, + "rewards/margins": 9.604995727539062, + "rewards/rejected": -10.212211608886719, + "step": 502 + }, + { + "epoch": 0.08, + "learning_rate": 1.3778413818144095e-05, + "logits/chosen": -2.4287164211273193, + "logits/rejected": -3.153777599334717, + "logps/chosen": -302.41571044921875, + "logps/rejected": -370.5398864746094, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7136207818984985, + "rewards/margins": 6.60358190536499, + "rewards/rejected": -8.3172025680542, + "step": 503 + }, + { + "epoch": 0.08, + "learning_rate": 1.3777680377612947e-05, + "logits/chosen": -2.845146656036377, + "logits/rejected": -3.184713363647461, + "logps/chosen": -10.614585876464844, + "logps/rejected": -197.31930541992188, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0167996883392334, + "rewards/margins": 8.022881507873535, + "rewards/rejected": -8.039681434631348, + "step": 504 + }, + { + "epoch": 0.08, + "learning_rate": 1.37769469370818e-05, + "logits/chosen": -3.093980312347412, + "logits/rejected": -2.846322536468506, + "logps/chosen": -192.9917449951172, + "logps/rejected": -146.61111450195312, + "loss": 0.8726, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.870414972305298, + "rewards/margins": 2.302175760269165, + "rewards/rejected": -5.172590732574463, + "step": 505 + }, + { + "epoch": 0.08, + "learning_rate": 1.3776213496550652e-05, + "logits/chosen": -2.828658103942871, + "logits/rejected": -3.1142938137054443, + "logps/chosen": -270.4136962890625, + "logps/rejected": -217.82571411132812, + "loss": 2.4024, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.50265645980835, + "rewards/margins": 2.1904335021972656, + "rewards/rejected": -6.693090438842773, + "step": 506 + }, + { + "epoch": 0.08, + "learning_rate": 1.3775480056019504e-05, + "logits/chosen": -2.851731300354004, + "logits/rejected": -3.027531147003174, + "logps/chosen": -118.27056884765625, + "logps/rejected": -307.83062744140625, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9641211032867432, + "rewards/margins": 4.059726238250732, + "rewards/rejected": -6.023847579956055, + "step": 507 + }, + { + "epoch": 0.08, + "learning_rate": 1.3774746615488356e-05, + "logits/chosen": -2.80940318107605, + "logits/rejected": -2.5676677227020264, + "logps/chosen": -346.19366455078125, + "logps/rejected": -375.9385681152344, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9881370663642883, + "rewards/margins": 6.904390335083008, + "rewards/rejected": -7.8925275802612305, + "step": 508 + }, + { + "epoch": 0.08, + "learning_rate": 1.3774013174957208e-05, + "logits/chosen": -1.3524305820465088, + "logits/rejected": -2.981827974319458, + "logps/chosen": -67.82550048828125, + "logps/rejected": -296.877197265625, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0870304107666016, + "rewards/margins": 5.990466117858887, + "rewards/rejected": -7.077496528625488, + "step": 509 + }, + { + "epoch": 0.08, + "learning_rate": 1.377327973442606e-05, + "logits/chosen": -2.122309446334839, + "logits/rejected": -2.7984678745269775, + "logps/chosen": -241.77505493164062, + "logps/rejected": -530.97509765625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.260986328125, + "rewards/margins": 8.114384651184082, + "rewards/rejected": -10.375370979309082, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 1.3772546293894911e-05, + "logits/chosen": -2.710845708847046, + "logits/rejected": -3.1200783252716064, + "logps/chosen": -147.51785278320312, + "logps/rejected": -139.96665954589844, + "loss": 2.6216, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1083598136901855, + "rewards/margins": 0.7471318244934082, + "rewards/rejected": -4.855491638183594, + "step": 511 + }, + { + "epoch": 0.08, + "learning_rate": 1.3771812853363763e-05, + "logits/chosen": -1.8085652589797974, + "logits/rejected": -3.130507230758667, + "logps/chosen": -32.383609771728516, + "logps/rejected": -292.4798889160156, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8065105676651001, + "rewards/margins": 3.8704581260681152, + "rewards/rejected": -4.676968574523926, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 1.3771079412832615e-05, + "logits/chosen": -2.5351719856262207, + "logits/rejected": -3.0855605602264404, + "logps/chosen": -76.99761199951172, + "logps/rejected": -259.11065673828125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3428661823272705, + "rewards/margins": 6.286698341369629, + "rewards/rejected": -7.62956428527832, + "step": 513 + }, + { + "epoch": 0.08, + "learning_rate": 1.3770345972301469e-05, + "logits/chosen": -2.616234064102173, + "logits/rejected": -3.104029893875122, + "logps/chosen": -301.4141540527344, + "logps/rejected": -319.2886047363281, + "loss": 4.5402, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.471127510070801, + "rewards/margins": 0.21398687362670898, + "rewards/rejected": -6.68511438369751, + "step": 514 + }, + { + "epoch": 0.08, + "learning_rate": 1.376961253177032e-05, + "logits/chosen": -2.7766849994659424, + "logits/rejected": -3.1236836910247803, + "logps/chosen": -60.339115142822266, + "logps/rejected": -267.60821533203125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7308233976364136, + "rewards/margins": 5.453793048858643, + "rewards/rejected": -7.184616565704346, + "step": 515 + }, + { + "epoch": 0.08, + "learning_rate": 1.3768879091239173e-05, + "logits/chosen": -2.9501986503601074, + "logits/rejected": -3.0922579765319824, + "logps/chosen": -141.09788513183594, + "logps/rejected": -212.74301147460938, + "loss": 0.2168, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5605833530426025, + "rewards/margins": 2.6617090702056885, + "rewards/rejected": -5.222292423248291, + "step": 516 + }, + { + "epoch": 0.08, + "learning_rate": 1.3768145650708024e-05, + "logits/chosen": -1.655928134918213, + "logits/rejected": -2.994053602218628, + "logps/chosen": -73.28124237060547, + "logps/rejected": -364.59442138671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0567299127578735, + "rewards/margins": 12.081727981567383, + "rewards/rejected": -13.138457298278809, + "step": 517 + }, + { + "epoch": 0.08, + "learning_rate": 1.3767412210176876e-05, + "logits/chosen": -1.2017379999160767, + "logits/rejected": -2.9822285175323486, + "logps/chosen": -51.66252517700195, + "logps/rejected": -313.09552001953125, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1353818029165268, + "rewards/margins": 4.70712947845459, + "rewards/rejected": -4.571747779846191, + "step": 518 + }, + { + "epoch": 0.08, + "learning_rate": 1.3766678769645728e-05, + "logits/chosen": -3.08410906791687, + "logits/rejected": -3.2225868701934814, + "logps/chosen": -127.74391174316406, + "logps/rejected": -348.49029541015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1859080791473389, + "rewards/margins": 8.137834548950195, + "rewards/rejected": -9.323742866516113, + "step": 519 + }, + { + "epoch": 0.08, + "learning_rate": 1.376594532911458e-05, + "logits/chosen": -3.0263826847076416, + "logits/rejected": -1.5592800378799438, + "logps/chosen": -262.7222595214844, + "logps/rejected": -151.482177734375, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4989500045776367, + "rewards/margins": 3.8981986045837402, + "rewards/rejected": -5.397149085998535, + "step": 520 + }, + { + "epoch": 0.08, + "learning_rate": 1.3765211888583432e-05, + "logits/chosen": -3.2113494873046875, + "logits/rejected": -2.874398946762085, + "logps/chosen": -1074.536865234375, + "logps/rejected": -680.5767822265625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.008349657058716, + "rewards/margins": 6.4866485595703125, + "rewards/rejected": -8.49499797821045, + "step": 521 + }, + { + "epoch": 0.08, + "learning_rate": 1.3764478448052284e-05, + "logits/chosen": -2.8985486030578613, + "logits/rejected": -3.1041336059570312, + "logps/chosen": -107.22237396240234, + "logps/rejected": -226.44049072265625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49592325091362, + "rewards/margins": 6.987436771392822, + "rewards/rejected": -7.4833598136901855, + "step": 522 + }, + { + "epoch": 0.08, + "learning_rate": 1.3763745007521137e-05, + "logits/chosen": -0.9999324083328247, + "logits/rejected": -3.0379345417022705, + "logps/chosen": -52.26139831542969, + "logps/rejected": -362.4666748046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.293783187866211, + "rewards/margins": 8.407327651977539, + "rewards/rejected": -9.70111083984375, + "step": 523 + }, + { + "epoch": 0.08, + "learning_rate": 1.376301156698999e-05, + "logits/chosen": -3.3227739334106445, + "logits/rejected": -3.4051616191864014, + "logps/chosen": -71.71842956542969, + "logps/rejected": -124.6612319946289, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9563283920288086, + "rewards/margins": 4.676135540008545, + "rewards/rejected": -5.6324639320373535, + "step": 524 + }, + { + "epoch": 0.08, + "learning_rate": 1.3762278126458841e-05, + "logits/chosen": -1.9666444063186646, + "logits/rejected": -3.073773145675659, + "logps/chosen": -190.611328125, + "logps/rejected": -355.88433837890625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.865992784500122, + "rewards/margins": 5.925532817840576, + "rewards/rejected": -7.791525840759277, + "step": 525 + }, + { + "epoch": 0.08, + "learning_rate": 1.3761544685927693e-05, + "logits/chosen": -2.7096505165100098, + "logits/rejected": -2.9335436820983887, + "logps/chosen": -110.83584594726562, + "logps/rejected": -80.80164337158203, + "loss": 1.5907, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3878610134124756, + "rewards/margins": 1.4970911741256714, + "rewards/rejected": -4.884952068328857, + "step": 526 + }, + { + "epoch": 0.08, + "learning_rate": 1.3760811245396545e-05, + "logits/chosen": -3.1093220710754395, + "logits/rejected": -2.063420534133911, + "logps/chosen": -354.9792175292969, + "logps/rejected": -453.8577575683594, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.853738307952881, + "rewards/margins": 7.217663764953613, + "rewards/rejected": -10.071401596069336, + "step": 527 + }, + { + "epoch": 0.08, + "learning_rate": 1.3760077804865397e-05, + "logits/chosen": -3.140305995941162, + "logits/rejected": -1.4174878597259521, + "logps/chosen": -244.10824584960938, + "logps/rejected": -129.65908813476562, + "loss": 1.4626, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5612893104553223, + "rewards/margins": 0.822236180305481, + "rewards/rejected": -4.383525371551514, + "step": 528 + }, + { + "epoch": 0.08, + "learning_rate": 1.3759344364334249e-05, + "logits/chosen": -2.9910497665405273, + "logits/rejected": -2.530571699142456, + "logps/chosen": -289.5887451171875, + "logps/rejected": -378.97186279296875, + "loss": 4.0446, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6191935539245605, + "rewards/margins": 0.3640298843383789, + "rewards/rejected": -4.9832234382629395, + "step": 529 + }, + { + "epoch": 0.08, + "learning_rate": 1.37586109238031e-05, + "logits/chosen": -3.2173073291778564, + "logits/rejected": -3.2813854217529297, + "logps/chosen": -133.38241577148438, + "logps/rejected": -242.12646484375, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7917773723602295, + "rewards/margins": 4.625782489776611, + "rewards/rejected": -6.41756010055542, + "step": 530 + }, + { + "epoch": 0.08, + "learning_rate": 1.3757877483271954e-05, + "logits/chosen": -3.0218100547790527, + "logits/rejected": -2.0679848194122314, + "logps/chosen": -157.59140014648438, + "logps/rejected": -57.166481018066406, + "loss": 3.8709, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.952013969421387, + "rewards/margins": -3.843864917755127, + "rewards/rejected": -1.1081494092941284, + "step": 531 + }, + { + "epoch": 0.08, + "learning_rate": 1.3757144042740806e-05, + "logits/chosen": -1.9339268207550049, + "logits/rejected": -2.9535796642303467, + "logps/chosen": -164.98863220214844, + "logps/rejected": -196.36683654785156, + "loss": 1.9231, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.563265323638916, + "rewards/margins": 1.0987170934677124, + "rewards/rejected": -4.661982536315918, + "step": 532 + }, + { + "epoch": 0.08, + "learning_rate": 1.375641060220966e-05, + "logits/chosen": -1.3253065347671509, + "logits/rejected": -3.159231662750244, + "logps/chosen": -282.871337890625, + "logps/rejected": -518.5775756835938, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6547272205352783, + "rewards/margins": 6.556499004364014, + "rewards/rejected": -8.211226463317871, + "step": 533 + }, + { + "epoch": 0.08, + "learning_rate": 1.3755677161678511e-05, + "logits/chosen": -3.014578104019165, + "logits/rejected": -2.685225009918213, + "logps/chosen": -255.67831420898438, + "logps/rejected": -273.7005310058594, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9122209548950195, + "rewards/margins": 5.412606239318848, + "rewards/rejected": -8.324827194213867, + "step": 534 + }, + { + "epoch": 0.08, + "learning_rate": 1.3754943721147363e-05, + "logits/chosen": -3.3047657012939453, + "logits/rejected": -3.236140489578247, + "logps/chosen": -154.04624938964844, + "logps/rejected": -298.53204345703125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2710158824920654, + "rewards/margins": 5.438289642333984, + "rewards/rejected": -6.709305286407471, + "step": 535 + }, + { + "epoch": 0.08, + "learning_rate": 1.3754210280616215e-05, + "logits/chosen": -1.2154711484909058, + "logits/rejected": -2.967883586883545, + "logps/chosen": -71.74598693847656, + "logps/rejected": -347.64697265625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2235822677612305, + "rewards/margins": 7.455417156219482, + "rewards/rejected": -8.678999900817871, + "step": 536 + }, + { + "epoch": 0.08, + "learning_rate": 1.3753476840085067e-05, + "logits/chosen": -3.0655930042266846, + "logits/rejected": -3.189976215362549, + "logps/chosen": -533.1141357421875, + "logps/rejected": -506.86981201171875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2907363176345825, + "rewards/margins": 7.675966262817383, + "rewards/rejected": -8.966702461242676, + "step": 537 + }, + { + "epoch": 0.08, + "learning_rate": 1.3752743399553919e-05, + "logits/chosen": -2.816925525665283, + "logits/rejected": -3.1873226165771484, + "logps/chosen": -21.600818634033203, + "logps/rejected": -191.8150177001953, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6863735318183899, + "rewards/margins": 5.602498531341553, + "rewards/rejected": -6.288872241973877, + "step": 538 + }, + { + "epoch": 0.08, + "learning_rate": 1.375200995902277e-05, + "logits/chosen": -2.290881633758545, + "logits/rejected": -3.059638023376465, + "logps/chosen": -172.3941192626953, + "logps/rejected": -191.57313537597656, + "loss": 1.3088, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.197324275970459, + "rewards/margins": 0.41442620754241943, + "rewards/rejected": -4.611750602722168, + "step": 539 + }, + { + "epoch": 0.08, + "learning_rate": 1.3751276518491624e-05, + "logits/chosen": -3.1082305908203125, + "logits/rejected": -2.658400774002075, + "logps/chosen": -259.807861328125, + "logps/rejected": -263.08758544921875, + "loss": 3.1681, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.720860958099365, + "rewards/margins": -1.6006301641464233, + "rewards/rejected": -3.1202309131622314, + "step": 540 + }, + { + "epoch": 0.08, + "learning_rate": 1.3750543077960476e-05, + "logits/chosen": -3.0189332962036133, + "logits/rejected": -3.236225128173828, + "logps/chosen": -199.81777954101562, + "logps/rejected": -330.0688781738281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7714417576789856, + "rewards/margins": 8.973102569580078, + "rewards/rejected": -9.744543075561523, + "step": 541 + }, + { + "epoch": 0.08, + "learning_rate": 1.3749809637429328e-05, + "logits/chosen": -3.1433513164520264, + "logits/rejected": -1.8422805070877075, + "logps/chosen": -293.66949462890625, + "logps/rejected": -368.5772399902344, + "loss": 1.9789, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1901988983154297, + "rewards/margins": 5.068136215209961, + "rewards/rejected": -8.25833511352539, + "step": 542 + }, + { + "epoch": 0.08, + "learning_rate": 1.374907619689818e-05, + "logits/chosen": -2.068749189376831, + "logits/rejected": -3.14660906791687, + "logps/chosen": -180.6510009765625, + "logps/rejected": -444.2142333984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0281521081924438, + "rewards/margins": 7.596301555633545, + "rewards/rejected": -8.6244535446167, + "step": 543 + }, + { + "epoch": 0.08, + "learning_rate": 1.3748342756367032e-05, + "logits/chosen": -2.959777593612671, + "logits/rejected": -3.0789337158203125, + "logps/chosen": -53.94057846069336, + "logps/rejected": -127.45608520507812, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6536338329315186, + "rewards/margins": 5.005664825439453, + "rewards/rejected": -6.659298896789551, + "step": 544 + }, + { + "epoch": 0.08, + "learning_rate": 1.3747609315835884e-05, + "logits/chosen": -3.175926685333252, + "logits/rejected": -3.2062461376190186, + "logps/chosen": -276.052978515625, + "logps/rejected": -345.41717529296875, + "loss": 3.6499, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.66455602645874, + "rewards/margins": -0.5316801071166992, + "rewards/rejected": -4.132875919342041, + "step": 545 + }, + { + "epoch": 0.08, + "learning_rate": 1.3746875875304736e-05, + "logits/chosen": -2.982403516769409, + "logits/rejected": -3.156614065170288, + "logps/chosen": -158.98733520507812, + "logps/rejected": -263.23699951171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.69203519821167, + "rewards/margins": 7.206422805786133, + "rewards/rejected": -8.898458480834961, + "step": 546 + }, + { + "epoch": 0.09, + "learning_rate": 1.3746142434773588e-05, + "logits/chosen": -2.136805534362793, + "logits/rejected": -3.19640851020813, + "logps/chosen": -165.75384521484375, + "logps/rejected": -382.85845947265625, + "loss": 0.5045, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3630127906799316, + "rewards/margins": 1.4270042181015015, + "rewards/rejected": -3.7900168895721436, + "step": 547 + }, + { + "epoch": 0.09, + "learning_rate": 1.374540899424244e-05, + "logits/chosen": -1.5107840299606323, + "logits/rejected": -2.9651386737823486, + "logps/chosen": -121.77131652832031, + "logps/rejected": -295.2123718261719, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.552471399307251, + "rewards/margins": 6.814458847045898, + "rewards/rejected": -8.36693000793457, + "step": 548 + }, + { + "epoch": 0.09, + "learning_rate": 1.3744675553711293e-05, + "logits/chosen": -2.826827049255371, + "logits/rejected": -2.5848376750946045, + "logps/chosen": -213.7955322265625, + "logps/rejected": -122.93873596191406, + "loss": 4.5619, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.72116756439209, + "rewards/margins": -4.550411224365234, + "rewards/rejected": -1.170756220817566, + "step": 549 + }, + { + "epoch": 0.09, + "learning_rate": 1.3743942113180145e-05, + "logits/chosen": -3.0044567584991455, + "logits/rejected": -1.8728141784667969, + "logps/chosen": -155.5878448486328, + "logps/rejected": -191.43963623046875, + "loss": 2.6938, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.21026349067688, + "rewards/margins": 2.294015884399414, + "rewards/rejected": -5.504279136657715, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 1.3743208672648997e-05, + "logits/chosen": -3.0899901390075684, + "logits/rejected": -2.4900028705596924, + "logps/chosen": -213.9537353515625, + "logps/rejected": -189.45303344726562, + "loss": 3.2866, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.862743377685547, + "rewards/margins": 1.0327000617980957, + "rewards/rejected": -6.895443439483643, + "step": 551 + }, + { + "epoch": 0.09, + "learning_rate": 1.3742475232117849e-05, + "logits/chosen": -2.8493759632110596, + "logits/rejected": -3.248969554901123, + "logps/chosen": -38.716217041015625, + "logps/rejected": -176.65640258789062, + "loss": 0.153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7917417287826538, + "rewards/margins": 4.512180805206299, + "rewards/rejected": -5.303922653198242, + "step": 552 + }, + { + "epoch": 0.09, + "learning_rate": 1.37417417915867e-05, + "logits/chosen": -3.1721243858337402, + "logits/rejected": -3.2151241302490234, + "logps/chosen": -269.6120910644531, + "logps/rejected": -69.47520446777344, + "loss": 0.3782, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0892562866210938, + "rewards/margins": 2.510915994644165, + "rewards/rejected": -4.600172519683838, + "step": 553 + }, + { + "epoch": 0.09, + "learning_rate": 1.3741008351055552e-05, + "logits/chosen": -3.1528327465057373, + "logits/rejected": -2.841904640197754, + "logps/chosen": -468.9495849609375, + "logps/rejected": -411.553466796875, + "loss": 2.3851, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.121066570281982, + "rewards/margins": -2.109957695007324, + "rewards/rejected": -2.011108875274658, + "step": 554 + }, + { + "epoch": 0.09, + "learning_rate": 1.3740274910524404e-05, + "logits/chosen": -2.7519915103912354, + "logits/rejected": -2.8114144802093506, + "logps/chosen": -311.199951171875, + "logps/rejected": -215.7843475341797, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6524345874786377, + "rewards/margins": 2.782038927078247, + "rewards/rejected": -3.4344735145568848, + "step": 555 + }, + { + "epoch": 0.09, + "learning_rate": 1.3739541469993256e-05, + "logits/chosen": -3.014206886291504, + "logits/rejected": -3.1790292263031006, + "logps/chosen": -197.37985229492188, + "logps/rejected": -185.66395568847656, + "loss": 2.2438, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6183314323425293, + "rewards/margins": 0.0765841007232666, + "rewards/rejected": -3.694915533065796, + "step": 556 + }, + { + "epoch": 0.09, + "learning_rate": 1.3738808029462108e-05, + "logits/chosen": -3.1512272357940674, + "logits/rejected": -2.482283115386963, + "logps/chosen": -103.09208679199219, + "logps/rejected": -154.7888641357422, + "loss": 0.9003, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1651604175567627, + "rewards/margins": 3.0593650341033936, + "rewards/rejected": -5.224525451660156, + "step": 557 + }, + { + "epoch": 0.09, + "learning_rate": 1.3738074588930962e-05, + "logits/chosen": -2.556640863418579, + "logits/rejected": -3.2697110176086426, + "logps/chosen": -26.284931182861328, + "logps/rejected": -120.7190933227539, + "loss": 0.1463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3930164575576782, + "rewards/margins": 2.859549045562744, + "rewards/rejected": -4.252565383911133, + "step": 558 + }, + { + "epoch": 0.09, + "learning_rate": 1.3737341148399813e-05, + "logits/chosen": -1.9643863439559937, + "logits/rejected": -3.198641061782837, + "logps/chosen": -134.62359619140625, + "logps/rejected": -251.2902374267578, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1289035975933075, + "rewards/margins": 4.973156929016113, + "rewards/rejected": -5.102060317993164, + "step": 559 + }, + { + "epoch": 0.09, + "learning_rate": 1.3736607707868665e-05, + "logits/chosen": -3.2900900840759277, + "logits/rejected": -3.228623867034912, + "logps/chosen": -163.63656616210938, + "logps/rejected": -208.4311981201172, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1411266326904297, + "rewards/margins": 4.322317123413086, + "rewards/rejected": -4.463443756103516, + "step": 560 + }, + { + "epoch": 0.09, + "learning_rate": 1.3735874267337517e-05, + "logits/chosen": -1.943698763847351, + "logits/rejected": -2.7625527381896973, + "logps/chosen": -369.67596435546875, + "logps/rejected": -369.9947509765625, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3544367551803589, + "rewards/margins": 4.319520950317383, + "rewards/rejected": -5.673957824707031, + "step": 561 + }, + { + "epoch": 0.09, + "learning_rate": 1.3735140826806369e-05, + "logits/chosen": -3.1696817874908447, + "logits/rejected": -3.0820422172546387, + "logps/chosen": -140.32359313964844, + "logps/rejected": -256.8897705078125, + "loss": 0.9384, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.607146739959717, + "rewards/margins": 2.2038869857788086, + "rewards/rejected": -4.811033725738525, + "step": 562 + }, + { + "epoch": 0.09, + "learning_rate": 1.3734407386275221e-05, + "logits/chosen": -2.9801089763641357, + "logits/rejected": -2.512720823287964, + "logps/chosen": -467.87774658203125, + "logps/rejected": -595.5128173828125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5350189208984375, + "rewards/margins": 6.674809455871582, + "rewards/rejected": -7.2098283767700195, + "step": 563 + }, + { + "epoch": 0.09, + "learning_rate": 1.3733673945744073e-05, + "logits/chosen": -2.7951254844665527, + "logits/rejected": -3.1062469482421875, + "logps/chosen": -496.82684326171875, + "logps/rejected": -726.3569946289062, + "loss": 2.9001, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.182102918624878, + "rewards/margins": 0.19887852668762207, + "rewards/rejected": -2.3809814453125, + "step": 564 + }, + { + "epoch": 0.09, + "learning_rate": 1.3732940505212926e-05, + "logits/chosen": -2.89870023727417, + "logits/rejected": -3.1374902725219727, + "logps/chosen": -65.09915161132812, + "logps/rejected": -181.69329833984375, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7347856760025024, + "rewards/margins": 3.598677635192871, + "rewards/rejected": -5.333463668823242, + "step": 565 + }, + { + "epoch": 0.09, + "learning_rate": 1.3732207064681778e-05, + "logits/chosen": -3.2578694820404053, + "logits/rejected": -2.7316267490386963, + "logps/chosen": -180.58648681640625, + "logps/rejected": -165.4195556640625, + "loss": 2.1842, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7736077308654785, + "rewards/margins": -0.3417849540710449, + "rewards/rejected": -3.4318227767944336, + "step": 566 + }, + { + "epoch": 0.09, + "learning_rate": 1.3731473624150632e-05, + "logits/chosen": -1.4171996116638184, + "logits/rejected": -2.9025237560272217, + "logps/chosen": -111.02257537841797, + "logps/rejected": -309.5704345703125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5437057614326477, + "rewards/margins": 5.60312557220459, + "rewards/rejected": -6.146831035614014, + "step": 567 + }, + { + "epoch": 0.09, + "learning_rate": 1.3730740183619484e-05, + "logits/chosen": -2.6315770149230957, + "logits/rejected": -3.3638086318969727, + "logps/chosen": -81.46694946289062, + "logps/rejected": -297.70343017578125, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.825859010219574, + "rewards/margins": 3.524040937423706, + "rewards/rejected": -4.349900245666504, + "step": 568 + }, + { + "epoch": 0.09, + "learning_rate": 1.3730006743088336e-05, + "logits/chosen": -3.2582032680511475, + "logits/rejected": -3.03568434715271, + "logps/chosen": -678.101318359375, + "logps/rejected": -397.67803955078125, + "loss": 1.4107, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.2584548592567444, + "rewards/margins": 3.245220899581909, + "rewards/rejected": -2.9867660999298096, + "step": 569 + }, + { + "epoch": 0.09, + "learning_rate": 1.3729273302557188e-05, + "logits/chosen": -3.155776023864746, + "logits/rejected": -2.7058327198028564, + "logps/chosen": -170.7063446044922, + "logps/rejected": -287.1402587890625, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1883881092071533, + "rewards/margins": 4.249316215515137, + "rewards/rejected": -5.437704563140869, + "step": 570 + }, + { + "epoch": 0.09, + "learning_rate": 1.372853986202604e-05, + "logits/chosen": -2.4402194023132324, + "logits/rejected": -3.1685259342193604, + "logps/chosen": -344.12225341796875, + "logps/rejected": -403.95159912109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1884981393814087, + "rewards/margins": 8.446496963500977, + "rewards/rejected": -9.634994506835938, + "step": 571 + }, + { + "epoch": 0.09, + "learning_rate": 1.3727806421494891e-05, + "logits/chosen": -3.1798529624938965, + "logits/rejected": -2.044851064682007, + "logps/chosen": -762.5087890625, + "logps/rejected": -500.73150634765625, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5943429470062256, + "rewards/margins": 2.766700029373169, + "rewards/rejected": -4.3610429763793945, + "step": 572 + }, + { + "epoch": 0.09, + "learning_rate": 1.3727072980963743e-05, + "logits/chosen": -3.05591082572937, + "logits/rejected": -2.478914260864258, + "logps/chosen": -192.42965698242188, + "logps/rejected": -118.59590911865234, + "loss": 1.9074, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2037100791931152, + "rewards/margins": -0.8296252489089966, + "rewards/rejected": -2.374084711074829, + "step": 573 + }, + { + "epoch": 0.09, + "learning_rate": 1.3726339540432595e-05, + "logits/chosen": -0.9557162523269653, + "logits/rejected": -2.736943244934082, + "logps/chosen": -130.3118896484375, + "logps/rejected": -226.7738494873047, + "loss": 0.1438, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.644674301147461, + "rewards/margins": 2.4178833961486816, + "rewards/rejected": -4.062557697296143, + "step": 574 + }, + { + "epoch": 0.09, + "learning_rate": 1.3725606099901447e-05, + "logits/chosen": -1.4507981538772583, + "logits/rejected": -3.087599039077759, + "logps/chosen": -211.67706298828125, + "logps/rejected": -332.30804443359375, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9925544261932373, + "rewards/margins": 4.16372013092041, + "rewards/rejected": -6.156274795532227, + "step": 575 + }, + { + "epoch": 0.09, + "learning_rate": 1.37248726593703e-05, + "logits/chosen": -3.1862361431121826, + "logits/rejected": -2.555081367492676, + "logps/chosen": -141.543701171875, + "logps/rejected": -231.5477294921875, + "loss": 0.1396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8442172408103943, + "rewards/margins": 3.5827648639678955, + "rewards/rejected": -4.4269819259643555, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 1.3724139218839152e-05, + "logits/chosen": -3.0464348793029785, + "logits/rejected": -2.9239509105682373, + "logps/chosen": -292.5823974609375, + "logps/rejected": -257.42059326171875, + "loss": 3.0941, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.137118339538574, + "rewards/margins": -2.033616542816162, + "rewards/rejected": -2.103501796722412, + "step": 577 + }, + { + "epoch": 0.09, + "learning_rate": 1.3723405778308004e-05, + "logits/chosen": -2.850971221923828, + "logits/rejected": -3.031500816345215, + "logps/chosen": -449.0883483886719, + "logps/rejected": -423.35833740234375, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8268760442733765, + "rewards/margins": 5.274957180023193, + "rewards/rejected": -6.101833343505859, + "step": 578 + }, + { + "epoch": 0.09, + "learning_rate": 1.3722672337776856e-05, + "logits/chosen": -2.646599769592285, + "logits/rejected": -2.8426551818847656, + "logps/chosen": -188.40957641601562, + "logps/rejected": -279.978515625, + "loss": 0.0653, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3240952491760254, + "rewards/margins": 6.2867112159729, + "rewards/rejected": -7.610806465148926, + "step": 579 + }, + { + "epoch": 0.09, + "learning_rate": 1.3721938897245708e-05, + "logits/chosen": -2.291614532470703, + "logits/rejected": -3.3033578395843506, + "logps/chosen": -691.8984375, + "logps/rejected": -848.2205810546875, + "loss": 3.0999, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.234715938568115, + "rewards/margins": 1.1269311904907227, + "rewards/rejected": -5.361647129058838, + "step": 580 + }, + { + "epoch": 0.09, + "learning_rate": 1.372120545671456e-05, + "logits/chosen": -2.82979154586792, + "logits/rejected": -3.148827075958252, + "logps/chosen": -111.45004272460938, + "logps/rejected": -230.75845336914062, + "loss": 0.0911, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6244999170303345, + "rewards/margins": 2.8883931636810303, + "rewards/rejected": -4.512892723083496, + "step": 581 + }, + { + "epoch": 0.09, + "learning_rate": 1.3720472016183412e-05, + "logits/chosen": -2.204576253890991, + "logits/rejected": -2.9288809299468994, + "logps/chosen": -99.67672729492188, + "logps/rejected": -175.94244384765625, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8989464044570923, + "rewards/margins": 4.302189826965332, + "rewards/rejected": -5.201136112213135, + "step": 582 + }, + { + "epoch": 0.09, + "learning_rate": 1.3719738575652264e-05, + "logits/chosen": -2.9705543518066406, + "logits/rejected": -2.6662116050720215, + "logps/chosen": -181.81851196289062, + "logps/rejected": -162.37757873535156, + "loss": 1.9575, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8107590675354004, + "rewards/margins": -0.40390777587890625, + "rewards/rejected": -2.406851291656494, + "step": 583 + }, + { + "epoch": 0.09, + "learning_rate": 1.3719005135121115e-05, + "logits/chosen": -1.370236873626709, + "logits/rejected": -3.0519707202911377, + "logps/chosen": -154.2199249267578, + "logps/rejected": -548.9427490234375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.743477463722229, + "rewards/margins": 7.227293014526367, + "rewards/rejected": -7.970770359039307, + "step": 584 + }, + { + "epoch": 0.09, + "learning_rate": 1.3718271694589969e-05, + "logits/chosen": -1.972825527191162, + "logits/rejected": -3.2038753032684326, + "logps/chosen": -65.7489013671875, + "logps/rejected": -390.82574462890625, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9017513990402222, + "rewards/margins": 4.215548515319824, + "rewards/rejected": -6.117300033569336, + "step": 585 + }, + { + "epoch": 0.09, + "learning_rate": 1.3717538254058821e-05, + "logits/chosen": -2.7396352291107178, + "logits/rejected": -3.2317159175872803, + "logps/chosen": -263.2033386230469, + "logps/rejected": -140.2040252685547, + "loss": 3.6741, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.5561933517456055, + "rewards/margins": -0.37686777114868164, + "rewards/rejected": -5.179325580596924, + "step": 586 + }, + { + "epoch": 0.09, + "learning_rate": 1.3716804813527673e-05, + "logits/chosen": -3.0606813430786133, + "logits/rejected": -2.0374693870544434, + "logps/chosen": -307.5671081542969, + "logps/rejected": -230.91595458984375, + "loss": 3.2879, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.558058261871338, + "rewards/margins": -3.2491114139556885, + "rewards/rejected": -0.30894699692726135, + "step": 587 + }, + { + "epoch": 0.09, + "learning_rate": 1.3716071372996525e-05, + "logits/chosen": -2.8882274627685547, + "logits/rejected": -2.5348498821258545, + "logps/chosen": -218.43544006347656, + "logps/rejected": -193.6925506591797, + "loss": 1.2309, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.763049840927124, + "rewards/margins": 2.2140302658081055, + "rewards/rejected": -4.97707986831665, + "step": 588 + }, + { + "epoch": 0.09, + "learning_rate": 1.3715337932465377e-05, + "logits/chosen": -2.273517370223999, + "logits/rejected": -3.1549246311187744, + "logps/chosen": -59.01618194580078, + "logps/rejected": -249.09869384765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9257237911224365, + "rewards/margins": 7.465335369110107, + "rewards/rejected": -8.391058921813965, + "step": 589 + }, + { + "epoch": 0.09, + "learning_rate": 1.3714604491934228e-05, + "logits/chosen": -2.2020046710968018, + "logits/rejected": -3.170503854751587, + "logps/chosen": -81.4858169555664, + "logps/rejected": -301.85345458984375, + "loss": 0.7893, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3546202182769775, + "rewards/margins": 3.366687297821045, + "rewards/rejected": -4.721307277679443, + "step": 590 + }, + { + "epoch": 0.09, + "learning_rate": 1.371387105140308e-05, + "logits/chosen": -2.9186854362487793, + "logits/rejected": -3.1824593544006348, + "logps/chosen": -51.498809814453125, + "logps/rejected": -247.1400604248047, + "loss": 1.0079, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6603190898895264, + "rewards/margins": 1.2002754211425781, + "rewards/rejected": -3.8605945110321045, + "step": 591 + }, + { + "epoch": 0.09, + "learning_rate": 1.3713137610871932e-05, + "logits/chosen": -2.6481611728668213, + "logits/rejected": -3.0954902172088623, + "logps/chosen": -66.10108184814453, + "logps/rejected": -287.0120849609375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9179941415786743, + "rewards/margins": 6.689514636993408, + "rewards/rejected": -7.607508659362793, + "step": 592 + }, + { + "epoch": 0.09, + "learning_rate": 1.3712404170340784e-05, + "logits/chosen": -3.1744284629821777, + "logits/rejected": -3.0770018100738525, + "logps/chosen": -680.3023681640625, + "logps/rejected": -637.4017944335938, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4109329283237457, + "rewards/margins": 6.963229179382324, + "rewards/rejected": -7.374162673950195, + "step": 593 + }, + { + "epoch": 0.09, + "learning_rate": 1.3711670729809638e-05, + "logits/chosen": -1.7598901987075806, + "logits/rejected": -3.112787961959839, + "logps/chosen": -110.18441009521484, + "logps/rejected": -275.7664794921875, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8809909820556641, + "rewards/margins": 4.131436824798584, + "rewards/rejected": -5.012427806854248, + "step": 594 + }, + { + "epoch": 0.09, + "learning_rate": 1.371093728927849e-05, + "logits/chosen": -3.222935914993286, + "logits/rejected": -3.2811477184295654, + "logps/chosen": -116.61060333251953, + "logps/rejected": -169.05056762695312, + "loss": 0.7704, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.9707865715026855, + "rewards/margins": 1.8186414241790771, + "rewards/rejected": -2.7894279956817627, + "step": 595 + }, + { + "epoch": 0.09, + "learning_rate": 1.3710203848747341e-05, + "logits/chosen": -3.2028331756591797, + "logits/rejected": -3.0449516773223877, + "logps/chosen": -525.5845336914062, + "logps/rejected": -582.2369384765625, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26644840836524963, + "rewards/margins": 7.910278797149658, + "rewards/rejected": -8.176727294921875, + "step": 596 + }, + { + "epoch": 0.09, + "learning_rate": 1.3709470408216193e-05, + "logits/chosen": -3.0619754791259766, + "logits/rejected": -2.883923053741455, + "logps/chosen": -403.7517395019531, + "logps/rejected": -490.0301513671875, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.243553876876831, + "rewards/margins": 6.351446151733398, + "rewards/rejected": -7.59499979019165, + "step": 597 + }, + { + "epoch": 0.09, + "learning_rate": 1.3708736967685045e-05, + "logits/chosen": -2.672826051712036, + "logits/rejected": -3.094160318374634, + "logps/chosen": -202.06036376953125, + "logps/rejected": -180.69842529296875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8402694463729858, + "rewards/margins": 3.9971563816070557, + "rewards/rejected": -4.83742618560791, + "step": 598 + }, + { + "epoch": 0.09, + "learning_rate": 1.3708003527153899e-05, + "logits/chosen": -2.6861371994018555, + "logits/rejected": -3.2959721088409424, + "logps/chosen": -49.39651870727539, + "logps/rejected": -234.2587432861328, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.103132963180542, + "rewards/margins": 6.421786308288574, + "rewards/rejected": -7.524919033050537, + "step": 599 + }, + { + "epoch": 0.09, + "learning_rate": 1.370727008662275e-05, + "logits/chosen": -1.831966519355774, + "logits/rejected": -3.129625082015991, + "logps/chosen": -190.89874267578125, + "logps/rejected": -287.3532409667969, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2177799940109253, + "rewards/margins": 5.000677108764648, + "rewards/rejected": -6.218457221984863, + "step": 600 + }, + { + "epoch": 0.09, + "learning_rate": 1.3706536646091603e-05, + "logits/chosen": -2.8520991802215576, + "logits/rejected": -3.0411367416381836, + "logps/chosen": -283.3871154785156, + "logps/rejected": -330.46197509765625, + "loss": 0.1352, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6651318073272705, + "rewards/margins": 2.512622356414795, + "rewards/rejected": -4.1777544021606445, + "step": 601 + }, + { + "epoch": 0.09, + "learning_rate": 1.3705803205560454e-05, + "logits/chosen": -2.9212050437927246, + "logits/rejected": -3.220228910446167, + "logps/chosen": -78.02900695800781, + "logps/rejected": -164.92298889160156, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6576694250106812, + "rewards/margins": 4.594609260559082, + "rewards/rejected": -5.2522783279418945, + "step": 602 + }, + { + "epoch": 0.09, + "learning_rate": 1.3705069765029308e-05, + "logits/chosen": -1.5184690952301025, + "logits/rejected": -2.9037020206451416, + "logps/chosen": -81.89019012451172, + "logps/rejected": -235.8068084716797, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013016890734434128, + "rewards/margins": 4.813329696655273, + "rewards/rejected": -4.8003129959106445, + "step": 603 + }, + { + "epoch": 0.09, + "learning_rate": 1.370433632449816e-05, + "logits/chosen": -3.2151851654052734, + "logits/rejected": -1.9182626008987427, + "logps/chosen": -308.4080505371094, + "logps/rejected": -112.66285705566406, + "loss": 1.6925, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5601556301116943, + "rewards/margins": 0.9981675148010254, + "rewards/rejected": -3.5583231449127197, + "step": 604 + }, + { + "epoch": 0.09, + "learning_rate": 1.3703602883967012e-05, + "logits/chosen": -0.9175253510475159, + "logits/rejected": -2.4543344974517822, + "logps/chosen": -141.61236572265625, + "logps/rejected": -242.4770965576172, + "loss": 2.6322, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6736485958099365, + "rewards/margins": -0.7885184288024902, + "rewards/rejected": -2.8851301670074463, + "step": 605 + }, + { + "epoch": 0.09, + "learning_rate": 1.3702869443435864e-05, + "logits/chosen": -1.715612769126892, + "logits/rejected": -3.175481081008911, + "logps/chosen": -72.78694152832031, + "logps/rejected": -277.65765380859375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0909829139709473, + "rewards/margins": 7.354184150695801, + "rewards/rejected": -9.445167541503906, + "step": 606 + }, + { + "epoch": 0.09, + "learning_rate": 1.3702136002904715e-05, + "logits/chosen": -2.564028024673462, + "logits/rejected": -3.160210371017456, + "logps/chosen": -179.19200134277344, + "logps/rejected": -265.4335021972656, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0415467023849487, + "rewards/margins": 6.56905460357666, + "rewards/rejected": -7.610601425170898, + "step": 607 + }, + { + "epoch": 0.09, + "learning_rate": 1.3701402562373567e-05, + "logits/chosen": -3.1228976249694824, + "logits/rejected": -2.563227891921997, + "logps/chosen": -108.91108703613281, + "logps/rejected": -184.92550659179688, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1126426458358765, + "rewards/margins": 5.294501304626465, + "rewards/rejected": -6.407143592834473, + "step": 608 + }, + { + "epoch": 0.09, + "learning_rate": 1.370066912184242e-05, + "logits/chosen": -3.1445491313934326, + "logits/rejected": -2.3280372619628906, + "logps/chosen": -551.9871826171875, + "logps/rejected": -505.8147277832031, + "loss": 4.3007, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.080203056335449, + "rewards/margins": -1.857020378112793, + "rewards/rejected": -3.2231826782226562, + "step": 609 + }, + { + "epoch": 0.09, + "learning_rate": 1.3699935681311271e-05, + "logits/chosen": -3.149275541305542, + "logits/rejected": -3.207993268966675, + "logps/chosen": -331.19580078125, + "logps/rejected": -360.94232177734375, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6609573364257812, + "rewards/margins": 4.502834320068359, + "rewards/rejected": -5.163791656494141, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 1.3699202240780123e-05, + "logits/chosen": -2.3057146072387695, + "logits/rejected": -3.003573417663574, + "logps/chosen": -276.4903869628906, + "logps/rejected": -281.5438232421875, + "loss": 0.2143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7094057202339172, + "rewards/margins": 3.18929123878479, + "rewards/rejected": -3.8986968994140625, + "step": 611 + }, + { + "epoch": 0.1, + "learning_rate": 1.3698468800248977e-05, + "logits/chosen": -3.265040874481201, + "logits/rejected": -3.067317485809326, + "logps/chosen": -620.0552978515625, + "logps/rejected": -905.5797729492188, + "loss": 2.9049, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.128118991851807, + "rewards/margins": -0.7338576316833496, + "rewards/rejected": -3.394261360168457, + "step": 612 + }, + { + "epoch": 0.1, + "learning_rate": 1.3697735359717828e-05, + "logits/chosen": -3.14945125579834, + "logits/rejected": -2.2029266357421875, + "logps/chosen": -325.80181884765625, + "logps/rejected": -312.3128967285156, + "loss": 3.1041, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8867745399475098, + "rewards/margins": -0.024187564849853516, + "rewards/rejected": -3.8625869750976562, + "step": 613 + }, + { + "epoch": 0.1, + "learning_rate": 1.369700191918668e-05, + "logits/chosen": -2.614997625350952, + "logits/rejected": -3.1092677116394043, + "logps/chosen": -417.7093505859375, + "logps/rejected": -409.35662841796875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6737518310546875, + "rewards/margins": 5.8021745681762695, + "rewards/rejected": -6.475926399230957, + "step": 614 + }, + { + "epoch": 0.1, + "learning_rate": 1.3696268478655532e-05, + "logits/chosen": -2.1680448055267334, + "logits/rejected": -2.932811975479126, + "logps/chosen": -128.3190155029297, + "logps/rejected": -284.4375305175781, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16296960413455963, + "rewards/margins": 6.8505120277404785, + "rewards/rejected": -7.013482093811035, + "step": 615 + }, + { + "epoch": 0.1, + "learning_rate": 1.3695535038124384e-05, + "logits/chosen": -2.1494147777557373, + "logits/rejected": -3.0235300064086914, + "logps/chosen": -78.99925231933594, + "logps/rejected": -286.1852111816406, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0528829097747803, + "rewards/margins": 5.622527122497559, + "rewards/rejected": -6.675410270690918, + "step": 616 + }, + { + "epoch": 0.1, + "learning_rate": 1.3694801597593236e-05, + "logits/chosen": -2.1118271350860596, + "logits/rejected": -3.122562885284424, + "logps/chosen": -46.777259826660156, + "logps/rejected": -275.5895080566406, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4997830390930176, + "rewards/margins": 6.281552314758301, + "rewards/rejected": -7.781335830688477, + "step": 617 + }, + { + "epoch": 0.1, + "learning_rate": 1.3694068157062088e-05, + "logits/chosen": -3.1141529083251953, + "logits/rejected": -1.8929080963134766, + "logps/chosen": -972.322021484375, + "logps/rejected": -426.91473388671875, + "loss": 1.014, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9826507568359375, + "rewards/margins": 0.18693435192108154, + "rewards/rejected": -2.1695852279663086, + "step": 618 + }, + { + "epoch": 0.1, + "learning_rate": 1.369333471653094e-05, + "logits/chosen": -2.9883389472961426, + "logits/rejected": -3.1223526000976562, + "logps/chosen": -34.48210144042969, + "logps/rejected": -208.69757080078125, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9088356494903564, + "rewards/margins": 5.731667518615723, + "rewards/rejected": -7.6405029296875, + "step": 619 + }, + { + "epoch": 0.1, + "learning_rate": 1.3692601275999793e-05, + "logits/chosen": -3.183220148086548, + "logits/rejected": -3.047109603881836, + "logps/chosen": -95.45455932617188, + "logps/rejected": -77.32060241699219, + "loss": 1.7161, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5408984422683716, + "rewards/margins": 0.05780649185180664, + "rewards/rejected": -1.5987049341201782, + "step": 620 + }, + { + "epoch": 0.1, + "learning_rate": 1.3691867835468645e-05, + "logits/chosen": -3.1679766178131104, + "logits/rejected": -2.4949729442596436, + "logps/chosen": -478.08203125, + "logps/rejected": -473.69439697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.871245265007019, + "rewards/margins": 11.902688980102539, + "rewards/rejected": -12.773933410644531, + "step": 621 + }, + { + "epoch": 0.1, + "learning_rate": 1.3691134394937497e-05, + "logits/chosen": -3.154392957687378, + "logits/rejected": -3.222964286804199, + "logps/chosen": -249.32733154296875, + "logps/rejected": -367.7370910644531, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.495323657989502, + "rewards/margins": 5.410611152648926, + "rewards/rejected": -7.905935287475586, + "step": 622 + }, + { + "epoch": 0.1, + "learning_rate": 1.3690400954406349e-05, + "logits/chosen": -1.244024395942688, + "logits/rejected": -2.7204346656799316, + "logps/chosen": -115.73884582519531, + "logps/rejected": -700.55029296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4120380878448486, + "rewards/margins": 10.455677032470703, + "rewards/rejected": -11.867715835571289, + "step": 623 + }, + { + "epoch": 0.1, + "learning_rate": 1.36896675138752e-05, + "logits/chosen": -2.2176058292388916, + "logits/rejected": -3.011935234069824, + "logps/chosen": -252.31820678710938, + "logps/rejected": -311.4550476074219, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9807381629943848, + "rewards/margins": 3.9392313957214355, + "rewards/rejected": -5.91996955871582, + "step": 624 + }, + { + "epoch": 0.1, + "learning_rate": 1.3688934073344053e-05, + "logits/chosen": -1.7494251728057861, + "logits/rejected": -2.7162532806396484, + "logps/chosen": -129.9586639404297, + "logps/rejected": -243.80528259277344, + "loss": 0.1323, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.88716721534729, + "rewards/margins": 3.597156286239624, + "rewards/rejected": -5.484323501586914, + "step": 625 + }, + { + "epoch": 0.1, + "learning_rate": 1.3688200632812905e-05, + "logits/chosen": -2.948317527770996, + "logits/rejected": -2.301619529724121, + "logps/chosen": -344.04449462890625, + "logps/rejected": -184.42808532714844, + "loss": 6.4463, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.227148056030273, + "rewards/margins": -6.444649696350098, + "rewards/rejected": -1.7824980020523071, + "step": 626 + }, + { + "epoch": 0.1, + "learning_rate": 1.3687467192281756e-05, + "logits/chosen": -1.8091814517974854, + "logits/rejected": -3.091627836227417, + "logps/chosen": -136.92807006835938, + "logps/rejected": -340.7166748046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1543937921524048, + "rewards/margins": 7.625659465789795, + "rewards/rejected": -8.78005313873291, + "step": 627 + }, + { + "epoch": 0.1, + "learning_rate": 1.3686733751750608e-05, + "logits/chosen": -2.0161759853363037, + "logits/rejected": -3.1229608058929443, + "logps/chosen": -145.64878845214844, + "logps/rejected": -313.287353515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3685508966445923, + "rewards/margins": 7.518052577972412, + "rewards/rejected": -8.886603355407715, + "step": 628 + }, + { + "epoch": 0.1, + "learning_rate": 1.3686000311219462e-05, + "logits/chosen": -2.5521585941314697, + "logits/rejected": -2.633329153060913, + "logps/chosen": -726.3740234375, + "logps/rejected": -519.6714477539062, + "loss": 3.0093, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.773851156234741, + "rewards/margins": -0.7983734607696533, + "rewards/rejected": -2.975477695465088, + "step": 629 + }, + { + "epoch": 0.1, + "learning_rate": 1.3685266870688314e-05, + "logits/chosen": -1.8987170457839966, + "logits/rejected": -2.8951094150543213, + "logps/chosen": -110.49093627929688, + "logps/rejected": -273.65887451171875, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0431327819824219, + "rewards/margins": 7.244024753570557, + "rewards/rejected": -8.28715705871582, + "step": 630 + }, + { + "epoch": 0.1, + "learning_rate": 1.3684533430157166e-05, + "logits/chosen": -3.0260634422302246, + "logits/rejected": -3.2265260219573975, + "logps/chosen": -127.05555725097656, + "logps/rejected": -146.86244201660156, + "loss": 1.1151, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.226640462875366, + "rewards/margins": 1.3568501472473145, + "rewards/rejected": -3.5834906101226807, + "step": 631 + }, + { + "epoch": 0.1, + "learning_rate": 1.3683799989626018e-05, + "logits/chosen": -3.2076706886291504, + "logits/rejected": -2.690979480743408, + "logps/chosen": -587.888427734375, + "logps/rejected": -409.6119079589844, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8699722290039062, + "rewards/margins": 5.9389543533325195, + "rewards/rejected": -6.808926582336426, + "step": 632 + }, + { + "epoch": 0.1, + "learning_rate": 1.3683066549094871e-05, + "logits/chosen": -2.981557846069336, + "logits/rejected": -3.180983066558838, + "logps/chosen": -161.41041564941406, + "logps/rejected": -245.88284301757812, + "loss": 0.6307, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5771427154541016, + "rewards/margins": 5.147810935974121, + "rewards/rejected": -7.724953651428223, + "step": 633 + }, + { + "epoch": 0.1, + "learning_rate": 1.3682333108563723e-05, + "logits/chosen": -3.112075090408325, + "logits/rejected": -2.1978940963745117, + "logps/chosen": -317.25579833984375, + "logps/rejected": -297.46112060546875, + "loss": 3.1299, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.854144096374512, + "rewards/margins": -1.7219481468200684, + "rewards/rejected": -3.1321961879730225, + "step": 634 + }, + { + "epoch": 0.1, + "learning_rate": 1.3681599668032575e-05, + "logits/chosen": -2.7402970790863037, + "logits/rejected": -2.921067714691162, + "logps/chosen": -58.132774353027344, + "logps/rejected": -159.48324584960938, + "loss": 0.0563, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1791623830795288, + "rewards/margins": 3.1893322467803955, + "rewards/rejected": -4.368494510650635, + "step": 635 + }, + { + "epoch": 0.1, + "learning_rate": 1.3680866227501427e-05, + "logits/chosen": -1.7741131782531738, + "logits/rejected": -3.2633771896362305, + "logps/chosen": -80.67306518554688, + "logps/rejected": -330.7607421875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4262560606002808, + "rewards/margins": 5.510300159454346, + "rewards/rejected": -6.936556339263916, + "step": 636 + }, + { + "epoch": 0.1, + "learning_rate": 1.3680132786970279e-05, + "logits/chosen": -3.279648780822754, + "logits/rejected": -3.3650400638580322, + "logps/chosen": -87.28776550292969, + "logps/rejected": -180.43017578125, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7311261892318726, + "rewards/margins": 4.82628059387207, + "rewards/rejected": -5.557407379150391, + "step": 637 + }, + { + "epoch": 0.1, + "learning_rate": 1.3679399346439132e-05, + "logits/chosen": -1.0428069829940796, + "logits/rejected": -3.101527214050293, + "logps/chosen": -21.043163299560547, + "logps/rejected": -301.25946044921875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9546589851379395, + "rewards/margins": 6.886845588684082, + "rewards/rejected": -7.841504096984863, + "step": 638 + }, + { + "epoch": 0.1, + "learning_rate": 1.3678665905907984e-05, + "logits/chosen": -3.0898709297180176, + "logits/rejected": -2.584897756576538, + "logps/chosen": -397.3729248046875, + "logps/rejected": -457.64776611328125, + "loss": 2.5513, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.026383876800537, + "rewards/margins": -0.06809377670288086, + "rewards/rejected": -4.958290100097656, + "step": 639 + }, + { + "epoch": 0.1, + "learning_rate": 1.3677932465376836e-05, + "logits/chosen": -3.0292673110961914, + "logits/rejected": -3.279299736022949, + "logps/chosen": -481.4113464355469, + "logps/rejected": -473.68475341796875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.043538212776184, + "rewards/margins": 6.26554012298584, + "rewards/rejected": -7.309078216552734, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 1.3677199024845688e-05, + "logits/chosen": -1.7763954401016235, + "logits/rejected": -3.187023401260376, + "logps/chosen": -91.26270294189453, + "logps/rejected": -384.04864501953125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2273945808410645, + "rewards/margins": 6.759293556213379, + "rewards/rejected": -8.986688613891602, + "step": 641 + }, + { + "epoch": 0.1, + "learning_rate": 1.367646558431454e-05, + "logits/chosen": -1.9095760583877563, + "logits/rejected": -3.2304677963256836, + "logps/chosen": -111.30249786376953, + "logps/rejected": -413.6489562988281, + "loss": 1.2874, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.767293930053711, + "rewards/margins": 0.9290738105773926, + "rewards/rejected": -3.6963677406311035, + "step": 642 + }, + { + "epoch": 0.1, + "learning_rate": 1.3675732143783392e-05, + "logits/chosen": -1.6537253856658936, + "logits/rejected": -3.068779706954956, + "logps/chosen": -188.4862518310547, + "logps/rejected": -309.227294921875, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4255526065826416, + "rewards/margins": 3.7348663806915283, + "rewards/rejected": -5.16041898727417, + "step": 643 + }, + { + "epoch": 0.1, + "learning_rate": 1.3674998703252243e-05, + "logits/chosen": -2.268660068511963, + "logits/rejected": -3.0284924507141113, + "logps/chosen": -262.3863525390625, + "logps/rejected": -295.97607421875, + "loss": 2.164, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6713356971740723, + "rewards/margins": -0.34911811351776123, + "rewards/rejected": -3.3222174644470215, + "step": 644 + }, + { + "epoch": 0.1, + "learning_rate": 1.3674265262721095e-05, + "logits/chosen": -2.2293882369995117, + "logits/rejected": -2.9227445125579834, + "logps/chosen": -134.45016479492188, + "logps/rejected": -441.7669372558594, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.513399362564087, + "rewards/margins": 11.74238109588623, + "rewards/rejected": -13.255780220031738, + "step": 645 + }, + { + "epoch": 0.1, + "learning_rate": 1.3673531822189947e-05, + "logits/chosen": -2.842780828475952, + "logits/rejected": -2.8101119995117188, + "logps/chosen": -167.50381469726562, + "logps/rejected": -320.6292724609375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02093176543712616, + "rewards/margins": 5.829056262969971, + "rewards/rejected": -5.808124542236328, + "step": 646 + }, + { + "epoch": 0.1, + "learning_rate": 1.36727983816588e-05, + "logits/chosen": -3.0849931240081787, + "logits/rejected": -1.383854627609253, + "logps/chosen": -219.13270568847656, + "logps/rejected": -112.54186248779297, + "loss": 1.7462, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.809732437133789, + "rewards/margins": -0.8334407806396484, + "rewards/rejected": -3.9762914180755615, + "step": 647 + }, + { + "epoch": 0.1, + "learning_rate": 1.3672064941127653e-05, + "logits/chosen": -1.6588175296783447, + "logits/rejected": -3.140308141708374, + "logps/chosen": -78.77629089355469, + "logps/rejected": -533.307373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8768060207366943, + "rewards/margins": 15.521533966064453, + "rewards/rejected": -17.398340225219727, + "step": 648 + }, + { + "epoch": 0.1, + "learning_rate": 1.3671331500596505e-05, + "logits/chosen": -3.0595338344573975, + "logits/rejected": -1.5220510959625244, + "logps/chosen": -136.3072509765625, + "logps/rejected": -35.91450500488281, + "loss": 2.4186, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.006736755371094, + "rewards/margins": -2.314612865447998, + "rewards/rejected": -1.6921236515045166, + "step": 649 + }, + { + "epoch": 0.1, + "learning_rate": 1.3670598060065356e-05, + "logits/chosen": -2.942301034927368, + "logits/rejected": -2.935955286026001, + "logps/chosen": -33.50537109375, + "logps/rejected": -99.73579406738281, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.444188117980957, + "rewards/margins": 4.061349868774414, + "rewards/rejected": -5.505537986755371, + "step": 650 + }, + { + "epoch": 0.1, + "learning_rate": 1.3669864619534208e-05, + "logits/chosen": -3.1358463764190674, + "logits/rejected": -3.1865322589874268, + "logps/chosen": -238.5361785888672, + "logps/rejected": -422.7308349609375, + "loss": 0.1, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.855717658996582, + "rewards/margins": 8.249650001525879, + "rewards/rejected": -10.105367660522461, + "step": 651 + }, + { + "epoch": 0.1, + "learning_rate": 1.366913117900306e-05, + "logits/chosen": -2.5990681648254395, + "logits/rejected": -3.1681923866271973, + "logps/chosen": -27.677330017089844, + "logps/rejected": -223.6719512939453, + "loss": 0.0831, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1180719137191772, + "rewards/margins": 3.169235944747925, + "rewards/rejected": -4.2873077392578125, + "step": 652 + }, + { + "epoch": 0.1, + "learning_rate": 1.3668397738471912e-05, + "logits/chosen": -3.2190866470336914, + "logits/rejected": -3.0844264030456543, + "logps/chosen": -110.47041320800781, + "logps/rejected": -157.66592407226562, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7959408164024353, + "rewards/margins": 3.794268846511841, + "rewards/rejected": -4.590209484100342, + "step": 653 + }, + { + "epoch": 0.1, + "learning_rate": 1.3667664297940764e-05, + "logits/chosen": -3.2198615074157715, + "logits/rejected": -2.8665883541107178, + "logps/chosen": -907.5003662109375, + "logps/rejected": -617.9759521484375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2746734917163849, + "rewards/margins": 7.20135498046875, + "rewards/rejected": -6.9266815185546875, + "step": 654 + }, + { + "epoch": 0.1, + "learning_rate": 1.3666930857409616e-05, + "logits/chosen": -2.034518241882324, + "logits/rejected": -2.9923617839813232, + "logps/chosen": -107.77951049804688, + "logps/rejected": -315.9512634277344, + "loss": 1.0662, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2897543907165527, + "rewards/margins": 1.9025161266326904, + "rewards/rejected": -5.192270755767822, + "step": 655 + }, + { + "epoch": 0.1, + "learning_rate": 1.366619741687847e-05, + "logits/chosen": -3.0680551528930664, + "logits/rejected": -3.0771162509918213, + "logps/chosen": -214.15203857421875, + "logps/rejected": -251.10488891601562, + "loss": 0.7924, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4552993774414062, + "rewards/margins": 0.9289302825927734, + "rewards/rejected": -3.3842296600341797, + "step": 656 + }, + { + "epoch": 0.1, + "learning_rate": 1.3665463976347321e-05, + "logits/chosen": -1.2762280702590942, + "logits/rejected": -3.14817476272583, + "logps/chosen": -130.18922424316406, + "logps/rejected": -248.03469848632812, + "loss": 1.455, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8146250247955322, + "rewards/margins": 1.4943561553955078, + "rewards/rejected": -4.308980941772461, + "step": 657 + }, + { + "epoch": 0.1, + "learning_rate": 1.3664730535816173e-05, + "logits/chosen": -2.2208688259124756, + "logits/rejected": -3.249598264694214, + "logps/chosen": -358.57843017578125, + "logps/rejected": -544.192138671875, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6021348237991333, + "rewards/margins": 4.394120693206787, + "rewards/rejected": -5.996255397796631, + "step": 658 + }, + { + "epoch": 0.1, + "learning_rate": 1.3663997095285025e-05, + "logits/chosen": -3.117461919784546, + "logits/rejected": -1.9079164266586304, + "logps/chosen": -269.6214904785156, + "logps/rejected": -191.48321533203125, + "loss": 2.8191, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.484889268875122, + "rewards/margins": 0.5986251831054688, + "rewards/rejected": -4.08351469039917, + "step": 659 + }, + { + "epoch": 0.1, + "learning_rate": 1.3663263654753877e-05, + "logits/chosen": -2.969343662261963, + "logits/rejected": -3.0638487339019775, + "logps/chosen": -137.58175659179688, + "logps/rejected": -275.82080078125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.804755449295044, + "rewards/margins": 4.953536033630371, + "rewards/rejected": -6.758291721343994, + "step": 660 + }, + { + "epoch": 0.1, + "learning_rate": 1.3662530214222729e-05, + "logits/chosen": -2.689846992492676, + "logits/rejected": -3.3003194332122803, + "logps/chosen": -74.89727020263672, + "logps/rejected": -354.62030029296875, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9074859619140625, + "rewards/margins": 5.906257629394531, + "rewards/rejected": -7.813743591308594, + "step": 661 + }, + { + "epoch": 0.1, + "learning_rate": 1.366179677369158e-05, + "logits/chosen": -3.0721495151519775, + "logits/rejected": -3.201382875442505, + "logps/chosen": -41.20996856689453, + "logps/rejected": -226.41567993164062, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.364408254623413, + "rewards/margins": 8.972713470458984, + "rewards/rejected": -10.337121963500977, + "step": 662 + }, + { + "epoch": 0.1, + "learning_rate": 1.3661063333160433e-05, + "logits/chosen": -2.878797769546509, + "logits/rejected": -3.3297336101531982, + "logps/chosen": -311.25238037109375, + "logps/rejected": -100.59144592285156, + "loss": 5.048, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.43961238861084, + "rewards/margins": -2.916466474533081, + "rewards/rejected": -3.523145914077759, + "step": 663 + }, + { + "epoch": 0.1, + "learning_rate": 1.3660329892629284e-05, + "logits/chosen": -1.9385722875595093, + "logits/rejected": -3.0854039192199707, + "logps/chosen": -91.50806427001953, + "logps/rejected": -232.45013427734375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7050444483757019, + "rewards/margins": 4.9799933433532715, + "rewards/rejected": -5.685038089752197, + "step": 664 + }, + { + "epoch": 0.1, + "learning_rate": 1.3659596452098138e-05, + "logits/chosen": -3.2557532787323, + "logits/rejected": -3.1155812740325928, + "logps/chosen": -106.74748992919922, + "logps/rejected": -148.823974609375, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8368406295776367, + "rewards/margins": 4.0666704177856445, + "rewards/rejected": -4.903511047363281, + "step": 665 + }, + { + "epoch": 0.1, + "learning_rate": 1.365886301156699e-05, + "logits/chosen": -3.129138469696045, + "logits/rejected": -2.5693230628967285, + "logps/chosen": -95.36527252197266, + "logps/rejected": -209.700439453125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1715015172958374, + "rewards/margins": 6.131647109985352, + "rewards/rejected": -7.30314826965332, + "step": 666 + }, + { + "epoch": 0.1, + "learning_rate": 1.3658129571035843e-05, + "logits/chosen": -1.6280591487884521, + "logits/rejected": -3.2824039459228516, + "logps/chosen": -49.562721252441406, + "logps/rejected": -426.7691650390625, + "loss": 0.1009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9824646711349487, + "rewards/margins": 4.7416887283325195, + "rewards/rejected": -6.724153518676758, + "step": 667 + }, + { + "epoch": 0.1, + "learning_rate": 1.3657396130504695e-05, + "logits/chosen": -1.2953439950942993, + "logits/rejected": -2.920592784881592, + "logps/chosen": -32.390750885009766, + "logps/rejected": -195.9396514892578, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5272996425628662, + "rewards/margins": 4.283400535583496, + "rewards/rejected": -5.810700416564941, + "step": 668 + }, + { + "epoch": 0.1, + "learning_rate": 1.3656662689973547e-05, + "logits/chosen": -2.9400620460510254, + "logits/rejected": -3.0434224605560303, + "logps/chosen": -343.0433349609375, + "logps/rejected": -390.69000244140625, + "loss": 2.197, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.288012981414795, + "rewards/margins": -1.1163095235824585, + "rewards/rejected": -3.171703338623047, + "step": 669 + }, + { + "epoch": 0.1, + "learning_rate": 1.3655929249442399e-05, + "logits/chosen": -3.0995993614196777, + "logits/rejected": -1.9259637594223022, + "logps/chosen": -357.3390808105469, + "logps/rejected": -157.9972381591797, + "loss": 3.3962, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.341400146484375, + "rewards/margins": -1.464233160018921, + "rewards/rejected": -2.877166986465454, + "step": 670 + }, + { + "epoch": 0.1, + "learning_rate": 1.3655195808911251e-05, + "logits/chosen": -3.3512229919433594, + "logits/rejected": -3.3330821990966797, + "logps/chosen": -571.0912475585938, + "logps/rejected": -584.6177978515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3338109254837036, + "rewards/margins": 7.8903961181640625, + "rewards/rejected": -8.224206924438477, + "step": 671 + }, + { + "epoch": 0.1, + "learning_rate": 1.3654462368380103e-05, + "logits/chosen": -2.3720791339874268, + "logits/rejected": -3.166198968887329, + "logps/chosen": -63.070396423339844, + "logps/rejected": -93.28923797607422, + "loss": 0.0874, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.283307671546936, + "rewards/margins": 2.3941407203674316, + "rewards/rejected": -3.6774487495422363, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 1.3653728927848955e-05, + "logits/chosen": -3.1209933757781982, + "logits/rejected": -1.368618130683899, + "logps/chosen": -540.6761474609375, + "logps/rejected": -202.28689575195312, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3286889791488647, + "rewards/margins": 3.4979610443115234, + "rewards/rejected": -4.826650142669678, + "step": 673 + }, + { + "epoch": 0.1, + "learning_rate": 1.3652995487317808e-05, + "logits/chosen": -2.780590534210205, + "logits/rejected": -3.1925835609436035, + "logps/chosen": -520.2089233398438, + "logps/rejected": -547.1190795898438, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.503736138343811, + "rewards/margins": 8.370156288146973, + "rewards/rejected": -7.866420269012451, + "step": 674 + }, + { + "epoch": 0.1, + "learning_rate": 1.365226204678666e-05, + "logits/chosen": -2.108401298522949, + "logits/rejected": -2.915019989013672, + "logps/chosen": -33.0374641418457, + "logps/rejected": -133.95762634277344, + "loss": 0.0774, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1994578838348389, + "rewards/margins": 4.611993312835693, + "rewards/rejected": -5.811450958251953, + "step": 675 + }, + { + "epoch": 0.11, + "learning_rate": 1.3651528606255512e-05, + "logits/chosen": -2.2894270420074463, + "logits/rejected": -3.2997636795043945, + "logps/chosen": -323.38470458984375, + "logps/rejected": -488.475341796875, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7219680547714233, + "rewards/margins": 4.828518867492676, + "rewards/rejected": -6.5504865646362305, + "step": 676 + }, + { + "epoch": 0.11, + "learning_rate": 1.3650795165724364e-05, + "logits/chosen": -2.6117188930511475, + "logits/rejected": -3.3030636310577393, + "logps/chosen": -28.444971084594727, + "logps/rejected": -293.0326232910156, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5524647235870361, + "rewards/margins": 6.560603618621826, + "rewards/rejected": -8.113068580627441, + "step": 677 + }, + { + "epoch": 0.11, + "learning_rate": 1.3650061725193216e-05, + "logits/chosen": -3.1124751567840576, + "logits/rejected": -2.6975040435791016, + "logps/chosen": -384.8302917480469, + "logps/rejected": -301.0179748535156, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.805965781211853, + "rewards/margins": 6.268857002258301, + "rewards/rejected": -7.074823379516602, + "step": 678 + }, + { + "epoch": 0.11, + "learning_rate": 1.3649328284662068e-05, + "logits/chosen": -3.148757219314575, + "logits/rejected": -2.8889474868774414, + "logps/chosen": -234.218017578125, + "logps/rejected": -222.3436279296875, + "loss": 1.5556, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.147451877593994, + "rewards/margins": 1.5982438325881958, + "rewards/rejected": -5.7456955909729, + "step": 679 + }, + { + "epoch": 0.11, + "learning_rate": 1.364859484413092e-05, + "logits/chosen": -3.1588966846466064, + "logits/rejected": -3.2414019107818604, + "logps/chosen": -144.35195922851562, + "logps/rejected": -330.88006591796875, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3116676807403564, + "rewards/margins": 5.7593278884887695, + "rewards/rejected": -7.070995330810547, + "step": 680 + }, + { + "epoch": 0.11, + "learning_rate": 1.3647861403599771e-05, + "logits/chosen": -2.849785089492798, + "logits/rejected": -3.1691734790802, + "logps/chosen": -169.19996643066406, + "logps/rejected": -308.7735595703125, + "loss": 1.7503, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.765448093414307, + "rewards/margins": 0.4128544330596924, + "rewards/rejected": -5.178302764892578, + "step": 681 + }, + { + "epoch": 0.11, + "learning_rate": 1.3647127963068623e-05, + "logits/chosen": -2.093562602996826, + "logits/rejected": -3.089848756790161, + "logps/chosen": -231.82810974121094, + "logps/rejected": -428.83831787109375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2699780464172363, + "rewards/margins": 6.841884613037109, + "rewards/rejected": -9.111862182617188, + "step": 682 + }, + { + "epoch": 0.11, + "learning_rate": 1.3646394522537477e-05, + "logits/chosen": -3.26989483833313, + "logits/rejected": -3.104750394821167, + "logps/chosen": -270.5647888183594, + "logps/rejected": -346.33465576171875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.268716335296631, + "rewards/margins": 6.558528900146484, + "rewards/rejected": -8.827245712280273, + "step": 683 + }, + { + "epoch": 0.11, + "learning_rate": 1.3645661082006329e-05, + "logits/chosen": -3.242802619934082, + "logits/rejected": -2.803680181503296, + "logps/chosen": -121.2288818359375, + "logps/rejected": -130.06771850585938, + "loss": 0.0381, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7910157442092896, + "rewards/margins": 6.150829315185547, + "rewards/rejected": -7.941844940185547, + "step": 684 + }, + { + "epoch": 0.11, + "learning_rate": 1.364492764147518e-05, + "logits/chosen": -2.8093883991241455, + "logits/rejected": -3.20798659324646, + "logps/chosen": -88.5950698852539, + "logps/rejected": -268.0391845703125, + "loss": 0.4869, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4195621013641357, + "rewards/margins": 1.641683578491211, + "rewards/rejected": -4.061245918273926, + "step": 685 + }, + { + "epoch": 0.11, + "learning_rate": 1.3644194200944032e-05, + "logits/chosen": -1.685762643814087, + "logits/rejected": -3.2100157737731934, + "logps/chosen": -135.58038330078125, + "logps/rejected": -454.1636047363281, + "loss": 2.4065, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.350285053253174, + "rewards/margins": -0.05992245674133301, + "rewards/rejected": -3.290362596511841, + "step": 686 + }, + { + "epoch": 0.11, + "learning_rate": 1.3643460760412884e-05, + "logits/chosen": -1.764767050743103, + "logits/rejected": -2.965118885040283, + "logps/chosen": -184.27273559570312, + "logps/rejected": -392.9076843261719, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884861946105957, + "rewards/margins": 4.25714111328125, + "rewards/rejected": -7.142003059387207, + "step": 687 + }, + { + "epoch": 0.11, + "learning_rate": 1.3642727319881736e-05, + "logits/chosen": -1.310533046722412, + "logits/rejected": -2.9289426803588867, + "logps/chosen": -63.819923400878906, + "logps/rejected": -271.8531799316406, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9127516746520996, + "rewards/margins": 5.721913814544678, + "rewards/rejected": -8.634665489196777, + "step": 688 + }, + { + "epoch": 0.11, + "learning_rate": 1.3641993879350588e-05, + "logits/chosen": -3.143211603164673, + "logits/rejected": -2.830362558364868, + "logps/chosen": -264.57135009765625, + "logps/rejected": -256.6038818359375, + "loss": 3.3528, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.690913200378418, + "rewards/margins": -0.538271427154541, + "rewards/rejected": -5.152642250061035, + "step": 689 + }, + { + "epoch": 0.11, + "learning_rate": 1.364126043881944e-05, + "logits/chosen": -3.037430763244629, + "logits/rejected": -3.068558692932129, + "logps/chosen": -163.4641571044922, + "logps/rejected": -197.10816955566406, + "loss": 3.8639, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.139421463012695, + "rewards/margins": -0.08453607559204102, + "rewards/rejected": -5.054885387420654, + "step": 690 + }, + { + "epoch": 0.11, + "learning_rate": 1.3640526998288292e-05, + "logits/chosen": -2.4352610111236572, + "logits/rejected": -3.073662519454956, + "logps/chosen": -295.01495361328125, + "logps/rejected": -401.7635803222656, + "loss": 0.1716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0358822345733643, + "rewards/margins": 3.5757710933685303, + "rewards/rejected": -5.6116533279418945, + "step": 691 + }, + { + "epoch": 0.11, + "learning_rate": 1.3639793557757145e-05, + "logits/chosen": -3.0129337310791016, + "logits/rejected": -2.1438496112823486, + "logps/chosen": -238.7086181640625, + "logps/rejected": -205.12026977539062, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9841728210449219, + "rewards/margins": 4.892938137054443, + "rewards/rejected": -6.877110481262207, + "step": 692 + }, + { + "epoch": 0.11, + "learning_rate": 1.3639060117225997e-05, + "logits/chosen": -1.4869447946548462, + "logits/rejected": -3.129380464553833, + "logps/chosen": -122.5439224243164, + "logps/rejected": -339.98150634765625, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4724643230438232, + "rewards/margins": 4.732921123504639, + "rewards/rejected": -7.205385208129883, + "step": 693 + }, + { + "epoch": 0.11, + "learning_rate": 1.363832667669485e-05, + "logits/chosen": -1.200215458869934, + "logits/rejected": -2.4563915729522705, + "logps/chosen": -582.3572998046875, + "logps/rejected": -384.34442138671875, + "loss": 10.01, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.418177604675293, + "rewards/margins": -8.328593254089355, + "rewards/rejected": -5.0895843505859375, + "step": 694 + }, + { + "epoch": 0.11, + "learning_rate": 1.3637593236163701e-05, + "logits/chosen": -2.759178876876831, + "logits/rejected": -2.1372008323669434, + "logps/chosen": -188.892822265625, + "logps/rejected": -216.06915283203125, + "loss": 2.915, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.387383460998535, + "rewards/margins": 0.7222476005554199, + "rewards/rejected": -6.109631061553955, + "step": 695 + }, + { + "epoch": 0.11, + "learning_rate": 1.3636859795632553e-05, + "logits/chosen": -2.397657871246338, + "logits/rejected": -3.166440963745117, + "logps/chosen": -498.2428283691406, + "logps/rejected": -828.4351806640625, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.188127040863037, + "rewards/margins": 4.590062141418457, + "rewards/rejected": -6.778189659118652, + "step": 696 + }, + { + "epoch": 0.11, + "learning_rate": 1.3636126355101405e-05, + "logits/chosen": -2.565474033355713, + "logits/rejected": -3.1648504734039307, + "logps/chosen": -157.4207763671875, + "logps/rejected": -216.42510986328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.175640344619751, + "rewards/margins": 6.864535331726074, + "rewards/rejected": -8.040176391601562, + "step": 697 + }, + { + "epoch": 0.11, + "learning_rate": 1.3635392914570257e-05, + "logits/chosen": -1.6160736083984375, + "logits/rejected": -3.050171375274658, + "logps/chosen": -119.946533203125, + "logps/rejected": -319.4556579589844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.489219307899475, + "rewards/margins": 7.108882904052734, + "rewards/rejected": -8.598102569580078, + "step": 698 + }, + { + "epoch": 0.11, + "learning_rate": 1.363465947403911e-05, + "logits/chosen": -2.958482027053833, + "logits/rejected": -2.924013614654541, + "logps/chosen": -296.4183654785156, + "logps/rejected": -568.3208618164062, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.613093614578247, + "rewards/margins": 8.339642524719238, + "rewards/rejected": -10.952735900878906, + "step": 699 + }, + { + "epoch": 0.11, + "learning_rate": 1.3633926033507962e-05, + "logits/chosen": -3.06280779838562, + "logits/rejected": -3.211756944656372, + "logps/chosen": -32.368865966796875, + "logps/rejected": -167.574951171875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4953255653381348, + "rewards/margins": 5.75441837310791, + "rewards/rejected": -7.249744415283203, + "step": 700 + }, + { + "epoch": 0.11, + "learning_rate": 1.3633192592976816e-05, + "logits/chosen": -1.8280515670776367, + "logits/rejected": -2.968653917312622, + "logps/chosen": -73.31221008300781, + "logps/rejected": -360.49066162109375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2503631114959717, + "rewards/margins": 5.52210807800293, + "rewards/rejected": -7.7724714279174805, + "step": 701 + }, + { + "epoch": 0.11, + "learning_rate": 1.3632459152445668e-05, + "logits/chosen": -1.8269935846328735, + "logits/rejected": -3.0322437286376953, + "logps/chosen": -37.63182830810547, + "logps/rejected": -171.78736877441406, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8209660649299622, + "rewards/margins": 5.777923583984375, + "rewards/rejected": -6.598889350891113, + "step": 702 + }, + { + "epoch": 0.11, + "learning_rate": 1.363172571191452e-05, + "logits/chosen": -2.994385004043579, + "logits/rejected": -2.5944674015045166, + "logps/chosen": -407.5289306640625, + "logps/rejected": -275.12396240234375, + "loss": 3.3351, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.581156253814697, + "rewards/margins": -1.4647647142410278, + "rewards/rejected": -3.116391658782959, + "step": 703 + }, + { + "epoch": 0.11, + "learning_rate": 1.3630992271383371e-05, + "logits/chosen": -3.0327396392822266, + "logits/rejected": -3.3579728603363037, + "logps/chosen": -374.7530822753906, + "logps/rejected": -454.9599609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4025788307189941, + "rewards/margins": 8.020752906799316, + "rewards/rejected": -9.423332214355469, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 1.3630258830852223e-05, + "logits/chosen": -2.302955389022827, + "logits/rejected": -2.91117525100708, + "logps/chosen": -107.71336364746094, + "logps/rejected": -201.68734741210938, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.507460832595825, + "rewards/margins": 3.9273860454559326, + "rewards/rejected": -6.434846878051758, + "step": 705 + }, + { + "epoch": 0.11, + "learning_rate": 1.3629525390321075e-05, + "logits/chosen": -2.6374428272247314, + "logits/rejected": -3.1846866607666016, + "logps/chosen": -70.65203857421875, + "logps/rejected": -287.215087890625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3951829671859741, + "rewards/margins": 6.152449131011963, + "rewards/rejected": -7.547632217407227, + "step": 706 + }, + { + "epoch": 0.11, + "learning_rate": 1.3628791949789927e-05, + "logits/chosen": -3.2468514442443848, + "logits/rejected": -2.784843683242798, + "logps/chosen": -188.9674530029297, + "logps/rejected": -84.85929870605469, + "loss": 0.8168, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8837347030639648, + "rewards/margins": -0.07798323035240173, + "rewards/rejected": -1.8057514429092407, + "step": 707 + }, + { + "epoch": 0.11, + "learning_rate": 1.3628058509258779e-05, + "logits/chosen": -2.8651890754699707, + "logits/rejected": -2.308152675628662, + "logps/chosen": -243.0733642578125, + "logps/rejected": -188.9992218017578, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.040759325027466, + "rewards/margins": 4.39536190032959, + "rewards/rejected": -6.436120986938477, + "step": 708 + }, + { + "epoch": 0.11, + "learning_rate": 1.362732506872763e-05, + "logits/chosen": -2.999844551086426, + "logits/rejected": -3.135815382003784, + "logps/chosen": -152.94515991210938, + "logps/rejected": -256.40032958984375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.604682207107544, + "rewards/margins": 4.944964408874512, + "rewards/rejected": -6.549646854400635, + "step": 709 + }, + { + "epoch": 0.11, + "learning_rate": 1.3626591628196484e-05, + "logits/chosen": -1.8716744184494019, + "logits/rejected": -2.8579578399658203, + "logps/chosen": -136.7195587158203, + "logps/rejected": -383.8059387207031, + "loss": 0.1967, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7186298370361328, + "rewards/margins": 4.479336261749268, + "rewards/rejected": -6.1979660987854, + "step": 710 + }, + { + "epoch": 0.11, + "learning_rate": 1.3625858187665336e-05, + "logits/chosen": -1.53165602684021, + "logits/rejected": -3.124199628829956, + "logps/chosen": -204.096923828125, + "logps/rejected": -528.508056640625, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3874573707580566, + "rewards/margins": 4.908760070800781, + "rewards/rejected": -7.296217441558838, + "step": 711 + }, + { + "epoch": 0.11, + "learning_rate": 1.3625124747134188e-05, + "logits/chosen": -2.909669876098633, + "logits/rejected": -3.2757153511047363, + "logps/chosen": -223.46875, + "logps/rejected": -292.41790771484375, + "loss": 1.602, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.804173707962036, + "rewards/margins": 1.3674404621124268, + "rewards/rejected": -4.171614170074463, + "step": 712 + }, + { + "epoch": 0.11, + "learning_rate": 1.362439130660304e-05, + "logits/chosen": -2.6795716285705566, + "logits/rejected": -3.0583581924438477, + "logps/chosen": -299.3695373535156, + "logps/rejected": -261.1458740234375, + "loss": 0.8147, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.992988109588623, + "rewards/margins": 1.7698211669921875, + "rewards/rejected": -4.762809753417969, + "step": 713 + }, + { + "epoch": 0.11, + "learning_rate": 1.3623657866071892e-05, + "logits/chosen": -3.018770694732666, + "logits/rejected": -3.3020684719085693, + "logps/chosen": -190.16339111328125, + "logps/rejected": -280.02374267578125, + "loss": 3.0364, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.539331912994385, + "rewards/margins": 1.3652994632720947, + "rewards/rejected": -5.9046311378479, + "step": 714 + }, + { + "epoch": 0.11, + "learning_rate": 1.3622924425540744e-05, + "logits/chosen": -1.897752285003662, + "logits/rejected": -2.7591702938079834, + "logps/chosen": -156.19790649414062, + "logps/rejected": -267.2516174316406, + "loss": 0.1151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8381156921386719, + "rewards/margins": 4.857510089874268, + "rewards/rejected": -6.695625305175781, + "step": 715 + }, + { + "epoch": 0.11, + "learning_rate": 1.3622190985009596e-05, + "logits/chosen": -1.9045692682266235, + "logits/rejected": -3.1372697353363037, + "logps/chosen": -366.4244384765625, + "logps/rejected": -501.211181640625, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6724716424942017, + "rewards/margins": 5.292248725891113, + "rewards/rejected": -6.964720249176025, + "step": 716 + }, + { + "epoch": 0.11, + "learning_rate": 1.3621457544478447e-05, + "logits/chosen": -2.7081339359283447, + "logits/rejected": -2.994905471801758, + "logps/chosen": -134.66287231445312, + "logps/rejected": -240.7359161376953, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4557337760925293, + "rewards/margins": 4.030972480773926, + "rewards/rejected": -6.486705780029297, + "step": 717 + }, + { + "epoch": 0.11, + "learning_rate": 1.3620724103947301e-05, + "logits/chosen": -3.0968968868255615, + "logits/rejected": -2.7093629837036133, + "logps/chosen": -181.857421875, + "logps/rejected": -247.88641357421875, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.893157958984375, + "rewards/margins": 3.4663636684417725, + "rewards/rejected": -5.359521865844727, + "step": 718 + }, + { + "epoch": 0.11, + "learning_rate": 1.3619990663416153e-05, + "logits/chosen": -2.2070319652557373, + "logits/rejected": -3.0949833393096924, + "logps/chosen": -57.258140563964844, + "logps/rejected": -227.5447998046875, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9480760097503662, + "rewards/margins": 4.112516403198242, + "rewards/rejected": -6.060592174530029, + "step": 719 + }, + { + "epoch": 0.11, + "learning_rate": 1.3619257222885005e-05, + "logits/chosen": -3.1592540740966797, + "logits/rejected": -2.9373819828033447, + "logps/chosen": -320.40771484375, + "logps/rejected": -396.113037109375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.097834825515747, + "rewards/margins": 5.914435386657715, + "rewards/rejected": -8.012269973754883, + "step": 720 + }, + { + "epoch": 0.11, + "learning_rate": 1.3618523782353857e-05, + "logits/chosen": -2.6764793395996094, + "logits/rejected": -3.0508031845092773, + "logps/chosen": -105.3978042602539, + "logps/rejected": -170.07479858398438, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8346889019012451, + "rewards/margins": 3.6024045944213867, + "rewards/rejected": -5.437093734741211, + "step": 721 + }, + { + "epoch": 0.11, + "learning_rate": 1.3617790341822709e-05, + "logits/chosen": -2.6806986331939697, + "logits/rejected": -3.0177295207977295, + "logps/chosen": -270.7159423828125, + "logps/rejected": -276.2884216308594, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.188234806060791, + "rewards/margins": 4.791343688964844, + "rewards/rejected": -6.979578495025635, + "step": 722 + }, + { + "epoch": 0.11, + "learning_rate": 1.361705690129156e-05, + "logits/chosen": -2.9503843784332275, + "logits/rejected": -3.0855281352996826, + "logps/chosen": -108.93486022949219, + "logps/rejected": -267.4547424316406, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5986114740371704, + "rewards/margins": 5.171643257141113, + "rewards/rejected": -6.770254611968994, + "step": 723 + }, + { + "epoch": 0.11, + "learning_rate": 1.3616323460760412e-05, + "logits/chosen": -2.708075761795044, + "logits/rejected": -3.1742453575134277, + "logps/chosen": -115.5200424194336, + "logps/rejected": -302.49871826171875, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8395071029663086, + "rewards/margins": 5.575664520263672, + "rewards/rejected": -7.4151716232299805, + "step": 724 + }, + { + "epoch": 0.11, + "learning_rate": 1.3615590020229264e-05, + "logits/chosen": -3.052938222885132, + "logits/rejected": -2.733625888824463, + "logps/chosen": -189.53916931152344, + "logps/rejected": -280.6137390136719, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.678266167640686, + "rewards/margins": 4.958371162414551, + "rewards/rejected": -6.636637210845947, + "step": 725 + }, + { + "epoch": 0.11, + "learning_rate": 1.3614856579698116e-05, + "logits/chosen": -2.961034059524536, + "logits/rejected": -2.222576379776001, + "logps/chosen": -264.5697021484375, + "logps/rejected": -86.39826202392578, + "loss": 7.0209, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.201177597045898, + "rewards/margins": -7.010911464691162, + "rewards/rejected": -1.1902663707733154, + "step": 726 + }, + { + "epoch": 0.11, + "learning_rate": 1.361412313916697e-05, + "logits/chosen": -3.1756532192230225, + "logits/rejected": -3.090752363204956, + "logps/chosen": -256.49261474609375, + "logps/rejected": -120.81342315673828, + "loss": 3.1438, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.257852077484131, + "rewards/margins": -2.630943775177002, + "rewards/rejected": -2.626908302307129, + "step": 727 + }, + { + "epoch": 0.11, + "learning_rate": 1.3613389698635822e-05, + "logits/chosen": -2.336059093475342, + "logits/rejected": -3.243701934814453, + "logps/chosen": -144.98106384277344, + "logps/rejected": -216.4014892578125, + "loss": 1.6447, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8095145225524902, + "rewards/margins": 1.6365468502044678, + "rewards/rejected": -5.446061134338379, + "step": 728 + }, + { + "epoch": 0.11, + "learning_rate": 1.3612656258104673e-05, + "logits/chosen": -1.4737789630889893, + "logits/rejected": -2.9059994220733643, + "logps/chosen": -94.2584228515625, + "logps/rejected": -415.81884765625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6576824188232422, + "rewards/margins": 6.449153900146484, + "rewards/rejected": -8.106836318969727, + "step": 729 + }, + { + "epoch": 0.11, + "learning_rate": 1.3611922817573525e-05, + "logits/chosen": -2.397970676422119, + "logits/rejected": -2.9009451866149902, + "logps/chosen": -130.3492431640625, + "logps/rejected": -213.87991333007812, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.101404905319214, + "rewards/margins": 5.4727091789245605, + "rewards/rejected": -7.574113845825195, + "step": 730 + }, + { + "epoch": 0.11, + "learning_rate": 1.3611189377042377e-05, + "logits/chosen": -1.7677001953125, + "logits/rejected": -2.677987813949585, + "logps/chosen": -160.9908447265625, + "logps/rejected": -265.7969665527344, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.302046775817871, + "rewards/margins": 4.925952434539795, + "rewards/rejected": -7.227999210357666, + "step": 731 + }, + { + "epoch": 0.11, + "learning_rate": 1.3610455936511229e-05, + "logits/chosen": -0.9077907800674438, + "logits/rejected": -2.1282429695129395, + "logps/chosen": -79.6927261352539, + "logps/rejected": -325.0904846191406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8424834609031677, + "rewards/margins": 9.016862869262695, + "rewards/rejected": -9.859346389770508, + "step": 732 + }, + { + "epoch": 0.11, + "learning_rate": 1.3609722495980083e-05, + "logits/chosen": -3.2161247730255127, + "logits/rejected": -2.733600616455078, + "logps/chosen": -477.89776611328125, + "logps/rejected": -166.23782348632812, + "loss": 1.3353, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.814910888671875, + "rewards/margins": 0.40838778018951416, + "rewards/rejected": -4.223299026489258, + "step": 733 + }, + { + "epoch": 0.11, + "learning_rate": 1.3608989055448935e-05, + "logits/chosen": -2.719134569168091, + "logits/rejected": -3.1268885135650635, + "logps/chosen": -165.63226318359375, + "logps/rejected": -296.9040832519531, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0722942352294922, + "rewards/margins": 6.56038761138916, + "rewards/rejected": -7.632681846618652, + "step": 734 + }, + { + "epoch": 0.11, + "learning_rate": 1.3608255614917786e-05, + "logits/chosen": -3.2435760498046875, + "logits/rejected": -2.903964042663574, + "logps/chosen": -52.26857376098633, + "logps/rejected": -98.04254150390625, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6106570959091187, + "rewards/margins": 3.9673705101013184, + "rewards/rejected": -5.578027725219727, + "step": 735 + }, + { + "epoch": 0.11, + "learning_rate": 1.360752217438664e-05, + "logits/chosen": -2.9164187908172607, + "logits/rejected": -3.061886787414551, + "logps/chosen": -74.33151245117188, + "logps/rejected": -275.0765380859375, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225585699081421, + "rewards/margins": 4.497384071350098, + "rewards/rejected": -6.722970008850098, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 1.3606788733855492e-05, + "logits/chosen": -3.019143581390381, + "logits/rejected": -2.425581932067871, + "logps/chosen": -1111.4453125, + "logps/rejected": -860.699462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6646484136581421, + "rewards/margins": 11.66184139251709, + "rewards/rejected": -12.326489448547363, + "step": 737 + }, + { + "epoch": 0.11, + "learning_rate": 1.3606055293324344e-05, + "logits/chosen": -2.3388073444366455, + "logits/rejected": -2.969799041748047, + "logps/chosen": -111.92892456054688, + "logps/rejected": -261.84234619140625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3562235832214355, + "rewards/margins": 6.185622692108154, + "rewards/rejected": -8.54184627532959, + "step": 738 + }, + { + "epoch": 0.11, + "learning_rate": 1.3605321852793196e-05, + "logits/chosen": -2.655219078063965, + "logits/rejected": -3.255007028579712, + "logps/chosen": -146.15060424804688, + "logps/rejected": -411.54461669921875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.02335262298584, + "rewards/margins": 7.245641708374023, + "rewards/rejected": -9.268994331359863, + "step": 739 + }, + { + "epoch": 0.12, + "learning_rate": 1.3604588412262047e-05, + "logits/chosen": -2.9784724712371826, + "logits/rejected": -3.095690965652466, + "logps/chosen": -622.0779418945312, + "logps/rejected": -972.9514770507812, + "loss": 3.994, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.383780002593994, + "rewards/margins": 0.44013381004333496, + "rewards/rejected": -4.82391357421875, + "step": 740 + }, + { + "epoch": 0.12, + "learning_rate": 1.36038549717309e-05, + "logits/chosen": -2.570176362991333, + "logits/rejected": -2.9347634315490723, + "logps/chosen": -349.80126953125, + "logps/rejected": -398.7441101074219, + "loss": 3.3949, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.1535844802856445, + "rewards/margins": -1.1137452125549316, + "rewards/rejected": -6.039839267730713, + "step": 741 + }, + { + "epoch": 0.12, + "learning_rate": 1.3603121531199751e-05, + "logits/chosen": -2.9127235412597656, + "logits/rejected": -3.089822769165039, + "logps/chosen": -49.441471099853516, + "logps/rejected": -142.5437774658203, + "loss": 0.1184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956100583076477, + "rewards/margins": 4.081521034240723, + "rewards/rejected": -6.03762149810791, + "step": 742 + }, + { + "epoch": 0.12, + "learning_rate": 1.3602388090668603e-05, + "logits/chosen": -1.5530871152877808, + "logits/rejected": -2.952270746231079, + "logps/chosen": -79.05931091308594, + "logps/rejected": -258.1224670410156, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3969695568084717, + "rewards/margins": 7.018586158752441, + "rewards/rejected": -7.415555953979492, + "step": 743 + }, + { + "epoch": 0.12, + "learning_rate": 1.3601654650137455e-05, + "logits/chosen": -3.2012746334075928, + "logits/rejected": -3.140827178955078, + "logps/chosen": -415.5125732421875, + "logps/rejected": -244.8551025390625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4987953901290894, + "rewards/margins": 5.571514129638672, + "rewards/rejected": -7.070309638977051, + "step": 744 + }, + { + "epoch": 0.12, + "learning_rate": 1.3600921209606309e-05, + "logits/chosen": -2.6865909099578857, + "logits/rejected": -3.0549094676971436, + "logps/chosen": -510.5403747558594, + "logps/rejected": -404.77435302734375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6592986583709717, + "rewards/margins": 5.409289836883545, + "rewards/rejected": -8.068588256835938, + "step": 745 + }, + { + "epoch": 0.12, + "learning_rate": 1.360018776907516e-05, + "logits/chosen": -3.145824909210205, + "logits/rejected": -2.8804128170013428, + "logps/chosen": -280.38848876953125, + "logps/rejected": -263.793701171875, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.844825029373169, + "rewards/margins": 4.153092384338379, + "rewards/rejected": -6.997917175292969, + "step": 746 + }, + { + "epoch": 0.12, + "learning_rate": 1.3599454328544012e-05, + "logits/chosen": -2.5436787605285645, + "logits/rejected": -3.203115940093994, + "logps/chosen": -360.76043701171875, + "logps/rejected": -630.4730834960938, + "loss": 2.9119, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.80610466003418, + "rewards/margins": -0.45664381980895996, + "rewards/rejected": -4.349460601806641, + "step": 747 + }, + { + "epoch": 0.12, + "learning_rate": 1.3598720888012864e-05, + "logits/chosen": -1.7956820726394653, + "logits/rejected": -3.066174030303955, + "logps/chosen": -176.86038208007812, + "logps/rejected": -374.4722900390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6741267442703247, + "rewards/margins": 8.011783599853516, + "rewards/rejected": -9.68591022491455, + "step": 748 + }, + { + "epoch": 0.12, + "learning_rate": 1.3597987447481716e-05, + "logits/chosen": -3.0581588745117188, + "logits/rejected": -2.9487457275390625, + "logps/chosen": -218.4246826171875, + "logps/rejected": -94.85386657714844, + "loss": 4.3698, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.747041702270508, + "rewards/margins": -4.356352806091309, + "rewards/rejected": -2.390688896179199, + "step": 749 + }, + { + "epoch": 0.12, + "learning_rate": 1.3597254006950568e-05, + "logits/chosen": -3.1499931812286377, + "logits/rejected": -3.268418073654175, + "logps/chosen": -51.10933303833008, + "logps/rejected": -183.07586669921875, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7102112770080566, + "rewards/margins": 4.831571578979492, + "rewards/rejected": -6.541782855987549, + "step": 750 + }, + { + "epoch": 0.12, + "learning_rate": 1.359652056641942e-05, + "logits/chosen": -3.065908908843994, + "logits/rejected": -2.5342202186584473, + "logps/chosen": -452.5474548339844, + "logps/rejected": -547.2565307617188, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6167476177215576, + "rewards/margins": 4.919337749481201, + "rewards/rejected": -6.536085605621338, + "step": 751 + }, + { + "epoch": 0.12, + "learning_rate": 1.3595787125888272e-05, + "logits/chosen": -3.028773784637451, + "logits/rejected": -1.9593194723129272, + "logps/chosen": -319.7934875488281, + "logps/rejected": -185.9302978515625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3417707681655884, + "rewards/margins": 6.564620018005371, + "rewards/rejected": -7.90639066696167, + "step": 752 + }, + { + "epoch": 0.12, + "learning_rate": 1.3595053685357124e-05, + "logits/chosen": -2.8973820209503174, + "logits/rejected": -3.2832911014556885, + "logps/chosen": -40.85547637939453, + "logps/rejected": -240.42681884765625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7502018809318542, + "rewards/margins": 5.561142921447754, + "rewards/rejected": -6.311344623565674, + "step": 753 + }, + { + "epoch": 0.12, + "learning_rate": 1.3594320244825977e-05, + "logits/chosen": -2.0089852809906006, + "logits/rejected": -3.186033010482788, + "logps/chosen": -97.29618835449219, + "logps/rejected": -297.8473815917969, + "loss": 0.4915, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.161525011062622, + "rewards/margins": 2.244795799255371, + "rewards/rejected": -5.406320571899414, + "step": 754 + }, + { + "epoch": 0.12, + "learning_rate": 1.3593586804294829e-05, + "logits/chosen": -3.019643545150757, + "logits/rejected": -2.8844099044799805, + "logps/chosen": -122.71027374267578, + "logps/rejected": -124.54597473144531, + "loss": 0.9353, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.768979072570801, + "rewards/margins": 2.5218427181243896, + "rewards/rejected": -5.290821552276611, + "step": 755 + }, + { + "epoch": 0.12, + "learning_rate": 1.3592853363763681e-05, + "logits/chosen": -3.050192356109619, + "logits/rejected": -2.78413462638855, + "logps/chosen": -360.1427001953125, + "logps/rejected": -155.87525939941406, + "loss": 1.5736, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8010759353637695, + "rewards/margins": 0.619612455368042, + "rewards/rejected": -3.4206883907318115, + "step": 756 + }, + { + "epoch": 0.12, + "learning_rate": 1.3592119923232533e-05, + "logits/chosen": -0.6179492473602295, + "logits/rejected": -1.7360057830810547, + "logps/chosen": -38.07359313964844, + "logps/rejected": -448.1778259277344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3310640752315521, + "rewards/margins": 7.2921905517578125, + "rewards/rejected": -7.623254776000977, + "step": 757 + }, + { + "epoch": 0.12, + "learning_rate": 1.3591386482701385e-05, + "logits/chosen": -3.0310537815093994, + "logits/rejected": -2.6989729404449463, + "logps/chosen": -391.20111083984375, + "logps/rejected": -324.98809814453125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25731760263442993, + "rewards/margins": 5.989706993103027, + "rewards/rejected": -6.2470245361328125, + "step": 758 + }, + { + "epoch": 0.12, + "learning_rate": 1.3590653042170237e-05, + "logits/chosen": -2.7924952507019043, + "logits/rejected": -3.1427414417266846, + "logps/chosen": -139.78524780273438, + "logps/rejected": -140.6404266357422, + "loss": 2.6395, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8289310932159424, + "rewards/margins": 1.5453808307647705, + "rewards/rejected": -5.374311923980713, + "step": 759 + }, + { + "epoch": 0.12, + "learning_rate": 1.3589919601639088e-05, + "logits/chosen": -1.7111469507217407, + "logits/rejected": -2.802109718322754, + "logps/chosen": -43.42923355102539, + "logps/rejected": -365.2419738769531, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6952158212661743, + "rewards/margins": 8.320149421691895, + "rewards/rejected": -9.015365600585938, + "step": 760 + }, + { + "epoch": 0.12, + "learning_rate": 1.358918616110794e-05, + "logits/chosen": -2.4186453819274902, + "logits/rejected": -3.2048864364624023, + "logps/chosen": -413.9666748046875, + "logps/rejected": -501.802490234375, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9194116592407227, + "rewards/margins": 5.305083751678467, + "rewards/rejected": -8.224494934082031, + "step": 761 + }, + { + "epoch": 0.12, + "learning_rate": 1.3588452720576792e-05, + "logits/chosen": -1.9383714199066162, + "logits/rejected": -2.9192187786102295, + "logps/chosen": -109.61971282958984, + "logps/rejected": -241.03099060058594, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2236378192901611, + "rewards/margins": 7.247008323669434, + "rewards/rejected": -8.470645904541016, + "step": 762 + }, + { + "epoch": 0.12, + "learning_rate": 1.3587719280045646e-05, + "logits/chosen": -3.1824803352355957, + "logits/rejected": -2.9402990341186523, + "logps/chosen": -279.0486755371094, + "logps/rejected": -82.78312683105469, + "loss": 4.7902, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.559541702270508, + "rewards/margins": -3.6797938346862793, + "rewards/rejected": -3.8797478675842285, + "step": 763 + }, + { + "epoch": 0.12, + "learning_rate": 1.3586985839514498e-05, + "logits/chosen": -3.272117853164673, + "logits/rejected": -2.425506353378296, + "logps/chosen": -175.96868896484375, + "logps/rejected": -57.92021179199219, + "loss": 4.3466, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.54640531539917, + "rewards/margins": -2.5540645122528076, + "rewards/rejected": -2.992340564727783, + "step": 764 + }, + { + "epoch": 0.12, + "learning_rate": 1.358625239898335e-05, + "logits/chosen": -3.054176092147827, + "logits/rejected": -2.3438055515289307, + "logps/chosen": -144.98414611816406, + "logps/rejected": -188.65264892578125, + "loss": 0.751, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.419785499572754, + "rewards/margins": 1.687070369720459, + "rewards/rejected": -4.106855869293213, + "step": 765 + }, + { + "epoch": 0.12, + "learning_rate": 1.3585518958452201e-05, + "logits/chosen": -2.9970388412475586, + "logits/rejected": -3.1226613521575928, + "logps/chosen": -72.7497329711914, + "logps/rejected": -153.80723571777344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8620786666870117, + "rewards/margins": 7.414614677429199, + "rewards/rejected": -9.276693344116211, + "step": 766 + }, + { + "epoch": 0.12, + "learning_rate": 1.3584785517921053e-05, + "logits/chosen": -3.103180408477783, + "logits/rejected": -2.930652618408203, + "logps/chosen": -177.19454956054688, + "logps/rejected": -72.99114227294922, + "loss": 4.618, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.500460624694824, + "rewards/margins": -4.6080522537231445, + "rewards/rejected": -1.8924086093902588, + "step": 767 + }, + { + "epoch": 0.12, + "learning_rate": 1.3584052077389907e-05, + "logits/chosen": -3.1048102378845215, + "logits/rejected": -2.802426815032959, + "logps/chosen": -456.09375, + "logps/rejected": -304.2204284667969, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.641774296760559, + "rewards/margins": 5.746769428253174, + "rewards/rejected": -7.388544082641602, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 1.3583318636858759e-05, + "logits/chosen": -2.9329633712768555, + "logits/rejected": -3.031559467315674, + "logps/chosen": -401.5271301269531, + "logps/rejected": -442.32171630859375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1944606304168701, + "rewards/margins": 5.657041072845459, + "rewards/rejected": -6.85150146484375, + "step": 769 + }, + { + "epoch": 0.12, + "learning_rate": 1.358258519632761e-05, + "logits/chosen": -2.877713680267334, + "logits/rejected": -3.111079216003418, + "logps/chosen": -34.73942184448242, + "logps/rejected": -154.21107482910156, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7305262088775635, + "rewards/margins": 6.274757385253906, + "rewards/rejected": -8.00528335571289, + "step": 770 + }, + { + "epoch": 0.12, + "learning_rate": 1.3581851755796462e-05, + "logits/chosen": -2.983104944229126, + "logits/rejected": -1.2126483917236328, + "logps/chosen": -403.40972900390625, + "logps/rejected": -283.3551025390625, + "loss": 3.3108, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7759971618652344, + "rewards/margins": 0.972912073135376, + "rewards/rejected": -4.748908996582031, + "step": 771 + }, + { + "epoch": 0.12, + "learning_rate": 1.3581118315265316e-05, + "logits/chosen": -2.8185577392578125, + "logits/rejected": -3.11539888381958, + "logps/chosen": -76.93763732910156, + "logps/rejected": -259.6943359375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.439334511756897, + "rewards/margins": 6.581804275512695, + "rewards/rejected": -8.021139144897461, + "step": 772 + }, + { + "epoch": 0.12, + "learning_rate": 1.3580384874734168e-05, + "logits/chosen": -2.3835229873657227, + "logits/rejected": -2.776801586151123, + "logps/chosen": -121.16381072998047, + "logps/rejected": -290.8045654296875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.286848783493042, + "rewards/margins": 6.420062065124512, + "rewards/rejected": -7.706910610198975, + "step": 773 + }, + { + "epoch": 0.12, + "learning_rate": 1.357965143420302e-05, + "logits/chosen": -2.2701714038848877, + "logits/rejected": -2.432309627532959, + "logps/chosen": -487.35394287109375, + "logps/rejected": -507.688720703125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28261566162109375, + "rewards/margins": 5.8416428565979, + "rewards/rejected": -5.559027194976807, + "step": 774 + }, + { + "epoch": 0.12, + "learning_rate": 1.3578917993671872e-05, + "logits/chosen": -2.9877631664276123, + "logits/rejected": -3.244218587875366, + "logps/chosen": -115.14315032958984, + "logps/rejected": -280.9347839355469, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1511626243591309, + "rewards/margins": 4.877101898193359, + "rewards/rejected": -6.028264999389648, + "step": 775 + }, + { + "epoch": 0.12, + "learning_rate": 1.3578184553140724e-05, + "logits/chosen": -3.1906888484954834, + "logits/rejected": -3.04707407951355, + "logps/chosen": -99.84645080566406, + "logps/rejected": -112.2783432006836, + "loss": 1.684, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7616071701049805, + "rewards/margins": -0.818085253238678, + "rewards/rejected": -1.9435219764709473, + "step": 776 + }, + { + "epoch": 0.12, + "learning_rate": 1.3577451112609575e-05, + "logits/chosen": -2.3436992168426514, + "logits/rejected": -3.025322437286377, + "logps/chosen": -451.20086669921875, + "logps/rejected": -570.4427490234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18128661811351776, + "rewards/margins": 8.565680503845215, + "rewards/rejected": -8.746967315673828, + "step": 777 + }, + { + "epoch": 0.12, + "learning_rate": 1.3576717672078427e-05, + "logits/chosen": -1.6473342180252075, + "logits/rejected": -2.935242176055908, + "logps/chosen": -67.72840881347656, + "logps/rejected": -239.46182250976562, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7942004203796387, + "rewards/margins": 4.099061489105225, + "rewards/rejected": -5.893261909484863, + "step": 778 + }, + { + "epoch": 0.12, + "learning_rate": 1.357598423154728e-05, + "logits/chosen": -1.6825470924377441, + "logits/rejected": -3.021742820739746, + "logps/chosen": -172.34664916992188, + "logps/rejected": -343.9667053222656, + "loss": 2.0679, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.694810390472412, + "rewards/margins": 3.5286662578582764, + "rewards/rejected": -7.223476409912109, + "step": 779 + }, + { + "epoch": 0.12, + "learning_rate": 1.3575250791016131e-05, + "logits/chosen": -2.8227779865264893, + "logits/rejected": -3.0767364501953125, + "logps/chosen": -382.1719970703125, + "logps/rejected": -307.9638671875, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3960785865783691, + "rewards/margins": 4.136833190917969, + "rewards/rejected": -5.532911777496338, + "step": 780 + }, + { + "epoch": 0.12, + "learning_rate": 1.3574517350484985e-05, + "logits/chosen": -1.6837537288665771, + "logits/rejected": -3.1123857498168945, + "logps/chosen": -112.07748413085938, + "logps/rejected": -323.67962646484375, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.012248992919922, + "rewards/margins": 5.992252826690674, + "rewards/rejected": -8.004502296447754, + "step": 781 + }, + { + "epoch": 0.12, + "learning_rate": 1.3573783909953837e-05, + "logits/chosen": -2.7708606719970703, + "logits/rejected": -3.211901903152466, + "logps/chosen": -126.3958511352539, + "logps/rejected": -281.18255615234375, + "loss": 0.1662, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.110795259475708, + "rewards/margins": 2.772623300552368, + "rewards/rejected": -4.883418560028076, + "step": 782 + }, + { + "epoch": 0.12, + "learning_rate": 1.3573050469422688e-05, + "logits/chosen": -3.2218756675720215, + "logits/rejected": -2.475933790206909, + "logps/chosen": -250.3980712890625, + "logps/rejected": -378.1069030761719, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.130403757095337, + "rewards/margins": 6.5936384201049805, + "rewards/rejected": -7.724041938781738, + "step": 783 + }, + { + "epoch": 0.12, + "learning_rate": 1.357231702889154e-05, + "logits/chosen": -2.7350995540618896, + "logits/rejected": -3.080040693283081, + "logps/chosen": -132.04600524902344, + "logps/rejected": -287.3489990234375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6745600700378418, + "rewards/margins": 4.52200984954834, + "rewards/rejected": -6.196569919586182, + "step": 784 + }, + { + "epoch": 0.12, + "learning_rate": 1.3571583588360392e-05, + "logits/chosen": -3.1320066452026367, + "logits/rejected": -2.500650644302368, + "logps/chosen": -463.8009948730469, + "logps/rejected": -327.624755859375, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6100265979766846, + "rewards/margins": 3.564194917678833, + "rewards/rejected": -5.174221515655518, + "step": 785 + }, + { + "epoch": 0.12, + "learning_rate": 1.3570850147829244e-05, + "logits/chosen": -3.249206781387329, + "logits/rejected": -3.018143892288208, + "logps/chosen": -158.84225463867188, + "logps/rejected": -252.74722290039062, + "loss": 1.1951, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8953566551208496, + "rewards/margins": 1.1123247146606445, + "rewards/rejected": -4.007681369781494, + "step": 786 + }, + { + "epoch": 0.12, + "learning_rate": 1.3570116707298096e-05, + "logits/chosen": -3.1120262145996094, + "logits/rejected": -2.1388285160064697, + "logps/chosen": -451.411376953125, + "logps/rejected": -322.28411865234375, + "loss": 2.6026, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1954238414764404, + "rewards/margins": 0.3853142261505127, + "rewards/rejected": -3.580738067626953, + "step": 787 + }, + { + "epoch": 0.12, + "learning_rate": 1.3569383266766948e-05, + "logits/chosen": -3.2768399715423584, + "logits/rejected": -2.825684070587158, + "logps/chosen": -202.0099639892578, + "logps/rejected": -175.7904052734375, + "loss": 2.0743, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9667959213256836, + "rewards/margins": 0.7447366714477539, + "rewards/rejected": -3.7115325927734375, + "step": 788 + }, + { + "epoch": 0.12, + "learning_rate": 1.35686498262358e-05, + "logits/chosen": -2.584474802017212, + "logits/rejected": -2.9669909477233887, + "logps/chosen": -124.78407287597656, + "logps/rejected": -315.8648681640625, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8729063272476196, + "rewards/margins": 4.359999656677246, + "rewards/rejected": -5.232905387878418, + "step": 789 + }, + { + "epoch": 0.12, + "learning_rate": 1.3567916385704653e-05, + "logits/chosen": -3.071800470352173, + "logits/rejected": -2.3855624198913574, + "logps/chosen": -321.25128173828125, + "logps/rejected": -286.3349914550781, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.290853500366211, + "rewards/margins": 4.546663284301758, + "rewards/rejected": -5.837516784667969, + "step": 790 + }, + { + "epoch": 0.12, + "learning_rate": 1.3567182945173505e-05, + "logits/chosen": -3.0111472606658936, + "logits/rejected": -2.4227309226989746, + "logps/chosen": -318.3901672363281, + "logps/rejected": -277.168212890625, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3893463611602783, + "rewards/margins": 3.014151096343994, + "rewards/rejected": -4.403497219085693, + "step": 791 + }, + { + "epoch": 0.12, + "learning_rate": 1.3566449504642357e-05, + "logits/chosen": -0.6861141920089722, + "logits/rejected": -2.5915379524230957, + "logps/chosen": -75.59441375732422, + "logps/rejected": -364.5035400390625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8360023498535156, + "rewards/margins": 5.006896018981934, + "rewards/rejected": -5.842898368835449, + "step": 792 + }, + { + "epoch": 0.12, + "learning_rate": 1.3565716064111209e-05, + "logits/chosen": -3.002706289291382, + "logits/rejected": -1.289991021156311, + "logps/chosen": -827.3475341796875, + "logps/rejected": -335.25445556640625, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7453984022140503, + "rewards/margins": 3.3460798263549805, + "rewards/rejected": -5.09147834777832, + "step": 793 + }, + { + "epoch": 0.12, + "learning_rate": 1.356498262358006e-05, + "logits/chosen": -3.0426793098449707, + "logits/rejected": -3.2957522869110107, + "logps/chosen": -190.29547119140625, + "logps/rejected": -205.86489868164062, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6337788105010986, + "rewards/margins": 4.7376861572265625, + "rewards/rejected": -6.37146520614624, + "step": 794 + }, + { + "epoch": 0.12, + "learning_rate": 1.3564249183048913e-05, + "logits/chosen": -0.9766989350318909, + "logits/rejected": -2.9045965671539307, + "logps/chosen": -130.6246337890625, + "logps/rejected": -540.9398193359375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058269500732422, + "rewards/margins": 5.2679057121276855, + "rewards/rejected": -7.326174736022949, + "step": 795 + }, + { + "epoch": 0.12, + "learning_rate": 1.3563515742517765e-05, + "logits/chosen": -3.2560744285583496, + "logits/rejected": -2.354905128479004, + "logps/chosen": -573.8976440429688, + "logps/rejected": -308.39312744140625, + "loss": 1.9614, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0425689220428467, + "rewards/margins": -0.11191689968109131, + "rewards/rejected": -2.930651903152466, + "step": 796 + }, + { + "epoch": 0.12, + "learning_rate": 1.3562782301986616e-05, + "logits/chosen": -2.2447025775909424, + "logits/rejected": -2.9805428981781006, + "logps/chosen": -135.4912872314453, + "logps/rejected": -445.5499267578125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6347503662109375, + "rewards/margins": 9.05239486694336, + "rewards/rejected": -11.68714427947998, + "step": 797 + }, + { + "epoch": 0.12, + "learning_rate": 1.3562048861455468e-05, + "logits/chosen": -1.4476467370986938, + "logits/rejected": -3.0297622680664062, + "logps/chosen": -370.1444091796875, + "logps/rejected": -392.51763916015625, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.964188814163208, + "rewards/margins": 4.984358787536621, + "rewards/rejected": -5.94854736328125, + "step": 798 + }, + { + "epoch": 0.12, + "learning_rate": 1.3561315420924322e-05, + "logits/chosen": -3.10461688041687, + "logits/rejected": -2.9954919815063477, + "logps/chosen": -466.28863525390625, + "logps/rejected": -251.42344665527344, + "loss": 3.5776, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.028273582458496, + "rewards/margins": -0.8850736618041992, + "rewards/rejected": -4.143200397491455, + "step": 799 + }, + { + "epoch": 0.12, + "learning_rate": 1.3560581980393174e-05, + "logits/chosen": -2.5484414100646973, + "logits/rejected": -3.122560739517212, + "logps/chosen": -95.13434600830078, + "logps/rejected": -164.15792846679688, + "loss": 0.0594, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8147964477539062, + "rewards/margins": 3.8231542110443115, + "rewards/rejected": -5.637950897216797, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 1.3559848539862026e-05, + "logits/chosen": -2.6122806072235107, + "logits/rejected": -3.1330296993255615, + "logps/chosen": -300.1451110839844, + "logps/rejected": -417.47564697265625, + "loss": 3.5041, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.809527397155762, + "rewards/margins": -0.39479827880859375, + "rewards/rejected": -4.414729595184326, + "step": 801 + }, + { + "epoch": 0.12, + "learning_rate": 1.355911509933088e-05, + "logits/chosen": -3.0748982429504395, + "logits/rejected": -2.611595869064331, + "logps/chosen": -144.76119995117188, + "logps/rejected": -226.18753051757812, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0265259742736816, + "rewards/margins": 4.6252946853637695, + "rewards/rejected": -6.651820659637451, + "step": 802 + }, + { + "epoch": 0.12, + "learning_rate": 1.3558381658799731e-05, + "logits/chosen": -3.1959590911865234, + "logits/rejected": -2.9793646335601807, + "logps/chosen": -218.12109375, + "logps/rejected": -212.31838989257812, + "loss": 3.539, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.032627582550049, + "rewards/margins": -1.3067994117736816, + "rewards/rejected": -3.725828170776367, + "step": 803 + }, + { + "epoch": 0.13, + "learning_rate": 1.3557648218268583e-05, + "logits/chosen": -2.119037389755249, + "logits/rejected": -3.0088117122650146, + "logps/chosen": -210.5069122314453, + "logps/rejected": -279.28985595703125, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.152673363685608, + "rewards/margins": 4.6488356590271, + "rewards/rejected": -5.801508903503418, + "step": 804 + }, + { + "epoch": 0.13, + "learning_rate": 1.3556914777737435e-05, + "logits/chosen": -3.150696277618408, + "logits/rejected": -3.149850845336914, + "logps/chosen": -119.23744201660156, + "logps/rejected": -279.04052734375, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.056230068206787, + "rewards/margins": 4.909871578216553, + "rewards/rejected": -6.96610164642334, + "step": 805 + }, + { + "epoch": 0.13, + "learning_rate": 1.3556181337206287e-05, + "logits/chosen": -1.6339170932769775, + "logits/rejected": -2.8598215579986572, + "logps/chosen": -186.20599365234375, + "logps/rejected": -415.5313720703125, + "loss": 3.6095, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.147932052612305, + "rewards/margins": -0.6451215744018555, + "rewards/rejected": -4.502810478210449, + "step": 806 + }, + { + "epoch": 0.13, + "learning_rate": 1.355544789667514e-05, + "logits/chosen": -3.2754404544830322, + "logits/rejected": -3.2402374744415283, + "logps/chosen": -303.8418884277344, + "logps/rejected": -206.96241760253906, + "loss": 2.5311, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.188465118408203, + "rewards/margins": -0.5280245542526245, + "rewards/rejected": -2.660440444946289, + "step": 807 + }, + { + "epoch": 0.13, + "learning_rate": 1.3554714456143992e-05, + "logits/chosen": -1.664503574371338, + "logits/rejected": -2.848510265350342, + "logps/chosen": -88.33676147460938, + "logps/rejected": -253.6476593017578, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0000486373901367, + "rewards/margins": 5.75696325302124, + "rewards/rejected": -7.757011413574219, + "step": 808 + }, + { + "epoch": 0.13, + "learning_rate": 1.3553981015612844e-05, + "logits/chosen": -3.2327539920806885, + "logits/rejected": -3.01586651802063, + "logps/chosen": -409.874755859375, + "logps/rejected": -262.6158447265625, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8402597904205322, + "rewards/margins": 3.7533724308013916, + "rewards/rejected": -5.593632221221924, + "step": 809 + }, + { + "epoch": 0.13, + "learning_rate": 1.3553247575081696e-05, + "logits/chosen": -3.2541730403900146, + "logits/rejected": -2.5330379009246826, + "logps/chosen": -628.8211669921875, + "logps/rejected": -547.2007446289062, + "loss": 4.6493, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.953636169433594, + "rewards/margins": -4.5755767822265625, + "rewards/rejected": -1.3780593872070312, + "step": 810 + }, + { + "epoch": 0.13, + "learning_rate": 1.3552514134550548e-05, + "logits/chosen": -3.023336887359619, + "logits/rejected": -2.6397995948791504, + "logps/chosen": -263.05242919921875, + "logps/rejected": -317.93023681640625, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.552727222442627, + "rewards/margins": 6.095338344573975, + "rewards/rejected": -7.648065567016602, + "step": 811 + }, + { + "epoch": 0.13, + "learning_rate": 1.35517806940194e-05, + "logits/chosen": -2.837862491607666, + "logits/rejected": -2.93005108833313, + "logps/chosen": -470.46783447265625, + "logps/rejected": -526.1090087890625, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7370292544364929, + "rewards/margins": 4.471416473388672, + "rewards/rejected": -5.2084455490112305, + "step": 812 + }, + { + "epoch": 0.13, + "learning_rate": 1.3551047253488252e-05, + "logits/chosen": -2.4162516593933105, + "logits/rejected": -3.0384061336517334, + "logps/chosen": -165.94021606445312, + "logps/rejected": -255.86573791503906, + "loss": 0.0768, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.63262140750885, + "rewards/margins": 3.8362996578216553, + "rewards/rejected": -5.468921184539795, + "step": 813 + }, + { + "epoch": 0.13, + "learning_rate": 1.3550313812957103e-05, + "logits/chosen": -3.1192808151245117, + "logits/rejected": -1.629333257675171, + "logps/chosen": -441.81573486328125, + "logps/rejected": -338.32666015625, + "loss": 1.8312, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4539520740509033, + "rewards/margins": 3.1946897506713867, + "rewards/rejected": -6.648642063140869, + "step": 814 + }, + { + "epoch": 0.13, + "learning_rate": 1.3549580372425955e-05, + "logits/chosen": -3.1037044525146484, + "logits/rejected": -2.0196096897125244, + "logps/chosen": -201.64874267578125, + "logps/rejected": -124.2532958984375, + "loss": 4.0851, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.875535011291504, + "rewards/margins": -2.5736916065216064, + "rewards/rejected": -3.3018434047698975, + "step": 815 + }, + { + "epoch": 0.13, + "learning_rate": 1.3548846931894809e-05, + "logits/chosen": -2.4348819255828857, + "logits/rejected": -3.054917573928833, + "logps/chosen": -141.4295196533203, + "logps/rejected": -344.6048278808594, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8426957130432129, + "rewards/margins": 6.846774101257324, + "rewards/rejected": -7.689469814300537, + "step": 816 + }, + { + "epoch": 0.13, + "learning_rate": 1.354811349136366e-05, + "logits/chosen": -2.8663270473480225, + "logits/rejected": -3.24173903465271, + "logps/chosen": -306.6064453125, + "logps/rejected": -508.9228210449219, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1374831199645996, + "rewards/margins": 3.0839684009552, + "rewards/rejected": -4.221451759338379, + "step": 817 + }, + { + "epoch": 0.13, + "learning_rate": 1.3547380050832513e-05, + "logits/chosen": -1.7049589157104492, + "logits/rejected": -2.631648302078247, + "logps/chosen": -177.85850524902344, + "logps/rejected": -229.49684143066406, + "loss": 2.9641, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.358814239501953, + "rewards/margins": -1.8577768802642822, + "rewards/rejected": -2.50103759765625, + "step": 818 + }, + { + "epoch": 0.13, + "learning_rate": 1.3546646610301364e-05, + "logits/chosen": -3.0751230716705322, + "logits/rejected": -2.880504608154297, + "logps/chosen": -545.30029296875, + "logps/rejected": -357.08721923828125, + "loss": 3.5658, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.956596374511719, + "rewards/margins": -1.3456454277038574, + "rewards/rejected": -3.6109514236450195, + "step": 819 + }, + { + "epoch": 0.13, + "learning_rate": 1.3545913169770216e-05, + "logits/chosen": -3.1633732318878174, + "logits/rejected": -2.843226909637451, + "logps/chosen": -527.2022094726562, + "logps/rejected": -555.519287109375, + "loss": 2.3094, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.337533950805664, + "rewards/margins": 0.9657435417175293, + "rewards/rejected": -4.303277492523193, + "step": 820 + }, + { + "epoch": 0.13, + "learning_rate": 1.3545179729239068e-05, + "logits/chosen": -0.3390876054763794, + "logits/rejected": -2.756946325302124, + "logps/chosen": -60.48039245605469, + "logps/rejected": -745.49951171875, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4007481336593628, + "rewards/margins": 4.70893669128418, + "rewards/rejected": -6.109684944152832, + "step": 821 + }, + { + "epoch": 0.13, + "learning_rate": 1.354444628870792e-05, + "logits/chosen": -3.2927825450897217, + "logits/rejected": -3.0448994636535645, + "logps/chosen": -335.3277587890625, + "logps/rejected": -209.25875854492188, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9987956881523132, + "rewards/margins": 3.2494568824768066, + "rewards/rejected": -4.248252868652344, + "step": 822 + }, + { + "epoch": 0.13, + "learning_rate": 1.3543712848176772e-05, + "logits/chosen": -2.474059820175171, + "logits/rejected": -3.153470039367676, + "logps/chosen": -148.6142578125, + "logps/rejected": -212.496337890625, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8011940717697144, + "rewards/margins": 4.421808242797852, + "rewards/rejected": -5.2230024337768555, + "step": 823 + }, + { + "epoch": 0.13, + "learning_rate": 1.3542979407645624e-05, + "logits/chosen": -3.0102834701538086, + "logits/rejected": -3.119494676589966, + "logps/chosen": -54.989845275878906, + "logps/rejected": -137.22073364257812, + "loss": 0.0745, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6722264289855957, + "rewards/margins": 2.6781883239746094, + "rewards/rejected": -5.350414752960205, + "step": 824 + }, + { + "epoch": 0.13, + "learning_rate": 1.3542245967114477e-05, + "logits/chosen": -3.2110352516174316, + "logits/rejected": -3.2058684825897217, + "logps/chosen": -136.11898803710938, + "logps/rejected": -129.80003356933594, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7620790004730225, + "rewards/margins": 2.712794780731201, + "rewards/rejected": -4.4748735427856445, + "step": 825 + }, + { + "epoch": 0.13, + "learning_rate": 1.354151252658333e-05, + "logits/chosen": -2.820136308670044, + "logits/rejected": -3.1331229209899902, + "logps/chosen": -219.7324981689453, + "logps/rejected": -441.7222900390625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3361198902130127, + "rewards/margins": 5.268808364868164, + "rewards/rejected": -6.604928493499756, + "step": 826 + }, + { + "epoch": 0.13, + "learning_rate": 1.3540779086052181e-05, + "logits/chosen": -3.1346826553344727, + "logits/rejected": -3.1274375915527344, + "logps/chosen": -195.8754425048828, + "logps/rejected": -257.84234619140625, + "loss": 2.1916, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.350275993347168, + "rewards/margins": -1.4424123764038086, + "rewards/rejected": -2.9078636169433594, + "step": 827 + }, + { + "epoch": 0.13, + "learning_rate": 1.3540045645521033e-05, + "logits/chosen": -2.2736384868621826, + "logits/rejected": -3.1977686882019043, + "logps/chosen": -111.0964126586914, + "logps/rejected": -280.221435546875, + "loss": 1.6329, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.526336193084717, + "rewards/margins": 1.5569998025894165, + "rewards/rejected": -5.083335876464844, + "step": 828 + }, + { + "epoch": 0.13, + "learning_rate": 1.3539312204989885e-05, + "logits/chosen": -3.2802722454071045, + "logits/rejected": -3.173156261444092, + "logps/chosen": -163.84686279296875, + "logps/rejected": -207.47000122070312, + "loss": 1.6281, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.144035577774048, + "rewards/margins": 1.7671881914138794, + "rewards/rejected": -4.911223888397217, + "step": 829 + }, + { + "epoch": 0.13, + "learning_rate": 1.3538578764458737e-05, + "logits/chosen": -2.951054573059082, + "logits/rejected": -3.10809588432312, + "logps/chosen": -140.0281982421875, + "logps/rejected": -121.31304168701172, + "loss": 0.3767, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5386531352996826, + "rewards/margins": 1.5791114568710327, + "rewards/rejected": -4.117764472961426, + "step": 830 + }, + { + "epoch": 0.13, + "learning_rate": 1.3537845323927589e-05, + "logits/chosen": -2.9832582473754883, + "logits/rejected": -2.9253621101379395, + "logps/chosen": -207.10781860351562, + "logps/rejected": -111.73223876953125, + "loss": 1.4477, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6924285888671875, + "rewards/margins": 1.6146246194839478, + "rewards/rejected": -4.307053089141846, + "step": 831 + }, + { + "epoch": 0.13, + "learning_rate": 1.353711188339644e-05, + "logits/chosen": -2.4705440998077393, + "logits/rejected": -3.289438247680664, + "logps/chosen": -80.30021667480469, + "logps/rejected": -363.30621337890625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.595820665359497, + "rewards/margins": 6.18206787109375, + "rewards/rejected": -7.777888298034668, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 1.3536378442865292e-05, + "logits/chosen": -3.2667598724365234, + "logits/rejected": -3.380819320678711, + "logps/chosen": -48.79621887207031, + "logps/rejected": -125.78428649902344, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8640332221984863, + "rewards/margins": 4.210719585418701, + "rewards/rejected": -6.0747528076171875, + "step": 833 + }, + { + "epoch": 0.13, + "learning_rate": 1.3535645002334146e-05, + "logits/chosen": -1.7859559059143066, + "logits/rejected": -3.047415018081665, + "logps/chosen": -211.5377960205078, + "logps/rejected": -271.29754638671875, + "loss": 4.2492, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.218243598937988, + "rewards/margins": -1.4023280143737793, + "rewards/rejected": -4.815915584564209, + "step": 834 + }, + { + "epoch": 0.13, + "learning_rate": 1.3534911561802998e-05, + "logits/chosen": -2.0436134338378906, + "logits/rejected": -3.0120151042938232, + "logps/chosen": -91.69776153564453, + "logps/rejected": -269.0013427734375, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9942821264266968, + "rewards/margins": 4.977700233459473, + "rewards/rejected": -5.971982955932617, + "step": 835 + }, + { + "epoch": 0.13, + "learning_rate": 1.3534178121271852e-05, + "logits/chosen": -1.5394412279129028, + "logits/rejected": -3.1324851512908936, + "logps/chosen": -72.19633483886719, + "logps/rejected": -248.56842041015625, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.429274082183838, + "rewards/margins": 4.632236003875732, + "rewards/rejected": -8.06151008605957, + "step": 836 + }, + { + "epoch": 0.13, + "learning_rate": 1.3533444680740703e-05, + "logits/chosen": -3.3350276947021484, + "logits/rejected": -2.8861074447631836, + "logps/chosen": -189.49053955078125, + "logps/rejected": -185.48959350585938, + "loss": 1.218, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6669669151306152, + "rewards/margins": 0.8504183292388916, + "rewards/rejected": -3.517385244369507, + "step": 837 + }, + { + "epoch": 0.13, + "learning_rate": 1.3532711240209555e-05, + "logits/chosen": -3.1216626167297363, + "logits/rejected": -2.8520185947418213, + "logps/chosen": -391.5675048828125, + "logps/rejected": -300.65509033203125, + "loss": 2.3337, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.607154846191406, + "rewards/margins": -0.13180899620056152, + "rewards/rejected": -4.475345611572266, + "step": 838 + }, + { + "epoch": 0.13, + "learning_rate": 1.3531977799678407e-05, + "logits/chosen": -2.1032302379608154, + "logits/rejected": -3.1950693130493164, + "logps/chosen": -383.6916809082031, + "logps/rejected": -339.767333984375, + "loss": 0.1102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8435611724853516, + "rewards/margins": 2.444640874862671, + "rewards/rejected": -3.2882018089294434, + "step": 839 + }, + { + "epoch": 0.13, + "learning_rate": 1.3531244359147259e-05, + "logits/chosen": -2.9341728687286377, + "logits/rejected": -3.236889123916626, + "logps/chosen": -197.02438354492188, + "logps/rejected": -241.608642578125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1446661949157715, + "rewards/margins": 6.145638465881348, + "rewards/rejected": -7.290305137634277, + "step": 840 + }, + { + "epoch": 0.13, + "learning_rate": 1.3530510918616111e-05, + "logits/chosen": -2.441629409790039, + "logits/rejected": -2.9519190788269043, + "logps/chosen": -85.30217742919922, + "logps/rejected": -379.71319580078125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.134068489074707, + "rewards/margins": 7.706472873687744, + "rewards/rejected": -8.84054183959961, + "step": 841 + }, + { + "epoch": 0.13, + "learning_rate": 1.3529777478084963e-05, + "logits/chosen": -3.138469696044922, + "logits/rejected": -3.073910713195801, + "logps/chosen": -260.17791748046875, + "logps/rejected": -192.62246704101562, + "loss": 1.6954, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.328160285949707, + "rewards/margins": 0.6815941333770752, + "rewards/rejected": -4.009754180908203, + "step": 842 + }, + { + "epoch": 0.13, + "learning_rate": 1.3529044037553816e-05, + "logits/chosen": -3.1846773624420166, + "logits/rejected": -2.2992184162139893, + "logps/chosen": -955.0287475585938, + "logps/rejected": -452.26837158203125, + "loss": 2.0977, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.702221632003784, + "rewards/margins": 0.29386067390441895, + "rewards/rejected": -2.996082305908203, + "step": 843 + }, + { + "epoch": 0.13, + "learning_rate": 1.3528310597022668e-05, + "logits/chosen": -3.147766590118408, + "logits/rejected": -3.068946123123169, + "logps/chosen": -373.9651794433594, + "logps/rejected": -377.6228332519531, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.38177227973938, + "rewards/margins": 3.365457057952881, + "rewards/rejected": -5.74722957611084, + "step": 844 + }, + { + "epoch": 0.13, + "learning_rate": 1.352757715649152e-05, + "logits/chosen": -3.100574254989624, + "logits/rejected": -2.581319808959961, + "logps/chosen": -263.51910400390625, + "logps/rejected": -216.1345672607422, + "loss": 2.3313, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2775650024414062, + "rewards/margins": -0.2297961711883545, + "rewards/rejected": -3.0477688312530518, + "step": 845 + }, + { + "epoch": 0.13, + "learning_rate": 1.3526843715960372e-05, + "logits/chosen": -3.0999398231506348, + "logits/rejected": -3.0906567573547363, + "logps/chosen": -583.362060546875, + "logps/rejected": -613.9033813476562, + "loss": 0.1258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6771163940429688, + "rewards/margins": 3.2635436058044434, + "rewards/rejected": -3.940659999847412, + "step": 846 + }, + { + "epoch": 0.13, + "learning_rate": 1.3526110275429224e-05, + "logits/chosen": -2.4219369888305664, + "logits/rejected": -2.8390307426452637, + "logps/chosen": -110.63906860351562, + "logps/rejected": -253.0104522705078, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8601438999176025, + "rewards/margins": 5.094519138336182, + "rewards/rejected": -5.954663276672363, + "step": 847 + }, + { + "epoch": 0.13, + "learning_rate": 1.3525376834898076e-05, + "logits/chosen": -3.1431190967559814, + "logits/rejected": -2.4606025218963623, + "logps/chosen": -280.96051025390625, + "logps/rejected": -374.0875549316406, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3757290840148926, + "rewards/margins": 6.0012078285217285, + "rewards/rejected": -7.376936912536621, + "step": 848 + }, + { + "epoch": 0.13, + "learning_rate": 1.3524643394366928e-05, + "logits/chosen": -3.1620404720306396, + "logits/rejected": -1.7868930101394653, + "logps/chosen": -320.010498046875, + "logps/rejected": -77.79531860351562, + "loss": 2.1787, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.867079734802246, + "rewards/margins": -2.053131103515625, + "rewards/rejected": -2.813948631286621, + "step": 849 + }, + { + "epoch": 0.13, + "learning_rate": 1.352390995383578e-05, + "logits/chosen": -3.067439556121826, + "logits/rejected": -2.7790372371673584, + "logps/chosen": -181.90069580078125, + "logps/rejected": -309.2286071777344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6517210006713867, + "rewards/margins": 6.984875679016113, + "rewards/rejected": -8.6365966796875, + "step": 850 + }, + { + "epoch": 0.13, + "learning_rate": 1.3523176513304631e-05, + "logits/chosen": -3.1987669467926025, + "logits/rejected": -2.8434669971466064, + "logps/chosen": -128.7362518310547, + "logps/rejected": -241.53683471679688, + "loss": 2.2503, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.525149822235107, + "rewards/margins": 1.5637826919555664, + "rewards/rejected": -6.088932991027832, + "step": 851 + }, + { + "epoch": 0.13, + "learning_rate": 1.3522443072773485e-05, + "logits/chosen": -2.265413522720337, + "logits/rejected": -2.988797664642334, + "logps/chosen": -185.94631958007812, + "logps/rejected": -286.447998046875, + "loss": 1.5973, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.690915107727051, + "rewards/margins": 1.4895448684692383, + "rewards/rejected": -6.180459976196289, + "step": 852 + }, + { + "epoch": 0.13, + "learning_rate": 1.3521709632242337e-05, + "logits/chosen": -3.2607855796813965, + "logits/rejected": -2.5893146991729736, + "logps/chosen": -690.1863403320312, + "logps/rejected": -518.6631469726562, + "loss": 0.2113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3791548013687134, + "rewards/margins": 1.8550370931625366, + "rewards/rejected": -3.23419189453125, + "step": 853 + }, + { + "epoch": 0.13, + "learning_rate": 1.3520976191711189e-05, + "logits/chosen": -3.116533041000366, + "logits/rejected": -3.248661518096924, + "logps/chosen": -95.35911560058594, + "logps/rejected": -218.8645477294922, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.095623016357422, + "rewards/margins": 4.333588123321533, + "rewards/rejected": -6.429211139678955, + "step": 854 + }, + { + "epoch": 0.13, + "learning_rate": 1.352024275118004e-05, + "logits/chosen": -2.53572678565979, + "logits/rejected": -3.1350276470184326, + "logps/chosen": -44.568153381347656, + "logps/rejected": -268.57989501953125, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0763630867004395, + "rewards/margins": 3.621048927307129, + "rewards/rejected": -5.697412490844727, + "step": 855 + }, + { + "epoch": 0.13, + "learning_rate": 1.3519509310648892e-05, + "logits/chosen": -1.5580928325653076, + "logits/rejected": -3.09458327293396, + "logps/chosen": -272.7974853515625, + "logps/rejected": -463.821044921875, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4940712451934814, + "rewards/margins": 6.130108833312988, + "rewards/rejected": -7.624180793762207, + "step": 856 + }, + { + "epoch": 0.13, + "learning_rate": 1.3518775870117744e-05, + "logits/chosen": -3.2379443645477295, + "logits/rejected": -2.629626989364624, + "logps/chosen": -655.03125, + "logps/rejected": -466.6448974609375, + "loss": 1.289, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.930712938308716, + "rewards/margins": 1.3115981817245483, + "rewards/rejected": -4.242311000823975, + "step": 857 + }, + { + "epoch": 0.13, + "learning_rate": 1.3518042429586596e-05, + "logits/chosen": -2.9579617977142334, + "logits/rejected": -3.1726999282836914, + "logps/chosen": -68.27163696289062, + "logps/rejected": -349.08380126953125, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8880277872085571, + "rewards/margins": 4.9205803871154785, + "rewards/rejected": -6.808608055114746, + "step": 858 + }, + { + "epoch": 0.13, + "learning_rate": 1.3517308989055448e-05, + "logits/chosen": -3.1246418952941895, + "logits/rejected": -3.3341009616851807, + "logps/chosen": -201.54043579101562, + "logps/rejected": -366.9518127441406, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37602049112319946, + "rewards/margins": 6.113772392272949, + "rewards/rejected": -6.489792823791504, + "step": 859 + }, + { + "epoch": 0.13, + "learning_rate": 1.35165755485243e-05, + "logits/chosen": -3.2670860290527344, + "logits/rejected": -3.309561014175415, + "logps/chosen": -283.1492919921875, + "logps/rejected": -464.67388916015625, + "loss": 0.2076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9051151275634766, + "rewards/margins": 2.4718356132507324, + "rewards/rejected": -4.376951217651367, + "step": 860 + }, + { + "epoch": 0.13, + "learning_rate": 1.3515842107993154e-05, + "logits/chosen": -3.2263600826263428, + "logits/rejected": -3.0101866722106934, + "logps/chosen": -433.16168212890625, + "logps/rejected": -294.4318542480469, + "loss": 0.0526, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9484093189239502, + "rewards/margins": 2.932206630706787, + "rewards/rejected": -4.880616188049316, + "step": 861 + }, + { + "epoch": 0.13, + "learning_rate": 1.3515108667462005e-05, + "logits/chosen": -2.616901397705078, + "logits/rejected": -3.150355100631714, + "logps/chosen": -113.50657653808594, + "logps/rejected": -214.8811492919922, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4667438268661499, + "rewards/margins": 5.546049118041992, + "rewards/rejected": -6.012793064117432, + "step": 862 + }, + { + "epoch": 0.13, + "learning_rate": 1.3514375226930857e-05, + "logits/chosen": -2.701646089553833, + "logits/rejected": -3.2791669368743896, + "logps/chosen": -49.238624572753906, + "logps/rejected": -315.58831787109375, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.197913646697998, + "rewards/margins": 3.189357042312622, + "rewards/rejected": -7.387270927429199, + "step": 863 + }, + { + "epoch": 0.13, + "learning_rate": 1.351364178639971e-05, + "logits/chosen": -1.6671370267868042, + "logits/rejected": -3.1764626502990723, + "logps/chosen": -263.8768615722656, + "logps/rejected": -499.52691650390625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9327722787857056, + "rewards/margins": 7.631495475769043, + "rewards/rejected": -9.564268112182617, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 1.3512908345868561e-05, + "logits/chosen": -1.3720197677612305, + "logits/rejected": -3.100170850753784, + "logps/chosen": -128.56179809570312, + "logps/rejected": -400.091552734375, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3761515617370605, + "rewards/margins": 3.7957639694213867, + "rewards/rejected": -7.171915531158447, + "step": 865 + }, + { + "epoch": 0.13, + "learning_rate": 1.3512174905337413e-05, + "logits/chosen": -2.608412742614746, + "logits/rejected": -3.1107890605926514, + "logps/chosen": -117.33053588867188, + "logps/rejected": -218.59165954589844, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2938904762268066, + "rewards/margins": 4.026058673858643, + "rewards/rejected": -6.319949150085449, + "step": 866 + }, + { + "epoch": 0.13, + "learning_rate": 1.3511441464806265e-05, + "logits/chosen": -2.8231327533721924, + "logits/rejected": -3.279705047607422, + "logps/chosen": -200.37527465820312, + "logps/rejected": -367.77923583984375, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.936505913734436, + "rewards/margins": 3.516108512878418, + "rewards/rejected": -5.4526143074035645, + "step": 867 + }, + { + "epoch": 0.13, + "learning_rate": 1.3510708024275118e-05, + "logits/chosen": -3.01967191696167, + "logits/rejected": -1.7402534484863281, + "logps/chosen": -426.52935791015625, + "logps/rejected": -343.9852600097656, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9679007530212402, + "rewards/margins": 5.024806976318359, + "rewards/rejected": -7.992708206176758, + "step": 868 + }, + { + "epoch": 0.14, + "learning_rate": 1.350997458374397e-05, + "logits/chosen": -3.244365930557251, + "logits/rejected": -3.055349826812744, + "logps/chosen": -229.9159698486328, + "logps/rejected": -299.071533203125, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4256527423858643, + "rewards/margins": 3.369050979614258, + "rewards/rejected": -5.794703483581543, + "step": 869 + }, + { + "epoch": 0.14, + "learning_rate": 1.3509241143212824e-05, + "logits/chosen": -1.835890769958496, + "logits/rejected": -3.1259377002716064, + "logps/chosen": -194.75625610351562, + "logps/rejected": -439.10333251953125, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7016091346740723, + "rewards/margins": 3.3777718544006348, + "rewards/rejected": -6.079380989074707, + "step": 870 + }, + { + "epoch": 0.14, + "learning_rate": 1.3508507702681676e-05, + "logits/chosen": -2.3652894496917725, + "logits/rejected": -3.0749940872192383, + "logps/chosen": -400.59478759765625, + "logps/rejected": -281.79339599609375, + "loss": 5.074, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.940969944000244, + "rewards/margins": -2.32611083984375, + "rewards/rejected": -5.614859104156494, + "step": 871 + }, + { + "epoch": 0.14, + "learning_rate": 1.3507774262150528e-05, + "logits/chosen": -3.084557056427002, + "logits/rejected": -1.9132416248321533, + "logps/chosen": -233.7763214111328, + "logps/rejected": -166.118896484375, + "loss": 3.1185, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4696831703186035, + "rewards/margins": 1.4096739292144775, + "rewards/rejected": -6.87935733795166, + "step": 872 + }, + { + "epoch": 0.14, + "learning_rate": 1.350704082161938e-05, + "logits/chosen": -1.4125038385391235, + "logits/rejected": -2.1846721172332764, + "logps/chosen": -135.9716796875, + "logps/rejected": -313.7278137207031, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8839683532714844, + "rewards/margins": 6.43377685546875, + "rewards/rejected": -8.317745208740234, + "step": 873 + }, + { + "epoch": 0.14, + "learning_rate": 1.3506307381088231e-05, + "logits/chosen": -2.866114377975464, + "logits/rejected": -2.8847174644470215, + "logps/chosen": -190.3300018310547, + "logps/rejected": -165.81063842773438, + "loss": 1.9464, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.177217483520508, + "rewards/margins": 0.4857161045074463, + "rewards/rejected": -4.662933826446533, + "step": 874 + }, + { + "epoch": 0.14, + "learning_rate": 1.3505573940557083e-05, + "logits/chosen": -2.347571849822998, + "logits/rejected": -3.190380573272705, + "logps/chosen": -36.146209716796875, + "logps/rejected": -308.3041076660156, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.733154535293579, + "rewards/margins": 7.067442417144775, + "rewards/rejected": -8.800596237182617, + "step": 875 + }, + { + "epoch": 0.14, + "learning_rate": 1.3504840500025935e-05, + "logits/chosen": -2.620095729827881, + "logits/rejected": -3.133626937866211, + "logps/chosen": -684.6450805664062, + "logps/rejected": -719.609130859375, + "loss": 0.0967, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2317914962768555, + "rewards/margins": 3.208454132080078, + "rewards/rejected": -6.440245628356934, + "step": 876 + }, + { + "epoch": 0.14, + "learning_rate": 1.3504107059494787e-05, + "logits/chosen": -3.071702241897583, + "logits/rejected": -3.089226245880127, + "logps/chosen": -234.204345703125, + "logps/rejected": -200.23382568359375, + "loss": 2.0562, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.655269145965576, + "rewards/margins": 0.38941144943237305, + "rewards/rejected": -5.044680595397949, + "step": 877 + }, + { + "epoch": 0.14, + "learning_rate": 1.3503373618963639e-05, + "logits/chosen": -1.9260700941085815, + "logits/rejected": -0.8764755725860596, + "logps/chosen": -233.1286163330078, + "logps/rejected": -148.91091918945312, + "loss": 3.9926, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.593987941741943, + "rewards/margins": -0.8178541660308838, + "rewards/rejected": -4.776134014129639, + "step": 878 + }, + { + "epoch": 0.14, + "learning_rate": 1.3502640178432492e-05, + "logits/chosen": -2.797637462615967, + "logits/rejected": -3.1852731704711914, + "logps/chosen": -56.59856414794922, + "logps/rejected": -267.2527160644531, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1288318634033203, + "rewards/margins": 4.07358980178833, + "rewards/rejected": -6.20242166519165, + "step": 879 + }, + { + "epoch": 0.14, + "learning_rate": 1.3501906737901344e-05, + "logits/chosen": -3.2989578247070312, + "logits/rejected": -3.0378305912017822, + "logps/chosen": -639.5302124023438, + "logps/rejected": -565.0554809570312, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5358710289001465, + "rewards/margins": 7.368269920349121, + "rewards/rejected": -8.904141426086426, + "step": 880 + }, + { + "epoch": 0.14, + "learning_rate": 1.3501173297370196e-05, + "logits/chosen": -3.1044118404388428, + "logits/rejected": -2.344294548034668, + "logps/chosen": -379.37127685546875, + "logps/rejected": -162.39959716796875, + "loss": 4.0899, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.40914249420166, + "rewards/margins": -2.238961696624756, + "rewards/rejected": -3.1701810359954834, + "step": 881 + }, + { + "epoch": 0.14, + "learning_rate": 1.3500439856839048e-05, + "logits/chosen": -2.81330943107605, + "logits/rejected": -3.2048792839050293, + "logps/chosen": -153.94876098632812, + "logps/rejected": -248.96530151367188, + "loss": 0.0771, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5779740810394287, + "rewards/margins": 3.247966766357422, + "rewards/rejected": -4.8259406089782715, + "step": 882 + }, + { + "epoch": 0.14, + "learning_rate": 1.34997064163079e-05, + "logits/chosen": -1.6559072732925415, + "logits/rejected": -2.7322018146514893, + "logps/chosen": -184.7309112548828, + "logps/rejected": -465.04937744140625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0725185871124268, + "rewards/margins": 6.398687362670898, + "rewards/rejected": -9.471205711364746, + "step": 883 + }, + { + "epoch": 0.14, + "learning_rate": 1.3498972975776752e-05, + "logits/chosen": -2.360391139984131, + "logits/rejected": -3.0148401260375977, + "logps/chosen": -527.4671020507812, + "logps/rejected": -707.65380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.335843086242676, + "rewards/margins": 9.97571086883545, + "rewards/rejected": -12.311553955078125, + "step": 884 + }, + { + "epoch": 0.14, + "learning_rate": 1.3498239535245604e-05, + "logits/chosen": -2.6949617862701416, + "logits/rejected": -3.064018726348877, + "logps/chosen": -396.4322509765625, + "logps/rejected": -400.88946533203125, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0386369228363037, + "rewards/margins": 3.6581268310546875, + "rewards/rejected": -5.69676399230957, + "step": 885 + }, + { + "epoch": 0.14, + "learning_rate": 1.3497506094714456e-05, + "logits/chosen": -3.2985734939575195, + "logits/rejected": -2.2770543098449707, + "logps/chosen": -666.666015625, + "logps/rejected": -312.03759765625, + "loss": 2.5962, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.390306234359741, + "rewards/margins": 1.4577500820159912, + "rewards/rejected": -4.848055839538574, + "step": 886 + }, + { + "epoch": 0.14, + "learning_rate": 1.3496772654183307e-05, + "logits/chosen": -2.6879560947418213, + "logits/rejected": -3.147901773452759, + "logps/chosen": -36.32789611816406, + "logps/rejected": -129.17440795898438, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1967315673828125, + "rewards/margins": 3.7894062995910645, + "rewards/rejected": -5.986137866973877, + "step": 887 + }, + { + "epoch": 0.14, + "learning_rate": 1.3496039213652161e-05, + "logits/chosen": -2.864332675933838, + "logits/rejected": -2.2059853076934814, + "logps/chosen": -236.59100341796875, + "logps/rejected": -294.6620178222656, + "loss": 0.1888, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0147500038146973, + "rewards/margins": 3.6736059188842773, + "rewards/rejected": -5.688355922698975, + "step": 888 + }, + { + "epoch": 0.14, + "learning_rate": 1.3495305773121013e-05, + "logits/chosen": -3.144529342651367, + "logits/rejected": -2.009268283843994, + "logps/chosen": -232.74317932128906, + "logps/rejected": -223.54302978515625, + "loss": 2.9498, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7298197746276855, + "rewards/margins": 1.2358293533325195, + "rewards/rejected": -5.965648651123047, + "step": 889 + }, + { + "epoch": 0.14, + "learning_rate": 1.3494572332589865e-05, + "logits/chosen": -3.0827133655548096, + "logits/rejected": -1.2011851072311401, + "logps/chosen": -182.41665649414062, + "logps/rejected": -155.4373321533203, + "loss": 1.157, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0652589797973633, + "rewards/margins": 1.4340853691101074, + "rewards/rejected": -4.499344348907471, + "step": 890 + }, + { + "epoch": 0.14, + "learning_rate": 1.3493838892058717e-05, + "logits/chosen": -3.0307674407958984, + "logits/rejected": -3.042557716369629, + "logps/chosen": -71.36637878417969, + "logps/rejected": -196.71902465820312, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1975488662719727, + "rewards/margins": 4.632724761962891, + "rewards/rejected": -6.830273628234863, + "step": 891 + }, + { + "epoch": 0.14, + "learning_rate": 1.3493105451527569e-05, + "logits/chosen": -3.171908378601074, + "logits/rejected": -2.5586721897125244, + "logps/chosen": -332.9779968261719, + "logps/rejected": -244.23883056640625, + "loss": 2.3937, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.727585792541504, + "rewards/margins": -0.0693197250366211, + "rewards/rejected": -5.658266067504883, + "step": 892 + }, + { + "epoch": 0.14, + "learning_rate": 1.349237201099642e-05, + "logits/chosen": -3.096395254135132, + "logits/rejected": -2.2697372436523438, + "logps/chosen": -143.0289306640625, + "logps/rejected": -208.66184997558594, + "loss": 2.9601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.953656196594238, + "rewards/margins": 1.421949863433838, + "rewards/rejected": -6.375606060028076, + "step": 893 + }, + { + "epoch": 0.14, + "learning_rate": 1.3491638570465272e-05, + "logits/chosen": -2.6790714263916016, + "logits/rejected": -3.187962055206299, + "logps/chosen": -103.29310607910156, + "logps/rejected": -411.7303771972656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6615066528320312, + "rewards/margins": 7.173314094543457, + "rewards/rejected": -8.834820747375488, + "step": 894 + }, + { + "epoch": 0.14, + "learning_rate": 1.3490905129934124e-05, + "logits/chosen": -2.64864182472229, + "logits/rejected": -3.0764963626861572, + "logps/chosen": -669.36865234375, + "logps/rejected": -586.0709228515625, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.024405002593994, + "rewards/margins": 4.800773620605469, + "rewards/rejected": -6.825178623199463, + "step": 895 + }, + { + "epoch": 0.14, + "learning_rate": 1.3490171689402976e-05, + "logits/chosen": -3.008892059326172, + "logits/rejected": -1.4801465272903442, + "logps/chosen": -772.5718994140625, + "logps/rejected": -381.03314208984375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5911216735839844, + "rewards/margins": 4.4112653732299805, + "rewards/rejected": -6.002387046813965, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 1.348943824887183e-05, + "logits/chosen": -2.3138020038604736, + "logits/rejected": -2.916926145553589, + "logps/chosen": -250.89920043945312, + "logps/rejected": -427.59844970703125, + "loss": 3.7998, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.553122043609619, + "rewards/margins": -0.6344084739685059, + "rewards/rejected": -3.9187135696411133, + "step": 897 + }, + { + "epoch": 0.14, + "learning_rate": 1.3488704808340682e-05, + "logits/chosen": -2.6149885654449463, + "logits/rejected": -3.0225582122802734, + "logps/chosen": -853.2959594726562, + "logps/rejected": -594.896728515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.289224237203598, + "rewards/margins": 9.715892791748047, + "rewards/rejected": -9.426668167114258, + "step": 898 + }, + { + "epoch": 0.14, + "learning_rate": 1.3487971367809533e-05, + "logits/chosen": -1.961280107498169, + "logits/rejected": -2.9906866550445557, + "logps/chosen": -102.74833679199219, + "logps/rejected": -176.6856689453125, + "loss": 1.6804, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3032944202423096, + "rewards/margins": 1.2270736694335938, + "rewards/rejected": -4.530367851257324, + "step": 899 + }, + { + "epoch": 0.14, + "learning_rate": 1.3487237927278385e-05, + "logits/chosen": -3.269519090652466, + "logits/rejected": -2.7400808334350586, + "logps/chosen": -431.8030700683594, + "logps/rejected": -208.4232177734375, + "loss": 0.9693, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5297973155975342, + "rewards/margins": 1.719139814376831, + "rewards/rejected": -3.2489373683929443, + "step": 900 + }, + { + "epoch": 0.14, + "learning_rate": 1.3486504486747237e-05, + "logits/chosen": -2.6236138343811035, + "logits/rejected": -3.1861777305603027, + "logps/chosen": -133.3762969970703, + "logps/rejected": -331.02777099609375, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4691969156265259, + "rewards/margins": 4.128577709197998, + "rewards/rejected": -5.597774505615234, + "step": 901 + }, + { + "epoch": 0.14, + "learning_rate": 1.348577104621609e-05, + "logits/chosen": -2.885424852371216, + "logits/rejected": -3.0539050102233887, + "logps/chosen": -226.11245727539062, + "logps/rejected": -452.15521240234375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3917083740234375, + "rewards/margins": 6.47426176071167, + "rewards/rejected": -8.86596965789795, + "step": 902 + }, + { + "epoch": 0.14, + "learning_rate": 1.3485037605684943e-05, + "logits/chosen": -2.6663994789123535, + "logits/rejected": -2.2707393169403076, + "logps/chosen": -458.6169738769531, + "logps/rejected": -292.3715515136719, + "loss": 3.2803, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.291131973266602, + "rewards/margins": -2.6766440868377686, + "rewards/rejected": -2.614487648010254, + "step": 903 + }, + { + "epoch": 0.14, + "learning_rate": 1.3484304165153794e-05, + "logits/chosen": -3.176528215408325, + "logits/rejected": -3.3554880619049072, + "logps/chosen": -54.09550094604492, + "logps/rejected": -154.48504638671875, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.903944492340088, + "rewards/margins": 4.575868606567383, + "rewards/rejected": -7.4798126220703125, + "step": 904 + }, + { + "epoch": 0.14, + "learning_rate": 1.3483570724622648e-05, + "logits/chosen": -2.4180989265441895, + "logits/rejected": -3.110942840576172, + "logps/chosen": -103.91207885742188, + "logps/rejected": -226.63894653320312, + "loss": 2.1822, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.9886345863342285, + "rewards/margins": 0.4976949691772461, + "rewards/rejected": -5.486329078674316, + "step": 905 + }, + { + "epoch": 0.14, + "learning_rate": 1.34828372840915e-05, + "logits/chosen": -1.8130180835723877, + "logits/rejected": -2.9334497451782227, + "logps/chosen": -76.84527587890625, + "logps/rejected": -420.24554443359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9848341941833496, + "rewards/margins": 8.426412582397461, + "rewards/rejected": -11.411246299743652, + "step": 906 + }, + { + "epoch": 0.14, + "learning_rate": 1.3482103843560352e-05, + "logits/chosen": -1.2311662435531616, + "logits/rejected": -2.9833526611328125, + "logps/chosen": -214.22018432617188, + "logps/rejected": -533.8328247070312, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6426339149475098, + "rewards/margins": 7.106761932373047, + "rewards/rejected": -9.749395370483398, + "step": 907 + }, + { + "epoch": 0.14, + "learning_rate": 1.3481370403029204e-05, + "logits/chosen": -2.4793450832366943, + "logits/rejected": -3.1639180183410645, + "logps/chosen": -880.1002197265625, + "logps/rejected": -838.4697265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.070214867591858, + "rewards/margins": 7.023135185241699, + "rewards/rejected": -8.093350410461426, + "step": 908 + }, + { + "epoch": 0.14, + "learning_rate": 1.3480636962498056e-05, + "logits/chosen": -3.1153299808502197, + "logits/rejected": -2.0867772102355957, + "logps/chosen": -324.56011962890625, + "logps/rejected": -107.55577850341797, + "loss": 4.5128, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.341387748718262, + "rewards/margins": -4.4487104415893555, + "rewards/rejected": -2.892677068710327, + "step": 909 + }, + { + "epoch": 0.14, + "learning_rate": 1.3479903521966907e-05, + "logits/chosen": -3.0331735610961914, + "logits/rejected": -3.1215322017669678, + "logps/chosen": -68.21076965332031, + "logps/rejected": -195.18270874023438, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7883468270301819, + "rewards/margins": 5.125888347625732, + "rewards/rejected": -5.9142351150512695, + "step": 910 + }, + { + "epoch": 0.14, + "learning_rate": 1.347917008143576e-05, + "logits/chosen": -1.1544151306152344, + "logits/rejected": -2.989692449569702, + "logps/chosen": -34.73590850830078, + "logps/rejected": -433.06005859375, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5188409090042114, + "rewards/margins": 5.806795120239258, + "rewards/rejected": -7.325636386871338, + "step": 911 + }, + { + "epoch": 0.14, + "learning_rate": 1.3478436640904611e-05, + "logits/chosen": -1.9308617115020752, + "logits/rejected": -2.95452618598938, + "logps/chosen": -60.94776153564453, + "logps/rejected": -139.31590270996094, + "loss": 1.1945, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1620278358459473, + "rewards/margins": 0.536507248878479, + "rewards/rejected": -3.6985349655151367, + "step": 912 + }, + { + "epoch": 0.14, + "learning_rate": 1.3477703200373463e-05, + "logits/chosen": -3.134916305541992, + "logits/rejected": -3.0569937229156494, + "logps/chosen": -160.5797119140625, + "logps/rejected": -255.37570190429688, + "loss": 1.273, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.146993637084961, + "rewards/margins": 1.031596302986145, + "rewards/rejected": -5.178589820861816, + "step": 913 + }, + { + "epoch": 0.14, + "learning_rate": 1.3476969759842317e-05, + "logits/chosen": -2.8898472785949707, + "logits/rejected": -2.864654541015625, + "logps/chosen": -169.49525451660156, + "logps/rejected": -373.8451843261719, + "loss": 2.1813, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5163724422454834, + "rewards/margins": 0.8082547187805176, + "rewards/rejected": -4.324626922607422, + "step": 914 + }, + { + "epoch": 0.14, + "learning_rate": 1.3476236319311169e-05, + "logits/chosen": -2.8720834255218506, + "logits/rejected": -2.7047317028045654, + "logps/chosen": -362.3338928222656, + "logps/rejected": -544.688720703125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9340332746505737, + "rewards/margins": 6.270040512084961, + "rewards/rejected": -8.204073905944824, + "step": 915 + }, + { + "epoch": 0.14, + "learning_rate": 1.347550287878002e-05, + "logits/chosen": -2.6997439861297607, + "logits/rejected": -2.507976770401001, + "logps/chosen": -211.47573852539062, + "logps/rejected": -345.1946105957031, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4083137512207031, + "rewards/margins": 6.032084941864014, + "rewards/rejected": -7.440398693084717, + "step": 916 + }, + { + "epoch": 0.14, + "learning_rate": 1.3474769438248872e-05, + "logits/chosen": -2.7968714237213135, + "logits/rejected": -3.217371702194214, + "logps/chosen": -104.96282958984375, + "logps/rejected": -273.5144958496094, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225754737854004, + "rewards/margins": 5.140748500823975, + "rewards/rejected": -7.3665032386779785, + "step": 917 + }, + { + "epoch": 0.14, + "learning_rate": 1.3474035997717724e-05, + "logits/chosen": -2.111074447631836, + "logits/rejected": -2.986931324005127, + "logps/chosen": -188.94932556152344, + "logps/rejected": -375.921875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6355586647987366, + "rewards/margins": 6.007321357727051, + "rewards/rejected": -6.642880439758301, + "step": 918 + }, + { + "epoch": 0.14, + "learning_rate": 1.3473302557186576e-05, + "logits/chosen": -3.027364492416382, + "logits/rejected": -3.159461259841919, + "logps/chosen": -82.8377456665039, + "logps/rejected": -231.05572509765625, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6465549468994141, + "rewards/margins": 5.566336631774902, + "rewards/rejected": -6.212891578674316, + "step": 919 + }, + { + "epoch": 0.14, + "learning_rate": 1.3472569116655428e-05, + "logits/chosen": -1.4465291500091553, + "logits/rejected": -3.0362846851348877, + "logps/chosen": -92.69041442871094, + "logps/rejected": -305.1114501953125, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9753649234771729, + "rewards/margins": 5.289577960968018, + "rewards/rejected": -7.2649431228637695, + "step": 920 + }, + { + "epoch": 0.14, + "learning_rate": 1.347183567612428e-05, + "logits/chosen": -2.716838836669922, + "logits/rejected": -2.9513978958129883, + "logps/chosen": -131.86911010742188, + "logps/rejected": -409.0745544433594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.811618447303772, + "rewards/margins": 9.182964324951172, + "rewards/rejected": -9.994583129882812, + "step": 921 + }, + { + "epoch": 0.14, + "learning_rate": 1.3471102235593132e-05, + "logits/chosen": -2.4146881103515625, + "logits/rejected": -3.039580821990967, + "logps/chosen": -364.1150817871094, + "logps/rejected": -565.96044921875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6941025257110596, + "rewards/margins": 8.022026062011719, + "rewards/rejected": -9.716129302978516, + "step": 922 + }, + { + "epoch": 0.14, + "learning_rate": 1.3470368795061985e-05, + "logits/chosen": -2.973662853240967, + "logits/rejected": -2.4021129608154297, + "logps/chosen": -217.4969024658203, + "logps/rejected": -193.8546600341797, + "loss": 2.8218, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0361762046813965, + "rewards/margins": -0.028263568878173828, + "rewards/rejected": -4.007912635803223, + "step": 923 + }, + { + "epoch": 0.14, + "learning_rate": 1.3469635354530837e-05, + "logits/chosen": -1.881808876991272, + "logits/rejected": -3.0688672065734863, + "logps/chosen": -91.4753189086914, + "logps/rejected": -316.5804443359375, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4647350311279297, + "rewards/margins": 4.399582386016846, + "rewards/rejected": -5.864317417144775, + "step": 924 + }, + { + "epoch": 0.14, + "learning_rate": 1.3468901913999689e-05, + "logits/chosen": -3.031670331954956, + "logits/rejected": -2.137674570083618, + "logps/chosen": -186.9307861328125, + "logps/rejected": -133.94546508789062, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.905707359313965, + "rewards/margins": 3.4223580360412598, + "rewards/rejected": -6.328065395355225, + "step": 925 + }, + { + "epoch": 0.14, + "learning_rate": 1.3468168473468541e-05, + "logits/chosen": -2.785395383834839, + "logits/rejected": -3.193114757537842, + "logps/chosen": -116.689453125, + "logps/rejected": -123.54066467285156, + "loss": 1.0607, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4806294441223145, + "rewards/margins": 0.33422791957855225, + "rewards/rejected": -3.8148574829101562, + "step": 926 + }, + { + "epoch": 0.14, + "learning_rate": 1.3467435032937393e-05, + "logits/chosen": -2.854090690612793, + "logits/rejected": -2.9700100421905518, + "logps/chosen": -721.3082275390625, + "logps/rejected": -696.6288452148438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.523465156555176, + "rewards/margins": 7.861236572265625, + "rewards/rejected": -10.3847017288208, + "step": 927 + }, + { + "epoch": 0.14, + "learning_rate": 1.3466701592406245e-05, + "logits/chosen": -3.018272638320923, + "logits/rejected": -3.019028663635254, + "logps/chosen": -682.3461303710938, + "logps/rejected": -468.1175537109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2954132556915283, + "rewards/margins": 7.2826128005981445, + "rewards/rejected": -8.578025817871094, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 1.3465968151875097e-05, + "logits/chosen": -2.637847661972046, + "logits/rejected": -2.949704647064209, + "logps/chosen": -145.4412841796875, + "logps/rejected": -346.5396728515625, + "loss": 0.1706, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5203895568847656, + "rewards/margins": 4.396847724914551, + "rewards/rejected": -6.917237281799316, + "step": 929 + }, + { + "epoch": 0.14, + "learning_rate": 1.3465234711343948e-05, + "logits/chosen": -1.940616250038147, + "logits/rejected": -3.0459485054016113, + "logps/chosen": -252.29026794433594, + "logps/rejected": -311.18218994140625, + "loss": 1.049, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.781277656555176, + "rewards/margins": 2.3530631065368652, + "rewards/rejected": -5.134340763092041, + "step": 930 + }, + { + "epoch": 0.14, + "learning_rate": 1.34645012708128e-05, + "logits/chosen": -2.690262794494629, + "logits/rejected": -3.0833306312561035, + "logps/chosen": -97.24950408935547, + "logps/rejected": -351.77313232421875, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0951664447784424, + "rewards/margins": 7.818049907684326, + "rewards/rejected": -9.913216590881348, + "step": 931 + }, + { + "epoch": 0.14, + "learning_rate": 1.3463767830281654e-05, + "logits/chosen": -1.798823595046997, + "logits/rejected": -2.617490530014038, + "logps/chosen": -186.2882843017578, + "logps/rejected": -329.1983642578125, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.868499755859375, + "rewards/margins": 3.982036590576172, + "rewards/rejected": -6.850536346435547, + "step": 932 + }, + { + "epoch": 0.15, + "learning_rate": 1.3463034389750506e-05, + "logits/chosen": -2.4990339279174805, + "logits/rejected": -2.7186272144317627, + "logps/chosen": -85.84310913085938, + "logps/rejected": -193.20230102539062, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058377742767334, + "rewards/margins": 4.8162641525268555, + "rewards/rejected": -6.874642372131348, + "step": 933 + }, + { + "epoch": 0.15, + "learning_rate": 1.3462300949219358e-05, + "logits/chosen": -2.671412467956543, + "logits/rejected": -3.1070661544799805, + "logps/chosen": -193.1884765625, + "logps/rejected": -435.16461181640625, + "loss": 3.4147, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.390747547149658, + "rewards/margins": -1.518154501914978, + "rewards/rejected": -1.8725929260253906, + "step": 934 + }, + { + "epoch": 0.15, + "learning_rate": 1.346156750868821e-05, + "logits/chosen": -3.1483521461486816, + "logits/rejected": -2.9244256019592285, + "logps/chosen": -125.91165161132812, + "logps/rejected": -213.16427612304688, + "loss": 0.0947, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7436611652374268, + "rewards/margins": 3.8024988174438477, + "rewards/rejected": -5.5461602210998535, + "step": 935 + }, + { + "epoch": 0.15, + "learning_rate": 1.3460834068157063e-05, + "logits/chosen": -2.402880907058716, + "logits/rejected": -3.012812852859497, + "logps/chosen": -626.7802124023438, + "logps/rejected": -641.1202392578125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6018967628479004, + "rewards/margins": 6.74801778793335, + "rewards/rejected": -8.34991455078125, + "step": 936 + }, + { + "epoch": 0.15, + "learning_rate": 1.3460100627625915e-05, + "logits/chosen": -3.063361883163452, + "logits/rejected": -3.0486221313476562, + "logps/chosen": -418.6708984375, + "logps/rejected": -420.2943420410156, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8747024536132812, + "rewards/margins": 4.872059345245361, + "rewards/rejected": -6.746761798858643, + "step": 937 + }, + { + "epoch": 0.15, + "learning_rate": 1.3459367187094767e-05, + "logits/chosen": -3.1528265476226807, + "logits/rejected": -3.0521159172058105, + "logps/chosen": -44.339630126953125, + "logps/rejected": -151.71566772460938, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.310502290725708, + "rewards/margins": 2.5823490619659424, + "rewards/rejected": -3.8928513526916504, + "step": 938 + }, + { + "epoch": 0.15, + "learning_rate": 1.3458633746563619e-05, + "logits/chosen": -3.0052478313446045, + "logits/rejected": -2.5812394618988037, + "logps/chosen": -200.43309020996094, + "logps/rejected": -209.90708923339844, + "loss": 1.8256, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.881934404373169, + "rewards/margins": -0.25773608684539795, + "rewards/rejected": -3.6241984367370605, + "step": 939 + }, + { + "epoch": 0.15, + "learning_rate": 1.345790030603247e-05, + "logits/chosen": -1.3507208824157715, + "logits/rejected": -2.944453477859497, + "logps/chosen": -145.45529174804688, + "logps/rejected": -360.97222900390625, + "loss": 0.1922, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6368587017059326, + "rewards/margins": 3.3972368240356445, + "rewards/rejected": -6.034095764160156, + "step": 940 + }, + { + "epoch": 0.15, + "learning_rate": 1.3457166865501324e-05, + "logits/chosen": -2.1999082565307617, + "logits/rejected": -2.9418928623199463, + "logps/chosen": -324.15496826171875, + "logps/rejected": -391.84674072265625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9922844171524048, + "rewards/margins": 6.895103454589844, + "rewards/rejected": -7.887388229370117, + "step": 941 + }, + { + "epoch": 0.15, + "learning_rate": 1.3456433424970176e-05, + "logits/chosen": -2.2997231483459473, + "logits/rejected": -2.8078041076660156, + "logps/chosen": -55.247528076171875, + "logps/rejected": -127.31207275390625, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4036775827407837, + "rewards/margins": 4.386163234710693, + "rewards/rejected": -5.7898406982421875, + "step": 942 + }, + { + "epoch": 0.15, + "learning_rate": 1.3455699984439028e-05, + "logits/chosen": -2.893094778060913, + "logits/rejected": -1.4687609672546387, + "logps/chosen": -442.7125244140625, + "logps/rejected": -324.65533447265625, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.972964882850647, + "rewards/margins": 3.6980509757995605, + "rewards/rejected": -5.671015739440918, + "step": 943 + }, + { + "epoch": 0.15, + "learning_rate": 1.345496654390788e-05, + "logits/chosen": -3.0773422718048096, + "logits/rejected": -2.697129011154175, + "logps/chosen": -511.16510009765625, + "logps/rejected": -334.21527099609375, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1438095569610596, + "rewards/margins": 5.224214553833008, + "rewards/rejected": -7.368023872375488, + "step": 944 + }, + { + "epoch": 0.15, + "learning_rate": 1.3454233103376732e-05, + "logits/chosen": -3.079218864440918, + "logits/rejected": -1.7623982429504395, + "logps/chosen": -240.7735595703125, + "logps/rejected": -30.00877571105957, + "loss": 4.3049, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.613073825836182, + "rewards/margins": -4.265116214752197, + "rewards/rejected": -1.3479573726654053, + "step": 945 + }, + { + "epoch": 0.15, + "learning_rate": 1.3453499662845584e-05, + "logits/chosen": -2.654348134994507, + "logits/rejected": -2.978424310684204, + "logps/chosen": -297.55340576171875, + "logps/rejected": -281.7110290527344, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.661067247390747, + "rewards/margins": 5.5474700927734375, + "rewards/rejected": -8.208538055419922, + "step": 946 + }, + { + "epoch": 0.15, + "learning_rate": 1.3452766222314435e-05, + "logits/chosen": -3.100759506225586, + "logits/rejected": -2.8279197216033936, + "logps/chosen": -322.8085632324219, + "logps/rejected": -605.8980712890625, + "loss": 1.8105, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.739311695098877, + "rewards/margins": 2.1292612552642822, + "rewards/rejected": -5.86857271194458, + "step": 947 + }, + { + "epoch": 0.15, + "learning_rate": 1.3452032781783287e-05, + "logits/chosen": -3.1211674213409424, + "logits/rejected": -3.159886598587036, + "logps/chosen": -173.56561279296875, + "logps/rejected": -235.90835571289062, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7006548047065735, + "rewards/margins": 5.188518047332764, + "rewards/rejected": -5.889172554016113, + "step": 948 + }, + { + "epoch": 0.15, + "learning_rate": 1.345129934125214e-05, + "logits/chosen": -2.1562516689300537, + "logits/rejected": -2.8826904296875, + "logps/chosen": -86.29751586914062, + "logps/rejected": -305.4505615234375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1366515159606934, + "rewards/margins": 7.096964359283447, + "rewards/rejected": -8.23361587524414, + "step": 949 + }, + { + "epoch": 0.15, + "learning_rate": 1.3450565900720993e-05, + "logits/chosen": -3.0613670349121094, + "logits/rejected": -3.0609264373779297, + "logps/chosen": -221.6486053466797, + "logps/rejected": -281.923828125, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.992512583732605, + "rewards/margins": 4.341635704040527, + "rewards/rejected": -6.334147930145264, + "step": 950 + }, + { + "epoch": 0.15, + "learning_rate": 1.3449832460189845e-05, + "logits/chosen": -1.604068636894226, + "logits/rejected": -2.9683125019073486, + "logps/chosen": -99.72869873046875, + "logps/rejected": -405.1046142578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.219698905944824, + "rewards/margins": 8.328925132751465, + "rewards/rejected": -10.548624038696289, + "step": 951 + }, + { + "epoch": 0.15, + "learning_rate": 1.3449099019658696e-05, + "logits/chosen": -2.386878252029419, + "logits/rejected": -2.995845317840576, + "logps/chosen": -509.51690673828125, + "logps/rejected": -636.8885498046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.104588270187378, + "rewards/margins": 8.541017532348633, + "rewards/rejected": -10.64560604095459, + "step": 952 + }, + { + "epoch": 0.15, + "learning_rate": 1.3448365579127548e-05, + "logits/chosen": -2.618037462234497, + "logits/rejected": -2.9369819164276123, + "logps/chosen": -379.50579833984375, + "logps/rejected": -346.592041015625, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.678274631500244, + "rewards/margins": 4.777393341064453, + "rewards/rejected": -7.455667495727539, + "step": 953 + }, + { + "epoch": 0.15, + "learning_rate": 1.34476321385964e-05, + "logits/chosen": -2.048084020614624, + "logits/rejected": -2.942304849624634, + "logps/chosen": -342.8201904296875, + "logps/rejected": -460.8028564453125, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2520084381103516, + "rewards/margins": 5.554142475128174, + "rewards/rejected": -7.806150913238525, + "step": 954 + }, + { + "epoch": 0.15, + "learning_rate": 1.3446898698065252e-05, + "logits/chosen": -2.145498037338257, + "logits/rejected": -3.095024347305298, + "logps/chosen": -112.76997375488281, + "logps/rejected": -264.164794921875, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.094167470932007, + "rewards/margins": 3.764730215072632, + "rewards/rejected": -5.8588972091674805, + "step": 955 + }, + { + "epoch": 0.15, + "learning_rate": 1.3446165257534104e-05, + "logits/chosen": -2.763540744781494, + "logits/rejected": -3.070608615875244, + "logps/chosen": -58.89140319824219, + "logps/rejected": -172.40292358398438, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2921440601348877, + "rewards/margins": 4.070094108581543, + "rewards/rejected": -5.36223840713501, + "step": 956 + }, + { + "epoch": 0.15, + "learning_rate": 1.3445431817002956e-05, + "logits/chosen": -3.1899027824401855, + "logits/rejected": -2.362272024154663, + "logps/chosen": -161.40264892578125, + "logps/rejected": -292.37274169921875, + "loss": 2.9788, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.402918338775635, + "rewards/margins": 0.9613869190216064, + "rewards/rejected": -5.36430549621582, + "step": 957 + }, + { + "epoch": 0.15, + "learning_rate": 1.3444698376471808e-05, + "logits/chosen": -2.68978214263916, + "logits/rejected": -2.732036590576172, + "logps/chosen": -475.5437927246094, + "logps/rejected": -583.499267578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.960181474685669, + "rewards/margins": 8.077669143676758, + "rewards/rejected": -11.037851333618164, + "step": 958 + }, + { + "epoch": 0.15, + "learning_rate": 1.3443964935940661e-05, + "logits/chosen": -3.111633062362671, + "logits/rejected": -3.0165131092071533, + "logps/chosen": -138.92967224121094, + "logps/rejected": -172.2808837890625, + "loss": 0.1684, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.909425973892212, + "rewards/margins": 3.28027606010437, + "rewards/rejected": -5.189702033996582, + "step": 959 + }, + { + "epoch": 0.15, + "learning_rate": 1.3443231495409513e-05, + "logits/chosen": -2.968993902206421, + "logits/rejected": -2.7386696338653564, + "logps/chosen": -289.76800537109375, + "logps/rejected": -415.394287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3620352745056152, + "rewards/margins": 8.942049980163574, + "rewards/rejected": -11.304084777832031, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 1.3442498054878365e-05, + "logits/chosen": -2.9714598655700684, + "logits/rejected": -1.9595600366592407, + "logps/chosen": -249.90139770507812, + "logps/rejected": -96.10215759277344, + "loss": 3.5465, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.9232587814331055, + "rewards/margins": -2.6753358840942383, + "rewards/rejected": -3.247922420501709, + "step": 961 + }, + { + "epoch": 0.15, + "learning_rate": 1.3441764614347217e-05, + "logits/chosen": -2.224045515060425, + "logits/rejected": -3.0974485874176025, + "logps/chosen": -160.07925415039062, + "logps/rejected": -270.4988098144531, + "loss": 0.8029, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0651626586914062, + "rewards/margins": 0.9148083925247192, + "rewards/rejected": -3.979970932006836, + "step": 962 + }, + { + "epoch": 0.15, + "learning_rate": 1.3441031173816069e-05, + "logits/chosen": -2.719886064529419, + "logits/rejected": -2.90958833694458, + "logps/chosen": -59.00075149536133, + "logps/rejected": -133.5406494140625, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7809613943099976, + "rewards/margins": 4.318364143371582, + "rewards/rejected": -6.099325656890869, + "step": 963 + }, + { + "epoch": 0.15, + "learning_rate": 1.344029773328492e-05, + "logits/chosen": -2.5511906147003174, + "logits/rejected": -1.5249196290969849, + "logps/chosen": -357.5921630859375, + "logps/rejected": -209.80142211914062, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5909774899482727, + "rewards/margins": 6.065243721008301, + "rewards/rejected": -6.656221389770508, + "step": 964 + }, + { + "epoch": 0.15, + "learning_rate": 1.3439564292753773e-05, + "logits/chosen": -0.7796417474746704, + "logits/rejected": -2.4043164253234863, + "logps/chosen": -203.92071533203125, + "logps/rejected": -354.59881591796875, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9157230854034424, + "rewards/margins": 5.290746688842773, + "rewards/rejected": -6.206469535827637, + "step": 965 + }, + { + "epoch": 0.15, + "learning_rate": 1.3438830852222624e-05, + "logits/chosen": -2.500314474105835, + "logits/rejected": -3.185187578201294, + "logps/chosen": -543.7037353515625, + "logps/rejected": -506.404052734375, + "loss": 1.9419, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.10546875, + "rewards/margins": 2.7554047107696533, + "rewards/rejected": -5.860873222351074, + "step": 966 + }, + { + "epoch": 0.15, + "learning_rate": 1.3438097411691476e-05, + "logits/chosen": -2.0813891887664795, + "logits/rejected": -2.8812034130096436, + "logps/chosen": -237.3910369873047, + "logps/rejected": -325.9895324707031, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3864798545837402, + "rewards/margins": 4.964146137237549, + "rewards/rejected": -7.350625991821289, + "step": 967 + }, + { + "epoch": 0.15, + "learning_rate": 1.343736397116033e-05, + "logits/chosen": -2.7839133739471436, + "logits/rejected": -3.1982460021972656, + "logps/chosen": -59.171234130859375, + "logps/rejected": -150.10757446289062, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.26628041267395, + "rewards/margins": 5.158475399017334, + "rewards/rejected": -7.424756050109863, + "step": 968 + }, + { + "epoch": 0.15, + "learning_rate": 1.3436630530629182e-05, + "logits/chosen": -1.262977957725525, + "logits/rejected": -3.082663059234619, + "logps/chosen": -262.33038330078125, + "logps/rejected": -578.2178955078125, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2776306867599487, + "rewards/margins": 8.986071586608887, + "rewards/rejected": -10.263702392578125, + "step": 969 + }, + { + "epoch": 0.15, + "learning_rate": 1.3435897090098035e-05, + "logits/chosen": -2.976912021636963, + "logits/rejected": -2.0476279258728027, + "logps/chosen": -202.76995849609375, + "logps/rejected": -183.4424591064453, + "loss": 0.0732, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2219491004943848, + "rewards/margins": 4.5249762535095215, + "rewards/rejected": -6.746925354003906, + "step": 970 + }, + { + "epoch": 0.15, + "learning_rate": 1.3435163649566887e-05, + "logits/chosen": -1.3116304874420166, + "logits/rejected": -2.856515407562256, + "logps/chosen": -217.59812927246094, + "logps/rejected": -465.6683349609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.547625780105591, + "rewards/margins": 8.057295799255371, + "rewards/rejected": -10.604921340942383, + "step": 971 + }, + { + "epoch": 0.15, + "learning_rate": 1.3434430209035739e-05, + "logits/chosen": -3.0888993740081787, + "logits/rejected": -2.870736598968506, + "logps/chosen": -393.0875549316406, + "logps/rejected": -352.7376708984375, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.226025104522705, + "rewards/margins": 5.846262454986572, + "rewards/rejected": -9.072287559509277, + "step": 972 + }, + { + "epoch": 0.15, + "learning_rate": 1.3433696768504591e-05, + "logits/chosen": -1.3731938600540161, + "logits/rejected": -2.5740466117858887, + "logps/chosen": -59.508785247802734, + "logps/rejected": -281.7380676269531, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.722881555557251, + "rewards/margins": 8.809382438659668, + "rewards/rejected": -10.532264709472656, + "step": 973 + }, + { + "epoch": 0.15, + "learning_rate": 1.3432963327973443e-05, + "logits/chosen": -3.1293883323669434, + "logits/rejected": -2.799170732498169, + "logps/chosen": -209.28097534179688, + "logps/rejected": -176.34817504882812, + "loss": 4.9474, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.507863998413086, + "rewards/margins": 0.25258541107177734, + "rewards/rejected": -6.760449409484863, + "step": 974 + }, + { + "epoch": 0.15, + "learning_rate": 1.3432229887442295e-05, + "logits/chosen": -2.9265270233154297, + "logits/rejected": -2.2779622077941895, + "logps/chosen": -204.88389587402344, + "logps/rejected": -214.89309692382812, + "loss": 2.4771, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.315905809402466, + "rewards/margins": 2.5861635208129883, + "rewards/rejected": -5.902069091796875, + "step": 975 + }, + { + "epoch": 0.15, + "learning_rate": 1.3431496446911147e-05, + "logits/chosen": -1.9513853788375854, + "logits/rejected": -2.8536088466644287, + "logps/chosen": -142.00283813476562, + "logps/rejected": -305.64801025390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9648008346557617, + "rewards/margins": 8.529337882995605, + "rewards/rejected": -11.494138717651367, + "step": 976 + }, + { + "epoch": 0.15, + "learning_rate": 1.343076300638e-05, + "logits/chosen": -2.886969566345215, + "logits/rejected": -1.7234522104263306, + "logps/chosen": -218.06785583496094, + "logps/rejected": -102.70822143554688, + "loss": 4.196, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.586045742034912, + "rewards/margins": -3.9437530040740967, + "rewards/rejected": -3.6422927379608154, + "step": 977 + }, + { + "epoch": 0.15, + "learning_rate": 1.3430029565848852e-05, + "logits/chosen": -2.0735254287719727, + "logits/rejected": -2.969200849533081, + "logps/chosen": -265.4456481933594, + "logps/rejected": -247.4735870361328, + "loss": 3.4612, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.668156147003174, + "rewards/margins": -0.6230947971343994, + "rewards/rejected": -6.045061111450195, + "step": 978 + }, + { + "epoch": 0.15, + "learning_rate": 1.3429296125317704e-05, + "logits/chosen": -1.9722559452056885, + "logits/rejected": -2.8536322116851807, + "logps/chosen": -116.20330047607422, + "logps/rejected": -258.1092224121094, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.46828556060791, + "rewards/margins": 5.309717655181885, + "rewards/rejected": -8.778003692626953, + "step": 979 + }, + { + "epoch": 0.15, + "learning_rate": 1.3428562684786556e-05, + "logits/chosen": -2.4847781658172607, + "logits/rejected": -2.955247163772583, + "logps/chosen": -98.79388427734375, + "logps/rejected": -177.13548278808594, + "loss": 0.0935, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.046215295791626, + "rewards/margins": 3.1119985580444336, + "rewards/rejected": -5.1582136154174805, + "step": 980 + }, + { + "epoch": 0.15, + "learning_rate": 1.3427829244255408e-05, + "logits/chosen": -2.808295965194702, + "logits/rejected": -1.3332587480545044, + "logps/chosen": -626.4384765625, + "logps/rejected": -262.07904052734375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.968876838684082, + "rewards/margins": 6.513495445251465, + "rewards/rejected": -9.482372283935547, + "step": 981 + }, + { + "epoch": 0.15, + "learning_rate": 1.342709580372426e-05, + "logits/chosen": -2.917942762374878, + "logits/rejected": -1.7207589149475098, + "logps/chosen": -353.9032897949219, + "logps/rejected": -263.823974609375, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.373059034347534, + "rewards/margins": 4.826189994812012, + "rewards/rejected": -7.199249267578125, + "step": 982 + }, + { + "epoch": 0.15, + "learning_rate": 1.3426362363193111e-05, + "logits/chosen": -2.9674994945526123, + "logits/rejected": -2.0444114208221436, + "logps/chosen": -627.4159545898438, + "logps/rejected": -366.070556640625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7841553688049316, + "rewards/margins": 4.736436367034912, + "rewards/rejected": -7.520591735839844, + "step": 983 + }, + { + "epoch": 0.15, + "learning_rate": 1.3425628922661963e-05, + "logits/chosen": -2.4033474922180176, + "logits/rejected": -2.82580828666687, + "logps/chosen": -172.11192321777344, + "logps/rejected": -170.50140380859375, + "loss": 2.5604, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.283535480499268, + "rewards/margins": 0.9546966552734375, + "rewards/rejected": -6.238232135772705, + "step": 984 + }, + { + "epoch": 0.15, + "learning_rate": 1.3424895482130815e-05, + "logits/chosen": -2.9456193447113037, + "logits/rejected": -1.331101894378662, + "logps/chosen": -903.284912109375, + "logps/rejected": -322.59136962890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8772903680801392, + "rewards/margins": 7.608437538146973, + "rewards/rejected": -8.485727310180664, + "step": 985 + }, + { + "epoch": 0.15, + "learning_rate": 1.3424162041599669e-05, + "logits/chosen": -2.960782527923584, + "logits/rejected": -2.8490729331970215, + "logps/chosen": -664.549072265625, + "logps/rejected": -615.5443115234375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9113373160362244, + "rewards/margins": 8.742510795593262, + "rewards/rejected": -9.653848648071289, + "step": 986 + }, + { + "epoch": 0.15, + "learning_rate": 1.342342860106852e-05, + "logits/chosen": -2.8776967525482178, + "logits/rejected": -3.116264820098877, + "logps/chosen": -289.5096130371094, + "logps/rejected": -180.552490234375, + "loss": 4.5666, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.9902663230896, + "rewards/margins": -3.5521445274353027, + "rewards/rejected": -2.438121795654297, + "step": 987 + }, + { + "epoch": 0.15, + "learning_rate": 1.3422695160537373e-05, + "logits/chosen": -2.4515960216522217, + "logits/rejected": -3.0357296466827393, + "logps/chosen": -489.17596435546875, + "logps/rejected": -376.21356201171875, + "loss": 3.1214, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.161281585693359, + "rewards/margins": 1.4516382217407227, + "rewards/rejected": -6.612919807434082, + "step": 988 + }, + { + "epoch": 0.15, + "learning_rate": 1.3421961720006224e-05, + "logits/chosen": -2.5764858722686768, + "logits/rejected": -2.7811038494110107, + "logps/chosen": -377.57501220703125, + "logps/rejected": -257.8905334472656, + "loss": 2.4659, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.704792499542236, + "rewards/margins": 2.0560364723205566, + "rewards/rejected": -6.760828971862793, + "step": 989 + }, + { + "epoch": 0.15, + "learning_rate": 1.3421228279475076e-05, + "logits/chosen": -2.164632558822632, + "logits/rejected": -1.2807132005691528, + "logps/chosen": -869.99609375, + "logps/rejected": -337.51641845703125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6390350461006165, + "rewards/margins": 6.848684310913086, + "rewards/rejected": -7.487719535827637, + "step": 990 + }, + { + "epoch": 0.15, + "learning_rate": 1.3420494838943928e-05, + "logits/chosen": -3.0644333362579346, + "logits/rejected": -2.0347867012023926, + "logps/chosen": -464.28363037109375, + "logps/rejected": -346.35919189453125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4412189722061157, + "rewards/margins": 6.6122260093688965, + "rewards/rejected": -8.053444862365723, + "step": 991 + }, + { + "epoch": 0.15, + "learning_rate": 1.341976139841278e-05, + "logits/chosen": -2.4884116649627686, + "logits/rejected": -3.0658297538757324, + "logps/chosen": -39.259464263916016, + "logps/rejected": -173.06503295898438, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.651953935623169, + "rewards/margins": 4.855432987213135, + "rewards/rejected": -6.507387161254883, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 1.3419027957881632e-05, + "logits/chosen": -3.1396539211273193, + "logits/rejected": -1.853620171546936, + "logps/chosen": -277.46917724609375, + "logps/rejected": -135.5343017578125, + "loss": 3.9835, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.513834476470947, + "rewards/margins": -3.8986377716064453, + "rewards/rejected": -2.615196704864502, + "step": 993 + }, + { + "epoch": 0.15, + "learning_rate": 1.3418294517350486e-05, + "logits/chosen": -2.9029123783111572, + "logits/rejected": -2.5388829708099365, + "logps/chosen": -194.40403747558594, + "logps/rejected": -289.03375244140625, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.31290864944458, + "rewards/margins": 4.915584087371826, + "rewards/rejected": -9.228492736816406, + "step": 994 + }, + { + "epoch": 0.15, + "learning_rate": 1.3417561076819337e-05, + "logits/chosen": -1.2585221529006958, + "logits/rejected": -2.9478752613067627, + "logps/chosen": -151.91200256347656, + "logps/rejected": -473.1123046875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3235828876495361, + "rewards/margins": 6.4897871017456055, + "rewards/rejected": -7.8133697509765625, + "step": 995 + }, + { + "epoch": 0.15, + "learning_rate": 1.341682763628819e-05, + "logits/chosen": -2.8672521114349365, + "logits/rejected": -1.4539953470230103, + "logps/chosen": -365.97833251953125, + "logps/rejected": -225.50582885742188, + "loss": 4.0166, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.645706653594971, + "rewards/margins": -0.5144963264465332, + "rewards/rejected": -5.1312103271484375, + "step": 996 + }, + { + "epoch": 0.16, + "learning_rate": 1.3416094195757041e-05, + "logits/chosen": -2.6429226398468018, + "logits/rejected": -3.0702292919158936, + "logps/chosen": -142.1456298828125, + "logps/rejected": -141.9205322265625, + "loss": 2.1528, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.190125465393066, + "rewards/margins": -0.29360413551330566, + "rewards/rejected": -4.89652156829834, + "step": 997 + }, + { + "epoch": 0.16, + "learning_rate": 1.3415360755225893e-05, + "logits/chosen": -2.747176170349121, + "logits/rejected": -3.0797982215881348, + "logps/chosen": -55.557342529296875, + "logps/rejected": -293.76226806640625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.588905692100525, + "rewards/margins": 7.569928169250488, + "rewards/rejected": -9.158834457397461, + "step": 998 + }, + { + "epoch": 0.16, + "learning_rate": 1.3414627314694745e-05, + "logits/chosen": -2.235434055328369, + "logits/rejected": -3.1083340644836426, + "logps/chosen": -220.71681213378906, + "logps/rejected": -263.9876708984375, + "loss": 2.6947, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.2798943519592285, + "rewards/margins": 0.28757810592651367, + "rewards/rejected": -5.567472457885742, + "step": 999 + }, + { + "epoch": 0.16, + "learning_rate": 1.3413893874163597e-05, + "logits/chosen": -2.1784863471984863, + "logits/rejected": -2.8192965984344482, + "logps/chosen": -186.1019287109375, + "logps/rejected": -180.61964416503906, + "loss": 2.7297, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.86777663230896, + "rewards/margins": 0.13480591773986816, + "rewards/rejected": -4.002582550048828, + "step": 1000 + }, + { + "epoch": 0.16, + "learning_rate": 1.3413160433632449e-05, + "logits/chosen": -2.171968936920166, + "logits/rejected": -3.005101442337036, + "logps/chosen": -434.8150634765625, + "logps/rejected": -492.0318298339844, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.43780517578125, + "rewards/margins": 5.509051322937012, + "rewards/rejected": -7.946856498718262, + "step": 1001 + }, + { + "epoch": 0.16, + "learning_rate": 1.3412426993101302e-05, + "logits/chosen": -2.582646369934082, + "logits/rejected": -2.9851319789886475, + "logps/chosen": -468.5435791015625, + "logps/rejected": -584.0890502929688, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.367560625076294, + "rewards/margins": 8.139633178710938, + "rewards/rejected": -9.507193565368652, + "step": 1002 + }, + { + "epoch": 0.16, + "learning_rate": 1.3411693552570154e-05, + "logits/chosen": -3.166443347930908, + "logits/rejected": -1.7153563499450684, + "logps/chosen": -754.4130249023438, + "logps/rejected": -305.8725891113281, + "loss": 0.4595, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1004669666290283, + "rewards/margins": 1.7280328273773193, + "rewards/rejected": -4.828499794006348, + "step": 1003 + }, + { + "epoch": 0.16, + "learning_rate": 1.3410960112039008e-05, + "logits/chosen": -2.3310976028442383, + "logits/rejected": -2.706557512283325, + "logps/chosen": -95.81278991699219, + "logps/rejected": -154.5025177001953, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6463110446929932, + "rewards/margins": 4.401035308837891, + "rewards/rejected": -6.047346115112305, + "step": 1004 + }, + { + "epoch": 0.16, + "learning_rate": 1.341022667150786e-05, + "logits/chosen": -2.4638071060180664, + "logits/rejected": -3.010420322418213, + "logps/chosen": -228.8708953857422, + "logps/rejected": -250.53787231445312, + "loss": 2.3503, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8051254749298096, + "rewards/margins": 1.8886616230010986, + "rewards/rejected": -5.693787574768066, + "step": 1005 + }, + { + "epoch": 0.16, + "learning_rate": 1.3409493230976711e-05, + "logits/chosen": -2.810527801513672, + "logits/rejected": -3.28987717628479, + "logps/chosen": -182.96774291992188, + "logps/rejected": -318.11102294921875, + "loss": 0.1216, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.772852897644043, + "rewards/margins": 2.058189868927002, + "rewards/rejected": -4.831042766571045, + "step": 1006 + }, + { + "epoch": 0.16, + "learning_rate": 1.3408759790445563e-05, + "logits/chosen": -2.9034738540649414, + "logits/rejected": -3.080096483230591, + "logps/chosen": -156.50079345703125, + "logps/rejected": -186.18499755859375, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1922478675842285, + "rewards/margins": 3.7591261863708496, + "rewards/rejected": -7.951374053955078, + "step": 1007 + }, + { + "epoch": 0.16, + "learning_rate": 1.3408026349914415e-05, + "logits/chosen": -2.666947603225708, + "logits/rejected": -3.0639312267303467, + "logps/chosen": -97.76860046386719, + "logps/rejected": -219.92068481445312, + "loss": 0.0483, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9848090410232544, + "rewards/margins": 4.762668132781982, + "rewards/rejected": -6.7474775314331055, + "step": 1008 + }, + { + "epoch": 0.16, + "learning_rate": 1.3407292909383267e-05, + "logits/chosen": -3.118752956390381, + "logits/rejected": -2.26457142829895, + "logps/chosen": -267.61041259765625, + "logps/rejected": -457.7978515625, + "loss": 2.3773, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3175530433654785, + "rewards/margins": 0.3870353698730469, + "rewards/rejected": -3.7045884132385254, + "step": 1009 + }, + { + "epoch": 0.16, + "learning_rate": 1.3406559468852119e-05, + "logits/chosen": -2.9785549640655518, + "logits/rejected": -2.917733907699585, + "logps/chosen": -594.847412109375, + "logps/rejected": -593.9771118164062, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33165591955184937, + "rewards/margins": 7.254732131958008, + "rewards/rejected": -7.586387634277344, + "step": 1010 + }, + { + "epoch": 0.16, + "learning_rate": 1.3405826028320971e-05, + "logits/chosen": -2.9067542552948, + "logits/rejected": -2.7087721824645996, + "logps/chosen": -297.7349548339844, + "logps/rejected": -367.25616455078125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2826294898986816, + "rewards/margins": 5.664883613586426, + "rewards/rejected": -7.947512626647949, + "step": 1011 + }, + { + "epoch": 0.16, + "learning_rate": 1.3405092587789824e-05, + "logits/chosen": -2.896745204925537, + "logits/rejected": -3.0766167640686035, + "logps/chosen": -39.45521545410156, + "logps/rejected": -173.41552734375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5827255249023438, + "rewards/margins": 5.657005786895752, + "rewards/rejected": -7.2397308349609375, + "step": 1012 + }, + { + "epoch": 0.16, + "learning_rate": 1.3404359147258676e-05, + "logits/chosen": -3.1419503688812256, + "logits/rejected": -2.9536731243133545, + "logps/chosen": -178.00022888183594, + "logps/rejected": -536.4361572265625, + "loss": 2.0084, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.108758449554443, + "rewards/margins": -1.0226802825927734, + "rewards/rejected": -3.08607816696167, + "step": 1013 + }, + { + "epoch": 0.16, + "learning_rate": 1.3403625706727528e-05, + "logits/chosen": -2.7964978218078613, + "logits/rejected": -2.9589803218841553, + "logps/chosen": -137.61221313476562, + "logps/rejected": -360.16168212890625, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6420825719833374, + "rewards/margins": 6.008534908294678, + "rewards/rejected": -6.650617599487305, + "step": 1014 + }, + { + "epoch": 0.16, + "learning_rate": 1.340289226619638e-05, + "logits/chosen": -1.8009397983551025, + "logits/rejected": -2.7565605640411377, + "logps/chosen": -205.02366638183594, + "logps/rejected": -338.9035949707031, + "loss": 1.4765, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8210129737854, + "rewards/margins": 2.91701602935791, + "rewards/rejected": -7.738029479980469, + "step": 1015 + }, + { + "epoch": 0.16, + "learning_rate": 1.3402158825665232e-05, + "logits/chosen": -1.8516877889633179, + "logits/rejected": -2.9147276878356934, + "logps/chosen": -62.68084716796875, + "logps/rejected": -268.536865234375, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6904404163360596, + "rewards/margins": 3.4813523292541504, + "rewards/rejected": -6.171792984008789, + "step": 1016 + }, + { + "epoch": 0.16, + "learning_rate": 1.3401425385134084e-05, + "logits/chosen": -2.9254138469696045, + "logits/rejected": -2.247177839279175, + "logps/chosen": -322.2373046875, + "logps/rejected": -285.83929443359375, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5434539318084717, + "rewards/margins": 4.694679260253906, + "rewards/rejected": -6.238133430480957, + "step": 1017 + }, + { + "epoch": 0.16, + "learning_rate": 1.3400691944602936e-05, + "logits/chosen": -3.219374418258667, + "logits/rejected": -2.5621345043182373, + "logps/chosen": -357.658203125, + "logps/rejected": -267.7857666015625, + "loss": 0.4074, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3727431297302246, + "rewards/margins": 3.489187240600586, + "rewards/rejected": -6.8619303703308105, + "step": 1018 + }, + { + "epoch": 0.16, + "learning_rate": 1.3399958504071788e-05, + "logits/chosen": -1.3701783418655396, + "logits/rejected": -2.8303463459014893, + "logps/chosen": -315.10272216796875, + "logps/rejected": -377.36187744140625, + "loss": 2.8511, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.2737531661987305, + "rewards/margins": 0.7720687389373779, + "rewards/rejected": -8.045822143554688, + "step": 1019 + }, + { + "epoch": 0.16, + "learning_rate": 1.339922506354064e-05, + "logits/chosen": -3.159438133239746, + "logits/rejected": -2.2181966304779053, + "logps/chosen": -487.8572082519531, + "logps/rejected": -238.25343322753906, + "loss": 1.9431, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2628142833709717, + "rewards/margins": 1.6765426397323608, + "rewards/rejected": -4.939356803894043, + "step": 1020 + }, + { + "epoch": 0.16, + "learning_rate": 1.3398491623009493e-05, + "logits/chosen": -2.758598566055298, + "logits/rejected": -3.132688522338867, + "logps/chosen": -86.44502258300781, + "logps/rejected": -287.40338134765625, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6054461002349854, + "rewards/margins": 3.1358132362365723, + "rewards/rejected": -5.741259574890137, + "step": 1021 + }, + { + "epoch": 0.16, + "learning_rate": 1.3397758182478345e-05, + "logits/chosen": -2.7554378509521484, + "logits/rejected": -3.0385468006134033, + "logps/chosen": -610.2958984375, + "logps/rejected": -344.32025146484375, + "loss": 2.5716, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6826720237731934, + "rewards/margins": 1.0860998630523682, + "rewards/rejected": -4.768771648406982, + "step": 1022 + }, + { + "epoch": 0.16, + "learning_rate": 1.3397024741947197e-05, + "logits/chosen": -2.9521474838256836, + "logits/rejected": -3.098078727722168, + "logps/chosen": -132.7664794921875, + "logps/rejected": -256.41937255859375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3647552728652954, + "rewards/margins": 4.559924125671387, + "rewards/rejected": -5.924679279327393, + "step": 1023 + }, + { + "epoch": 0.16, + "learning_rate": 1.3396291301416049e-05, + "logits/chosen": -2.4808714389801025, + "logits/rejected": -2.0160093307495117, + "logps/chosen": -462.80889892578125, + "logps/rejected": -394.2989196777344, + "loss": 2.1339, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.149301052093506, + "rewards/margins": -0.6627914905548096, + "rewards/rejected": -3.4865097999572754, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 1.33955578608849e-05, + "logits/chosen": -1.0125747919082642, + "logits/rejected": -3.085667848587036, + "logps/chosen": -60.32366943359375, + "logps/rejected": -449.30712890625, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.026623010635376, + "rewards/margins": 5.964064121246338, + "rewards/rejected": -7.990686893463135, + "step": 1025 + }, + { + "epoch": 0.16, + "learning_rate": 1.3394824420353752e-05, + "logits/chosen": -1.7926894426345825, + "logits/rejected": -2.8970985412597656, + "logps/chosen": -189.37786865234375, + "logps/rejected": -191.58590698242188, + "loss": 2.7445, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.261348724365234, + "rewards/margins": -1.0927720069885254, + "rewards/rejected": -5.168576717376709, + "step": 1026 + }, + { + "epoch": 0.16, + "learning_rate": 1.3394090979822604e-05, + "logits/chosen": -2.3772411346435547, + "logits/rejected": -3.208871603012085, + "logps/chosen": -181.84901428222656, + "logps/rejected": -308.6582336425781, + "loss": 1.4884, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.003770589828491, + "rewards/margins": 0.9116239547729492, + "rewards/rejected": -3.9153945446014404, + "step": 1027 + }, + { + "epoch": 0.16, + "learning_rate": 1.3393357539291456e-05, + "logits/chosen": -2.1331419944763184, + "logits/rejected": -2.9736249446868896, + "logps/chosen": -92.14118194580078, + "logps/rejected": -247.63467407226562, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9069606065750122, + "rewards/margins": 5.720290184020996, + "rewards/rejected": -7.627250671386719, + "step": 1028 + }, + { + "epoch": 0.16, + "learning_rate": 1.3392624098760308e-05, + "logits/chosen": -2.350653886795044, + "logits/rejected": -2.882716417312622, + "logps/chosen": -101.76345825195312, + "logps/rejected": -167.3673553466797, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.771293878555298, + "rewards/margins": 3.0285227298736572, + "rewards/rejected": -6.799816608428955, + "step": 1029 + }, + { + "epoch": 0.16, + "learning_rate": 1.3391890658229162e-05, + "logits/chosen": -3.116262197494507, + "logits/rejected": -2.1730313301086426, + "logps/chosen": -316.7887878417969, + "logps/rejected": -136.4501953125, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4975234270095825, + "rewards/margins": 4.526785373687744, + "rewards/rejected": -6.024309158325195, + "step": 1030 + }, + { + "epoch": 0.16, + "learning_rate": 1.3391157217698014e-05, + "logits/chosen": -1.5390033721923828, + "logits/rejected": -2.9609220027923584, + "logps/chosen": -143.38580322265625, + "logps/rejected": -183.4684600830078, + "loss": 0.1276, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4199681282043457, + "rewards/margins": 2.4703259468078613, + "rewards/rejected": -4.890294075012207, + "step": 1031 + }, + { + "epoch": 0.16, + "learning_rate": 1.3390423777166865e-05, + "logits/chosen": -2.918602705001831, + "logits/rejected": -3.276233196258545, + "logps/chosen": -114.72848510742188, + "logps/rejected": -264.47125244140625, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.197063446044922, + "rewards/margins": 5.389032363891602, + "rewards/rejected": -7.586095809936523, + "step": 1032 + }, + { + "epoch": 0.16, + "learning_rate": 1.3389690336635717e-05, + "logits/chosen": -3.1578564643859863, + "logits/rejected": -2.37811541557312, + "logps/chosen": -338.7564392089844, + "logps/rejected": -429.658203125, + "loss": 1.5474, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0401930809021, + "rewards/margins": 1.119805932044983, + "rewards/rejected": -5.159998893737793, + "step": 1033 + }, + { + "epoch": 0.16, + "learning_rate": 1.3388956896104569e-05, + "logits/chosen": -1.1730530261993408, + "logits/rejected": -2.8364546298980713, + "logps/chosen": -97.77935791015625, + "logps/rejected": -435.0847473144531, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6966125965118408, + "rewards/margins": 6.470241069793701, + "rewards/rejected": -8.166853904724121, + "step": 1034 + }, + { + "epoch": 0.16, + "learning_rate": 1.3388223455573421e-05, + "logits/chosen": -2.931770086288452, + "logits/rejected": -3.2807865142822266, + "logps/chosen": -102.40400695800781, + "logps/rejected": -254.93023681640625, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.772706985473633, + "rewards/margins": 3.273494243621826, + "rewards/rejected": -6.046201229095459, + "step": 1035 + }, + { + "epoch": 0.16, + "learning_rate": 1.3387490015042275e-05, + "logits/chosen": -2.0326972007751465, + "logits/rejected": -2.977858781814575, + "logps/chosen": -129.60708618164062, + "logps/rejected": -268.60882568359375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.65205454826355, + "rewards/margins": 5.399960041046143, + "rewards/rejected": -8.052014350891113, + "step": 1036 + }, + { + "epoch": 0.16, + "learning_rate": 1.3386756574511126e-05, + "logits/chosen": -3.097930669784546, + "logits/rejected": -3.271625518798828, + "logps/chosen": -183.80960083007812, + "logps/rejected": -247.3042755126953, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2592453956604004, + "rewards/margins": 5.3848066329956055, + "rewards/rejected": -6.644052505493164, + "step": 1037 + }, + { + "epoch": 0.16, + "learning_rate": 1.3386023133979978e-05, + "logits/chosen": -2.6427102088928223, + "logits/rejected": -3.0941362380981445, + "logps/chosen": -534.5371704101562, + "logps/rejected": -462.8670959472656, + "loss": 0.0567, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6934494972229004, + "rewards/margins": 2.913837432861328, + "rewards/rejected": -6.6072869300842285, + "step": 1038 + }, + { + "epoch": 0.16, + "learning_rate": 1.3385289693448832e-05, + "logits/chosen": -2.943603038787842, + "logits/rejected": -2.2784860134124756, + "logps/chosen": -236.6003875732422, + "logps/rejected": -90.64324188232422, + "loss": 3.7619, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.872286796569824, + "rewards/margins": -2.8826351165771484, + "rewards/rejected": -2.989652156829834, + "step": 1039 + }, + { + "epoch": 0.16, + "learning_rate": 1.3384556252917684e-05, + "logits/chosen": -2.2974588871002197, + "logits/rejected": -3.045393943786621, + "logps/chosen": -49.17144775390625, + "logps/rejected": -179.2117156982422, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8815113306045532, + "rewards/margins": 4.988719940185547, + "rewards/rejected": -6.8702311515808105, + "step": 1040 + }, + { + "epoch": 0.16, + "learning_rate": 1.3383822812386536e-05, + "logits/chosen": -3.074847459793091, + "logits/rejected": -0.6599029898643494, + "logps/chosen": -431.0475769042969, + "logps/rejected": -117.59259033203125, + "loss": 4.4164, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.487776756286621, + "rewards/margins": -4.404077529907227, + "rewards/rejected": -4.083698749542236, + "step": 1041 + }, + { + "epoch": 0.16, + "learning_rate": 1.3383089371855388e-05, + "logits/chosen": -0.9919411540031433, + "logits/rejected": -2.2499561309814453, + "logps/chosen": -210.03912353515625, + "logps/rejected": -457.3226623535156, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8081146478652954, + "rewards/margins": 4.9026689529418945, + "rewards/rejected": -6.710783958435059, + "step": 1042 + }, + { + "epoch": 0.16, + "learning_rate": 1.338235593132424e-05, + "logits/chosen": -3.0610315799713135, + "logits/rejected": -2.740281105041504, + "logps/chosen": -374.4788513183594, + "logps/rejected": -266.00885009765625, + "loss": 2.1696, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.437854051589966, + "rewards/margins": 1.39571213722229, + "rewards/rejected": -4.833566188812256, + "step": 1043 + }, + { + "epoch": 0.16, + "learning_rate": 1.3381622490793091e-05, + "logits/chosen": -1.5559463500976562, + "logits/rejected": -3.0483498573303223, + "logps/chosen": -99.86848449707031, + "logps/rejected": -529.955322265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43158647418022156, + "rewards/margins": 7.678915023803711, + "rewards/rejected": -8.110501289367676, + "step": 1044 + }, + { + "epoch": 0.16, + "learning_rate": 1.3380889050261943e-05, + "logits/chosen": -3.0352025032043457, + "logits/rejected": -2.2169156074523926, + "logps/chosen": -255.26951599121094, + "logps/rejected": -253.26800537109375, + "loss": 3.168, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.4025983810424805, + "rewards/margins": -0.8934192657470703, + "rewards/rejected": -5.50917911529541, + "step": 1045 + }, + { + "epoch": 0.16, + "learning_rate": 1.3380155609730795e-05, + "logits/chosen": -2.3938465118408203, + "logits/rejected": -3.066650152206421, + "logps/chosen": -272.72039794921875, + "logps/rejected": -257.826171875, + "loss": 0.1171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.885833740234375, + "rewards/margins": 3.7356691360473633, + "rewards/rejected": -5.621502876281738, + "step": 1046 + }, + { + "epoch": 0.16, + "learning_rate": 1.3379422169199647e-05, + "logits/chosen": -1.8886709213256836, + "logits/rejected": -3.2221763134002686, + "logps/chosen": -170.79977416992188, + "logps/rejected": -470.6864318847656, + "loss": 0.5367, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.388232946395874, + "rewards/margins": 3.546926259994507, + "rewards/rejected": -5.935159206390381, + "step": 1047 + }, + { + "epoch": 0.16, + "learning_rate": 1.33786887286685e-05, + "logits/chosen": -2.0090994834899902, + "logits/rejected": -3.178983688354492, + "logps/chosen": -199.66986083984375, + "logps/rejected": -257.14666748046875, + "loss": 2.7289, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.386436939239502, + "rewards/margins": -0.948123574256897, + "rewards/rejected": -4.4383134841918945, + "step": 1048 + }, + { + "epoch": 0.16, + "learning_rate": 1.3377955288137352e-05, + "logits/chosen": -1.3437741994857788, + "logits/rejected": -2.968451976776123, + "logps/chosen": -107.48042297363281, + "logps/rejected": -272.04119873046875, + "loss": 0.1425, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1692676544189453, + "rewards/margins": 2.5062832832336426, + "rewards/rejected": -3.675550937652588, + "step": 1049 + }, + { + "epoch": 0.16, + "learning_rate": 1.3377221847606204e-05, + "logits/chosen": -2.868190288543701, + "logits/rejected": -2.3309898376464844, + "logps/chosen": -499.86199951171875, + "logps/rejected": -379.2830810546875, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0166008472442627, + "rewards/margins": 4.874518394470215, + "rewards/rejected": -5.891119480133057, + "step": 1050 + }, + { + "epoch": 0.16, + "learning_rate": 1.3376488407075056e-05, + "logits/chosen": -2.9873998165130615, + "logits/rejected": -1.9948793649673462, + "logps/chosen": -158.2305450439453, + "logps/rejected": -167.35720825195312, + "loss": 1.8113, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.313268184661865, + "rewards/margins": 1.6508315801620483, + "rewards/rejected": -5.964099884033203, + "step": 1051 + }, + { + "epoch": 0.16, + "learning_rate": 1.3375754966543908e-05, + "logits/chosen": -2.7214481830596924, + "logits/rejected": -3.1524875164031982, + "logps/chosen": -82.85350036621094, + "logps/rejected": -197.768798828125, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4667906761169434, + "rewards/margins": 4.337545871734619, + "rewards/rejected": -7.8043365478515625, + "step": 1052 + }, + { + "epoch": 0.16, + "learning_rate": 1.337502152601276e-05, + "logits/chosen": -2.2474567890167236, + "logits/rejected": -2.9561009407043457, + "logps/chosen": -100.01016998291016, + "logps/rejected": -264.3147277832031, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1954102516174316, + "rewards/margins": 3.3857741355895996, + "rewards/rejected": -5.581184387207031, + "step": 1053 + }, + { + "epoch": 0.16, + "learning_rate": 1.3374288085481612e-05, + "logits/chosen": -2.5247766971588135, + "logits/rejected": -3.113938093185425, + "logps/chosen": -90.4134521484375, + "logps/rejected": -252.9073028564453, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6541903018951416, + "rewards/margins": 3.72628116607666, + "rewards/rejected": -6.380471229553223, + "step": 1054 + }, + { + "epoch": 0.16, + "learning_rate": 1.3373554644950464e-05, + "logits/chosen": -2.7219064235687256, + "logits/rejected": -3.1547861099243164, + "logps/chosen": -102.45760345458984, + "logps/rejected": -375.20379638671875, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8873398303985596, + "rewards/margins": 4.563820838928223, + "rewards/rejected": -6.451160430908203, + "step": 1055 + }, + { + "epoch": 0.16, + "learning_rate": 1.3372821204419316e-05, + "logits/chosen": -3.1801490783691406, + "logits/rejected": -3.053363561630249, + "logps/chosen": -159.80319213867188, + "logps/rejected": -215.7152099609375, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0061888694763184, + "rewards/margins": 4.664425849914551, + "rewards/rejected": -6.670614719390869, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 1.3372087763888169e-05, + "logits/chosen": -2.6136529445648193, + "logits/rejected": -2.854264497756958, + "logps/chosen": -63.23332214355469, + "logps/rejected": -260.24249267578125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8503177165985107, + "rewards/margins": 5.08802604675293, + "rewards/rejected": -6.9383440017700195, + "step": 1057 + }, + { + "epoch": 0.16, + "learning_rate": 1.3371354323357021e-05, + "logits/chosen": -1.693810224533081, + "logits/rejected": -2.652498960494995, + "logps/chosen": -295.9326477050781, + "logps/rejected": -612.9661865234375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.631822109222412, + "rewards/margins": 5.908036708831787, + "rewards/rejected": -7.539858818054199, + "step": 1058 + }, + { + "epoch": 0.16, + "learning_rate": 1.3370620882825873e-05, + "logits/chosen": -2.4843204021453857, + "logits/rejected": -3.1978795528411865, + "logps/chosen": -44.76003646850586, + "logps/rejected": -247.58773803710938, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2574524879455566, + "rewards/margins": 6.570855617523193, + "rewards/rejected": -8.82830810546875, + "step": 1059 + }, + { + "epoch": 0.16, + "learning_rate": 1.3369887442294725e-05, + "logits/chosen": -2.5353517532348633, + "logits/rejected": -2.8408589363098145, + "logps/chosen": -320.4774169921875, + "logps/rejected": -348.81951904296875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.392613172531128, + "rewards/margins": 4.590122222900391, + "rewards/rejected": -5.982735633850098, + "step": 1060 + }, + { + "epoch": 0.17, + "learning_rate": 1.3369154001763577e-05, + "logits/chosen": -1.8414429426193237, + "logits/rejected": -2.7876412868499756, + "logps/chosen": -281.3769836425781, + "logps/rejected": -453.5696716308594, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3533257246017456, + "rewards/margins": 4.929767608642578, + "rewards/rejected": -6.283093452453613, + "step": 1061 + }, + { + "epoch": 0.17, + "learning_rate": 1.3368420561232429e-05, + "logits/chosen": -2.933856248855591, + "logits/rejected": -2.242361545562744, + "logps/chosen": -391.97100830078125, + "logps/rejected": -350.0135803222656, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.195559024810791, + "rewards/margins": 6.307277679443359, + "rewards/rejected": -8.502836227416992, + "step": 1062 + }, + { + "epoch": 0.17, + "learning_rate": 1.336768712070128e-05, + "logits/chosen": -1.4224048852920532, + "logits/rejected": -2.939053535461426, + "logps/chosen": -92.79244995117188, + "logps/rejected": -354.9570617675781, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6841578483581543, + "rewards/margins": 3.9161882400512695, + "rewards/rejected": -7.600346565246582, + "step": 1063 + }, + { + "epoch": 0.17, + "learning_rate": 1.3366953680170132e-05, + "logits/chosen": -2.5819625854492188, + "logits/rejected": -2.7166450023651123, + "logps/chosen": -187.32374572753906, + "logps/rejected": -275.4146423339844, + "loss": 3.3593, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.963430404663086, + "rewards/margins": 0.6562013626098633, + "rewards/rejected": -6.619631767272949, + "step": 1064 + }, + { + "epoch": 0.17, + "learning_rate": 1.3366220239638984e-05, + "logits/chosen": -2.957258462905884, + "logits/rejected": -2.1838104724884033, + "logps/chosen": -375.20416259765625, + "logps/rejected": -245.70790100097656, + "loss": 3.1492, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.077398300170898, + "rewards/margins": -0.014678955078125, + "rewards/rejected": -4.062718868255615, + "step": 1065 + }, + { + "epoch": 0.17, + "learning_rate": 1.3365486799107838e-05, + "logits/chosen": -3.1363954544067383, + "logits/rejected": -2.8654489517211914, + "logps/chosen": -510.7588806152344, + "logps/rejected": -411.0018005371094, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5875133872032166, + "rewards/margins": 6.996408462524414, + "rewards/rejected": -7.583921909332275, + "step": 1066 + }, + { + "epoch": 0.17, + "learning_rate": 1.336475335857669e-05, + "logits/chosen": -3.087812662124634, + "logits/rejected": -3.050734043121338, + "logps/chosen": -96.21919250488281, + "logps/rejected": -207.87057495117188, + "loss": 0.6156, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.645904064178467, + "rewards/margins": 0.22200453281402588, + "rewards/rejected": -3.867908477783203, + "step": 1067 + }, + { + "epoch": 0.17, + "learning_rate": 1.3364019918045541e-05, + "logits/chosen": -1.279405117034912, + "logits/rejected": -2.934006690979004, + "logps/chosen": -92.88665771484375, + "logps/rejected": -341.22216796875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.694145679473877, + "rewards/margins": 5.051743507385254, + "rewards/rejected": -7.745889663696289, + "step": 1068 + }, + { + "epoch": 0.17, + "learning_rate": 1.3363286477514393e-05, + "logits/chosen": -2.6829030513763428, + "logits/rejected": -3.258314609527588, + "logps/chosen": -284.82415771484375, + "logps/rejected": -316.3948974609375, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7856531143188477, + "rewards/margins": 4.399155139923096, + "rewards/rejected": -6.184808731079102, + "step": 1069 + }, + { + "epoch": 0.17, + "learning_rate": 1.3362553036983247e-05, + "logits/chosen": -1.8995431661605835, + "logits/rejected": -2.6996653079986572, + "logps/chosen": -359.72564697265625, + "logps/rejected": -684.68017578125, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6535812616348267, + "rewards/margins": 4.32431697845459, + "rewards/rejected": -5.977897644042969, + "step": 1070 + }, + { + "epoch": 0.17, + "learning_rate": 1.3361819596452099e-05, + "logits/chosen": -2.8229868412017822, + "logits/rejected": -3.111919641494751, + "logps/chosen": -158.35182189941406, + "logps/rejected": -151.5294952392578, + "loss": 1.3129, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2781200408935547, + "rewards/margins": -0.19622313976287842, + "rewards/rejected": -3.081897020339966, + "step": 1071 + }, + { + "epoch": 0.17, + "learning_rate": 1.336108615592095e-05, + "logits/chosen": -2.7351014614105225, + "logits/rejected": -2.913400173187256, + "logps/chosen": -62.27861022949219, + "logps/rejected": -287.82073974609375, + "loss": 0.0691, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.159545660018921, + "rewards/margins": 6.606349468231201, + "rewards/rejected": -8.765894889831543, + "step": 1072 + }, + { + "epoch": 0.17, + "learning_rate": 1.3360352715389803e-05, + "logits/chosen": -3.1203999519348145, + "logits/rejected": -3.197687864303589, + "logps/chosen": -149.44541931152344, + "logps/rejected": -246.38206481933594, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.084019899368286, + "rewards/margins": 5.916457176208496, + "rewards/rejected": -9.000476837158203, + "step": 1073 + }, + { + "epoch": 0.17, + "learning_rate": 1.3359619274858654e-05, + "logits/chosen": -2.918046474456787, + "logits/rejected": -2.1972100734710693, + "logps/chosen": -303.59613037109375, + "logps/rejected": -179.61721801757812, + "loss": 3.0027, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.698740482330322, + "rewards/margins": 0.7432551383972168, + "rewards/rejected": -6.441995620727539, + "step": 1074 + }, + { + "epoch": 0.17, + "learning_rate": 1.3358885834327508e-05, + "logits/chosen": -2.4249281883239746, + "logits/rejected": -2.8154661655426025, + "logps/chosen": -57.32133102416992, + "logps/rejected": -190.2915496826172, + "loss": 0.0552, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.048684597015381, + "rewards/margins": 4.6286725997924805, + "rewards/rejected": -7.6773576736450195, + "step": 1075 + }, + { + "epoch": 0.17, + "learning_rate": 1.335815239379636e-05, + "logits/chosen": -2.6625828742980957, + "logits/rejected": -2.5809600353240967, + "logps/chosen": -217.38406372070312, + "logps/rejected": -380.2894287109375, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8172024488449097, + "rewards/margins": 4.951900482177734, + "rewards/rejected": -6.769103050231934, + "step": 1076 + }, + { + "epoch": 0.17, + "learning_rate": 1.3357418953265212e-05, + "logits/chosen": -2.5675506591796875, + "logits/rejected": -3.040334463119507, + "logps/chosen": -72.7222900390625, + "logps/rejected": -210.39126586914062, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7669240236282349, + "rewards/margins": 5.767064571380615, + "rewards/rejected": -6.533988952636719, + "step": 1077 + }, + { + "epoch": 0.17, + "learning_rate": 1.3356685512734064e-05, + "logits/chosen": -2.7557573318481445, + "logits/rejected": -3.028982639312744, + "logps/chosen": -63.23306655883789, + "logps/rejected": -179.99313354492188, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.511275291442871, + "rewards/margins": 4.743412017822266, + "rewards/rejected": -7.254687309265137, + "step": 1078 + }, + { + "epoch": 0.17, + "learning_rate": 1.3355952072202916e-05, + "logits/chosen": -1.6785746812820435, + "logits/rejected": -2.768620729446411, + "logps/chosen": -162.90206909179688, + "logps/rejected": -407.54852294921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.700930118560791, + "rewards/margins": 8.087690353393555, + "rewards/rejected": -10.788619995117188, + "step": 1079 + }, + { + "epoch": 0.17, + "learning_rate": 1.3355218631671767e-05, + "logits/chosen": -1.8478937149047852, + "logits/rejected": -2.963545083999634, + "logps/chosen": -162.39535522460938, + "logps/rejected": -566.8896484375, + "loss": 2.5636, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.464856147766113, + "rewards/margins": 1.798041820526123, + "rewards/rejected": -6.2628984451293945, + "step": 1080 + }, + { + "epoch": 0.17, + "learning_rate": 1.335448519114062e-05, + "logits/chosen": -3.068720817565918, + "logits/rejected": -2.983346700668335, + "logps/chosen": -414.94696044921875, + "logps/rejected": -404.8901672363281, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.605242967605591, + "rewards/margins": 5.817152500152588, + "rewards/rejected": -8.422395706176758, + "step": 1081 + }, + { + "epoch": 0.17, + "learning_rate": 1.3353751750609471e-05, + "logits/chosen": -3.2604804039001465, + "logits/rejected": -2.1927382946014404, + "logps/chosen": -469.79620361328125, + "logps/rejected": -248.85736083984375, + "loss": 2.0058, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.140041351318359, + "rewards/margins": -0.13241887092590332, + "rewards/rejected": -4.007622718811035, + "step": 1082 + }, + { + "epoch": 0.17, + "learning_rate": 1.3353018310078323e-05, + "logits/chosen": -1.1865184307098389, + "logits/rejected": -2.9443914890289307, + "logps/chosen": -135.97061157226562, + "logps/rejected": -377.49163818359375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1398940086364746, + "rewards/margins": 6.9255266189575195, + "rewards/rejected": -9.065420150756836, + "step": 1083 + }, + { + "epoch": 0.17, + "learning_rate": 1.3352284869547177e-05, + "logits/chosen": -2.4876537322998047, + "logits/rejected": -2.9066872596740723, + "logps/chosen": -30.630416870117188, + "logps/rejected": -170.22250366210938, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6834707260131836, + "rewards/margins": 8.323878288269043, + "rewards/rejected": -10.007349014282227, + "step": 1084 + }, + { + "epoch": 0.17, + "learning_rate": 1.3351551429016028e-05, + "logits/chosen": -2.177034378051758, + "logits/rejected": -3.1337013244628906, + "logps/chosen": -382.398681640625, + "logps/rejected": -319.79888916015625, + "loss": 1.8276, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.638991355895996, + "rewards/margins": 2.841536521911621, + "rewards/rejected": -7.480527877807617, + "step": 1085 + }, + { + "epoch": 0.17, + "learning_rate": 1.335081798848488e-05, + "logits/chosen": -2.2239480018615723, + "logits/rejected": -2.49043607711792, + "logps/chosen": -364.42218017578125, + "logps/rejected": -405.2640380859375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4544448852539062, + "rewards/margins": 6.987616062164307, + "rewards/rejected": -9.442060470581055, + "step": 1086 + }, + { + "epoch": 0.17, + "learning_rate": 1.3350084547953732e-05, + "logits/chosen": -3.1230647563934326, + "logits/rejected": -3.099973678588867, + "logps/chosen": -487.7981262207031, + "logps/rejected": -361.15386962890625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5568907260894775, + "rewards/margins": 7.097688674926758, + "rewards/rejected": -9.654579162597656, + "step": 1087 + }, + { + "epoch": 0.17, + "learning_rate": 1.3349351107422584e-05, + "logits/chosen": -1.4251958131790161, + "logits/rejected": -2.9981706142425537, + "logps/chosen": -63.47876739501953, + "logps/rejected": -386.7073974609375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8799076080322266, + "rewards/margins": 5.004763603210449, + "rewards/rejected": -7.884671211242676, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 1.3348617666891436e-05, + "logits/chosen": -2.1597838401794434, + "logits/rejected": -2.8761308193206787, + "logps/chosen": -214.62783813476562, + "logps/rejected": -341.66619873046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6747360229492188, + "rewards/margins": 7.853079319000244, + "rewards/rejected": -9.527814865112305, + "step": 1089 + }, + { + "epoch": 0.17, + "learning_rate": 1.3347884226360288e-05, + "logits/chosen": -1.8596034049987793, + "logits/rejected": -2.980874538421631, + "logps/chosen": -207.98959350585938, + "logps/rejected": -330.4920654296875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6633487939834595, + "rewards/margins": 9.101529121398926, + "rewards/rejected": -10.764878273010254, + "step": 1090 + }, + { + "epoch": 0.17, + "learning_rate": 1.334715078582914e-05, + "logits/chosen": -1.2103469371795654, + "logits/rejected": -2.326812505722046, + "logps/chosen": -189.12469482421875, + "logps/rejected": -598.7490234375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1030735969543457, + "rewards/margins": 6.059696674346924, + "rewards/rejected": -8.16277027130127, + "step": 1091 + }, + { + "epoch": 0.17, + "learning_rate": 1.3346417345297993e-05, + "logits/chosen": -1.392159104347229, + "logits/rejected": -2.8906025886535645, + "logps/chosen": -134.96888732910156, + "logps/rejected": -403.69598388671875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4852206707000732, + "rewards/margins": 6.472295761108398, + "rewards/rejected": -8.95751667022705, + "step": 1092 + }, + { + "epoch": 0.17, + "learning_rate": 1.3345683904766845e-05, + "logits/chosen": -2.562624454498291, + "logits/rejected": -3.0295069217681885, + "logps/chosen": -360.79986572265625, + "logps/rejected": -466.7261657714844, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.231292247772217, + "rewards/margins": 3.1592671871185303, + "rewards/rejected": -7.390559196472168, + "step": 1093 + }, + { + "epoch": 0.17, + "learning_rate": 1.3344950464235697e-05, + "logits/chosen": -2.389608383178711, + "logits/rejected": -2.9359748363494873, + "logps/chosen": -162.77024841308594, + "logps/rejected": -269.49072265625, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2137322425842285, + "rewards/margins": 7.492884159088135, + "rewards/rejected": -9.706616401672363, + "step": 1094 + }, + { + "epoch": 0.17, + "learning_rate": 1.3344217023704549e-05, + "logits/chosen": -2.6073334217071533, + "logits/rejected": -2.880673408508301, + "logps/chosen": -181.74298095703125, + "logps/rejected": -272.28338623046875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.575038194656372, + "rewards/margins": 6.981163024902344, + "rewards/rejected": -8.556200981140137, + "step": 1095 + }, + { + "epoch": 0.17, + "learning_rate": 1.3343483583173401e-05, + "logits/chosen": -2.9176266193389893, + "logits/rejected": -2.9284651279449463, + "logps/chosen": -30.94048500061035, + "logps/rejected": -262.9013977050781, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2356171607971191, + "rewards/margins": 6.0272979736328125, + "rewards/rejected": -7.26291561126709, + "step": 1096 + }, + { + "epoch": 0.17, + "learning_rate": 1.3342750142642253e-05, + "logits/chosen": -3.0603537559509277, + "logits/rejected": -2.5097997188568115, + "logps/chosen": -499.46990966796875, + "logps/rejected": -341.7607421875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4188549518585205, + "rewards/margins": 8.208433151245117, + "rewards/rejected": -10.627288818359375, + "step": 1097 + }, + { + "epoch": 0.17, + "learning_rate": 1.3342016702111105e-05, + "logits/chosen": -0.5772959589958191, + "logits/rejected": -2.709162950515747, + "logps/chosen": -86.40837097167969, + "logps/rejected": -470.8092041015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4151604175567627, + "rewards/margins": 8.39082145690918, + "rewards/rejected": -11.805981636047363, + "step": 1098 + }, + { + "epoch": 0.17, + "learning_rate": 1.3341283261579956e-05, + "logits/chosen": -2.9647438526153564, + "logits/rejected": -2.951199769973755, + "logps/chosen": -442.97100830078125, + "logps/rejected": -411.2285461425781, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0066776275634766, + "rewards/margins": 5.629975318908691, + "rewards/rejected": -8.636652946472168, + "step": 1099 + }, + { + "epoch": 0.17, + "learning_rate": 1.3340549821048808e-05, + "logits/chosen": -2.804828405380249, + "logits/rejected": -1.9113281965255737, + "logps/chosen": -320.2662658691406, + "logps/rejected": -72.49141693115234, + "loss": 8.8395, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.523638725280762, + "rewards/margins": -8.839380264282227, + "rewards/rejected": -1.6842575073242188, + "step": 1100 + }, + { + "epoch": 0.17, + "learning_rate": 1.3339816380517662e-05, + "logits/chosen": -2.47058367729187, + "logits/rejected": -2.7349798679351807, + "logps/chosen": -127.05390930175781, + "logps/rejected": -292.46343994140625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1247639656066895, + "rewards/margins": 6.266449928283691, + "rewards/rejected": -9.391214370727539, + "step": 1101 + }, + { + "epoch": 0.17, + "learning_rate": 1.3339082939986514e-05, + "logits/chosen": -2.998354911804199, + "logits/rejected": -2.5837717056274414, + "logps/chosen": -529.8317260742188, + "logps/rejected": -406.2620849609375, + "loss": 0.4971, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7467880249023438, + "rewards/margins": 2.5687828063964844, + "rewards/rejected": -6.315570831298828, + "step": 1102 + }, + { + "epoch": 0.17, + "learning_rate": 1.3338349499455366e-05, + "logits/chosen": -3.0381765365600586, + "logits/rejected": -2.958623170852661, + "logps/chosen": -258.6785888671875, + "logps/rejected": -255.8770751953125, + "loss": 2.998, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.511691093444824, + "rewards/margins": -1.0300381183624268, + "rewards/rejected": -4.481653213500977, + "step": 1103 + }, + { + "epoch": 0.17, + "learning_rate": 1.333761605892422e-05, + "logits/chosen": -2.5955967903137207, + "logits/rejected": -3.0845823287963867, + "logps/chosen": -255.93614196777344, + "logps/rejected": -352.51910400390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1945035457611084, + "rewards/margins": 7.829001426696777, + "rewards/rejected": -9.023504257202148, + "step": 1104 + }, + { + "epoch": 0.17, + "learning_rate": 1.3336882618393071e-05, + "logits/chosen": -2.659930467605591, + "logits/rejected": -3.1478617191314697, + "logps/chosen": -37.8245735168457, + "logps/rejected": -226.5395965576172, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5145338773727417, + "rewards/margins": 5.979807376861572, + "rewards/rejected": -7.4943413734436035, + "step": 1105 + }, + { + "epoch": 0.17, + "learning_rate": 1.3336149177861923e-05, + "logits/chosen": -2.46712589263916, + "logits/rejected": -2.5006179809570312, + "logps/chosen": -123.66259002685547, + "logps/rejected": -261.61712646484375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9406392574310303, + "rewards/margins": 6.511241912841797, + "rewards/rejected": -9.451881408691406, + "step": 1106 + }, + { + "epoch": 0.17, + "learning_rate": 1.3335415737330775e-05, + "logits/chosen": -1.8894027471542358, + "logits/rejected": -3.034132719039917, + "logps/chosen": -64.7822265625, + "logps/rejected": -240.66209411621094, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7661445140838623, + "rewards/margins": 6.178708553314209, + "rewards/rejected": -7.94485330581665, + "step": 1107 + }, + { + "epoch": 0.17, + "learning_rate": 1.3334682296799627e-05, + "logits/chosen": -2.5523157119750977, + "logits/rejected": -3.028430938720703, + "logps/chosen": -82.85308074951172, + "logps/rejected": -288.05181884765625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3367650508880615, + "rewards/margins": 6.8203816413879395, + "rewards/rejected": -8.157146453857422, + "step": 1108 + }, + { + "epoch": 0.17, + "learning_rate": 1.3333948856268479e-05, + "logits/chosen": -2.8203511238098145, + "logits/rejected": -2.8542401790618896, + "logps/chosen": -167.12326049804688, + "logps/rejected": -127.1633529663086, + "loss": 3.4269, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.944316387176514, + "rewards/margins": -1.8931258916854858, + "rewards/rejected": -4.051190376281738, + "step": 1109 + }, + { + "epoch": 0.17, + "learning_rate": 1.3333215415737332e-05, + "logits/chosen": -2.832237958908081, + "logits/rejected": -2.8937997817993164, + "logps/chosen": -214.98117065429688, + "logps/rejected": -442.814453125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9906058311462402, + "rewards/margins": 5.381246089935303, + "rewards/rejected": -9.371851921081543, + "step": 1110 + }, + { + "epoch": 0.17, + "learning_rate": 1.3332481975206184e-05, + "logits/chosen": -2.262561082839966, + "logits/rejected": -2.8417739868164062, + "logps/chosen": -273.30230712890625, + "logps/rejected": -310.4088439941406, + "loss": 3.0307, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.680787563323975, + "rewards/margins": 0.8204753398895264, + "rewards/rejected": -6.501262664794922, + "step": 1111 + }, + { + "epoch": 0.17, + "learning_rate": 1.3331748534675036e-05, + "logits/chosen": -1.566980004310608, + "logits/rejected": -2.8858189582824707, + "logps/chosen": -112.32533264160156, + "logps/rejected": -362.46893310546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4502651691436768, + "rewards/margins": 7.457302093505859, + "rewards/rejected": -10.907567977905273, + "step": 1112 + }, + { + "epoch": 0.17, + "learning_rate": 1.3331015094143888e-05, + "logits/chosen": -2.2883846759796143, + "logits/rejected": -2.911806106567383, + "logps/chosen": -130.83761596679688, + "logps/rejected": -310.56146240234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0301108360290527, + "rewards/margins": 6.732028961181641, + "rewards/rejected": -8.762140274047852, + "step": 1113 + }, + { + "epoch": 0.17, + "learning_rate": 1.333028165361274e-05, + "logits/chosen": -2.791477680206299, + "logits/rejected": -3.035649299621582, + "logps/chosen": -81.1080322265625, + "logps/rejected": -176.28256225585938, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.411572456359863, + "rewards/margins": 3.169001817703247, + "rewards/rejected": -7.580574035644531, + "step": 1114 + }, + { + "epoch": 0.17, + "learning_rate": 1.3329548213081592e-05, + "logits/chosen": -3.105238914489746, + "logits/rejected": -2.3678948879241943, + "logps/chosen": -318.94110107421875, + "logps/rejected": -163.09609985351562, + "loss": 2.8702, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.962533950805664, + "rewards/margins": -0.3722724914550781, + "rewards/rejected": -4.590261459350586, + "step": 1115 + }, + { + "epoch": 0.17, + "learning_rate": 1.3328814772550443e-05, + "logits/chosen": -3.017406463623047, + "logits/rejected": -2.8530876636505127, + "logps/chosen": -633.896484375, + "logps/rejected": -467.572021484375, + "loss": 2.3539, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7650146484375, + "rewards/margins": 1.8204691410064697, + "rewards/rejected": -7.585483551025391, + "step": 1116 + }, + { + "epoch": 0.17, + "learning_rate": 1.3328081332019295e-05, + "logits/chosen": -1.8959227800369263, + "logits/rejected": -2.6181509494781494, + "logps/chosen": -89.22532653808594, + "logps/rejected": -269.8732604980469, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.474175453186035, + "rewards/margins": 6.559853553771973, + "rewards/rejected": -9.034029006958008, + "step": 1117 + }, + { + "epoch": 0.17, + "learning_rate": 1.3327347891488147e-05, + "logits/chosen": -3.0225601196289062, + "logits/rejected": -2.539720296859741, + "logps/chosen": -708.1885375976562, + "logps/rejected": -516.2527465820312, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.166285276412964, + "rewards/margins": 4.8503851890563965, + "rewards/rejected": -7.016670227050781, + "step": 1118 + }, + { + "epoch": 0.17, + "learning_rate": 1.3326614450957e-05, + "logits/chosen": -2.8180317878723145, + "logits/rejected": -1.4174891710281372, + "logps/chosen": -557.1571655273438, + "logps/rejected": -239.96743774414062, + "loss": 3.1989, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.246786594390869, + "rewards/margins": -0.818394660949707, + "rewards/rejected": -6.428391933441162, + "step": 1119 + }, + { + "epoch": 0.17, + "learning_rate": 1.3325881010425853e-05, + "logits/chosen": -2.882680654525757, + "logits/rejected": -1.995618224143982, + "logps/chosen": -281.7009582519531, + "logps/rejected": -180.2041473388672, + "loss": 2.4192, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.064291477203369, + "rewards/margins": 2.764557123184204, + "rewards/rejected": -6.828848361968994, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 1.3325147569894705e-05, + "logits/chosen": -2.8730247020721436, + "logits/rejected": -3.119032621383667, + "logps/chosen": -225.98141479492188, + "logps/rejected": -253.06564331054688, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3907310962677, + "rewards/margins": 3.749563455581665, + "rewards/rejected": -7.140294551849365, + "step": 1121 + }, + { + "epoch": 0.17, + "learning_rate": 1.3324414129363556e-05, + "logits/chosen": -3.0073392391204834, + "logits/rejected": -2.947951555252075, + "logps/chosen": -447.1112060546875, + "logps/rejected": -492.9811096191406, + "loss": 0.0469, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5814361572265625, + "rewards/margins": 4.753190994262695, + "rewards/rejected": -9.334627151489258, + "step": 1122 + }, + { + "epoch": 0.17, + "learning_rate": 1.3323680688832408e-05, + "logits/chosen": -1.7403804063796997, + "logits/rejected": -2.701084852218628, + "logps/chosen": -114.96446228027344, + "logps/rejected": -286.963623046875, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5410141944885254, + "rewards/margins": 4.583471298217773, + "rewards/rejected": -8.12448501586914, + "step": 1123 + }, + { + "epoch": 0.17, + "learning_rate": 1.332294724830126e-05, + "logits/chosen": -2.1637701988220215, + "logits/rejected": -2.9417800903320312, + "logps/chosen": -139.08804321289062, + "logps/rejected": -267.4525451660156, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3386496305465698, + "rewards/margins": 4.55428409576416, + "rewards/rejected": -5.8929338455200195, + "step": 1124 + }, + { + "epoch": 0.17, + "learning_rate": 1.3322213807770112e-05, + "logits/chosen": -2.8983590602874756, + "logits/rejected": -2.8216962814331055, + "logps/chosen": -402.61724853515625, + "logps/rejected": -408.88238525390625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.884746551513672, + "rewards/margins": 6.57071590423584, + "rewards/rejected": -10.455463409423828, + "step": 1125 + }, + { + "epoch": 0.18, + "learning_rate": 1.3321480367238964e-05, + "logits/chosen": -2.749382495880127, + "logits/rejected": -2.9305574893951416, + "logps/chosen": -163.20706176757812, + "logps/rejected": -225.49847412109375, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.019582748413086, + "rewards/margins": 4.786154747009277, + "rewards/rejected": -6.805737495422363, + "step": 1126 + }, + { + "epoch": 0.18, + "learning_rate": 1.3320746926707816e-05, + "logits/chosen": -2.82315993309021, + "logits/rejected": -2.992710590362549, + "logps/chosen": -86.60662841796875, + "logps/rejected": -254.5076141357422, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7824245691299438, + "rewards/margins": 6.979273319244385, + "rewards/rejected": -8.761697769165039, + "step": 1127 + }, + { + "epoch": 0.18, + "learning_rate": 1.332001348617667e-05, + "logits/chosen": -2.020112991333008, + "logits/rejected": -3.025158643722534, + "logps/chosen": -53.94084548950195, + "logps/rejected": -255.94537353515625, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3271509408950806, + "rewards/margins": 3.8224449157714844, + "rewards/rejected": -5.149595737457275, + "step": 1128 + }, + { + "epoch": 0.18, + "learning_rate": 1.3319280045645521e-05, + "logits/chosen": -2.877714157104492, + "logits/rejected": -2.9864988327026367, + "logps/chosen": -157.72666931152344, + "logps/rejected": -283.644287109375, + "loss": 0.1294, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6211347579956055, + "rewards/margins": 3.1037392616271973, + "rewards/rejected": -6.724874019622803, + "step": 1129 + }, + { + "epoch": 0.18, + "learning_rate": 1.3318546605114373e-05, + "logits/chosen": -1.9996814727783203, + "logits/rejected": -2.8261780738830566, + "logps/chosen": -277.26263427734375, + "logps/rejected": -540.712158203125, + "loss": 1.6438, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.791887283325195, + "rewards/margins": -0.0294952392578125, + "rewards/rejected": -5.762392044067383, + "step": 1130 + }, + { + "epoch": 0.18, + "learning_rate": 1.3317813164583225e-05, + "logits/chosen": -3.0409395694732666, + "logits/rejected": -2.9702885150909424, + "logps/chosen": -164.79415893554688, + "logps/rejected": -348.275146484375, + "loss": 1.6585, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.5844879150390625, + "rewards/margins": 1.7927346229553223, + "rewards/rejected": -6.377222537994385, + "step": 1131 + }, + { + "epoch": 0.18, + "learning_rate": 1.3317079724052077e-05, + "logits/chosen": -2.8691000938415527, + "logits/rejected": -2.2176692485809326, + "logps/chosen": -174.11672973632812, + "logps/rejected": -234.92446899414062, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.058072090148926, + "rewards/margins": 5.600486755371094, + "rewards/rejected": -7.6585588455200195, + "step": 1132 + }, + { + "epoch": 0.18, + "learning_rate": 1.3316346283520929e-05, + "logits/chosen": -2.3864638805389404, + "logits/rejected": -2.916764736175537, + "logps/chosen": -264.5694580078125, + "logps/rejected": -313.9791564941406, + "loss": 2.0538, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2592568397521973, + "rewards/margins": 1.0535988807678223, + "rewards/rejected": -4.3128557205200195, + "step": 1133 + }, + { + "epoch": 0.18, + "learning_rate": 1.331561284298978e-05, + "logits/chosen": -1.7101331949234009, + "logits/rejected": -2.6703310012817383, + "logps/chosen": -345.960205078125, + "logps/rejected": -452.2327880859375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.370962619781494, + "rewards/margins": 6.0912933349609375, + "rewards/rejected": -8.462255477905273, + "step": 1134 + }, + { + "epoch": 0.18, + "learning_rate": 1.3314879402458633e-05, + "logits/chosen": -2.9262044429779053, + "logits/rejected": -2.7691128253936768, + "logps/chosen": -310.4753112792969, + "logps/rejected": -281.115966796875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6184568405151367, + "rewards/margins": 6.0274248123168945, + "rewards/rejected": -8.645881652832031, + "step": 1135 + }, + { + "epoch": 0.18, + "learning_rate": 1.3314145961927484e-05, + "logits/chosen": -3.1019887924194336, + "logits/rejected": -2.1034483909606934, + "logps/chosen": -321.4767150878906, + "logps/rejected": -362.62652587890625, + "loss": 4.3303, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.722919940948486, + "rewards/margins": -0.43405866622924805, + "rewards/rejected": -6.288861274719238, + "step": 1136 + }, + { + "epoch": 0.18, + "learning_rate": 1.3313412521396338e-05, + "logits/chosen": -2.7192795276641846, + "logits/rejected": -2.5308096408843994, + "logps/chosen": -193.1251983642578, + "logps/rejected": -239.20095825195312, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.400395393371582, + "rewards/margins": 4.761258602142334, + "rewards/rejected": -7.161653995513916, + "step": 1137 + }, + { + "epoch": 0.18, + "learning_rate": 1.3312679080865192e-05, + "logits/chosen": -2.6550912857055664, + "logits/rejected": -2.7094507217407227, + "logps/chosen": -292.0428466796875, + "logps/rejected": -457.6878662109375, + "loss": 3.8599, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.188839912414551, + "rewards/margins": 0.8233475685119629, + "rewards/rejected": -6.012187480926514, + "step": 1138 + }, + { + "epoch": 0.18, + "learning_rate": 1.3311945640334043e-05, + "logits/chosen": -2.7779860496520996, + "logits/rejected": -2.1609392166137695, + "logps/chosen": -192.29864501953125, + "logps/rejected": -250.55914306640625, + "loss": 2.8373, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.211457252502441, + "rewards/margins": 0.54878830909729, + "rewards/rejected": -4.760245323181152, + "step": 1139 + }, + { + "epoch": 0.18, + "learning_rate": 1.3311212199802895e-05, + "logits/chosen": -2.43070650100708, + "logits/rejected": -3.07165789604187, + "logps/chosen": -278.3564758300781, + "logps/rejected": -332.9877014160156, + "loss": 1.7777, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8391165733337402, + "rewards/margins": -0.542597770690918, + "rewards/rejected": -3.2965188026428223, + "step": 1140 + }, + { + "epoch": 0.18, + "learning_rate": 1.3310478759271747e-05, + "logits/chosen": -3.0009701251983643, + "logits/rejected": -2.7358367443084717, + "logps/chosen": -208.40174865722656, + "logps/rejected": -328.1743469238281, + "loss": 2.4599, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.878769874572754, + "rewards/margins": 1.2581441402435303, + "rewards/rejected": -5.136914253234863, + "step": 1141 + }, + { + "epoch": 0.18, + "learning_rate": 1.3309745318740599e-05, + "logits/chosen": -3.1623048782348633, + "logits/rejected": -2.9614038467407227, + "logps/chosen": -337.19610595703125, + "logps/rejected": -455.6447448730469, + "loss": 0.9294, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2439370155334473, + "rewards/margins": 1.7098273038864136, + "rewards/rejected": -4.95376443862915, + "step": 1142 + }, + { + "epoch": 0.18, + "learning_rate": 1.3309011878209451e-05, + "logits/chosen": -1.4553910493850708, + "logits/rejected": -2.9709324836730957, + "logps/chosen": -281.46685791015625, + "logps/rejected": -550.3226928710938, + "loss": 6.4896, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.105762481689453, + "rewards/margins": -6.487234115600586, + "rewards/rejected": -1.6185288429260254, + "step": 1143 + }, + { + "epoch": 0.18, + "learning_rate": 1.3308278437678303e-05, + "logits/chosen": -1.9951213598251343, + "logits/rejected": -2.7559356689453125, + "logps/chosen": -238.42352294921875, + "logps/rejected": -238.83084106445312, + "loss": 2.2691, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.551324367523193, + "rewards/margins": 0.8086509704589844, + "rewards/rejected": -5.359975814819336, + "step": 1144 + }, + { + "epoch": 0.18, + "learning_rate": 1.3307544997147155e-05, + "logits/chosen": -2.2071053981781006, + "logits/rejected": -2.8791379928588867, + "logps/chosen": -230.7207489013672, + "logps/rejected": -388.1216125488281, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7284210920333862, + "rewards/margins": 5.405303955078125, + "rewards/rejected": -7.133725166320801, + "step": 1145 + }, + { + "epoch": 0.18, + "learning_rate": 1.3306811556616008e-05, + "logits/chosen": -1.128071665763855, + "logits/rejected": -3.0707204341888428, + "logps/chosen": -99.19427490234375, + "logps/rejected": -342.3149719238281, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.960782766342163, + "rewards/margins": 4.288952827453613, + "rewards/rejected": -6.249735355377197, + "step": 1146 + }, + { + "epoch": 0.18, + "learning_rate": 1.330607811608486e-05, + "logits/chosen": -1.7691075801849365, + "logits/rejected": -1.9624160528182983, + "logps/chosen": -149.36831665039062, + "logps/rejected": -256.83319091796875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6092720031738281, + "rewards/margins": 3.589494466781616, + "rewards/rejected": -5.198766708374023, + "step": 1147 + }, + { + "epoch": 0.18, + "learning_rate": 1.3305344675553712e-05, + "logits/chosen": -2.7721147537231445, + "logits/rejected": -3.037625789642334, + "logps/chosen": -180.97381591796875, + "logps/rejected": -296.47216796875, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9074249267578125, + "rewards/margins": 5.463083267211914, + "rewards/rejected": -7.370508193969727, + "step": 1148 + }, + { + "epoch": 0.18, + "learning_rate": 1.3304611235022564e-05, + "logits/chosen": -1.9268348217010498, + "logits/rejected": -2.9155702590942383, + "logps/chosen": -135.47714233398438, + "logps/rejected": -190.5743865966797, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.83642578125, + "rewards/margins": 3.7688021659851074, + "rewards/rejected": -6.605228424072266, + "step": 1149 + }, + { + "epoch": 0.18, + "learning_rate": 1.3303877794491416e-05, + "logits/chosen": -2.706174373626709, + "logits/rejected": -3.0831713676452637, + "logps/chosen": -493.3779296875, + "logps/rejected": -490.4447326660156, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0676302909851074, + "rewards/margins": 6.448376655578613, + "rewards/rejected": -7.516007423400879, + "step": 1150 + }, + { + "epoch": 0.18, + "learning_rate": 1.3303144353960268e-05, + "logits/chosen": -2.670684814453125, + "logits/rejected": -2.544022798538208, + "logps/chosen": -215.2251739501953, + "logps/rejected": -76.875732421875, + "loss": 3.971, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.283257007598877, + "rewards/margins": -1.6834263801574707, + "rewards/rejected": -4.599830627441406, + "step": 1151 + }, + { + "epoch": 0.18, + "learning_rate": 1.330241091342912e-05, + "logits/chosen": -2.53145432472229, + "logits/rejected": -2.864138603210449, + "logps/chosen": -102.50653076171875, + "logps/rejected": -149.36082458496094, + "loss": 2.0377, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8042120933532715, + "rewards/margins": -0.6878869533538818, + "rewards/rejected": -4.1163249015808105, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 1.3301677472897971e-05, + "logits/chosen": -1.8020421266555786, + "logits/rejected": -2.8317792415618896, + "logps/chosen": -82.67044830322266, + "logps/rejected": -262.347412109375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8321354389190674, + "rewards/margins": 5.84657096862793, + "rewards/rejected": -7.678706169128418, + "step": 1153 + }, + { + "epoch": 0.18, + "learning_rate": 1.3300944032366823e-05, + "logits/chosen": -1.287672758102417, + "logits/rejected": -1.9132972955703735, + "logps/chosen": -155.5238037109375, + "logps/rejected": -204.7960662841797, + "loss": 1.6967, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.335254669189453, + "rewards/margins": 0.9012235403060913, + "rewards/rejected": -5.236478328704834, + "step": 1154 + }, + { + "epoch": 0.18, + "learning_rate": 1.3300210591835677e-05, + "logits/chosen": -3.130934953689575, + "logits/rejected": -1.8143305778503418, + "logps/chosen": -480.3208312988281, + "logps/rejected": -149.40257263183594, + "loss": 4.1176, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.676509380340576, + "rewards/margins": -4.099045753479004, + "rewards/rejected": -1.5774636268615723, + "step": 1155 + }, + { + "epoch": 0.18, + "learning_rate": 1.3299477151304529e-05, + "logits/chosen": -3.059234380722046, + "logits/rejected": -2.6245646476745605, + "logps/chosen": -187.29656982421875, + "logps/rejected": -156.94735717773438, + "loss": 3.3194, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3957133293151855, + "rewards/margins": -1.3284920454025269, + "rewards/rejected": -4.067221641540527, + "step": 1156 + }, + { + "epoch": 0.18, + "learning_rate": 1.329874371077338e-05, + "logits/chosen": -1.3879245519638062, + "logits/rejected": -2.7449333667755127, + "logps/chosen": -153.6031494140625, + "logps/rejected": -494.8312683105469, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9072693586349487, + "rewards/margins": 6.328526496887207, + "rewards/rejected": -8.235795974731445, + "step": 1157 + }, + { + "epoch": 0.18, + "learning_rate": 1.3298010270242233e-05, + "logits/chosen": -2.971996545791626, + "logits/rejected": -2.393552541732788, + "logps/chosen": -153.6702880859375, + "logps/rejected": -175.9234161376953, + "loss": 3.7283, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.124358654022217, + "rewards/margins": 0.3300490379333496, + "rewards/rejected": -6.454407691955566, + "step": 1158 + }, + { + "epoch": 0.18, + "learning_rate": 1.3297276829711084e-05, + "logits/chosen": -2.83764386177063, + "logits/rejected": -3.1303534507751465, + "logps/chosen": -339.740478515625, + "logps/rejected": -444.8370056152344, + "loss": 0.0793, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8548873662948608, + "rewards/margins": 2.926287889480591, + "rewards/rejected": -4.781175136566162, + "step": 1159 + }, + { + "epoch": 0.18, + "learning_rate": 1.3296543389179936e-05, + "logits/chosen": -2.813469648361206, + "logits/rejected": -3.0730140209198, + "logps/chosen": -182.60519409179688, + "logps/rejected": -276.759521484375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6276955604553223, + "rewards/margins": 4.687403202056885, + "rewards/rejected": -6.315098762512207, + "step": 1160 + }, + { + "epoch": 0.18, + "learning_rate": 1.3295809948648788e-05, + "logits/chosen": -2.019423484802246, + "logits/rejected": -3.055027961730957, + "logps/chosen": -250.81167602539062, + "logps/rejected": -553.9008178710938, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8464090824127197, + "rewards/margins": 4.9708075523376465, + "rewards/rejected": -6.817216873168945, + "step": 1161 + }, + { + "epoch": 0.18, + "learning_rate": 1.329507650811764e-05, + "logits/chosen": -2.605034828186035, + "logits/rejected": -3.154275894165039, + "logps/chosen": -21.65131378173828, + "logps/rejected": -133.85000610351562, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1355797052383423, + "rewards/margins": 4.729480743408203, + "rewards/rejected": -5.865060806274414, + "step": 1162 + }, + { + "epoch": 0.18, + "learning_rate": 1.3294343067586492e-05, + "logits/chosen": -1.9986717700958252, + "logits/rejected": -2.854627847671509, + "logps/chosen": -328.4082336425781, + "logps/rejected": -396.7911071777344, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1367385387420654, + "rewards/margins": 5.584322929382324, + "rewards/rejected": -7.721061706542969, + "step": 1163 + }, + { + "epoch": 0.18, + "learning_rate": 1.3293609627055346e-05, + "logits/chosen": -1.8006778955459595, + "logits/rejected": -2.9552407264709473, + "logps/chosen": -65.99253845214844, + "logps/rejected": -376.168212890625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5294348001480103, + "rewards/margins": 6.308052062988281, + "rewards/rejected": -7.83748722076416, + "step": 1164 + }, + { + "epoch": 0.18, + "learning_rate": 1.3292876186524197e-05, + "logits/chosen": -2.894136428833008, + "logits/rejected": -2.9914333820343018, + "logps/chosen": -405.26153564453125, + "logps/rejected": -565.972412109375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.668879747390747, + "rewards/margins": 5.503021240234375, + "rewards/rejected": -7.171900749206543, + "step": 1165 + }, + { + "epoch": 0.18, + "learning_rate": 1.329214274599305e-05, + "logits/chosen": -2.9423828125, + "logits/rejected": -1.4450188875198364, + "logps/chosen": -373.08660888671875, + "logps/rejected": -189.16397094726562, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225947618484497, + "rewards/margins": 4.092886447906494, + "rewards/rejected": -6.31883430480957, + "step": 1166 + }, + { + "epoch": 0.18, + "learning_rate": 1.3291409305461901e-05, + "logits/chosen": -2.0367612838745117, + "logits/rejected": -2.9856033325195312, + "logps/chosen": -129.0386962890625, + "logps/rejected": -326.442138671875, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1839733123779297, + "rewards/margins": 4.1736531257629395, + "rewards/rejected": -6.357626438140869, + "step": 1167 + }, + { + "epoch": 0.18, + "learning_rate": 1.3290675864930753e-05, + "logits/chosen": -2.933471441268921, + "logits/rejected": -1.8702095746994019, + "logps/chosen": -505.9919128417969, + "logps/rejected": -384.4776611328125, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4820060729980469, + "rewards/margins": 5.278878211975098, + "rewards/rejected": -6.7608842849731445, + "step": 1168 + }, + { + "epoch": 0.18, + "learning_rate": 1.3289942424399605e-05, + "logits/chosen": -3.1446385383605957, + "logits/rejected": -2.615739583969116, + "logps/chosen": -176.63742065429688, + "logps/rejected": -80.17174530029297, + "loss": 1.5859, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.947429895401001, + "rewards/margins": 0.19065463542938232, + "rewards/rejected": -3.1380844116210938, + "step": 1169 + }, + { + "epoch": 0.18, + "learning_rate": 1.3289208983868457e-05, + "logits/chosen": -1.81328284740448, + "logits/rejected": -2.6477999687194824, + "logps/chosen": -130.0171661376953, + "logps/rejected": -304.89080810546875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3814176321029663, + "rewards/margins": 6.3487701416015625, + "rewards/rejected": -7.73018741607666, + "step": 1170 + }, + { + "epoch": 0.18, + "learning_rate": 1.328847554333731e-05, + "logits/chosen": -2.925154685974121, + "logits/rejected": -2.0506465435028076, + "logps/chosen": -617.1651000976562, + "logps/rejected": -338.5081481933594, + "loss": 2.4276, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.009425401687622, + "rewards/margins": 0.820871114730835, + "rewards/rejected": -3.830296516418457, + "step": 1171 + }, + { + "epoch": 0.18, + "learning_rate": 1.3287742102806162e-05, + "logits/chosen": -2.7605488300323486, + "logits/rejected": -2.008667469024658, + "logps/chosen": -267.8194580078125, + "logps/rejected": -227.82733154296875, + "loss": 5.1821, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.996633529663086, + "rewards/margins": -1.849898338317871, + "rewards/rejected": -5.146735191345215, + "step": 1172 + }, + { + "epoch": 0.18, + "learning_rate": 1.3287008662275016e-05, + "logits/chosen": -2.8581807613372803, + "logits/rejected": -3.1118857860565186, + "logps/chosen": -139.25897216796875, + "logps/rejected": -290.5771179199219, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1854126453399658, + "rewards/margins": 7.521426200866699, + "rewards/rejected": -8.706838607788086, + "step": 1173 + }, + { + "epoch": 0.18, + "learning_rate": 1.3286275221743868e-05, + "logits/chosen": -2.3160696029663086, + "logits/rejected": -2.6525614261627197, + "logps/chosen": -71.39929962158203, + "logps/rejected": -127.5505599975586, + "loss": 0.4877, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.207078456878662, + "rewards/margins": 2.5829689502716064, + "rewards/rejected": -4.7900471687316895, + "step": 1174 + }, + { + "epoch": 0.18, + "learning_rate": 1.328554178121272e-05, + "logits/chosen": -2.2569313049316406, + "logits/rejected": -3.040327310562134, + "logps/chosen": -621.198486328125, + "logps/rejected": -511.41790771484375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.835382103919983, + "rewards/margins": 5.1953535079956055, + "rewards/rejected": -7.030735969543457, + "step": 1175 + }, + { + "epoch": 0.18, + "learning_rate": 1.3284808340681571e-05, + "logits/chosen": -2.717230796813965, + "logits/rejected": -3.2139360904693604, + "logps/chosen": -391.0523986816406, + "logps/rejected": -371.47265625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3141772747039795, + "rewards/margins": 6.096436500549316, + "rewards/rejected": -8.410614013671875, + "step": 1176 + }, + { + "epoch": 0.18, + "learning_rate": 1.3284074900150423e-05, + "logits/chosen": -2.636509895324707, + "logits/rejected": -3.007002353668213, + "logps/chosen": -788.4247436523438, + "logps/rejected": -693.6912231445312, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3906402587890625, + "rewards/margins": 6.592656135559082, + "rewards/rejected": -9.983296394348145, + "step": 1177 + }, + { + "epoch": 0.18, + "learning_rate": 1.3283341459619275e-05, + "logits/chosen": -1.8453539609909058, + "logits/rejected": -2.8637430667877197, + "logps/chosen": -88.00330352783203, + "logps/rejected": -331.75299072265625, + "loss": 0.0641, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9928553104400635, + "rewards/margins": 3.2663955688476562, + "rewards/rejected": -5.259250640869141, + "step": 1178 + }, + { + "epoch": 0.18, + "learning_rate": 1.3282608019088127e-05, + "logits/chosen": -3.0395779609680176, + "logits/rejected": -2.139599084854126, + "logps/chosen": -856.7080078125, + "logps/rejected": -508.74688720703125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5689346790313721, + "rewards/margins": 6.959185600280762, + "rewards/rejected": -7.528120994567871, + "step": 1179 + }, + { + "epoch": 0.18, + "learning_rate": 1.3281874578556979e-05, + "logits/chosen": -1.828736662864685, + "logits/rejected": -3.1022355556488037, + "logps/chosen": -94.788818359375, + "logps/rejected": -420.1302490234375, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3380565643310547, + "rewards/margins": 2.6620824337005615, + "rewards/rejected": -5.000139236450195, + "step": 1180 + }, + { + "epoch": 0.18, + "learning_rate": 1.3281141138025833e-05, + "logits/chosen": -2.575343370437622, + "logits/rejected": -2.6836507320404053, + "logps/chosen": -132.20574951171875, + "logps/rejected": -288.4493408203125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.276749849319458, + "rewards/margins": 5.605178356170654, + "rewards/rejected": -7.881928443908691, + "step": 1181 + }, + { + "epoch": 0.18, + "learning_rate": 1.3280407697494684e-05, + "logits/chosen": -2.1232733726501465, + "logits/rejected": -3.1125123500823975, + "logps/chosen": -82.55320739746094, + "logps/rejected": -433.1861572265625, + "loss": 0.0766, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9325847625732422, + "rewards/margins": 3.7705750465393066, + "rewards/rejected": -5.703160285949707, + "step": 1182 + }, + { + "epoch": 0.18, + "learning_rate": 1.3279674256963536e-05, + "logits/chosen": -2.978159189224243, + "logits/rejected": -3.1502459049224854, + "logps/chosen": -48.18665313720703, + "logps/rejected": -130.80706787109375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9952854514122009, + "rewards/margins": 4.848273277282715, + "rewards/rejected": -5.843558311462402, + "step": 1183 + }, + { + "epoch": 0.18, + "learning_rate": 1.3278940816432388e-05, + "logits/chosen": -3.066105842590332, + "logits/rejected": -2.9029276371002197, + "logps/chosen": -134.01522827148438, + "logps/rejected": -178.9698486328125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0099341869354248, + "rewards/margins": 6.692169666290283, + "rewards/rejected": -7.702103614807129, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 1.327820737590124e-05, + "logits/chosen": -2.647254705429077, + "logits/rejected": -2.819101095199585, + "logps/chosen": -196.60406494140625, + "logps/rejected": -183.08251953125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6885294914245605, + "rewards/margins": 5.413684844970703, + "rewards/rejected": -7.1022138595581055, + "step": 1185 + }, + { + "epoch": 0.18, + "learning_rate": 1.3277473935370092e-05, + "logits/chosen": -3.1473543643951416, + "logits/rejected": -2.559741735458374, + "logps/chosen": -271.26251220703125, + "logps/rejected": -53.992000579833984, + "loss": 2.8436, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.824476718902588, + "rewards/margins": -2.7470853328704834, + "rewards/rejected": -2.0773913860321045, + "step": 1186 + }, + { + "epoch": 0.18, + "learning_rate": 1.3276740494838944e-05, + "logits/chosen": -0.8452058434486389, + "logits/rejected": -3.099327564239502, + "logps/chosen": -88.58733367919922, + "logps/rejected": -453.618896484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5477676391601562, + "rewards/margins": 6.8992204666137695, + "rewards/rejected": -8.446988105773926, + "step": 1187 + }, + { + "epoch": 0.18, + "learning_rate": 1.3276007054307796e-05, + "logits/chosen": -1.9182325601577759, + "logits/rejected": -3.049856185913086, + "logps/chosen": -171.75038146972656, + "logps/rejected": -354.30560302734375, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5807738304138184, + "rewards/margins": 4.362382888793945, + "rewards/rejected": -6.9431562423706055, + "step": 1188 + }, + { + "epoch": 0.18, + "learning_rate": 1.3275273613776648e-05, + "logits/chosen": -2.1480019092559814, + "logits/rejected": -2.923659086227417, + "logps/chosen": -73.13084411621094, + "logps/rejected": -181.72503662109375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.854493260383606, + "rewards/margins": 6.340611934661865, + "rewards/rejected": -8.195104598999023, + "step": 1189 + }, + { + "epoch": 0.19, + "learning_rate": 1.3274540173245501e-05, + "logits/chosen": -3.0852408409118652, + "logits/rejected": -2.4216246604919434, + "logps/chosen": -400.3575439453125, + "logps/rejected": -319.8631896972656, + "loss": 3.2854, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.918999671936035, + "rewards/margins": -1.8390284776687622, + "rewards/rejected": -3.0799713134765625, + "step": 1190 + }, + { + "epoch": 0.19, + "learning_rate": 1.3273806732714353e-05, + "logits/chosen": -2.479678153991699, + "logits/rejected": -2.8476247787475586, + "logps/chosen": -133.48171997070312, + "logps/rejected": -245.96058654785156, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.369198203086853, + "rewards/margins": 7.149820327758789, + "rewards/rejected": -8.519018173217773, + "step": 1191 + }, + { + "epoch": 0.19, + "learning_rate": 1.3273073292183205e-05, + "logits/chosen": -1.0657423734664917, + "logits/rejected": -2.933600902557373, + "logps/chosen": -84.2044448852539, + "logps/rejected": -383.37628173828125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3453779220581055, + "rewards/margins": 5.685217380523682, + "rewards/rejected": -7.030595302581787, + "step": 1192 + }, + { + "epoch": 0.19, + "learning_rate": 1.3272339851652057e-05, + "logits/chosen": -1.1469649076461792, + "logits/rejected": -2.7433483600616455, + "logps/chosen": -162.33592224121094, + "logps/rejected": -331.66278076171875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.489461660385132, + "rewards/margins": 5.207231521606445, + "rewards/rejected": -7.696692943572998, + "step": 1193 + }, + { + "epoch": 0.19, + "learning_rate": 1.3271606411120909e-05, + "logits/chosen": -2.9660353660583496, + "logits/rejected": -2.6530113220214844, + "logps/chosen": -595.3057250976562, + "logps/rejected": -517.5379028320312, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8548588752746582, + "rewards/margins": 6.146183013916016, + "rewards/rejected": -8.001042366027832, + "step": 1194 + }, + { + "epoch": 0.19, + "learning_rate": 1.327087297058976e-05, + "logits/chosen": -3.1526222229003906, + "logits/rejected": -3.0468785762786865, + "logps/chosen": -97.52983093261719, + "logps/rejected": -148.61419677734375, + "loss": 1.5677, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1198205947875977, + "rewards/margins": 1.3180567026138306, + "rewards/rejected": -4.437877178192139, + "step": 1195 + }, + { + "epoch": 0.19, + "learning_rate": 1.3270139530058612e-05, + "logits/chosen": -1.9526206254959106, + "logits/rejected": -2.6624624729156494, + "logps/chosen": -402.292236328125, + "logps/rejected": -340.294677734375, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.182389974594116, + "rewards/margins": 4.447620391845703, + "rewards/rejected": -7.630010604858398, + "step": 1196 + }, + { + "epoch": 0.19, + "learning_rate": 1.3269406089527464e-05, + "logits/chosen": -2.9854986667633057, + "logits/rejected": -2.5019164085388184, + "logps/chosen": -636.3123168945312, + "logps/rejected": -379.3663024902344, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35823312401771545, + "rewards/margins": 6.518433094024658, + "rewards/rejected": -6.876666069030762, + "step": 1197 + }, + { + "epoch": 0.19, + "learning_rate": 1.3268672648996316e-05, + "logits/chosen": -2.198910713195801, + "logits/rejected": -1.888319492340088, + "logps/chosen": -860.516845703125, + "logps/rejected": -620.2935791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20270690321922302, + "rewards/margins": 11.639652252197266, + "rewards/rejected": -11.436944961547852, + "step": 1198 + }, + { + "epoch": 0.19, + "learning_rate": 1.326793920846517e-05, + "logits/chosen": -0.9954180717468262, + "logits/rejected": -2.4668939113616943, + "logps/chosen": -28.942089080810547, + "logps/rejected": -272.01812744140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2360241413116455, + "rewards/margins": 7.863640785217285, + "rewards/rejected": -9.099664688110352, + "step": 1199 + }, + { + "epoch": 0.19, + "learning_rate": 1.3267205767934022e-05, + "logits/chosen": -2.6622660160064697, + "logits/rejected": -3.0570526123046875, + "logps/chosen": -273.5639953613281, + "logps/rejected": -269.7514953613281, + "loss": 3.1186, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.363307952880859, + "rewards/margins": 0.42063045501708984, + "rewards/rejected": -4.783938407897949, + "step": 1200 + }, + { + "epoch": 0.19, + "learning_rate": 1.3266472327402873e-05, + "logits/chosen": -2.93341064453125, + "logits/rejected": -2.463291883468628, + "logps/chosen": -325.3380432128906, + "logps/rejected": -196.0926971435547, + "loss": 1.8586, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.864859104156494, + "rewards/margins": 2.037722587585449, + "rewards/rejected": -5.902581691741943, + "step": 1201 + }, + { + "epoch": 0.19, + "learning_rate": 1.3265738886871725e-05, + "logits/chosen": -2.450411558151245, + "logits/rejected": -3.254228115081787, + "logps/chosen": -101.7100601196289, + "logps/rejected": -420.91998291015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2307029962539673, + "rewards/margins": 7.801278114318848, + "rewards/rejected": -9.031980514526367, + "step": 1202 + }, + { + "epoch": 0.19, + "learning_rate": 1.3265005446340577e-05, + "logits/chosen": -2.5378689765930176, + "logits/rejected": -3.0165774822235107, + "logps/chosen": -198.45852661132812, + "logps/rejected": -315.9935607910156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5559337735176086, + "rewards/margins": 7.656775951385498, + "rewards/rejected": -8.212709426879883, + "step": 1203 + }, + { + "epoch": 0.19, + "learning_rate": 1.3264272005809429e-05, + "logits/chosen": -2.113015651702881, + "logits/rejected": -2.81339430809021, + "logps/chosen": -320.79632568359375, + "logps/rejected": -347.2611389160156, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.178341865539551, + "rewards/margins": 7.761096477508545, + "rewards/rejected": -9.939437866210938, + "step": 1204 + }, + { + "epoch": 0.19, + "learning_rate": 1.3263538565278283e-05, + "logits/chosen": -3.178593158721924, + "logits/rejected": -2.7182962894439697, + "logps/chosen": -300.8979187011719, + "logps/rejected": -310.85650634765625, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9573204517364502, + "rewards/margins": 3.728231191635132, + "rewards/rejected": -5.685551643371582, + "step": 1205 + }, + { + "epoch": 0.19, + "learning_rate": 1.3262805124747135e-05, + "logits/chosen": -2.8378870487213135, + "logits/rejected": -3.2536520957946777, + "logps/chosen": -57.02916717529297, + "logps/rejected": -237.37771606445312, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7381737232208252, + "rewards/margins": 4.649017333984375, + "rewards/rejected": -6.387190818786621, + "step": 1206 + }, + { + "epoch": 0.19, + "learning_rate": 1.3262071684215986e-05, + "logits/chosen": -1.2743651866912842, + "logits/rejected": -3.085637331008911, + "logps/chosen": -178.19534301757812, + "logps/rejected": -450.7021789550781, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.505904197692871, + "rewards/margins": 5.958952903747559, + "rewards/rejected": -8.46485710144043, + "step": 1207 + }, + { + "epoch": 0.19, + "learning_rate": 1.326133824368484e-05, + "logits/chosen": -3.1106925010681152, + "logits/rejected": -2.892221450805664, + "logps/chosen": -73.28529357910156, + "logps/rejected": -201.8563232421875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2395403385162354, + "rewards/margins": 4.832232475280762, + "rewards/rejected": -6.071772575378418, + "step": 1208 + }, + { + "epoch": 0.19, + "learning_rate": 1.3260604803153692e-05, + "logits/chosen": -2.8208229541778564, + "logits/rejected": -3.1103355884552, + "logps/chosen": -212.982177734375, + "logps/rejected": -306.4441833496094, + "loss": 3.0511, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.512658596038818, + "rewards/margins": -1.587364673614502, + "rewards/rejected": -2.9252941608428955, + "step": 1209 + }, + { + "epoch": 0.19, + "learning_rate": 1.3259871362622544e-05, + "logits/chosen": -2.0959675312042236, + "logits/rejected": -3.1139769554138184, + "logps/chosen": -295.16387939453125, + "logps/rejected": -457.724609375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7174524068832397, + "rewards/margins": 7.744481086730957, + "rewards/rejected": -8.461933135986328, + "step": 1210 + }, + { + "epoch": 0.19, + "learning_rate": 1.3259137922091396e-05, + "logits/chosen": -2.7500808238983154, + "logits/rejected": -2.829380750656128, + "logps/chosen": -87.52658081054688, + "logps/rejected": -269.69482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8744325637817383, + "rewards/margins": 8.067285537719727, + "rewards/rejected": -9.941717147827148, + "step": 1211 + }, + { + "epoch": 0.19, + "learning_rate": 1.3258404481560248e-05, + "logits/chosen": -1.593971848487854, + "logits/rejected": -2.841106653213501, + "logps/chosen": -207.40841674804688, + "logps/rejected": -401.9805603027344, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4107346534729004, + "rewards/margins": 4.660128593444824, + "rewards/rejected": -7.070863246917725, + "step": 1212 + }, + { + "epoch": 0.19, + "learning_rate": 1.32576710410291e-05, + "logits/chosen": -2.573551654815674, + "logits/rejected": -2.670586347579956, + "logps/chosen": -161.1709442138672, + "logps/rejected": -127.68489074707031, + "loss": 2.647, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.651254653930664, + "rewards/margins": 0.2909879684448242, + "rewards/rejected": -4.942242622375488, + "step": 1213 + }, + { + "epoch": 0.19, + "learning_rate": 1.3256937600497951e-05, + "logits/chosen": -2.481405019760132, + "logits/rejected": -2.6041765213012695, + "logps/chosen": -87.15467071533203, + "logps/rejected": -228.24365234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4541028141975403, + "rewards/margins": 8.816370964050293, + "rewards/rejected": -9.27047348022461, + "step": 1214 + }, + { + "epoch": 0.19, + "learning_rate": 1.3256204159966803e-05, + "logits/chosen": -2.8119423389434814, + "logits/rejected": -2.9875807762145996, + "logps/chosen": -96.98092651367188, + "logps/rejected": -169.37545776367188, + "loss": 0.5971, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7481179237365723, + "rewards/margins": 1.5540878772735596, + "rewards/rejected": -4.302206039428711, + "step": 1215 + }, + { + "epoch": 0.19, + "learning_rate": 1.3255470719435655e-05, + "logits/chosen": -2.77051043510437, + "logits/rejected": -3.1561119556427, + "logps/chosen": -399.5495910644531, + "logps/rejected": -260.5568542480469, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.624777317047119, + "rewards/margins": 5.990685939788818, + "rewards/rejected": -8.615463256835938, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 1.3254737278904509e-05, + "logits/chosen": -2.6660399436950684, + "logits/rejected": -2.9877266883850098, + "logps/chosen": -32.869659423828125, + "logps/rejected": -192.97276306152344, + "loss": 0.1124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0614731311798096, + "rewards/margins": 5.016634941101074, + "rewards/rejected": -7.078108787536621, + "step": 1217 + }, + { + "epoch": 0.19, + "learning_rate": 1.325400383837336e-05, + "logits/chosen": -2.8756349086761475, + "logits/rejected": -3.0583839416503906, + "logps/chosen": -364.25421142578125, + "logps/rejected": -303.64373779296875, + "loss": 3.7098, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.534565448760986, + "rewards/margins": -1.79667067527771, + "rewards/rejected": -3.7378945350646973, + "step": 1218 + }, + { + "epoch": 0.19, + "learning_rate": 1.3253270397842212e-05, + "logits/chosen": -3.0877435207366943, + "logits/rejected": -2.900761127471924, + "logps/chosen": -330.55584716796875, + "logps/rejected": -351.8658447265625, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8176796436309814, + "rewards/margins": 4.502893924713135, + "rewards/rejected": -7.320573806762695, + "step": 1219 + }, + { + "epoch": 0.19, + "learning_rate": 1.3252536957311064e-05, + "logits/chosen": -3.0456809997558594, + "logits/rejected": -3.110283374786377, + "logps/chosen": -238.84097290039062, + "logps/rejected": -234.460693359375, + "loss": 1.9632, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.531244993209839, + "rewards/margins": 1.044075608253479, + "rewards/rejected": -4.575320720672607, + "step": 1220 + }, + { + "epoch": 0.19, + "learning_rate": 1.3251803516779916e-05, + "logits/chosen": -2.7467761039733887, + "logits/rejected": -2.740370988845825, + "logps/chosen": -133.22171020507812, + "logps/rejected": -220.46588134765625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9625221490859985, + "rewards/margins": 5.154932498931885, + "rewards/rejected": -7.117454528808594, + "step": 1221 + }, + { + "epoch": 0.19, + "learning_rate": 1.3251070076248768e-05, + "logits/chosen": -1.9030039310455322, + "logits/rejected": -2.8708596229553223, + "logps/chosen": -149.88104248046875, + "logps/rejected": -182.5226287841797, + "loss": 1.5935, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.121194362640381, + "rewards/margins": 3.1744205951690674, + "rewards/rejected": -5.295614719390869, + "step": 1222 + }, + { + "epoch": 0.19, + "learning_rate": 1.325033663571762e-05, + "logits/chosen": -3.1224091053009033, + "logits/rejected": -1.9006239175796509, + "logps/chosen": -428.8635559082031, + "logps/rejected": -73.8304443359375, + "loss": 8.0439, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.865293502807617, + "rewards/margins": -8.04360580444336, + "rewards/rejected": -0.8216881155967712, + "step": 1223 + }, + { + "epoch": 0.19, + "learning_rate": 1.3249603195186472e-05, + "logits/chosen": -1.1840155124664307, + "logits/rejected": -3.019723892211914, + "logps/chosen": -82.55302429199219, + "logps/rejected": -544.3299560546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.057020425796509, + "rewards/margins": 7.605386734008789, + "rewards/rejected": -9.662406921386719, + "step": 1224 + }, + { + "epoch": 0.19, + "learning_rate": 1.3248869754655324e-05, + "logits/chosen": -2.4711947441101074, + "logits/rejected": -3.0644004344940186, + "logps/chosen": -46.94925308227539, + "logps/rejected": -200.01388549804688, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7885046601295471, + "rewards/margins": 7.1179656982421875, + "rewards/rejected": -7.90647029876709, + "step": 1225 + }, + { + "epoch": 0.19, + "learning_rate": 1.3248136314124177e-05, + "logits/chosen": -2.611379861831665, + "logits/rejected": -2.747515916824341, + "logps/chosen": -209.69992065429688, + "logps/rejected": -251.67811584472656, + "loss": 0.2075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8983261585235596, + "rewards/margins": 3.6988894939422607, + "rewards/rejected": -6.59721565246582, + "step": 1226 + }, + { + "epoch": 0.19, + "learning_rate": 1.3247402873593029e-05, + "logits/chosen": -3.1107239723205566, + "logits/rejected": -3.2219455242156982, + "logps/chosen": -166.3751678466797, + "logps/rejected": -331.53466796875, + "loss": 0.2216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.455362319946289, + "rewards/margins": 2.996450901031494, + "rewards/rejected": -4.451813697814941, + "step": 1227 + }, + { + "epoch": 0.19, + "learning_rate": 1.3246669433061881e-05, + "logits/chosen": -1.8086217641830444, + "logits/rejected": -2.905792713165283, + "logps/chosen": -111.12765502929688, + "logps/rejected": -285.4499816894531, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9580045938491821, + "rewards/margins": 4.412420272827148, + "rewards/rejected": -6.370424747467041, + "step": 1228 + }, + { + "epoch": 0.19, + "learning_rate": 1.3245935992530733e-05, + "logits/chosen": -2.4925389289855957, + "logits/rejected": -3.025373697280884, + "logps/chosen": -97.92464447021484, + "logps/rejected": -172.60104370117188, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.451537847518921, + "rewards/margins": 5.769746780395508, + "rewards/rejected": -7.221284866333008, + "step": 1229 + }, + { + "epoch": 0.19, + "learning_rate": 1.3245202551999585e-05, + "logits/chosen": -2.6073689460754395, + "logits/rejected": -2.947756767272949, + "logps/chosen": -255.09315490722656, + "logps/rejected": -276.27520751953125, + "loss": 3.3008, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6534552574157715, + "rewards/margins": 0.24554920196533203, + "rewards/rejected": -4.8990044593811035, + "step": 1230 + }, + { + "epoch": 0.19, + "learning_rate": 1.3244469111468437e-05, + "logits/chosen": -2.98793363571167, + "logits/rejected": -2.6361632347106934, + "logps/chosen": -209.9313507080078, + "logps/rejected": -263.7270202636719, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9472572803497314, + "rewards/margins": 4.852786064147949, + "rewards/rejected": -6.80004358291626, + "step": 1231 + }, + { + "epoch": 0.19, + "learning_rate": 1.3243735670937288e-05, + "logits/chosen": -2.017841339111328, + "logits/rejected": -2.8814849853515625, + "logps/chosen": -121.12383270263672, + "logps/rejected": -221.08935546875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9567810893058777, + "rewards/margins": 5.735831260681152, + "rewards/rejected": -6.6926116943359375, + "step": 1232 + }, + { + "epoch": 0.19, + "learning_rate": 1.324300223040614e-05, + "logits/chosen": -2.9333856105804443, + "logits/rejected": -3.0384678840637207, + "logps/chosen": -176.88824462890625, + "logps/rejected": -146.62863159179688, + "loss": 3.101, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3039214611053467, + "rewards/margins": 0.14628839492797852, + "rewards/rejected": -3.450209856033325, + "step": 1233 + }, + { + "epoch": 0.19, + "learning_rate": 1.3242268789874992e-05, + "logits/chosen": -2.4975199699401855, + "logits/rejected": -3.125572443008423, + "logps/chosen": -70.6661605834961, + "logps/rejected": -275.45068359375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.599228024482727, + "rewards/margins": 6.881216526031494, + "rewards/rejected": -7.480444431304932, + "step": 1234 + }, + { + "epoch": 0.19, + "learning_rate": 1.3241535349343846e-05, + "logits/chosen": -2.950085163116455, + "logits/rejected": -3.035907506942749, + "logps/chosen": -115.30561828613281, + "logps/rejected": -253.35580444335938, + "loss": 4.0883, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.510742664337158, + "rewards/margins": -0.2540557384490967, + "rewards/rejected": -4.256686687469482, + "step": 1235 + }, + { + "epoch": 0.19, + "learning_rate": 1.3240801908812698e-05, + "logits/chosen": -2.8087992668151855, + "logits/rejected": -1.9812654256820679, + "logps/chosen": -90.7575454711914, + "logps/rejected": -77.32400512695312, + "loss": 3.2365, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5779404640197754, + "rewards/margins": -1.2557175159454346, + "rewards/rejected": -2.322222948074341, + "step": 1236 + }, + { + "epoch": 0.19, + "learning_rate": 1.324006846828155e-05, + "logits/chosen": -2.8813886642456055, + "logits/rejected": -2.6729395389556885, + "logps/chosen": -387.0774230957031, + "logps/rejected": -618.60888671875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5022127628326416, + "rewards/margins": 6.009820461273193, + "rewards/rejected": -7.512033462524414, + "step": 1237 + }, + { + "epoch": 0.19, + "learning_rate": 1.3239335027750401e-05, + "logits/chosen": -3.026294231414795, + "logits/rejected": -2.120612382888794, + "logps/chosen": -504.5993347167969, + "logps/rejected": -390.024169921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.744250535964966, + "rewards/margins": 6.945825576782227, + "rewards/rejected": -9.690075874328613, + "step": 1238 + }, + { + "epoch": 0.19, + "learning_rate": 1.3238601587219255e-05, + "logits/chosen": -2.6387510299682617, + "logits/rejected": -2.800546884536743, + "logps/chosen": -157.6513671875, + "logps/rejected": -113.99079132080078, + "loss": 1.8457, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6769471168518066, + "rewards/margins": 0.8154610395431519, + "rewards/rejected": -4.492408275604248, + "step": 1239 + }, + { + "epoch": 0.19, + "learning_rate": 1.3237868146688107e-05, + "logits/chosen": -1.5995938777923584, + "logits/rejected": -2.866849422454834, + "logps/chosen": -126.11163330078125, + "logps/rejected": -309.91522216796875, + "loss": 1.1784, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9696590900421143, + "rewards/margins": 3.222951889038086, + "rewards/rejected": -6.192611217498779, + "step": 1240 + }, + { + "epoch": 0.19, + "learning_rate": 1.3237134706156959e-05, + "logits/chosen": -1.8662530183792114, + "logits/rejected": -2.7062206268310547, + "logps/chosen": -238.5645751953125, + "logps/rejected": -387.51690673828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.496991753578186, + "rewards/margins": 8.29244327545166, + "rewards/rejected": -9.789434432983398, + "step": 1241 + }, + { + "epoch": 0.19, + "learning_rate": 1.323640126562581e-05, + "logits/chosen": -2.067934036254883, + "logits/rejected": -2.9216480255126953, + "logps/chosen": -81.75292205810547, + "logps/rejected": -106.43480682373047, + "loss": 1.0349, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2719788551330566, + "rewards/margins": 2.188784122467041, + "rewards/rejected": -4.460762977600098, + "step": 1242 + }, + { + "epoch": 0.19, + "learning_rate": 1.3235667825094663e-05, + "logits/chosen": -2.394547700881958, + "logits/rejected": -2.9129252433776855, + "logps/chosen": -370.45709228515625, + "logps/rejected": -552.840087890625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2187881469726562, + "rewards/margins": 6.461775302886963, + "rewards/rejected": -8.680563926696777, + "step": 1243 + }, + { + "epoch": 0.19, + "learning_rate": 1.3234934384563516e-05, + "logits/chosen": -3.210081100463867, + "logits/rejected": -3.2745394706726074, + "logps/chosen": -26.968170166015625, + "logps/rejected": -65.33087158203125, + "loss": 0.153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40501803159713745, + "rewards/margins": 2.8491132259368896, + "rewards/rejected": -3.254131317138672, + "step": 1244 + }, + { + "epoch": 0.19, + "learning_rate": 1.3234200944032368e-05, + "logits/chosen": -2.3114521503448486, + "logits/rejected": -3.166435718536377, + "logps/chosen": -268.154541015625, + "logps/rejected": -411.767822265625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1600341796875, + "rewards/margins": 4.751152038574219, + "rewards/rejected": -6.911186218261719, + "step": 1245 + }, + { + "epoch": 0.19, + "learning_rate": 1.323346750350122e-05, + "logits/chosen": -2.818769931793213, + "logits/rejected": -2.9499740600585938, + "logps/chosen": -89.80815124511719, + "logps/rejected": -183.12664794921875, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2148792743682861, + "rewards/margins": 3.589878559112549, + "rewards/rejected": -4.804758071899414, + "step": 1246 + }, + { + "epoch": 0.19, + "learning_rate": 1.3232734062970072e-05, + "logits/chosen": -2.827136754989624, + "logits/rejected": -3.193848133087158, + "logps/chosen": -33.2579460144043, + "logps/rejected": -158.62911987304688, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8957908749580383, + "rewards/margins": 3.7091941833496094, + "rewards/rejected": -4.604985237121582, + "step": 1247 + }, + { + "epoch": 0.19, + "learning_rate": 1.3232000622438924e-05, + "logits/chosen": -2.6507179737091064, + "logits/rejected": -3.0501439571380615, + "logps/chosen": -23.39739227294922, + "logps/rejected": -128.25177001953125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34186670184135437, + "rewards/margins": 4.267830848693848, + "rewards/rejected": -4.609697341918945, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 1.3231267181907775e-05, + "logits/chosen": -3.0157618522644043, + "logits/rejected": -2.546449899673462, + "logps/chosen": -352.79193115234375, + "logps/rejected": -362.4595947265625, + "loss": 0.0573, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.72447669506073, + "rewards/margins": 4.630435943603516, + "rewards/rejected": -6.354912757873535, + "step": 1249 + }, + { + "epoch": 0.19, + "learning_rate": 1.3230533741376627e-05, + "logits/chosen": -2.5417568683624268, + "logits/rejected": -2.8393824100494385, + "logps/chosen": -101.34379577636719, + "logps/rejected": -221.65127563476562, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9071428775787354, + "rewards/margins": 5.855553150177002, + "rewards/rejected": -6.762696266174316, + "step": 1250 + }, + { + "epoch": 0.19, + "learning_rate": 1.322980030084548e-05, + "logits/chosen": -2.338465690612793, + "logits/rejected": -3.075028896331787, + "logps/chosen": -508.1215515136719, + "logps/rejected": -540.2188720703125, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0592410564422607, + "rewards/margins": 4.990380764007568, + "rewards/rejected": -6.04962158203125, + "step": 1251 + }, + { + "epoch": 0.19, + "learning_rate": 1.3229066860314331e-05, + "logits/chosen": -0.7335683703422546, + "logits/rejected": -2.9883933067321777, + "logps/chosen": -189.41207885742188, + "logps/rejected": -530.3235473632812, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3363960385322571, + "rewards/margins": 6.464437484741211, + "rewards/rejected": -6.800833702087402, + "step": 1252 + }, + { + "epoch": 0.19, + "learning_rate": 1.3228333419783185e-05, + "logits/chosen": -2.249541997909546, + "logits/rejected": -2.9775078296661377, + "logps/chosen": -132.0654296875, + "logps/rejected": -195.7199249267578, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.332108497619629, + "rewards/margins": 5.015325546264648, + "rewards/rejected": -6.3474345207214355, + "step": 1253 + }, + { + "epoch": 0.2, + "learning_rate": 1.3227599979252037e-05, + "logits/chosen": -2.473818778991699, + "logits/rejected": -3.0133793354034424, + "logps/chosen": -203.29318237304688, + "logps/rejected": -381.89764404296875, + "loss": 3.635, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.319605827331543, + "rewards/margins": -0.07709980010986328, + "rewards/rejected": -4.24250602722168, + "step": 1254 + }, + { + "epoch": 0.2, + "learning_rate": 1.3226866538720888e-05, + "logits/chosen": -2.9614665508270264, + "logits/rejected": -2.1126508712768555, + "logps/chosen": -78.18899536132812, + "logps/rejected": -29.778549194335938, + "loss": 2.0915, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.021446466445923, + "rewards/margins": -1.3922147750854492, + "rewards/rejected": -1.6292316913604736, + "step": 1255 + }, + { + "epoch": 0.2, + "learning_rate": 1.322613309818974e-05, + "logits/chosen": -1.9048672914505005, + "logits/rejected": -2.9810540676116943, + "logps/chosen": -251.61083984375, + "logps/rejected": -393.914794921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1107078790664673, + "rewards/margins": 6.998202323913574, + "rewards/rejected": -8.108909606933594, + "step": 1256 + }, + { + "epoch": 0.2, + "learning_rate": 1.3225399657658592e-05, + "logits/chosen": -2.3545453548431396, + "logits/rejected": -1.494576096534729, + "logps/chosen": -442.0657958984375, + "logps/rejected": -472.03973388671875, + "loss": 5.6922, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.6793012619018555, + "rewards/margins": -5.685185432434082, + "rewards/rejected": -1.9941163063049316, + "step": 1257 + }, + { + "epoch": 0.2, + "learning_rate": 1.3224666217127444e-05, + "logits/chosen": -2.965635299682617, + "logits/rejected": -2.8769092559814453, + "logps/chosen": -417.3066711425781, + "logps/rejected": -442.2576904296875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0063327550888062, + "rewards/margins": 6.567082405090332, + "rewards/rejected": -7.5734148025512695, + "step": 1258 + }, + { + "epoch": 0.2, + "learning_rate": 1.3223932776596296e-05, + "logits/chosen": -3.175732135772705, + "logits/rejected": -2.9235475063323975, + "logps/chosen": -360.5289001464844, + "logps/rejected": -391.892822265625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8031250238418579, + "rewards/margins": 6.932069301605225, + "rewards/rejected": -7.735194206237793, + "step": 1259 + }, + { + "epoch": 0.2, + "learning_rate": 1.3223199336065148e-05, + "logits/chosen": -1.3529589176177979, + "logits/rejected": -2.6910486221313477, + "logps/chosen": -180.4400634765625, + "logps/rejected": -338.245361328125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19272765517234802, + "rewards/margins": 8.062813758850098, + "rewards/rejected": -8.25554084777832, + "step": 1260 + }, + { + "epoch": 0.2, + "learning_rate": 1.3222465895534e-05, + "logits/chosen": -2.6497671604156494, + "logits/rejected": -1.1680530309677124, + "logps/chosen": -290.4837341308594, + "logps/rejected": -290.2120056152344, + "loss": 4.1139, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.744204998016357, + "rewards/margins": -0.6237072944641113, + "rewards/rejected": -4.120497703552246, + "step": 1261 + }, + { + "epoch": 0.2, + "learning_rate": 1.3221732455002853e-05, + "logits/chosen": -3.1582093238830566, + "logits/rejected": -3.129739284515381, + "logps/chosen": -102.58538818359375, + "logps/rejected": -155.4707489013672, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.23614501953125, + "rewards/margins": 3.472769260406494, + "rewards/rejected": -4.708914279937744, + "step": 1262 + }, + { + "epoch": 0.2, + "learning_rate": 1.3220999014471705e-05, + "logits/chosen": -2.433380365371704, + "logits/rejected": -2.7344107627868652, + "logps/chosen": -239.4073944091797, + "logps/rejected": -357.6495666503906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8367156982421875, + "rewards/margins": 6.81390380859375, + "rewards/rejected": -8.650619506835938, + "step": 1263 + }, + { + "epoch": 0.2, + "learning_rate": 1.3220265573940557e-05, + "logits/chosen": -2.8884098529815674, + "logits/rejected": -2.9827635288238525, + "logps/chosen": -509.62432861328125, + "logps/rejected": -453.0785827636719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.816673994064331, + "rewards/margins": 7.997931480407715, + "rewards/rejected": -9.814605712890625, + "step": 1264 + }, + { + "epoch": 0.2, + "learning_rate": 1.3219532133409409e-05, + "logits/chosen": -2.7713687419891357, + "logits/rejected": -1.9957504272460938, + "logps/chosen": -422.07476806640625, + "logps/rejected": -345.82122802734375, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.913358211517334, + "rewards/margins": 4.365670204162598, + "rewards/rejected": -7.279028415679932, + "step": 1265 + }, + { + "epoch": 0.2, + "learning_rate": 1.321879869287826e-05, + "logits/chosen": -2.1059560775756836, + "logits/rejected": -2.574568271636963, + "logps/chosen": -158.80184936523438, + "logps/rejected": -336.47589111328125, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7655398845672607, + "rewards/margins": 5.850005149841309, + "rewards/rejected": -7.615545272827148, + "step": 1266 + }, + { + "epoch": 0.2, + "learning_rate": 1.3218065252347113e-05, + "logits/chosen": -2.528277635574341, + "logits/rejected": -3.11691951751709, + "logps/chosen": -153.46363830566406, + "logps/rejected": -123.1452407836914, + "loss": 0.7937, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1424927711486816, + "rewards/margins": 2.3452539443969727, + "rewards/rejected": -4.487746238708496, + "step": 1267 + }, + { + "epoch": 0.2, + "learning_rate": 1.3217331811815965e-05, + "logits/chosen": -2.6377744674682617, + "logits/rejected": -2.5291574001312256, + "logps/chosen": -153.65902709960938, + "logps/rejected": -178.7253875732422, + "loss": 2.2865, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.598367691040039, + "rewards/margins": 0.9526293277740479, + "rewards/rejected": -3.550997018814087, + "step": 1268 + }, + { + "epoch": 0.2, + "learning_rate": 1.3216598371284816e-05, + "logits/chosen": -2.5043251514434814, + "logits/rejected": -3.0734283924102783, + "logps/chosen": -57.03007888793945, + "logps/rejected": -220.5011749267578, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1089041233062744, + "rewards/margins": 4.50857400894165, + "rewards/rejected": -5.617478370666504, + "step": 1269 + }, + { + "epoch": 0.2, + "learning_rate": 1.3215864930753668e-05, + "logits/chosen": -2.6111860275268555, + "logits/rejected": -1.8983118534088135, + "logps/chosen": -150.16055297851562, + "logps/rejected": -276.7946472167969, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.136206030845642, + "rewards/margins": 5.313930988311768, + "rewards/rejected": -6.450137138366699, + "step": 1270 + }, + { + "epoch": 0.2, + "learning_rate": 1.3215131490222522e-05, + "logits/chosen": -3.107213020324707, + "logits/rejected": -2.943903923034668, + "logps/chosen": -318.73236083984375, + "logps/rejected": -107.13484954833984, + "loss": 4.3267, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.168637275695801, + "rewards/margins": -4.1987481117248535, + "rewards/rejected": -1.969888687133789, + "step": 1271 + }, + { + "epoch": 0.2, + "learning_rate": 1.3214398049691374e-05, + "logits/chosen": -1.6328125, + "logits/rejected": -2.775050640106201, + "logps/chosen": -60.78230285644531, + "logps/rejected": -185.20448303222656, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2525627613067627, + "rewards/margins": 4.82788610458374, + "rewards/rejected": -6.080449104309082, + "step": 1272 + }, + { + "epoch": 0.2, + "learning_rate": 1.3213664609160227e-05, + "logits/chosen": -2.299062490463257, + "logits/rejected": -3.1538403034210205, + "logps/chosen": -125.27434539794922, + "logps/rejected": -365.69622802734375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2052345275878906, + "rewards/margins": 5.756191253662109, + "rewards/rejected": -6.96142578125, + "step": 1273 + }, + { + "epoch": 0.2, + "learning_rate": 1.321293116862908e-05, + "logits/chosen": -2.3245246410369873, + "logits/rejected": -2.9821653366088867, + "logps/chosen": -470.45147705078125, + "logps/rejected": -474.1119689941406, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1234467029571533, + "rewards/margins": 7.1527252197265625, + "rewards/rejected": -9.276172637939453, + "step": 1274 + }, + { + "epoch": 0.2, + "learning_rate": 1.3212197728097931e-05, + "logits/chosen": -2.9873502254486084, + "logits/rejected": -2.6423819065093994, + "logps/chosen": -248.78317260742188, + "logps/rejected": -244.64059448242188, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4286994934082031, + "rewards/margins": 4.821347236633301, + "rewards/rejected": -6.250046730041504, + "step": 1275 + }, + { + "epoch": 0.2, + "learning_rate": 1.3211464287566783e-05, + "logits/chosen": -2.5100162029266357, + "logits/rejected": -3.1397900581359863, + "logps/chosen": -348.53692626953125, + "logps/rejected": -503.67266845703125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6830154657363892, + "rewards/margins": 7.097566604614258, + "rewards/rejected": -7.780581951141357, + "step": 1276 + }, + { + "epoch": 0.2, + "learning_rate": 1.3210730847035635e-05, + "logits/chosen": -1.6427643299102783, + "logits/rejected": -3.0045278072357178, + "logps/chosen": -185.7904815673828, + "logps/rejected": -169.93966674804688, + "loss": 5.4487, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.349759101867676, + "rewards/margins": -3.325329065322876, + "rewards/rejected": -3.0244300365448, + "step": 1277 + }, + { + "epoch": 0.2, + "learning_rate": 1.3209997406504487e-05, + "logits/chosen": -3.159050226211548, + "logits/rejected": -3.2571098804473877, + "logps/chosen": -244.0185546875, + "logps/rejected": -312.8522644042969, + "loss": 2.2211, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.062273979187012, + "rewards/margins": 2.8466856479644775, + "rewards/rejected": -6.90895938873291, + "step": 1278 + }, + { + "epoch": 0.2, + "learning_rate": 1.320926396597334e-05, + "logits/chosen": -2.8227736949920654, + "logits/rejected": -2.9682962894439697, + "logps/chosen": -222.19760131835938, + "logps/rejected": -326.75604248046875, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9183913469314575, + "rewards/margins": 6.5062761306762695, + "rewards/rejected": -7.4246673583984375, + "step": 1279 + }, + { + "epoch": 0.2, + "learning_rate": 1.3208530525442192e-05, + "logits/chosen": -1.9086530208587646, + "logits/rejected": -2.8618242740631104, + "logps/chosen": -53.740203857421875, + "logps/rejected": -207.3638916015625, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0988340377807617, + "rewards/margins": 5.167108535766602, + "rewards/rejected": -7.265942573547363, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 1.3207797084911044e-05, + "logits/chosen": -2.385608196258545, + "logits/rejected": -3.086110830307007, + "logps/chosen": -144.87599182128906, + "logps/rejected": -413.9961242675781, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.989950656890869, + "rewards/margins": 6.070285320281982, + "rewards/rejected": -9.060235977172852, + "step": 1281 + }, + { + "epoch": 0.2, + "learning_rate": 1.3207063644379896e-05, + "logits/chosen": -2.2909364700317383, + "logits/rejected": -3.1246352195739746, + "logps/chosen": -121.59095001220703, + "logps/rejected": -463.8987121582031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7874329090118408, + "rewards/margins": 7.6245574951171875, + "rewards/rejected": -8.411991119384766, + "step": 1282 + }, + { + "epoch": 0.2, + "learning_rate": 1.3206330203848748e-05, + "logits/chosen": -3.0896716117858887, + "logits/rejected": -2.317504405975342, + "logps/chosen": -330.57952880859375, + "logps/rejected": -152.602294921875, + "loss": 6.1212, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.34160041809082, + "rewards/margins": -6.118720531463623, + "rewards/rejected": -1.2228797674179077, + "step": 1283 + }, + { + "epoch": 0.2, + "learning_rate": 1.32055967633176e-05, + "logits/chosen": -1.687821388244629, + "logits/rejected": -2.9024815559387207, + "logps/chosen": -170.64761352539062, + "logps/rejected": -278.76885986328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6653854846954346, + "rewards/margins": 6.921684265136719, + "rewards/rejected": -7.587069511413574, + "step": 1284 + }, + { + "epoch": 0.2, + "learning_rate": 1.3204863322786452e-05, + "logits/chosen": -1.6883970499038696, + "logits/rejected": -3.0333127975463867, + "logps/chosen": -179.28912353515625, + "logps/rejected": -610.7203369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0527870655059814, + "rewards/margins": 10.774322509765625, + "rewards/rejected": -11.827108383178711, + "step": 1285 + }, + { + "epoch": 0.2, + "learning_rate": 1.3204129882255303e-05, + "logits/chosen": -1.5577325820922852, + "logits/rejected": -2.639458179473877, + "logps/chosen": -92.539794921875, + "logps/rejected": -284.04132080078125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8050222396850586, + "rewards/margins": 6.074200630187988, + "rewards/rejected": -7.879222869873047, + "step": 1286 + }, + { + "epoch": 0.2, + "learning_rate": 1.3203396441724155e-05, + "logits/chosen": -2.9024267196655273, + "logits/rejected": -3.1233103275299072, + "logps/chosen": -321.9561462402344, + "logps/rejected": -208.99765014648438, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7832927703857422, + "rewards/margins": 6.135766983032227, + "rewards/rejected": -7.919059753417969, + "step": 1287 + }, + { + "epoch": 0.2, + "learning_rate": 1.3202663001193009e-05, + "logits/chosen": -2.9540903568267822, + "logits/rejected": -2.894948720932007, + "logps/chosen": -105.76443481445312, + "logps/rejected": -103.77027893066406, + "loss": 1.18, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.742602586746216, + "rewards/margins": 1.4717187881469727, + "rewards/rejected": -4.214321613311768, + "step": 1288 + }, + { + "epoch": 0.2, + "learning_rate": 1.320192956066186e-05, + "logits/chosen": -1.8774261474609375, + "logits/rejected": -2.916240692138672, + "logps/chosen": -118.45742797851562, + "logps/rejected": -431.94598388671875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5828518271446228, + "rewards/margins": 5.989615440368652, + "rewards/rejected": -6.572466850280762, + "step": 1289 + }, + { + "epoch": 0.2, + "learning_rate": 1.3201196120130713e-05, + "logits/chosen": -2.9494106769561768, + "logits/rejected": -3.043752431869507, + "logps/chosen": -82.78661346435547, + "logps/rejected": -175.315185546875, + "loss": 1.855, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.174891233444214, + "rewards/margins": 1.9832059144973755, + "rewards/rejected": -4.158097267150879, + "step": 1290 + }, + { + "epoch": 0.2, + "learning_rate": 1.3200462679599565e-05, + "logits/chosen": -2.19856858253479, + "logits/rejected": -2.6760621070861816, + "logps/chosen": -123.77609252929688, + "logps/rejected": -218.04769897460938, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9545674324035645, + "rewards/margins": 4.465873718261719, + "rewards/rejected": -6.420441150665283, + "step": 1291 + }, + { + "epoch": 0.2, + "learning_rate": 1.3199729239068416e-05, + "logits/chosen": -2.973853826522827, + "logits/rejected": -3.091320753097534, + "logps/chosen": -217.7905731201172, + "logps/rejected": -221.23377990722656, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6681580543518066, + "rewards/margins": 4.5773725509643555, + "rewards/rejected": -5.245530605316162, + "step": 1292 + }, + { + "epoch": 0.2, + "learning_rate": 1.3198995798537268e-05, + "logits/chosen": -2.330587148666382, + "logits/rejected": -2.942606210708618, + "logps/chosen": -105.6888198852539, + "logps/rejected": -274.1193542480469, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8323475122451782, + "rewards/margins": 6.1926469802856445, + "rewards/rejected": -7.024994850158691, + "step": 1293 + }, + { + "epoch": 0.2, + "learning_rate": 1.319826235800612e-05, + "logits/chosen": -1.2289594411849976, + "logits/rejected": -2.4887983798980713, + "logps/chosen": -279.14178466796875, + "logps/rejected": -591.3426513671875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5282135009765625, + "rewards/margins": 7.439398765563965, + "rewards/rejected": -9.967612266540527, + "step": 1294 + }, + { + "epoch": 0.2, + "learning_rate": 1.3197528917474972e-05, + "logits/chosen": -1.1940932273864746, + "logits/rejected": -2.5322482585906982, + "logps/chosen": -134.3406219482422, + "logps/rejected": -492.8191223144531, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1479382514953613, + "rewards/margins": 3.8626561164855957, + "rewards/rejected": -6.010594367980957, + "step": 1295 + }, + { + "epoch": 0.2, + "learning_rate": 1.3196795476943824e-05, + "logits/chosen": -3.1590073108673096, + "logits/rejected": -3.0548973083496094, + "logps/chosen": -1120.929443359375, + "logps/rejected": -938.6559448242188, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3061127662658691, + "rewards/margins": 7.6468353271484375, + "rewards/rejected": -8.952948570251465, + "step": 1296 + }, + { + "epoch": 0.2, + "learning_rate": 1.3196062036412678e-05, + "logits/chosen": -2.9070885181427, + "logits/rejected": -2.0504820346832275, + "logps/chosen": -462.9054260253906, + "logps/rejected": -360.5770263671875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1645210981369019, + "rewards/margins": 4.936504364013672, + "rewards/rejected": -6.101025581359863, + "step": 1297 + }, + { + "epoch": 0.2, + "learning_rate": 1.319532859588153e-05, + "logits/chosen": -1.6039812564849854, + "logits/rejected": -3.0384392738342285, + "logps/chosen": -128.69992065429688, + "logps/rejected": -455.25225830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7982653975486755, + "rewards/margins": 9.803983688354492, + "rewards/rejected": -10.602249145507812, + "step": 1298 + }, + { + "epoch": 0.2, + "learning_rate": 1.3194595155350381e-05, + "logits/chosen": -2.627490758895874, + "logits/rejected": -3.180903196334839, + "logps/chosen": -78.82899475097656, + "logps/rejected": -342.97613525390625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8626478314399719, + "rewards/margins": 5.450507164001465, + "rewards/rejected": -6.313154697418213, + "step": 1299 + }, + { + "epoch": 0.2, + "learning_rate": 1.3193861714819233e-05, + "logits/chosen": -2.199631690979004, + "logits/rejected": -3.1082205772399902, + "logps/chosen": -284.8060302734375, + "logps/rejected": -290.3570251464844, + "loss": 3.1774, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.051285743713379, + "rewards/margins": -1.0306546688079834, + "rewards/rejected": -4.020630836486816, + "step": 1300 + }, + { + "epoch": 0.2, + "learning_rate": 1.3193128274288085e-05, + "logits/chosen": -1.6521947383880615, + "logits/rejected": -3.136636734008789, + "logps/chosen": -128.00503540039062, + "logps/rejected": -428.3584899902344, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23252487182617188, + "rewards/margins": 6.911327362060547, + "rewards/rejected": -7.143852233886719, + "step": 1301 + }, + { + "epoch": 0.2, + "learning_rate": 1.3192394833756937e-05, + "logits/chosen": -0.9921915531158447, + "logits/rejected": -2.542748212814331, + "logps/chosen": -252.19662475585938, + "logps/rejected": -440.89276123046875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8126609325408936, + "rewards/margins": 6.4927215576171875, + "rewards/rejected": -8.305381774902344, + "step": 1302 + }, + { + "epoch": 0.2, + "learning_rate": 1.3191661393225789e-05, + "logits/chosen": -2.7863051891326904, + "logits/rejected": -3.1648006439208984, + "logps/chosen": -130.86907958984375, + "logps/rejected": -430.98382568359375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1713860034942627, + "rewards/margins": 8.966721534729004, + "rewards/rejected": -10.138107299804688, + "step": 1303 + }, + { + "epoch": 0.2, + "learning_rate": 1.319092795269464e-05, + "logits/chosen": -1.0087628364562988, + "logits/rejected": -3.078558921813965, + "logps/chosen": -127.39938354492188, + "logps/rejected": -330.73284912109375, + "loss": 3.5043, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.638660907745361, + "rewards/margins": 0.256131649017334, + "rewards/rejected": -5.894792556762695, + "step": 1304 + }, + { + "epoch": 0.2, + "learning_rate": 1.3190194512163494e-05, + "logits/chosen": -2.2371714115142822, + "logits/rejected": -2.614546537399292, + "logps/chosen": -169.93203735351562, + "logps/rejected": -322.5294189453125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4915473461151123, + "rewards/margins": 6.328365325927734, + "rewards/rejected": -7.819912910461426, + "step": 1305 + }, + { + "epoch": 0.2, + "learning_rate": 1.3189461071632346e-05, + "logits/chosen": -2.6060469150543213, + "logits/rejected": -2.7621374130249023, + "logps/chosen": -169.89344787597656, + "logps/rejected": -263.39447021484375, + "loss": 3.2777, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.851273059844971, + "rewards/margins": 0.5750327110290527, + "rewards/rejected": -5.426305770874023, + "step": 1306 + }, + { + "epoch": 0.2, + "learning_rate": 1.31887276311012e-05, + "logits/chosen": -3.1043131351470947, + "logits/rejected": -2.04263973236084, + "logps/chosen": -568.3474731445312, + "logps/rejected": -392.3840026855469, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14448091387748718, + "rewards/margins": 6.36990213394165, + "rewards/rejected": -6.514383316040039, + "step": 1307 + }, + { + "epoch": 0.2, + "learning_rate": 1.3187994190570052e-05, + "logits/chosen": -2.5555572509765625, + "logits/rejected": -2.998349189758301, + "logps/chosen": -137.0399169921875, + "logps/rejected": -281.14227294921875, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1684257984161377, + "rewards/margins": 5.1673994064331055, + "rewards/rejected": -6.335825443267822, + "step": 1308 + }, + { + "epoch": 0.2, + "learning_rate": 1.3187260750038903e-05, + "logits/chosen": -2.9486277103424072, + "logits/rejected": -3.3067450523376465, + "logps/chosen": -286.0867004394531, + "logps/rejected": -285.28912353515625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5039823651313782, + "rewards/margins": 6.0397210121154785, + "rewards/rejected": -6.543703079223633, + "step": 1309 + }, + { + "epoch": 0.2, + "learning_rate": 1.3186527309507755e-05, + "logits/chosen": -2.9637744426727295, + "logits/rejected": -2.6231014728546143, + "logps/chosen": -266.01507568359375, + "logps/rejected": -326.4262390136719, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19769534468650818, + "rewards/margins": 6.91031551361084, + "rewards/rejected": -7.108010768890381, + "step": 1310 + }, + { + "epoch": 0.2, + "learning_rate": 1.3185793868976607e-05, + "logits/chosen": -2.882190227508545, + "logits/rejected": -3.0573532581329346, + "logps/chosen": -145.19781494140625, + "logps/rejected": -345.44873046875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4251086711883545, + "rewards/margins": 8.939142227172852, + "rewards/rejected": -10.364250183105469, + "step": 1311 + }, + { + "epoch": 0.2, + "learning_rate": 1.3185060428445459e-05, + "logits/chosen": -0.8369205594062805, + "logits/rejected": -3.0775156021118164, + "logps/chosen": -131.61038208007812, + "logps/rejected": -332.51226806640625, + "loss": 2.2472, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4705443382263184, + "rewards/margins": 3.126034736633301, + "rewards/rejected": -6.596579074859619, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 1.3184326987914311e-05, + "logits/chosen": -3.117751359939575, + "logits/rejected": -2.1397476196289062, + "logps/chosen": -404.4750061035156, + "logps/rejected": -369.22564697265625, + "loss": 4.0674, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.223265171051025, + "rewards/margins": -1.0289535522460938, + "rewards/rejected": -4.194311618804932, + "step": 1313 + }, + { + "epoch": 0.2, + "learning_rate": 1.3183593547383163e-05, + "logits/chosen": -2.2500457763671875, + "logits/rejected": -2.999776840209961, + "logps/chosen": -65.20289611816406, + "logps/rejected": -221.71331787109375, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2792439460754395, + "rewards/margins": 4.990663528442383, + "rewards/rejected": -6.269907474517822, + "step": 1314 + }, + { + "epoch": 0.2, + "learning_rate": 1.3182860106852016e-05, + "logits/chosen": -2.964872360229492, + "logits/rejected": -2.269742012023926, + "logps/chosen": -245.7291259765625, + "logps/rejected": -290.6856689453125, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7661094665527344, + "rewards/margins": 3.447202444076538, + "rewards/rejected": -4.213312149047852, + "step": 1315 + }, + { + "epoch": 0.2, + "learning_rate": 1.3182126666320868e-05, + "logits/chosen": -2.201923370361328, + "logits/rejected": -1.0083069801330566, + "logps/chosen": -1068.0247802734375, + "logps/rejected": -329.5964050292969, + "loss": 0.083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.492837905883789, + "rewards/margins": 4.977336883544922, + "rewards/rejected": -7.470174789428711, + "step": 1316 + }, + { + "epoch": 0.2, + "learning_rate": 1.318139322578972e-05, + "logits/chosen": -2.5543601512908936, + "logits/rejected": -2.454028367996216, + "logps/chosen": -239.52349853515625, + "logps/rejected": -233.26223754882812, + "loss": 2.7957, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.834449052810669, + "rewards/margins": 0.46593260765075684, + "rewards/rejected": -4.300381660461426, + "step": 1317 + }, + { + "epoch": 0.2, + "learning_rate": 1.3180659785258572e-05, + "logits/chosen": -2.9311439990997314, + "logits/rejected": -2.0022153854370117, + "logps/chosen": -304.6793212890625, + "logps/rejected": -382.28460693359375, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.538308620452881, + "rewards/margins": 5.83254861831665, + "rewards/rejected": -8.370857238769531, + "step": 1318 + }, + { + "epoch": 0.21, + "learning_rate": 1.3179926344727424e-05, + "logits/chosen": -2.779076337814331, + "logits/rejected": -3.236666440963745, + "logps/chosen": -27.517093658447266, + "logps/rejected": -98.0667495727539, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2280690670013428, + "rewards/margins": 3.260404348373413, + "rewards/rejected": -4.488473415374756, + "step": 1319 + }, + { + "epoch": 0.21, + "learning_rate": 1.3179192904196276e-05, + "logits/chosen": -2.3875534534454346, + "logits/rejected": -2.805925130844116, + "logps/chosen": -98.70037841796875, + "logps/rejected": -421.4385986328125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2238657474517822, + "rewards/margins": 6.8724212646484375, + "rewards/rejected": -8.096287727355957, + "step": 1320 + }, + { + "epoch": 0.21, + "learning_rate": 1.3178459463665128e-05, + "logits/chosen": -2.958178758621216, + "logits/rejected": -2.3323965072631836, + "logps/chosen": -249.06776428222656, + "logps/rejected": -208.24603271484375, + "loss": 2.6694, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.031737327575684, + "rewards/margins": -0.496673583984375, + "rewards/rejected": -3.5350639820098877, + "step": 1321 + }, + { + "epoch": 0.21, + "learning_rate": 1.317772602313398e-05, + "logits/chosen": -1.7444626092910767, + "logits/rejected": -2.9430837631225586, + "logps/chosen": -46.50828170776367, + "logps/rejected": -390.27337646484375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9377801418304443, + "rewards/margins": 8.676756858825684, + "rewards/rejected": -10.614537239074707, + "step": 1322 + }, + { + "epoch": 0.21, + "learning_rate": 1.3176992582602831e-05, + "logits/chosen": -1.8865041732788086, + "logits/rejected": -3.1412413120269775, + "logps/chosen": -243.07333374023438, + "logps/rejected": -509.21600341796875, + "loss": 0.1605, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.527031898498535, + "rewards/margins": 2.159358501434326, + "rewards/rejected": -4.686389923095703, + "step": 1323 + }, + { + "epoch": 0.21, + "learning_rate": 1.3176259142071685e-05, + "logits/chosen": -3.251105308532715, + "logits/rejected": -3.0305957794189453, + "logps/chosen": -107.38007354736328, + "logps/rejected": -110.10513305664062, + "loss": 1.4697, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4146111011505127, + "rewards/margins": 0.4132348299026489, + "rewards/rejected": -2.827846050262451, + "step": 1324 + }, + { + "epoch": 0.21, + "learning_rate": 1.3175525701540537e-05, + "logits/chosen": -2.498908042907715, + "logits/rejected": -3.1559574604034424, + "logps/chosen": -94.6812973022461, + "logps/rejected": -262.1959228515625, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1423825025558472, + "rewards/margins": 4.278217792510986, + "rewards/rejected": -5.420599937438965, + "step": 1325 + }, + { + "epoch": 0.21, + "learning_rate": 1.3174792261009389e-05, + "logits/chosen": -2.503366231918335, + "logits/rejected": -3.0452537536621094, + "logps/chosen": -175.29078674316406, + "logps/rejected": -185.3195343017578, + "loss": 0.8374, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.407919406890869, + "rewards/margins": 1.6746023893356323, + "rewards/rejected": -5.082521438598633, + "step": 1326 + }, + { + "epoch": 0.21, + "learning_rate": 1.317405882047824e-05, + "logits/chosen": -2.685835838317871, + "logits/rejected": -2.8993985652923584, + "logps/chosen": -206.1977081298828, + "logps/rejected": -370.11187744140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20983809232711792, + "rewards/margins": 7.20844841003418, + "rewards/rejected": -7.418286323547363, + "step": 1327 + }, + { + "epoch": 0.21, + "learning_rate": 1.3173325379947093e-05, + "logits/chosen": -2.9863178730010986, + "logits/rejected": -3.111603021621704, + "logps/chosen": -66.00183868408203, + "logps/rejected": -141.99293518066406, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6028295755386353, + "rewards/margins": 3.7321865558624268, + "rewards/rejected": -4.335016250610352, + "step": 1328 + }, + { + "epoch": 0.21, + "learning_rate": 1.3172591939415944e-05, + "logits/chosen": -2.736518383026123, + "logits/rejected": -2.0699422359466553, + "logps/chosen": -483.2480163574219, + "logps/rejected": -261.6917419433594, + "loss": 3.7067, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9335296154022217, + "rewards/margins": 1.698516845703125, + "rewards/rejected": -5.632046699523926, + "step": 1329 + }, + { + "epoch": 0.21, + "learning_rate": 1.3171858498884796e-05, + "logits/chosen": -2.464740037918091, + "logits/rejected": -2.8389039039611816, + "logps/chosen": -129.83462524414062, + "logps/rejected": -391.887451171875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2992111444473267, + "rewards/margins": 7.073863983154297, + "rewards/rejected": -8.373075485229492, + "step": 1330 + }, + { + "epoch": 0.21, + "learning_rate": 1.3171125058353648e-05, + "logits/chosen": -2.8839776515960693, + "logits/rejected": -3.131930112838745, + "logps/chosen": -121.54606628417969, + "logps/rejected": -184.49314880371094, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8212836980819702, + "rewards/margins": 4.612026214599609, + "rewards/rejected": -5.433309555053711, + "step": 1331 + }, + { + "epoch": 0.21, + "learning_rate": 1.31703916178225e-05, + "logits/chosen": -2.6896812915802, + "logits/rejected": -3.302164077758789, + "logps/chosen": -205.56578063964844, + "logps/rejected": -353.171875, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4060550928115845, + "rewards/margins": 4.6418986320495605, + "rewards/rejected": -6.0479536056518555, + "step": 1332 + }, + { + "epoch": 0.21, + "learning_rate": 1.3169658177291354e-05, + "logits/chosen": -2.809967041015625, + "logits/rejected": -3.043745994567871, + "logps/chosen": -182.760009765625, + "logps/rejected": -371.8674621582031, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7475025653839111, + "rewards/margins": 6.2783050537109375, + "rewards/rejected": -7.0258073806762695, + "step": 1333 + }, + { + "epoch": 0.21, + "learning_rate": 1.3168924736760205e-05, + "logits/chosen": -2.327460527420044, + "logits/rejected": -3.1266725063323975, + "logps/chosen": -47.65025329589844, + "logps/rejected": -278.28076171875, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1614372730255127, + "rewards/margins": 5.290327548980713, + "rewards/rejected": -7.451765060424805, + "step": 1334 + }, + { + "epoch": 0.21, + "learning_rate": 1.3168191296229057e-05, + "logits/chosen": -3.0427119731903076, + "logits/rejected": -2.791555881500244, + "logps/chosen": -239.37213134765625, + "logps/rejected": -232.97662353515625, + "loss": 3.7159, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.803788185119629, + "rewards/margins": -0.5846190452575684, + "rewards/rejected": -4.2191691398620605, + "step": 1335 + }, + { + "epoch": 0.21, + "learning_rate": 1.316745785569791e-05, + "logits/chosen": -1.6221622228622437, + "logits/rejected": -1.984244704246521, + "logps/chosen": -140.63104248046875, + "logps/rejected": -360.46832275390625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2777992486953735, + "rewards/margins": 6.110509872436523, + "rewards/rejected": -7.388309001922607, + "step": 1336 + }, + { + "epoch": 0.21, + "learning_rate": 1.3166724415166761e-05, + "logits/chosen": -2.8944618701934814, + "logits/rejected": -2.6420483589172363, + "logps/chosen": -391.7995910644531, + "logps/rejected": -267.0691833496094, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3605161905288696, + "rewards/margins": 4.760066032409668, + "rewards/rejected": -6.120582580566406, + "step": 1337 + }, + { + "epoch": 0.21, + "learning_rate": 1.3165990974635613e-05, + "logits/chosen": -3.0006532669067383, + "logits/rejected": -3.017491579055786, + "logps/chosen": -340.9610900878906, + "logps/rejected": -391.3045349121094, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1844773292541504, + "rewards/margins": 5.273225784301758, + "rewards/rejected": -6.457703113555908, + "step": 1338 + }, + { + "epoch": 0.21, + "learning_rate": 1.3165257534104467e-05, + "logits/chosen": -3.075958490371704, + "logits/rejected": -2.6325161457061768, + "logps/chosen": -349.9679260253906, + "logps/rejected": -247.3542938232422, + "loss": 1.8392, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.712655544281006, + "rewards/margins": 1.3326702117919922, + "rewards/rejected": -5.045326232910156, + "step": 1339 + }, + { + "epoch": 0.21, + "learning_rate": 1.3164524093573318e-05, + "logits/chosen": -2.08394193649292, + "logits/rejected": -3.175478219985962, + "logps/chosen": -83.61038970947266, + "logps/rejected": -430.283935546875, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3085005283355713, + "rewards/margins": 4.765789985656738, + "rewards/rejected": -6.0742902755737305, + "step": 1340 + }, + { + "epoch": 0.21, + "learning_rate": 1.316379065304217e-05, + "logits/chosen": -3.0474300384521484, + "logits/rejected": -3.159987688064575, + "logps/chosen": -740.0671997070312, + "logps/rejected": -364.3890380859375, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28621065616607666, + "rewards/margins": 5.907950401306152, + "rewards/rejected": -6.194161415100098, + "step": 1341 + }, + { + "epoch": 0.21, + "learning_rate": 1.3163057212511024e-05, + "logits/chosen": -1.463545799255371, + "logits/rejected": -2.9970951080322266, + "logps/chosen": -42.23461151123047, + "logps/rejected": -219.29959106445312, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.260890245437622, + "rewards/margins": 5.432249069213867, + "rewards/rejected": -7.69313907623291, + "step": 1342 + }, + { + "epoch": 0.21, + "learning_rate": 1.3162323771979876e-05, + "logits/chosen": -2.631538152694702, + "logits/rejected": -3.269620418548584, + "logps/chosen": -270.24847412109375, + "logps/rejected": -428.82421875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.117088794708252, + "rewards/margins": 7.0118560791015625, + "rewards/rejected": -9.128944396972656, + "step": 1343 + }, + { + "epoch": 0.21, + "learning_rate": 1.3161590331448728e-05, + "logits/chosen": -2.7562036514282227, + "logits/rejected": -3.0063281059265137, + "logps/chosen": -296.5675964355469, + "logps/rejected": -504.7068176269531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7088046073913574, + "rewards/margins": 8.82507038116455, + "rewards/rejected": -11.53387451171875, + "step": 1344 + }, + { + "epoch": 0.21, + "learning_rate": 1.316085689091758e-05, + "logits/chosen": -2.7103333473205566, + "logits/rejected": -2.8822007179260254, + "logps/chosen": -98.7857437133789, + "logps/rejected": -181.52784729003906, + "loss": 4.3659, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.372536659240723, + "rewards/margins": -2.7711410522460938, + "rewards/rejected": -2.60139536857605, + "step": 1345 + }, + { + "epoch": 0.21, + "learning_rate": 1.3160123450386431e-05, + "logits/chosen": -2.380955219268799, + "logits/rejected": -3.1640284061431885, + "logps/chosen": -542.892822265625, + "logps/rejected": -512.123779296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4613761901855469, + "rewards/margins": 6.938443660736084, + "rewards/rejected": -8.399820327758789, + "step": 1346 + }, + { + "epoch": 0.21, + "learning_rate": 1.3159390009855283e-05, + "logits/chosen": -3.1790106296539307, + "logits/rejected": -2.6864752769470215, + "logps/chosen": -129.5941162109375, + "logps/rejected": -197.2852325439453, + "loss": 1.8449, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.950226306915283, + "rewards/margins": 0.839302659034729, + "rewards/rejected": -3.7895290851593018, + "step": 1347 + }, + { + "epoch": 0.21, + "learning_rate": 1.3158656569324135e-05, + "logits/chosen": -2.63211989402771, + "logits/rejected": -3.163054943084717, + "logps/chosen": -70.85340118408203, + "logps/rejected": -277.4875793457031, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.196866512298584, + "rewards/margins": 4.605669021606445, + "rewards/rejected": -6.802535533905029, + "step": 1348 + }, + { + "epoch": 0.21, + "learning_rate": 1.3157923128792987e-05, + "logits/chosen": -3.060318946838379, + "logits/rejected": -2.985531806945801, + "logps/chosen": -51.058006286621094, + "logps/rejected": -166.37998962402344, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.111014723777771, + "rewards/margins": 4.8092942237854, + "rewards/rejected": -5.920309066772461, + "step": 1349 + }, + { + "epoch": 0.21, + "learning_rate": 1.3157189688261839e-05, + "logits/chosen": -3.027909517288208, + "logits/rejected": -1.8405886888504028, + "logps/chosen": -210.05410766601562, + "logps/rejected": -74.40363311767578, + "loss": 2.2459, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.953951358795166, + "rewards/margins": -1.867832899093628, + "rewards/rejected": -2.086118459701538, + "step": 1350 + }, + { + "epoch": 0.21, + "learning_rate": 1.3156456247730692e-05, + "logits/chosen": -2.9421098232269287, + "logits/rejected": -3.119645595550537, + "logps/chosen": -114.91695404052734, + "logps/rejected": -301.666259765625, + "loss": 0.0713, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8594772815704346, + "rewards/margins": 4.0999908447265625, + "rewards/rejected": -6.959467887878418, + "step": 1351 + }, + { + "epoch": 0.21, + "learning_rate": 1.3155722807199544e-05, + "logits/chosen": -2.968449115753174, + "logits/rejected": -2.8979973793029785, + "logps/chosen": -208.8977508544922, + "logps/rejected": -292.84710693359375, + "loss": 1.7592, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0545196533203125, + "rewards/margins": 1.205213189125061, + "rewards/rejected": -4.259732723236084, + "step": 1352 + }, + { + "epoch": 0.21, + "learning_rate": 1.3154989366668396e-05, + "logits/chosen": -1.950018286705017, + "logits/rejected": -3.0132436752319336, + "logps/chosen": -244.6234130859375, + "logps/rejected": -380.58636474609375, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6165528297424316, + "rewards/margins": 5.674684524536133, + "rewards/rejected": -7.291237831115723, + "step": 1353 + }, + { + "epoch": 0.21, + "learning_rate": 1.3154255926137248e-05, + "logits/chosen": -2.6964902877807617, + "logits/rejected": -1.9603981971740723, + "logps/chosen": -429.094482421875, + "logps/rejected": -290.9031677246094, + "loss": 2.1773, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.981637716293335, + "rewards/margins": 1.4222121238708496, + "rewards/rejected": -4.403850078582764, + "step": 1354 + }, + { + "epoch": 0.21, + "learning_rate": 1.31535224856061e-05, + "logits/chosen": -2.814542770385742, + "logits/rejected": -3.084716796875, + "logps/chosen": -85.25888061523438, + "logps/rejected": -256.9579162597656, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0765571594238281, + "rewards/margins": 6.081934452056885, + "rewards/rejected": -7.158491611480713, + "step": 1355 + }, + { + "epoch": 0.21, + "learning_rate": 1.3152789045074952e-05, + "logits/chosen": -3.032196283340454, + "logits/rejected": -1.2744191884994507, + "logps/chosen": -353.4836730957031, + "logps/rejected": -137.00701904296875, + "loss": 3.074, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.762427806854248, + "rewards/margins": -0.43038439750671387, + "rewards/rejected": -4.332043647766113, + "step": 1356 + }, + { + "epoch": 0.21, + "learning_rate": 1.3152055604543804e-05, + "logits/chosen": -1.81504225730896, + "logits/rejected": -2.541527509689331, + "logps/chosen": -242.2683868408203, + "logps/rejected": -473.3798828125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9481323957443237, + "rewards/margins": 8.538915634155273, + "rewards/rejected": -9.487049102783203, + "step": 1357 + }, + { + "epoch": 0.21, + "learning_rate": 1.3151322164012656e-05, + "logits/chosen": -2.58329701423645, + "logits/rejected": -3.200042486190796, + "logps/chosen": -83.06881713867188, + "logps/rejected": -339.39642333984375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9732797145843506, + "rewards/margins": 6.558026313781738, + "rewards/rejected": -7.531306266784668, + "step": 1358 + }, + { + "epoch": 0.21, + "learning_rate": 1.3150588723481508e-05, + "logits/chosen": -1.781672716140747, + "logits/rejected": -3.0378270149230957, + "logps/chosen": -188.42645263671875, + "logps/rejected": -331.60406494140625, + "loss": 1.4703, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5969765186309814, + "rewards/margins": 1.5542399883270264, + "rewards/rejected": -4.151216506958008, + "step": 1359 + }, + { + "epoch": 0.21, + "learning_rate": 1.3149855282950361e-05, + "logits/chosen": -2.1468842029571533, + "logits/rejected": -3.125561237335205, + "logps/chosen": -357.9751892089844, + "logps/rejected": -237.38768005371094, + "loss": 0.1292, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2798492908477783, + "rewards/margins": 2.924556016921997, + "rewards/rejected": -5.204405307769775, + "step": 1360 + }, + { + "epoch": 0.21, + "learning_rate": 1.3149121842419213e-05, + "logits/chosen": -1.0544095039367676, + "logits/rejected": -2.0508084297180176, + "logps/chosen": -293.5851135253906, + "logps/rejected": -540.1995849609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18060913681983948, + "rewards/margins": 8.138612747192383, + "rewards/rejected": -8.319221496582031, + "step": 1361 + }, + { + "epoch": 0.21, + "learning_rate": 1.3148388401888065e-05, + "logits/chosen": -2.7720580101013184, + "logits/rejected": -3.1192264556884766, + "logps/chosen": -33.605934143066406, + "logps/rejected": -144.54373168945312, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3159703016281128, + "rewards/margins": 4.143891334533691, + "rewards/rejected": -5.459861755371094, + "step": 1362 + }, + { + "epoch": 0.21, + "learning_rate": 1.3147654961356917e-05, + "logits/chosen": -2.1677443981170654, + "logits/rejected": -3.177137851715088, + "logps/chosen": -358.02105712890625, + "logps/rejected": -401.03143310546875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0411598682403564, + "rewards/margins": 5.675960063934326, + "rewards/rejected": -7.7171196937561035, + "step": 1363 + }, + { + "epoch": 0.21, + "learning_rate": 1.3146921520825769e-05, + "logits/chosen": -2.5031778812408447, + "logits/rejected": -2.616142749786377, + "logps/chosen": -343.11505126953125, + "logps/rejected": -333.92608642578125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1953750848770142, + "rewards/margins": 4.8881120681762695, + "rewards/rejected": -6.083487033843994, + "step": 1364 + }, + { + "epoch": 0.21, + "learning_rate": 1.314618808029462e-05, + "logits/chosen": -3.096219539642334, + "logits/rejected": -2.8614585399627686, + "logps/chosen": -464.7314453125, + "logps/rejected": -537.4659423828125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4827759265899658, + "rewards/margins": 6.580780029296875, + "rewards/rejected": -8.063556671142578, + "step": 1365 + }, + { + "epoch": 0.21, + "learning_rate": 1.3145454639763472e-05, + "logits/chosen": -3.004366159439087, + "logits/rejected": -3.1092166900634766, + "logps/chosen": -506.1003112792969, + "logps/rejected": -404.10333251953125, + "loss": 7.1049, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.90411376953125, + "rewards/margins": -7.103485584259033, + "rewards/rejected": -0.8006283044815063, + "step": 1366 + }, + { + "epoch": 0.21, + "learning_rate": 1.3144721199232324e-05, + "logits/chosen": -3.1180901527404785, + "logits/rejected": -2.744330644607544, + "logps/chosen": -443.834716796875, + "logps/rejected": -435.50396728515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9782650470733643, + "rewards/margins": 7.225545406341553, + "rewards/rejected": -8.203810691833496, + "step": 1367 + }, + { + "epoch": 0.21, + "learning_rate": 1.3143987758701178e-05, + "logits/chosen": -2.4188802242279053, + "logits/rejected": -2.595486640930176, + "logps/chosen": -213.0806884765625, + "logps/rejected": -211.1971435546875, + "loss": 2.4005, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.875767707824707, + "rewards/margins": 0.45274853706359863, + "rewards/rejected": -4.328516006469727, + "step": 1368 + }, + { + "epoch": 0.21, + "learning_rate": 1.314325431817003e-05, + "logits/chosen": -2.2651748657226562, + "logits/rejected": -2.959925651550293, + "logps/chosen": -293.6156005859375, + "logps/rejected": -497.89874267578125, + "loss": 0.0756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3784019947052002, + "rewards/margins": 5.3409318923950195, + "rewards/rejected": -6.719333648681641, + "step": 1369 + }, + { + "epoch": 0.21, + "learning_rate": 1.3142520877638882e-05, + "logits/chosen": -3.061415433883667, + "logits/rejected": -3.1728813648223877, + "logps/chosen": -111.29843139648438, + "logps/rejected": -95.00201416015625, + "loss": 2.0028, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.123260021209717, + "rewards/margins": 1.348745346069336, + "rewards/rejected": -4.472005367279053, + "step": 1370 + }, + { + "epoch": 0.21, + "learning_rate": 1.3141787437107733e-05, + "logits/chosen": -3.009697198867798, + "logits/rejected": -3.0007565021514893, + "logps/chosen": -127.20275115966797, + "logps/rejected": -179.53018188476562, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.016592025756836, + "rewards/margins": 5.966353416442871, + "rewards/rejected": -7.982945442199707, + "step": 1371 + }, + { + "epoch": 0.21, + "learning_rate": 1.3141053996576585e-05, + "logits/chosen": -2.882655143737793, + "logits/rejected": -2.970187187194824, + "logps/chosen": -246.26187133789062, + "logps/rejected": -234.17393493652344, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5178321599960327, + "rewards/margins": 4.843034744262695, + "rewards/rejected": -6.360867500305176, + "step": 1372 + }, + { + "epoch": 0.21, + "learning_rate": 1.3140320556045439e-05, + "logits/chosen": -2.3686532974243164, + "logits/rejected": -3.1942241191864014, + "logps/chosen": -218.7418670654297, + "logps/rejected": -260.60150146484375, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.284820556640625, + "rewards/margins": 3.598148822784424, + "rewards/rejected": -5.882968902587891, + "step": 1373 + }, + { + "epoch": 0.21, + "learning_rate": 1.313958711551429e-05, + "logits/chosen": -1.9891608953475952, + "logits/rejected": -3.036120891571045, + "logps/chosen": -115.7560043334961, + "logps/rejected": -329.9703674316406, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.624495267868042, + "rewards/margins": 6.37165641784668, + "rewards/rejected": -7.996151924133301, + "step": 1374 + }, + { + "epoch": 0.21, + "learning_rate": 1.3138853674983143e-05, + "logits/chosen": -2.7066144943237305, + "logits/rejected": -2.731628179550171, + "logps/chosen": -279.6635437011719, + "logps/rejected": -319.22900390625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9014511108398438, + "rewards/margins": 6.581942558288574, + "rewards/rejected": -7.483393669128418, + "step": 1375 + }, + { + "epoch": 0.21, + "learning_rate": 1.3138120234451995e-05, + "logits/chosen": -2.907609462738037, + "logits/rejected": -1.4704164266586304, + "logps/chosen": -316.0621032714844, + "logps/rejected": -151.55104064941406, + "loss": 4.3516, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.424898147583008, + "rewards/margins": -4.3358612060546875, + "rewards/rejected": -1.0890365839004517, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 1.3137386793920848e-05, + "logits/chosen": -2.888336181640625, + "logits/rejected": -3.1714909076690674, + "logps/chosen": -61.87741470336914, + "logps/rejected": -152.177734375, + "loss": 0.2201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4139448404312134, + "rewards/margins": 2.0515360832214355, + "rewards/rejected": -3.4654808044433594, + "step": 1377 + }, + { + "epoch": 0.21, + "learning_rate": 1.31366533533897e-05, + "logits/chosen": -2.38832426071167, + "logits/rejected": -2.917707920074463, + "logps/chosen": -353.0237121582031, + "logps/rejected": -365.2606201171875, + "loss": 2.5097, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.177166938781738, + "rewards/margins": -0.000682830810546875, + "rewards/rejected": -4.176484107971191, + "step": 1378 + }, + { + "epoch": 0.21, + "learning_rate": 1.3135919912858552e-05, + "logits/chosen": -3.056760311126709, + "logits/rejected": -2.9256796836853027, + "logps/chosen": -148.07627868652344, + "logps/rejected": -189.63388061523438, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2521228790283203, + "rewards/margins": 3.1758599281311035, + "rewards/rejected": -5.427982807159424, + "step": 1379 + }, + { + "epoch": 0.21, + "learning_rate": 1.3135186472327404e-05, + "logits/chosen": -3.038771867752075, + "logits/rejected": -3.1271350383758545, + "logps/chosen": -51.50875473022461, + "logps/rejected": -68.6412353515625, + "loss": 0.4274, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3852813243865967, + "rewards/margins": 1.914680004119873, + "rewards/rejected": -3.299961566925049, + "step": 1380 + }, + { + "epoch": 0.21, + "learning_rate": 1.3134453031796256e-05, + "logits/chosen": -2.7495970726013184, + "logits/rejected": -3.156421661376953, + "logps/chosen": -229.4818878173828, + "logps/rejected": -433.1675720214844, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6140327453613281, + "rewards/margins": 5.416407585144043, + "rewards/rejected": -7.030439853668213, + "step": 1381 + }, + { + "epoch": 0.21, + "learning_rate": 1.3133719591265107e-05, + "logits/chosen": -3.0202572345733643, + "logits/rejected": -3.169438123703003, + "logps/chosen": -583.4524536132812, + "logps/rejected": -382.7564697265625, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6288543939590454, + "rewards/margins": 4.095707893371582, + "rewards/rejected": -5.724562168121338, + "step": 1382 + }, + { + "epoch": 0.22, + "learning_rate": 1.313298615073396e-05, + "logits/chosen": -1.9434380531311035, + "logits/rejected": -2.8133671283721924, + "logps/chosen": -296.65533447265625, + "logps/rejected": -418.5442810058594, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9104270935058594, + "rewards/margins": 6.857777118682861, + "rewards/rejected": -7.7682037353515625, + "step": 1383 + }, + { + "epoch": 0.22, + "learning_rate": 1.3132252710202811e-05, + "logits/chosen": -2.4234843254089355, + "logits/rejected": -3.0899100303649902, + "logps/chosen": -86.93487548828125, + "logps/rejected": -407.5312194824219, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0552688837051392, + "rewards/margins": 7.809325695037842, + "rewards/rejected": -8.864594459533691, + "step": 1384 + }, + { + "epoch": 0.22, + "learning_rate": 1.3131519269671663e-05, + "logits/chosen": -1.9990240335464478, + "logits/rejected": -3.0305192470550537, + "logps/chosen": -141.4933624267578, + "logps/rejected": -290.329345703125, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.951748013496399, + "rewards/margins": 3.993804931640625, + "rewards/rejected": -5.945552825927734, + "step": 1385 + }, + { + "epoch": 0.22, + "learning_rate": 1.3130785829140517e-05, + "logits/chosen": -3.1622684001922607, + "logits/rejected": -2.9322009086608887, + "logps/chosen": -276.5384216308594, + "logps/rejected": -263.59954833984375, + "loss": 2.4132, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8652138710021973, + "rewards/margins": 0.25147461891174316, + "rewards/rejected": -3.1166884899139404, + "step": 1386 + }, + { + "epoch": 0.22, + "learning_rate": 1.3130052388609369e-05, + "logits/chosen": -3.034210205078125, + "logits/rejected": -2.264568567276001, + "logps/chosen": -380.1021728515625, + "logps/rejected": -274.3924255371094, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7949037551879883, + "rewards/margins": 4.642617225646973, + "rewards/rejected": -7.437520980834961, + "step": 1387 + }, + { + "epoch": 0.22, + "learning_rate": 1.312931894807822e-05, + "logits/chosen": -0.918928325176239, + "logits/rejected": -3.095348596572876, + "logps/chosen": -44.17955780029297, + "logps/rejected": -497.008056640625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8750154972076416, + "rewards/margins": 5.809771537780762, + "rewards/rejected": -7.684786796569824, + "step": 1388 + }, + { + "epoch": 0.22, + "learning_rate": 1.3128585507547072e-05, + "logits/chosen": -1.958857774734497, + "logits/rejected": -3.050750732421875, + "logps/chosen": -53.691680908203125, + "logps/rejected": -281.38543701171875, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.07613205909729, + "rewards/margins": 4.881072998046875, + "rewards/rejected": -5.957204818725586, + "step": 1389 + }, + { + "epoch": 0.22, + "learning_rate": 1.3127852067015924e-05, + "logits/chosen": -3.03531813621521, + "logits/rejected": -1.9031157493591309, + "logps/chosen": -261.09466552734375, + "logps/rejected": -119.76341247558594, + "loss": 5.0523, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.93300199508667, + "rewards/margins": -5.045809745788574, + "rewards/rejected": -0.8871920108795166, + "step": 1390 + }, + { + "epoch": 0.22, + "learning_rate": 1.3127118626484776e-05, + "logits/chosen": -1.8665053844451904, + "logits/rejected": -3.2057042121887207, + "logps/chosen": -114.95014953613281, + "logps/rejected": -343.20843505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11099204421043396, + "rewards/margins": 7.809201717376709, + "rewards/rejected": -7.920193672180176, + "step": 1391 + }, + { + "epoch": 0.22, + "learning_rate": 1.3126385185953628e-05, + "logits/chosen": -2.1911604404449463, + "logits/rejected": -3.0150609016418457, + "logps/chosen": -181.18478393554688, + "logps/rejected": -383.69488525390625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3995857238769531, + "rewards/margins": 5.10291862487793, + "rewards/rejected": -6.502504348754883, + "step": 1392 + }, + { + "epoch": 0.22, + "learning_rate": 1.312565174542248e-05, + "logits/chosen": -3.1551477909088135, + "logits/rejected": -2.8410837650299072, + "logps/chosen": -642.697265625, + "logps/rejected": -663.42333984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.443023681640625, + "rewards/margins": 8.663568496704102, + "rewards/rejected": -10.106592178344727, + "step": 1393 + }, + { + "epoch": 0.22, + "learning_rate": 1.3124918304891332e-05, + "logits/chosen": -1.7230361700057983, + "logits/rejected": -2.7188968658447266, + "logps/chosen": -127.93563842773438, + "logps/rejected": -295.1378479003906, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6706657409667969, + "rewards/margins": 4.3236494064331055, + "rewards/rejected": -5.994315147399902, + "step": 1394 + }, + { + "epoch": 0.22, + "learning_rate": 1.3124184864360185e-05, + "logits/chosen": -2.7729008197784424, + "logits/rejected": -2.981431722640991, + "logps/chosen": -243.5855712890625, + "logps/rejected": -356.2088317871094, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5536081194877625, + "rewards/margins": 6.469601154327393, + "rewards/rejected": -7.023209571838379, + "step": 1395 + }, + { + "epoch": 0.22, + "learning_rate": 1.3123451423829037e-05, + "logits/chosen": -2.4449405670166016, + "logits/rejected": -3.2484328746795654, + "logps/chosen": -215.1477813720703, + "logps/rejected": -291.3846435546875, + "loss": 1.2483, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.271026611328125, + "rewards/margins": 2.984049081802368, + "rewards/rejected": -6.255075454711914, + "step": 1396 + }, + { + "epoch": 0.22, + "learning_rate": 1.3122717983297889e-05, + "logits/chosen": -2.9204964637756348, + "logits/rejected": -3.2034482955932617, + "logps/chosen": -222.9093475341797, + "logps/rejected": -186.5084228515625, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8376693725585938, + "rewards/margins": 4.645340919494629, + "rewards/rejected": -6.483010292053223, + "step": 1397 + }, + { + "epoch": 0.22, + "learning_rate": 1.3121984542766741e-05, + "logits/chosen": -1.9765042066574097, + "logits/rejected": -2.914050817489624, + "logps/chosen": -80.8501205444336, + "logps/rejected": -165.87161254882812, + "loss": 0.0851, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6015286445617676, + "rewards/margins": 3.062422275543213, + "rewards/rejected": -5.6639509201049805, + "step": 1398 + }, + { + "epoch": 0.22, + "learning_rate": 1.3121251102235593e-05, + "logits/chosen": -3.0138368606567383, + "logits/rejected": -2.0071194171905518, + "logps/chosen": -204.74400329589844, + "logps/rejected": -291.72894287109375, + "loss": 2.4277, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4104647636413574, + "rewards/margins": 2.092202663421631, + "rewards/rejected": -5.502667427062988, + "step": 1399 + }, + { + "epoch": 0.22, + "learning_rate": 1.3120517661704445e-05, + "logits/chosen": -2.6216278076171875, + "logits/rejected": -3.0538036823272705, + "logps/chosen": -133.99441528320312, + "logps/rejected": -402.46044921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6810200214385986, + "rewards/margins": 7.278106689453125, + "rewards/rejected": -8.959126472473145, + "step": 1400 + }, + { + "epoch": 0.22, + "learning_rate": 1.3119784221173297e-05, + "logits/chosen": -2.9962968826293945, + "logits/rejected": -3.069603443145752, + "logps/chosen": -133.47047424316406, + "logps/rejected": -334.4117431640625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5588228702545166, + "rewards/margins": 7.016500473022461, + "rewards/rejected": -9.575323104858398, + "step": 1401 + }, + { + "epoch": 0.22, + "learning_rate": 1.3119050780642148e-05, + "logits/chosen": -2.1138956546783447, + "logits/rejected": -2.6941003799438477, + "logps/chosen": -170.4747314453125, + "logps/rejected": -346.74853515625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1266292333602905, + "rewards/margins": 7.467979431152344, + "rewards/rejected": -8.594609260559082, + "step": 1402 + }, + { + "epoch": 0.22, + "learning_rate": 1.3118317340111e-05, + "logits/chosen": -0.896094560623169, + "logits/rejected": -2.14888334274292, + "logps/chosen": -140.17478942871094, + "logps/rejected": -365.169921875, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.311605215072632, + "rewards/margins": 3.5072319507598877, + "rewards/rejected": -5.8188371658325195, + "step": 1403 + }, + { + "epoch": 0.22, + "learning_rate": 1.3117583899579854e-05, + "logits/chosen": -2.946037530899048, + "logits/rejected": -3.0985662937164307, + "logps/chosen": -105.20256805419922, + "logps/rejected": -376.3970642089844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6606547832489014, + "rewards/margins": 8.902233123779297, + "rewards/rejected": -11.562889099121094, + "step": 1404 + }, + { + "epoch": 0.22, + "learning_rate": 1.3116850459048706e-05, + "logits/chosen": -3.1574196815490723, + "logits/rejected": -1.8304661512374878, + "logps/chosen": -365.08087158203125, + "logps/rejected": -55.72215270996094, + "loss": 3.8001, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.123716831207275, + "rewards/margins": -3.4185538291931152, + "rewards/rejected": -1.7051632404327393, + "step": 1405 + }, + { + "epoch": 0.22, + "learning_rate": 1.3116117018517558e-05, + "logits/chosen": -1.1541590690612793, + "logits/rejected": -2.7690136432647705, + "logps/chosen": -104.07754516601562, + "logps/rejected": -362.022216796875, + "loss": 0.313, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9675402641296387, + "rewards/margins": 4.005998611450195, + "rewards/rejected": -6.973538398742676, + "step": 1406 + }, + { + "epoch": 0.22, + "learning_rate": 1.3115383577986411e-05, + "logits/chosen": -2.8897364139556885, + "logits/rejected": -3.1678833961486816, + "logps/chosen": -185.2681121826172, + "logps/rejected": -318.76165771484375, + "loss": 3.2176, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.874320030212402, + "rewards/margins": -1.1108503341674805, + "rewards/rejected": -3.7634694576263428, + "step": 1407 + }, + { + "epoch": 0.22, + "learning_rate": 1.3114650137455263e-05, + "logits/chosen": -1.9894187450408936, + "logits/rejected": -3.0285730361938477, + "logps/chosen": -124.54434967041016, + "logps/rejected": -297.9515075683594, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8225473165512085, + "rewards/margins": 5.17140007019043, + "rewards/rejected": -6.993947982788086, + "step": 1408 + }, + { + "epoch": 0.22, + "learning_rate": 1.3113916696924115e-05, + "logits/chosen": -2.4800398349761963, + "logits/rejected": -3.2428553104400635, + "logps/chosen": -35.075042724609375, + "logps/rejected": -212.22354125976562, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3811465501785278, + "rewards/margins": 5.226130485534668, + "rewards/rejected": -6.607276916503906, + "step": 1409 + }, + { + "epoch": 0.22, + "learning_rate": 1.3113183256392967e-05, + "logits/chosen": -2.741807699203491, + "logits/rejected": -3.23675799369812, + "logps/chosen": -164.8768310546875, + "logps/rejected": -65.9529800415039, + "loss": 1.6283, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7406210899353027, + "rewards/margins": -0.8255337476730347, + "rewards/rejected": -1.9150874614715576, + "step": 1410 + }, + { + "epoch": 0.22, + "learning_rate": 1.3112449815861819e-05, + "logits/chosen": -1.9075723886489868, + "logits/rejected": -2.9152963161468506, + "logps/chosen": -81.70696258544922, + "logps/rejected": -498.0682678222656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.672145128250122, + "rewards/margins": 10.122282981872559, + "rewards/rejected": -12.794427871704102, + "step": 1411 + }, + { + "epoch": 0.22, + "learning_rate": 1.311171637533067e-05, + "logits/chosen": -2.969893217086792, + "logits/rejected": -3.0162441730499268, + "logps/chosen": -253.9609832763672, + "logps/rejected": -501.2156982421875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.845416784286499, + "rewards/margins": 6.454113483428955, + "rewards/rejected": -7.299530029296875, + "step": 1412 + }, + { + "epoch": 0.22, + "learning_rate": 1.3110982934799524e-05, + "logits/chosen": -2.3476665019989014, + "logits/rejected": -2.9043126106262207, + "logps/chosen": -118.80589294433594, + "logps/rejected": -214.05349731445312, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8848694562911987, + "rewards/margins": 6.289374351501465, + "rewards/rejected": -8.174243927001953, + "step": 1413 + }, + { + "epoch": 0.22, + "learning_rate": 1.3110249494268376e-05, + "logits/chosen": -2.9221787452697754, + "logits/rejected": -2.4087443351745605, + "logps/chosen": -119.94226837158203, + "logps/rejected": -106.74725341796875, + "loss": 0.3519, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8180317878723145, + "rewards/margins": 2.3416006565093994, + "rewards/rejected": -5.159632205963135, + "step": 1414 + }, + { + "epoch": 0.22, + "learning_rate": 1.3109516053737228e-05, + "logits/chosen": -1.911913514137268, + "logits/rejected": -2.8111817836761475, + "logps/chosen": -200.91709899902344, + "logps/rejected": -408.1972351074219, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.834446907043457, + "rewards/margins": 5.394356727600098, + "rewards/rejected": -8.228803634643555, + "step": 1415 + }, + { + "epoch": 0.22, + "learning_rate": 1.310878261320608e-05, + "logits/chosen": -2.351163387298584, + "logits/rejected": -2.6305994987487793, + "logps/chosen": -393.6665344238281, + "logps/rejected": -396.72418212890625, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1692917346954346, + "rewards/margins": 6.740936756134033, + "rewards/rejected": -8.910228729248047, + "step": 1416 + }, + { + "epoch": 0.22, + "learning_rate": 1.3108049172674932e-05, + "logits/chosen": -2.835808753967285, + "logits/rejected": -3.1360385417938232, + "logps/chosen": -111.42707824707031, + "logps/rejected": -217.16897583007812, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3647136688232422, + "rewards/margins": 6.063462734222412, + "rewards/rejected": -7.428176403045654, + "step": 1417 + }, + { + "epoch": 0.22, + "learning_rate": 1.3107315732143784e-05, + "logits/chosen": -3.027998447418213, + "logits/rejected": -3.21541690826416, + "logps/chosen": -192.72889709472656, + "logps/rejected": -312.89715576171875, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4647469520568848, + "rewards/margins": 4.969391822814941, + "rewards/rejected": -6.434138774871826, + "step": 1418 + }, + { + "epoch": 0.22, + "learning_rate": 1.3106582291612635e-05, + "logits/chosen": -2.5157885551452637, + "logits/rejected": -3.066957712173462, + "logps/chosen": -620.5054931640625, + "logps/rejected": -582.6574096679688, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30945438146591187, + "rewards/margins": 5.806753635406494, + "rewards/rejected": -6.116208076477051, + "step": 1419 + }, + { + "epoch": 0.22, + "learning_rate": 1.3105848851081487e-05, + "logits/chosen": -2.7065987586975098, + "logits/rejected": -3.0610477924346924, + "logps/chosen": -157.9814910888672, + "logps/rejected": -274.3741760253906, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5040837526321411, + "rewards/margins": 4.890867233276367, + "rewards/rejected": -6.394950866699219, + "step": 1420 + }, + { + "epoch": 0.22, + "learning_rate": 1.310511541055034e-05, + "logits/chosen": -2.0662875175476074, + "logits/rejected": -2.4099276065826416, + "logps/chosen": -190.3680419921875, + "logps/rejected": -346.3414611816406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7527717351913452, + "rewards/margins": 8.82953929901123, + "rewards/rejected": -10.582310676574707, + "step": 1421 + }, + { + "epoch": 0.22, + "learning_rate": 1.3104381970019193e-05, + "logits/chosen": -2.5942604541778564, + "logits/rejected": -3.101998805999756, + "logps/chosen": -409.0654296875, + "logps/rejected": -542.4613647460938, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6958985328674316, + "rewards/margins": 6.991246223449707, + "rewards/rejected": -8.687145233154297, + "step": 1422 + }, + { + "epoch": 0.22, + "learning_rate": 1.3103648529488045e-05, + "logits/chosen": -2.2262003421783447, + "logits/rejected": -3.294032573699951, + "logps/chosen": -220.91213989257812, + "logps/rejected": -359.0506896972656, + "loss": 3.793, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.195904731750488, + "rewards/margins": -1.266812801361084, + "rewards/rejected": -4.929091453552246, + "step": 1423 + }, + { + "epoch": 0.22, + "learning_rate": 1.3102915088956897e-05, + "logits/chosen": -1.5663542747497559, + "logits/rejected": -2.225482940673828, + "logps/chosen": -167.1996307373047, + "logps/rejected": -334.776611328125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.606205701828003, + "rewards/margins": 7.7142133712768555, + "rewards/rejected": -9.320419311523438, + "step": 1424 + }, + { + "epoch": 0.22, + "learning_rate": 1.3102181648425748e-05, + "logits/chosen": -3.156303644180298, + "logits/rejected": -3.1536765098571777, + "logps/chosen": -209.8662567138672, + "logps/rejected": -265.0220031738281, + "loss": 3.2394, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.386112689971924, + "rewards/margins": 0.7574594020843506, + "rewards/rejected": -6.1435723304748535, + "step": 1425 + }, + { + "epoch": 0.22, + "learning_rate": 1.31014482078946e-05, + "logits/chosen": -0.789135217666626, + "logits/rejected": -2.767882823944092, + "logps/chosen": -50.84123229980469, + "logps/rejected": -538.7120361328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2289199829101562, + "rewards/margins": 11.099502563476562, + "rewards/rejected": -14.328422546386719, + "step": 1426 + }, + { + "epoch": 0.22, + "learning_rate": 1.3100714767363452e-05, + "logits/chosen": -2.980968713760376, + "logits/rejected": -3.16902756690979, + "logps/chosen": -316.2067565917969, + "logps/rejected": -318.8709411621094, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3658660650253296, + "rewards/margins": 6.3433427810668945, + "rewards/rejected": -7.7092084884643555, + "step": 1427 + }, + { + "epoch": 0.22, + "learning_rate": 1.3099981326832304e-05, + "logits/chosen": -1.8782731294631958, + "logits/rejected": -2.6942522525787354, + "logps/chosen": -168.38308715820312, + "logps/rejected": -371.24237060546875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3053382635116577, + "rewards/margins": 7.335779190063477, + "rewards/rejected": -8.641117095947266, + "step": 1428 + }, + { + "epoch": 0.22, + "learning_rate": 1.3099247886301156e-05, + "logits/chosen": -3.0888848304748535, + "logits/rejected": -2.4578847885131836, + "logps/chosen": -304.7525634765625, + "logps/rejected": -279.1651611328125, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6954898834228516, + "rewards/margins": 4.689956188201904, + "rewards/rejected": -7.385446071624756, + "step": 1429 + }, + { + "epoch": 0.22, + "learning_rate": 1.3098514445770008e-05, + "logits/chosen": -3.1116373538970947, + "logits/rejected": -2.924504041671753, + "logps/chosen": -95.17323303222656, + "logps/rejected": -272.4489440917969, + "loss": 3.161, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.118329048156738, + "rewards/margins": 0.8200535774230957, + "rewards/rejected": -4.938382625579834, + "step": 1430 + }, + { + "epoch": 0.22, + "learning_rate": 1.3097781005238861e-05, + "logits/chosen": -2.741117238998413, + "logits/rejected": -2.880509853363037, + "logps/chosen": -34.04950714111328, + "logps/rejected": -197.3417205810547, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4185837507247925, + "rewards/margins": 5.762228488922119, + "rewards/rejected": -7.180811882019043, + "step": 1431 + }, + { + "epoch": 0.22, + "learning_rate": 1.3097047564707713e-05, + "logits/chosen": -2.158080816268921, + "logits/rejected": -3.0180606842041016, + "logps/chosen": -95.43698120117188, + "logps/rejected": -329.608154296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.471879482269287, + "rewards/margins": 7.697198867797852, + "rewards/rejected": -10.169078826904297, + "step": 1432 + }, + { + "epoch": 0.22, + "learning_rate": 1.3096314124176565e-05, + "logits/chosen": -2.804677963256836, + "logits/rejected": -3.105344295501709, + "logps/chosen": -169.70361328125, + "logps/rejected": -92.95283508300781, + "loss": 3.2276, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4366021156311035, + "rewards/margins": -2.286313533782959, + "rewards/rejected": -3.1502888202667236, + "step": 1433 + }, + { + "epoch": 0.22, + "learning_rate": 1.3095580683645417e-05, + "logits/chosen": -3.1625866889953613, + "logits/rejected": -3.1995160579681396, + "logps/chosen": -191.60006713867188, + "logps/rejected": -246.53677368164062, + "loss": 2.1662, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0168392658233643, + "rewards/margins": 1.5272951126098633, + "rewards/rejected": -4.544134616851807, + "step": 1434 + }, + { + "epoch": 0.22, + "learning_rate": 1.3094847243114269e-05, + "logits/chosen": -3.0209856033325195, + "logits/rejected": -1.753057599067688, + "logps/chosen": -307.84173583984375, + "logps/rejected": -222.48895263671875, + "loss": 3.2266, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.899374961853027, + "rewards/margins": -0.8604838848114014, + "rewards/rejected": -5.038890838623047, + "step": 1435 + }, + { + "epoch": 0.22, + "learning_rate": 1.309411380258312e-05, + "logits/chosen": -2.6801998615264893, + "logits/rejected": -3.1677587032318115, + "logps/chosen": -229.648193359375, + "logps/rejected": -351.2321472167969, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8186309337615967, + "rewards/margins": 6.75508975982666, + "rewards/rejected": -9.573720932006836, + "step": 1436 + }, + { + "epoch": 0.22, + "learning_rate": 1.3093380362051973e-05, + "logits/chosen": -2.0296220779418945, + "logits/rejected": -2.9698116779327393, + "logps/chosen": -72.96142578125, + "logps/rejected": -248.59848022460938, + "loss": 0.6651, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.787113904953003, + "rewards/margins": 2.120649814605713, + "rewards/rejected": -4.907763481140137, + "step": 1437 + }, + { + "epoch": 0.22, + "learning_rate": 1.3092646921520825e-05, + "logits/chosen": -2.521507740020752, + "logits/rejected": -3.0361649990081787, + "logps/chosen": -56.28599548339844, + "logps/rejected": -259.165771484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.377941370010376, + "rewards/margins": 6.076826095581055, + "rewards/rejected": -7.45476770401001, + "step": 1438 + }, + { + "epoch": 0.22, + "learning_rate": 1.3091913480989678e-05, + "logits/chosen": -2.792665719985962, + "logits/rejected": -3.0350453853607178, + "logps/chosen": -394.73773193359375, + "logps/rejected": -441.01287841796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.288870334625244, + "rewards/margins": 5.202054023742676, + "rewards/rejected": -8.490923881530762, + "step": 1439 + }, + { + "epoch": 0.22, + "learning_rate": 1.309118004045853e-05, + "logits/chosen": -3.033051013946533, + "logits/rejected": -3.2228217124938965, + "logps/chosen": -360.7853698730469, + "logps/rejected": -629.4605102539062, + "loss": 1.6129, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.98063063621521, + "rewards/margins": 2.38710355758667, + "rewards/rejected": -5.367733955383301, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 1.3090446599927384e-05, + "logits/chosen": -1.4878387451171875, + "logits/rejected": -2.9044244289398193, + "logps/chosen": -85.39448547363281, + "logps/rejected": -664.8059692382812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6817508935928345, + "rewards/margins": 8.472369194030762, + "rewards/rejected": -10.154119491577148, + "step": 1441 + }, + { + "epoch": 0.22, + "learning_rate": 1.3089713159396235e-05, + "logits/chosen": -2.713022470474243, + "logits/rejected": -2.811073064804077, + "logps/chosen": -93.0377197265625, + "logps/rejected": -305.67999267578125, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0772976875305176, + "rewards/margins": 6.119978427886963, + "rewards/rejected": -8.19727611541748, + "step": 1442 + }, + { + "epoch": 0.22, + "learning_rate": 1.3088979718865087e-05, + "logits/chosen": -2.9907608032226562, + "logits/rejected": -2.751004695892334, + "logps/chosen": -145.87220764160156, + "logps/rejected": -254.10702514648438, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5674171447753906, + "rewards/margins": 5.333803176879883, + "rewards/rejected": -7.901220321655273, + "step": 1443 + }, + { + "epoch": 0.22, + "learning_rate": 1.308824627833394e-05, + "logits/chosen": -2.4962706565856934, + "logits/rejected": -3.051835298538208, + "logps/chosen": -316.2226257324219, + "logps/rejected": -485.5577087402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7969425916671753, + "rewards/margins": 9.28980827331543, + "rewards/rejected": -11.086750984191895, + "step": 1444 + }, + { + "epoch": 0.22, + "learning_rate": 1.3087512837802791e-05, + "logits/chosen": -1.70440673828125, + "logits/rejected": -2.963822603225708, + "logps/chosen": -98.74929809570312, + "logps/rejected": -269.531005859375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.115525722503662, + "rewards/margins": 6.138178825378418, + "rewards/rejected": -8.253705024719238, + "step": 1445 + }, + { + "epoch": 0.22, + "learning_rate": 1.3086779397271643e-05, + "logits/chosen": -3.0206971168518066, + "logits/rejected": -3.0860795974731445, + "logps/chosen": -190.08380126953125, + "logps/rejected": -257.1956481933594, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8674797415733337, + "rewards/margins": 4.520089626312256, + "rewards/rejected": -5.387569427490234, + "step": 1446 + }, + { + "epoch": 0.23, + "learning_rate": 1.3086045956740495e-05, + "logits/chosen": -3.110577344894409, + "logits/rejected": -3.160747528076172, + "logps/chosen": -196.55322265625, + "logps/rejected": -267.3551025390625, + "loss": 2.144, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4800753593444824, + "rewards/margins": -1.4451532363891602, + "rewards/rejected": -2.0349221229553223, + "step": 1447 + }, + { + "epoch": 0.23, + "learning_rate": 1.3085312516209347e-05, + "logits/chosen": -2.916306734085083, + "logits/rejected": -3.0150344371795654, + "logps/chosen": -95.60076904296875, + "logps/rejected": -165.78814697265625, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2650829553604126, + "rewards/margins": 4.345729351043701, + "rewards/rejected": -5.610812187194824, + "step": 1448 + }, + { + "epoch": 0.23, + "learning_rate": 1.30845790756782e-05, + "logits/chosen": -2.805271625518799, + "logits/rejected": -2.480380058288574, + "logps/chosen": -160.8241729736328, + "logps/rejected": -329.9110107421875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5418572425842285, + "rewards/margins": 6.128579139709473, + "rewards/rejected": -7.670435905456543, + "step": 1449 + }, + { + "epoch": 0.23, + "learning_rate": 1.3083845635147052e-05, + "logits/chosen": -3.061960220336914, + "logits/rejected": -3.0211918354034424, + "logps/chosen": -151.13186645507812, + "logps/rejected": -422.0678405761719, + "loss": 1.8812, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.29905891418457, + "rewards/margins": 3.994980812072754, + "rewards/rejected": -8.294039726257324, + "step": 1450 + }, + { + "epoch": 0.23, + "learning_rate": 1.3083112194615904e-05, + "logits/chosen": -1.5757476091384888, + "logits/rejected": -2.997756242752075, + "logps/chosen": -147.60194396972656, + "logps/rejected": -229.52322387695312, + "loss": 1.3806, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1240220069885254, + "rewards/margins": 3.1724226474761963, + "rewards/rejected": -6.296444416046143, + "step": 1451 + }, + { + "epoch": 0.23, + "learning_rate": 1.3082378754084756e-05, + "logits/chosen": -1.502289891242981, + "logits/rejected": -2.9709815979003906, + "logps/chosen": -150.75827026367188, + "logps/rejected": -741.7747802734375, + "loss": 1.9654, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6320104598999023, + "rewards/margins": 0.5354580879211426, + "rewards/rejected": -4.167468547821045, + "step": 1452 + }, + { + "epoch": 0.23, + "learning_rate": 1.3081645313553608e-05, + "logits/chosen": -2.1776015758514404, + "logits/rejected": -3.1986451148986816, + "logps/chosen": -180.49583435058594, + "logps/rejected": -325.8043212890625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3183168172836304, + "rewards/margins": 5.675139427185059, + "rewards/rejected": -6.9934563636779785, + "step": 1453 + }, + { + "epoch": 0.23, + "learning_rate": 1.308091187302246e-05, + "logits/chosen": -2.9879567623138428, + "logits/rejected": -2.971719980239868, + "logps/chosen": -305.8205871582031, + "logps/rejected": -353.9740295410156, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.066573143005371, + "rewards/margins": 5.110914707183838, + "rewards/rejected": -7.177488327026367, + "step": 1454 + }, + { + "epoch": 0.23, + "learning_rate": 1.3080178432491312e-05, + "logits/chosen": -3.212083101272583, + "logits/rejected": -2.9754528999328613, + "logps/chosen": -171.02914428710938, + "logps/rejected": -245.9486083984375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3859710693359375, + "rewards/margins": 6.2444562911987305, + "rewards/rejected": -8.630428314208984, + "step": 1455 + }, + { + "epoch": 0.23, + "learning_rate": 1.3079444991960163e-05, + "logits/chosen": -1.5762169361114502, + "logits/rejected": -3.1529428958892822, + "logps/chosen": -36.16151428222656, + "logps/rejected": -380.89715576171875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8559906482696533, + "rewards/margins": 5.883518218994141, + "rewards/rejected": -7.739508628845215, + "step": 1456 + }, + { + "epoch": 0.23, + "learning_rate": 1.3078711551429015e-05, + "logits/chosen": -2.9244649410247803, + "logits/rejected": -3.079326629638672, + "logps/chosen": -184.03228759765625, + "logps/rejected": -104.6019287109375, + "loss": 5.1388, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.078508377075195, + "rewards/margins": -5.12258243560791, + "rewards/rejected": -0.955925464630127, + "step": 1457 + }, + { + "epoch": 0.23, + "learning_rate": 1.3077978110897869e-05, + "logits/chosen": -3.044957160949707, + "logits/rejected": -2.747427463531494, + "logps/chosen": -586.2091064453125, + "logps/rejected": -676.0589599609375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0481247901916504, + "rewards/margins": 7.912318229675293, + "rewards/rejected": -10.960443496704102, + "step": 1458 + }, + { + "epoch": 0.23, + "learning_rate": 1.307724467036672e-05, + "logits/chosen": -3.2819931507110596, + "logits/rejected": -3.2470197677612305, + "logps/chosen": -129.78338623046875, + "logps/rejected": -64.13848114013672, + "loss": 3.7662, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.76548957824707, + "rewards/margins": -3.7230217456817627, + "rewards/rejected": -1.0424678325653076, + "step": 1459 + }, + { + "epoch": 0.23, + "learning_rate": 1.3076511229835573e-05, + "logits/chosen": -2.5121116638183594, + "logits/rejected": -3.2442739009857178, + "logps/chosen": -88.19206237792969, + "logps/rejected": -298.59869384765625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9870836138725281, + "rewards/margins": 6.605887413024902, + "rewards/rejected": -7.592970848083496, + "step": 1460 + }, + { + "epoch": 0.23, + "learning_rate": 1.3075777789304425e-05, + "logits/chosen": -1.4431304931640625, + "logits/rejected": -2.329444646835327, + "logps/chosen": -170.47036743164062, + "logps/rejected": -260.876953125, + "loss": 2.2882, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.380339622497559, + "rewards/margins": 2.526914358139038, + "rewards/rejected": -6.907253742218018, + "step": 1461 + }, + { + "epoch": 0.23, + "learning_rate": 1.3075044348773276e-05, + "logits/chosen": -2.3925187587738037, + "logits/rejected": -3.202620029449463, + "logps/chosen": -34.14741134643555, + "logps/rejected": -194.80154418945312, + "loss": 0.0918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9837979078292847, + "rewards/margins": 2.4774231910705566, + "rewards/rejected": -3.4612209796905518, + "step": 1462 + }, + { + "epoch": 0.23, + "learning_rate": 1.3074310908242128e-05, + "logits/chosen": -2.983412027359009, + "logits/rejected": -3.2711260318756104, + "logps/chosen": -64.40203094482422, + "logps/rejected": -219.37655639648438, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8701499700546265, + "rewards/margins": 3.6424102783203125, + "rewards/rejected": -5.5125603675842285, + "step": 1463 + }, + { + "epoch": 0.23, + "learning_rate": 1.307357746771098e-05, + "logits/chosen": -2.79514479637146, + "logits/rejected": -3.067441940307617, + "logps/chosen": -445.9973449707031, + "logps/rejected": -624.4569702148438, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.405402421951294, + "rewards/margins": 7.851347923278809, + "rewards/rejected": -9.256750106811523, + "step": 1464 + }, + { + "epoch": 0.23, + "learning_rate": 1.3072844027179832e-05, + "logits/chosen": -2.7766273021698, + "logits/rejected": -3.1210665702819824, + "logps/chosen": -71.35086059570312, + "logps/rejected": -179.05303955078125, + "loss": 0.2101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.509158730506897, + "rewards/margins": 3.232480049133301, + "rewards/rejected": -4.741639137268066, + "step": 1465 + }, + { + "epoch": 0.23, + "learning_rate": 1.3072110586648686e-05, + "logits/chosen": -2.164745807647705, + "logits/rejected": -3.000469446182251, + "logps/chosen": -112.50830841064453, + "logps/rejected": -257.4690856933594, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.256923645734787, + "rewards/margins": 6.539398193359375, + "rewards/rejected": -6.796321868896484, + "step": 1466 + }, + { + "epoch": 0.23, + "learning_rate": 1.3071377146117537e-05, + "logits/chosen": -2.4390487670898438, + "logits/rejected": -2.7309155464172363, + "logps/chosen": -158.2626953125, + "logps/rejected": -331.1290588378906, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9491382837295532, + "rewards/margins": 7.640726089477539, + "rewards/rejected": -8.589864730834961, + "step": 1467 + }, + { + "epoch": 0.23, + "learning_rate": 1.307064370558639e-05, + "logits/chosen": -2.559311628341675, + "logits/rejected": -3.1680588722229004, + "logps/chosen": -301.7651062011719, + "logps/rejected": -402.51617431640625, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.463922917842865, + "rewards/margins": 7.422240257263184, + "rewards/rejected": -7.886162757873535, + "step": 1468 + }, + { + "epoch": 0.23, + "learning_rate": 1.3069910265055241e-05, + "logits/chosen": -3.095827341079712, + "logits/rejected": -3.1435048580169678, + "logps/chosen": -106.9912338256836, + "logps/rejected": -343.11041259765625, + "loss": 2.1774, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.027282476425171, + "rewards/margins": -0.3382391929626465, + "rewards/rejected": -2.6890432834625244, + "step": 1469 + }, + { + "epoch": 0.23, + "learning_rate": 1.3069176824524093e-05, + "logits/chosen": -2.9109559059143066, + "logits/rejected": -2.489365577697754, + "logps/chosen": -413.64361572265625, + "logps/rejected": -373.58447265625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7734665274620056, + "rewards/margins": 6.557973861694336, + "rewards/rejected": -7.3314409255981445, + "step": 1470 + }, + { + "epoch": 0.23, + "learning_rate": 1.3068443383992945e-05, + "logits/chosen": -3.2345058917999268, + "logits/rejected": -3.0660126209259033, + "logps/chosen": -577.2702026367188, + "logps/rejected": -406.28302001953125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4850921630859375, + "rewards/margins": 5.3822479248046875, + "rewards/rejected": -5.867340087890625, + "step": 1471 + }, + { + "epoch": 0.23, + "learning_rate": 1.3067709943461797e-05, + "logits/chosen": -2.6758148670196533, + "logits/rejected": -3.0739965438842773, + "logps/chosen": -42.01312255859375, + "logps/rejected": -204.70291137695312, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.585848093032837, + "rewards/margins": 5.4698991775512695, + "rewards/rejected": -7.0557475090026855, + "step": 1472 + }, + { + "epoch": 0.23, + "learning_rate": 1.306697650293065e-05, + "logits/chosen": -2.5357918739318848, + "logits/rejected": -3.1189939975738525, + "logps/chosen": -261.4609375, + "logps/rejected": -308.7064208984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9228515625, + "rewards/margins": 6.263811111450195, + "rewards/rejected": -7.186662673950195, + "step": 1473 + }, + { + "epoch": 0.23, + "learning_rate": 1.3066243062399502e-05, + "logits/chosen": -2.9536750316619873, + "logits/rejected": -2.6079299449920654, + "logps/chosen": -998.5454711914062, + "logps/rejected": -598.8558349609375, + "loss": 0.0517, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1365432739257812, + "rewards/margins": 4.191495895385742, + "rewards/rejected": -6.328039169311523, + "step": 1474 + }, + { + "epoch": 0.23, + "learning_rate": 1.3065509621868356e-05, + "logits/chosen": -3.1078057289123535, + "logits/rejected": -2.966346263885498, + "logps/chosen": -621.9505004882812, + "logps/rejected": -664.7813720703125, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9457030296325684, + "rewards/margins": 3.7259063720703125, + "rewards/rejected": -5.671609878540039, + "step": 1475 + }, + { + "epoch": 0.23, + "learning_rate": 1.3064776181337208e-05, + "logits/chosen": -2.8729939460754395, + "logits/rejected": -3.2819719314575195, + "logps/chosen": -636.3600463867188, + "logps/rejected": -593.963623046875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7601333856582642, + "rewards/margins": 6.215681076049805, + "rewards/rejected": -7.975813865661621, + "step": 1476 + }, + { + "epoch": 0.23, + "learning_rate": 1.306404274080606e-05, + "logits/chosen": -3.165928602218628, + "logits/rejected": -2.093308448791504, + "logps/chosen": -374.65545654296875, + "logps/rejected": -380.91387939453125, + "loss": 3.2882, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.578322887420654, + "rewards/margins": -0.6902475357055664, + "rewards/rejected": -4.888075351715088, + "step": 1477 + }, + { + "epoch": 0.23, + "learning_rate": 1.3063309300274912e-05, + "logits/chosen": -3.119893789291382, + "logits/rejected": -2.2234904766082764, + "logps/chosen": -328.1002197265625, + "logps/rejected": -271.927001953125, + "loss": 3.6912, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.125470161437988, + "rewards/margins": -0.41060614585876465, + "rewards/rejected": -3.7148637771606445, + "step": 1478 + }, + { + "epoch": 0.23, + "learning_rate": 1.3062575859743763e-05, + "logits/chosen": -2.966510057449341, + "logits/rejected": -3.2228734493255615, + "logps/chosen": -77.29157257080078, + "logps/rejected": -210.379150390625, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6396179795265198, + "rewards/margins": 3.675070285797119, + "rewards/rejected": -4.314688205718994, + "step": 1479 + }, + { + "epoch": 0.23, + "learning_rate": 1.3061842419212615e-05, + "logits/chosen": -2.433513641357422, + "logits/rejected": -3.0876834392547607, + "logps/chosen": -155.45016479492188, + "logps/rejected": -225.70079040527344, + "loss": 0.0469, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3651223182678223, + "rewards/margins": 3.9797606468200684, + "rewards/rejected": -5.344882965087891, + "step": 1480 + }, + { + "epoch": 0.23, + "learning_rate": 1.3061108978681467e-05, + "logits/chosen": -2.8780624866485596, + "logits/rejected": -3.0694825649261475, + "logps/chosen": -193.8728790283203, + "logps/rejected": -206.2808837890625, + "loss": 0.9474, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8360635042190552, + "rewards/margins": 3.0444016456604004, + "rewards/rejected": -4.880465030670166, + "step": 1481 + }, + { + "epoch": 0.23, + "learning_rate": 1.3060375538150319e-05, + "logits/chosen": -2.471054792404175, + "logits/rejected": -3.270495891571045, + "logps/chosen": -412.6564025878906, + "logps/rejected": -490.3629455566406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.485631227493286, + "rewards/margins": 7.51693058013916, + "rewards/rejected": -10.002561569213867, + "step": 1482 + }, + { + "epoch": 0.23, + "learning_rate": 1.3059642097619171e-05, + "logits/chosen": -2.4823548793792725, + "logits/rejected": -2.4357895851135254, + "logps/chosen": -410.94140625, + "logps/rejected": -359.10491943359375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8873084783554077, + "rewards/margins": 5.2406721115112305, + "rewards/rejected": -7.127981185913086, + "step": 1483 + }, + { + "epoch": 0.23, + "learning_rate": 1.3058908657088024e-05, + "logits/chosen": -1.3172577619552612, + "logits/rejected": -2.9451916217803955, + "logps/chosen": -87.41944885253906, + "logps/rejected": -389.09478759765625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4433083534240723, + "rewards/margins": 8.355974197387695, + "rewards/rejected": -10.79928207397461, + "step": 1484 + }, + { + "epoch": 0.23, + "learning_rate": 1.3058175216556876e-05, + "logits/chosen": -2.1563127040863037, + "logits/rejected": -2.305603504180908, + "logps/chosen": -168.80184936523438, + "logps/rejected": -222.08160400390625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5433971881866455, + "rewards/margins": 4.982593059539795, + "rewards/rejected": -6.525990009307861, + "step": 1485 + }, + { + "epoch": 0.23, + "learning_rate": 1.3057441776025728e-05, + "logits/chosen": -2.7235300540924072, + "logits/rejected": -3.044130325317383, + "logps/chosen": -65.94100952148438, + "logps/rejected": -350.53875732421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0871607065200806, + "rewards/margins": 8.496015548706055, + "rewards/rejected": -9.583176612854004, + "step": 1486 + }, + { + "epoch": 0.23, + "learning_rate": 1.305670833549458e-05, + "logits/chosen": -3.03684663772583, + "logits/rejected": -3.2550718784332275, + "logps/chosen": -95.61178588867188, + "logps/rejected": -307.8956604003906, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.138049840927124, + "rewards/margins": 6.492745399475098, + "rewards/rejected": -7.630795478820801, + "step": 1487 + }, + { + "epoch": 0.23, + "learning_rate": 1.3055974894963432e-05, + "logits/chosen": -2.676830768585205, + "logits/rejected": -3.107870578765869, + "logps/chosen": -269.27978515625, + "logps/rejected": -282.9123840332031, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.381060242652893, + "rewards/margins": 4.574911594390869, + "rewards/rejected": -5.955971717834473, + "step": 1488 + }, + { + "epoch": 0.23, + "learning_rate": 1.3055241454432284e-05, + "logits/chosen": -3.2094342708587646, + "logits/rejected": -2.4726874828338623, + "logps/chosen": -441.4730529785156, + "logps/rejected": -206.3662567138672, + "loss": 0.1464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.29625403881073, + "rewards/margins": 3.048046112060547, + "rewards/rejected": -4.344300270080566, + "step": 1489 + }, + { + "epoch": 0.23, + "learning_rate": 1.3054508013901136e-05, + "logits/chosen": -2.4168217182159424, + "logits/rejected": -3.0963807106018066, + "logps/chosen": -55.52982711791992, + "logps/rejected": -186.1511993408203, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8860272169113159, + "rewards/margins": 5.910080909729004, + "rewards/rejected": -6.796108245849609, + "step": 1490 + }, + { + "epoch": 0.23, + "learning_rate": 1.3053774573369988e-05, + "logits/chosen": -3.083116054534912, + "logits/rejected": -3.0315020084381104, + "logps/chosen": -239.47084045410156, + "logps/rejected": -157.0926971435547, + "loss": 3.1663, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.204920768737793, + "rewards/margins": 1.3378472328186035, + "rewards/rejected": -5.5427680015563965, + "step": 1491 + }, + { + "epoch": 0.23, + "learning_rate": 1.305304113283884e-05, + "logits/chosen": -2.2129664421081543, + "logits/rejected": -3.2533905506134033, + "logps/chosen": -133.2093505859375, + "logps/rejected": -409.7292785644531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0874227285385132, + "rewards/margins": 8.172555923461914, + "rewards/rejected": -9.259979248046875, + "step": 1492 + }, + { + "epoch": 0.23, + "learning_rate": 1.3052307692307693e-05, + "logits/chosen": -1.2997125387191772, + "logits/rejected": -2.6812331676483154, + "logps/chosen": -185.14088439941406, + "logps/rejected": -457.41265869140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06213761121034622, + "rewards/margins": 8.346016883850098, + "rewards/rejected": -8.283879280090332, + "step": 1493 + }, + { + "epoch": 0.23, + "learning_rate": 1.3051574251776545e-05, + "logits/chosen": -0.6702027916908264, + "logits/rejected": -2.418294668197632, + "logps/chosen": -201.8647918701172, + "logps/rejected": -830.905517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5305702090263367, + "rewards/margins": 12.78150463104248, + "rewards/rejected": -13.312074661254883, + "step": 1494 + }, + { + "epoch": 0.23, + "learning_rate": 1.3050840811245397e-05, + "logits/chosen": -2.9852137565612793, + "logits/rejected": -1.1164535284042358, + "logps/chosen": -290.5535888671875, + "logps/rejected": -62.591163635253906, + "loss": 3.4547, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.3969621658325195, + "rewards/margins": -3.39142107963562, + "rewards/rejected": -3.0055410861968994, + "step": 1495 + }, + { + "epoch": 0.23, + "learning_rate": 1.3050107370714249e-05, + "logits/chosen": -1.6642602682113647, + "logits/rejected": -2.9443891048431396, + "logps/chosen": -172.9842987060547, + "logps/rejected": -390.30023193359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5150665640830994, + "rewards/margins": 6.301841735839844, + "rewards/rejected": -6.816908359527588, + "step": 1496 + }, + { + "epoch": 0.23, + "learning_rate": 1.30493739301831e-05, + "logits/chosen": -2.83028507232666, + "logits/rejected": -3.167038917541504, + "logps/chosen": -161.6659698486328, + "logps/rejected": -297.2301025390625, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4452564716339111, + "rewards/margins": 5.368372917175293, + "rewards/rejected": -6.813629150390625, + "step": 1497 + }, + { + "epoch": 0.23, + "learning_rate": 1.3048640489651952e-05, + "logits/chosen": -3.0941109657287598, + "logits/rejected": -2.8910505771636963, + "logps/chosen": -97.6565933227539, + "logps/rejected": -168.46987915039062, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8098485469818115, + "rewards/margins": 4.694833755493164, + "rewards/rejected": -6.504682540893555, + "step": 1498 + }, + { + "epoch": 0.23, + "learning_rate": 1.3047907049120804e-05, + "logits/chosen": -3.0511155128479004, + "logits/rejected": -2.5255672931671143, + "logps/chosen": -334.0687255859375, + "logps/rejected": -324.48016357421875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2169067859649658, + "rewards/margins": 5.593761920928955, + "rewards/rejected": -6.810668468475342, + "step": 1499 + }, + { + "epoch": 0.23, + "learning_rate": 1.3047173608589656e-05, + "logits/chosen": -2.315544366836548, + "logits/rejected": -2.865217447280884, + "logps/chosen": -158.4910888671875, + "logps/rejected": -293.81866455078125, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3176770210266113, + "rewards/margins": 7.207392692565918, + "rewards/rejected": -10.525070190429688, + "step": 1500 + }, + { + "epoch": 0.23, + "learning_rate": 1.3046440168058508e-05, + "logits/chosen": -1.2650364637374878, + "logits/rejected": -2.519630193710327, + "logps/chosen": -284.5728759765625, + "logps/rejected": -414.494384765625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7196991443634033, + "rewards/margins": 5.775094985961914, + "rewards/rejected": -7.494793891906738, + "step": 1501 + }, + { + "epoch": 0.23, + "learning_rate": 1.3045706727527362e-05, + "logits/chosen": -2.3325064182281494, + "logits/rejected": -2.808220148086548, + "logps/chosen": -141.97528076171875, + "logps/rejected": -300.6959228515625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8028625249862671, + "rewards/margins": 6.031627655029297, + "rewards/rejected": -6.834489822387695, + "step": 1502 + }, + { + "epoch": 0.23, + "learning_rate": 1.3044973286996214e-05, + "logits/chosen": -2.725310802459717, + "logits/rejected": -3.1704444885253906, + "logps/chosen": -124.97039031982422, + "logps/rejected": -268.1642761230469, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.107781171798706, + "rewards/margins": 4.841626167297363, + "rewards/rejected": -5.94940710067749, + "step": 1503 + }, + { + "epoch": 0.23, + "learning_rate": 1.3044239846465065e-05, + "logits/chosen": -2.887054920196533, + "logits/rejected": -2.8550381660461426, + "logps/chosen": -228.7226104736328, + "logps/rejected": -121.97879028320312, + "loss": 4.6553, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.498417854309082, + "rewards/margins": -1.877753734588623, + "rewards/rejected": -4.620664119720459, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 1.3043506405933917e-05, + "logits/chosen": -3.2276690006256104, + "logits/rejected": -3.039249897003174, + "logps/chosen": -404.5706481933594, + "logps/rejected": -383.7308349609375, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9364679455757141, + "rewards/margins": 4.640948295593262, + "rewards/rejected": -5.57741641998291, + "step": 1505 + }, + { + "epoch": 0.23, + "learning_rate": 1.304277296540277e-05, + "logits/chosen": -1.4717941284179688, + "logits/rejected": -2.909649610519409, + "logps/chosen": -33.88707733154297, + "logps/rejected": -246.33001708984375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3572252988815308, + "rewards/margins": 5.884076118469238, + "rewards/rejected": -7.2413010597229, + "step": 1506 + }, + { + "epoch": 0.23, + "learning_rate": 1.3042039524871623e-05, + "logits/chosen": -2.6113314628601074, + "logits/rejected": -3.07316517829895, + "logps/chosen": -156.5985107421875, + "logps/rejected": -330.14776611328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1757818460464478, + "rewards/margins": 7.921412467956543, + "rewards/rejected": -9.09719467163086, + "step": 1507 + }, + { + "epoch": 0.23, + "learning_rate": 1.3041306084340475e-05, + "logits/chosen": -2.364210367202759, + "logits/rejected": -2.540168046951294, + "logps/chosen": -162.20230102539062, + "logps/rejected": -387.4364013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.36480712890625, + "rewards/margins": 10.050064086914062, + "rewards/rejected": -11.414871215820312, + "step": 1508 + }, + { + "epoch": 0.23, + "learning_rate": 1.3040572643809327e-05, + "logits/chosen": -2.380314588546753, + "logits/rejected": -3.2134852409362793, + "logps/chosen": -103.79080200195312, + "logps/rejected": -502.5039978027344, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2296278476715088, + "rewards/margins": 7.314006328582764, + "rewards/rejected": -8.543634414672852, + "step": 1509 + }, + { + "epoch": 0.23, + "learning_rate": 1.3039839203278178e-05, + "logits/chosen": -1.9972835779190063, + "logits/rejected": -2.7524447441101074, + "logps/chosen": -76.45973205566406, + "logps/rejected": -224.40736389160156, + "loss": 0.6909, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7724716663360596, + "rewards/margins": 3.783951759338379, + "rewards/rejected": -6.556423664093018, + "step": 1510 + }, + { + "epoch": 0.23, + "learning_rate": 1.3039105762747032e-05, + "logits/chosen": -2.425328493118286, + "logits/rejected": -2.9168472290039062, + "logps/chosen": -277.0873107910156, + "logps/rejected": -444.2005615234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1521629095077515, + "rewards/margins": 7.976247787475586, + "rewards/rejected": -9.128410339355469, + "step": 1511 + }, + { + "epoch": 0.24, + "learning_rate": 1.3038372322215884e-05, + "logits/chosen": -3.0101397037506104, + "logits/rejected": -3.213245391845703, + "logps/chosen": -73.34490966796875, + "logps/rejected": -158.30783081054688, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6440393924713135, + "rewards/margins": 3.2698493003845215, + "rewards/rejected": -4.913888931274414, + "step": 1512 + }, + { + "epoch": 0.24, + "learning_rate": 1.3037638881684736e-05, + "logits/chosen": -2.601040840148926, + "logits/rejected": -2.9474127292633057, + "logps/chosen": -895.332275390625, + "logps/rejected": -757.456298828125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.538560390472412, + "rewards/margins": 6.371099948883057, + "rewards/rejected": -8.909660339355469, + "step": 1513 + }, + { + "epoch": 0.24, + "learning_rate": 1.3036905441153588e-05, + "logits/chosen": -2.6383137702941895, + "logits/rejected": -3.0523946285247803, + "logps/chosen": -165.29725646972656, + "logps/rejected": -239.01258850097656, + "loss": 3.7848, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.634489059448242, + "rewards/margins": -1.4978728294372559, + "rewards/rejected": -4.1366167068481445, + "step": 1514 + }, + { + "epoch": 0.24, + "learning_rate": 1.303617200062244e-05, + "logits/chosen": -1.9860692024230957, + "logits/rejected": -3.0414907932281494, + "logps/chosen": -199.73500061035156, + "logps/rejected": -407.74896240234375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.184044361114502, + "rewards/margins": 9.229076385498047, + "rewards/rejected": -11.413121223449707, + "step": 1515 + }, + { + "epoch": 0.24, + "learning_rate": 1.3035438560091291e-05, + "logits/chosen": -1.7670201063156128, + "logits/rejected": -2.8139700889587402, + "logps/chosen": -139.12301635742188, + "logps/rejected": -454.26953125, + "loss": 3.3649, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.068356513977051, + "rewards/margins": 2.4344542026519775, + "rewards/rejected": -6.502810955047607, + "step": 1516 + }, + { + "epoch": 0.24, + "learning_rate": 1.3034705119560143e-05, + "logits/chosen": -2.315479040145874, + "logits/rejected": -2.9609456062316895, + "logps/chosen": -71.89270782470703, + "logps/rejected": -253.56027221679688, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43170034885406494, + "rewards/margins": 6.769632816314697, + "rewards/rejected": -7.201333045959473, + "step": 1517 + }, + { + "epoch": 0.24, + "learning_rate": 1.3033971679028995e-05, + "logits/chosen": -2.899834632873535, + "logits/rejected": -3.1300718784332275, + "logps/chosen": -472.45648193359375, + "logps/rejected": -499.3389892578125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4253451824188232, + "rewards/margins": 7.69102668762207, + "rewards/rejected": -10.116371154785156, + "step": 1518 + }, + { + "epoch": 0.24, + "learning_rate": 1.3033238238497847e-05, + "logits/chosen": -2.129166841506958, + "logits/rejected": -2.867138147354126, + "logps/chosen": -103.30659484863281, + "logps/rejected": -192.708740234375, + "loss": 2.3448, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7120563983917236, + "rewards/margins": 2.2925357818603516, + "rewards/rejected": -6.004591941833496, + "step": 1519 + }, + { + "epoch": 0.24, + "learning_rate": 1.30325047979667e-05, + "logits/chosen": -2.6584367752075195, + "logits/rejected": -2.8670811653137207, + "logps/chosen": -80.08599853515625, + "logps/rejected": -341.9099426269531, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1090517044067383, + "rewards/margins": 6.511892318725586, + "rewards/rejected": -7.620944023132324, + "step": 1520 + }, + { + "epoch": 0.24, + "learning_rate": 1.3031771357435552e-05, + "logits/chosen": -3.0142710208892822, + "logits/rejected": -2.8636491298675537, + "logps/chosen": -413.94970703125, + "logps/rejected": -429.37066650390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5078868865966797, + "rewards/margins": 6.966961860656738, + "rewards/rejected": -8.474848747253418, + "step": 1521 + }, + { + "epoch": 0.24, + "learning_rate": 1.3031037916904404e-05, + "logits/chosen": -1.780359148979187, + "logits/rejected": -2.8800606727600098, + "logps/chosen": -73.8353271484375, + "logps/rejected": -256.7236328125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7331922054290771, + "rewards/margins": 6.587639808654785, + "rewards/rejected": -8.320832252502441, + "step": 1522 + }, + { + "epoch": 0.24, + "learning_rate": 1.3030304476373256e-05, + "logits/chosen": -2.8750298023223877, + "logits/rejected": -1.8728200197219849, + "logps/chosen": -346.1729431152344, + "logps/rejected": -373.4283447265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.760232150554657, + "rewards/margins": 9.332584381103516, + "rewards/rejected": -10.092816352844238, + "step": 1523 + }, + { + "epoch": 0.24, + "learning_rate": 1.3029571035842108e-05, + "logits/chosen": -1.9002190828323364, + "logits/rejected": -3.0345382690429688, + "logps/chosen": -250.57904052734375, + "logps/rejected": -384.77728271484375, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.751152515411377, + "rewards/margins": 5.096137046813965, + "rewards/rejected": -9.847289085388184, + "step": 1524 + }, + { + "epoch": 0.24, + "learning_rate": 1.302883759531096e-05, + "logits/chosen": -1.1463650465011597, + "logits/rejected": -1.4223337173461914, + "logps/chosen": -375.682861328125, + "logps/rejected": -182.55947875976562, + "loss": 5.3668, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.73293924331665, + "rewards/margins": -3.171290874481201, + "rewards/rejected": -4.561647891998291, + "step": 1525 + }, + { + "epoch": 0.24, + "learning_rate": 1.3028104154779812e-05, + "logits/chosen": -2.601189374923706, + "logits/rejected": -3.12273907661438, + "logps/chosen": -51.35456466674805, + "logps/rejected": -263.7457580566406, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6157989501953125, + "rewards/margins": 6.270517826080322, + "rewards/rejected": -6.886316776275635, + "step": 1526 + }, + { + "epoch": 0.24, + "learning_rate": 1.3027370714248664e-05, + "logits/chosen": -3.1394989490509033, + "logits/rejected": -3.1566731929779053, + "logps/chosen": -294.70977783203125, + "logps/rejected": -297.195556640625, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.230615258216858, + "rewards/margins": 5.029507637023926, + "rewards/rejected": -6.260122776031494, + "step": 1527 + }, + { + "epoch": 0.24, + "learning_rate": 1.3026637273717516e-05, + "logits/chosen": -3.1825921535491943, + "logits/rejected": -3.263084650039673, + "logps/chosen": -265.5849609375, + "logps/rejected": -308.8111572265625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0176749229431152, + "rewards/margins": 5.633389472961426, + "rewards/rejected": -6.651064395904541, + "step": 1528 + }, + { + "epoch": 0.24, + "learning_rate": 1.302590383318637e-05, + "logits/chosen": -2.737612247467041, + "logits/rejected": -3.136806011199951, + "logps/chosen": -81.65682983398438, + "logps/rejected": -232.00631713867188, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4082849025726318, + "rewards/margins": 6.952452659606934, + "rewards/rejected": -8.360737800598145, + "step": 1529 + }, + { + "epoch": 0.24, + "learning_rate": 1.3025170392655221e-05, + "logits/chosen": -2.056936025619507, + "logits/rejected": -3.0460398197174072, + "logps/chosen": -405.8752136230469, + "logps/rejected": -534.348876953125, + "loss": 2.4224, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.287777900695801, + "rewards/margins": 1.2127461433410645, + "rewards/rejected": -5.500524044036865, + "step": 1530 + }, + { + "epoch": 0.24, + "learning_rate": 1.3024436952124073e-05, + "logits/chosen": -2.5800364017486572, + "logits/rejected": -2.957305431365967, + "logps/chosen": -139.93374633789062, + "logps/rejected": -263.1213684082031, + "loss": 1.9507, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.192640781402588, + "rewards/margins": 2.681497573852539, + "rewards/rejected": -6.874137878417969, + "step": 1531 + }, + { + "epoch": 0.24, + "learning_rate": 1.3023703511592925e-05, + "logits/chosen": -3.1525325775146484, + "logits/rejected": -3.224161386489868, + "logps/chosen": -329.40435791015625, + "logps/rejected": -385.1143798828125, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.652505397796631, + "rewards/margins": 2.6722617149353027, + "rewards/rejected": -5.324767112731934, + "step": 1532 + }, + { + "epoch": 0.24, + "learning_rate": 1.3022970071061777e-05, + "logits/chosen": -2.8418869972229004, + "logits/rejected": -2.866842269897461, + "logps/chosen": -177.25787353515625, + "logps/rejected": -290.12799072265625, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8396589756011963, + "rewards/margins": 4.483232498168945, + "rewards/rejected": -6.3228912353515625, + "step": 1533 + }, + { + "epoch": 0.24, + "learning_rate": 1.3022236630530629e-05, + "logits/chosen": -0.4634605646133423, + "logits/rejected": -2.9571595191955566, + "logps/chosen": -88.3292007446289, + "logps/rejected": -744.8597412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0799603462219238, + "rewards/margins": 10.500616073608398, + "rewards/rejected": -11.58057689666748, + "step": 1534 + }, + { + "epoch": 0.24, + "learning_rate": 1.302150318999948e-05, + "logits/chosen": -2.112872838973999, + "logits/rejected": -2.9518251419067383, + "logps/chosen": -349.3072814941406, + "logps/rejected": -371.953369140625, + "loss": 1.5249, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.795078992843628, + "rewards/margins": 3.5431902408599854, + "rewards/rejected": -6.338269233703613, + "step": 1535 + }, + { + "epoch": 0.24, + "learning_rate": 1.3020769749468332e-05, + "logits/chosen": -2.4098753929138184, + "logits/rejected": -3.0849220752716064, + "logps/chosen": -353.3531188964844, + "logps/rejected": -319.79901123046875, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7280571460723877, + "rewards/margins": 3.7260031700134277, + "rewards/rejected": -5.4540605545043945, + "step": 1536 + }, + { + "epoch": 0.24, + "learning_rate": 1.3020036308937184e-05, + "logits/chosen": -2.0330193042755127, + "logits/rejected": -3.0501723289489746, + "logps/chosen": -452.8018798828125, + "logps/rejected": -534.314697265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5665817260742188, + "rewards/margins": 9.108041763305664, + "rewards/rejected": -9.674623489379883, + "step": 1537 + }, + { + "epoch": 0.24, + "learning_rate": 1.3019302868406038e-05, + "logits/chosen": -3.204730272293091, + "logits/rejected": -3.206768274307251, + "logps/chosen": -213.69444274902344, + "logps/rejected": -124.78410339355469, + "loss": 2.9391, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.897287368774414, + "rewards/margins": -0.609520673751831, + "rewards/rejected": -4.287766933441162, + "step": 1538 + }, + { + "epoch": 0.24, + "learning_rate": 1.301856942787489e-05, + "logits/chosen": -1.4293491840362549, + "logits/rejected": -2.8666813373565674, + "logps/chosen": -152.31515502929688, + "logps/rejected": -475.747314453125, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.076314926147461, + "rewards/margins": 4.839954853057861, + "rewards/rejected": -7.9162702560424805, + "step": 1539 + }, + { + "epoch": 0.24, + "learning_rate": 1.3017835987343742e-05, + "logits/chosen": -1.929736852645874, + "logits/rejected": -2.8753600120544434, + "logps/chosen": -138.56590270996094, + "logps/rejected": -185.89892578125, + "loss": 1.4199, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.551572799682617, + "rewards/margins": 3.2351179122924805, + "rewards/rejected": -5.786690711975098, + "step": 1540 + }, + { + "epoch": 0.24, + "learning_rate": 1.3017102546812593e-05, + "logits/chosen": -1.6894030570983887, + "logits/rejected": -2.7119998931884766, + "logps/chosen": -154.9512939453125, + "logps/rejected": -460.8204345703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39739343523979187, + "rewards/margins": 10.005120277404785, + "rewards/rejected": -10.40251350402832, + "step": 1541 + }, + { + "epoch": 0.24, + "learning_rate": 1.3016369106281447e-05, + "logits/chosen": -2.422511100769043, + "logits/rejected": -2.912632942199707, + "logps/chosen": -164.44039916992188, + "logps/rejected": -435.21429443359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1960654258728027, + "rewards/margins": 7.847997665405273, + "rewards/rejected": -10.044063568115234, + "step": 1542 + }, + { + "epoch": 0.24, + "learning_rate": 1.3015635665750299e-05, + "logits/chosen": -2.632760763168335, + "logits/rejected": -3.1039912700653076, + "logps/chosen": -32.543174743652344, + "logps/rejected": -166.06307983398438, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5358035564422607, + "rewards/margins": 5.87493896484375, + "rewards/rejected": -7.41074275970459, + "step": 1543 + }, + { + "epoch": 0.24, + "learning_rate": 1.301490222521915e-05, + "logits/chosen": -2.83225154876709, + "logits/rejected": -2.0071394443511963, + "logps/chosen": -648.8469848632812, + "logps/rejected": -265.5538024902344, + "loss": 4.2746, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.1053619384765625, + "rewards/margins": -1.774764060974121, + "rewards/rejected": -3.3305978775024414, + "step": 1544 + }, + { + "epoch": 0.24, + "learning_rate": 1.3014168784688003e-05, + "logits/chosen": -2.9825451374053955, + "logits/rejected": -1.0922908782958984, + "logps/chosen": -426.1790466308594, + "logps/rejected": -120.6883544921875, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0805455446243286, + "rewards/margins": 3.3189737796783447, + "rewards/rejected": -4.399519443511963, + "step": 1545 + }, + { + "epoch": 0.24, + "learning_rate": 1.3013435344156854e-05, + "logits/chosen": -3.048784017562866, + "logits/rejected": -2.5839595794677734, + "logps/chosen": -159.72573852539062, + "logps/rejected": -199.67877197265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1101527214050293, + "rewards/margins": 7.046149253845215, + "rewards/rejected": -9.156301498413086, + "step": 1546 + }, + { + "epoch": 0.24, + "learning_rate": 1.3012701903625708e-05, + "logits/chosen": -3.127288341522217, + "logits/rejected": -2.758934736251831, + "logps/chosen": -146.55938720703125, + "logps/rejected": -237.67591857910156, + "loss": 2.3684, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7090048789978027, + "rewards/margins": -0.7313569784164429, + "rewards/rejected": -2.9776480197906494, + "step": 1547 + }, + { + "epoch": 0.24, + "learning_rate": 1.301196846309456e-05, + "logits/chosen": -1.4438949823379517, + "logits/rejected": -2.666788339614868, + "logps/chosen": -148.37039184570312, + "logps/rejected": -421.63519287109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1944000720977783, + "rewards/margins": 8.519253730773926, + "rewards/rejected": -9.713653564453125, + "step": 1548 + }, + { + "epoch": 0.24, + "learning_rate": 1.3011235022563412e-05, + "logits/chosen": -3.0314888954162598, + "logits/rejected": -1.7497409582138062, + "logps/chosen": -224.73406982421875, + "logps/rejected": -267.84246826171875, + "loss": 4.087, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.655444145202637, + "rewards/margins": 2.5069966316223145, + "rewards/rejected": -8.16244125366211, + "step": 1549 + }, + { + "epoch": 0.24, + "learning_rate": 1.3010501582032264e-05, + "logits/chosen": -2.8733816146850586, + "logits/rejected": -2.9638869762420654, + "logps/chosen": -378.6318664550781, + "logps/rejected": -548.2052001953125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5376328229904175, + "rewards/margins": 6.125242710113525, + "rewards/rejected": -7.662875175476074, + "step": 1550 + }, + { + "epoch": 0.24, + "learning_rate": 1.3009768141501116e-05, + "logits/chosen": -2.8075006008148193, + "logits/rejected": -2.2833030223846436, + "logps/chosen": -413.3851623535156, + "logps/rejected": -356.5880126953125, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.149127244949341, + "rewards/margins": 4.7264909744262695, + "rewards/rejected": -7.875617980957031, + "step": 1551 + }, + { + "epoch": 0.24, + "learning_rate": 1.3009034700969967e-05, + "logits/chosen": -3.0594499111175537, + "logits/rejected": -3.051356315612793, + "logps/chosen": -338.7303771972656, + "logps/rejected": -144.47943115234375, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6238988637924194, + "rewards/margins": 4.288476467132568, + "rewards/rejected": -5.912375450134277, + "step": 1552 + }, + { + "epoch": 0.24, + "learning_rate": 1.300830126043882e-05, + "logits/chosen": -1.211238980293274, + "logits/rejected": -2.13051438331604, + "logps/chosen": -181.0120849609375, + "logps/rejected": -335.730712890625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8834778070449829, + "rewards/margins": 6.335927963256836, + "rewards/rejected": -7.2194061279296875, + "step": 1553 + }, + { + "epoch": 0.24, + "learning_rate": 1.3007567819907671e-05, + "logits/chosen": -2.929131031036377, + "logits/rejected": -1.6122801303863525, + "logps/chosen": -158.13035583496094, + "logps/rejected": -132.11767578125, + "loss": 0.8279, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4181008338928223, + "rewards/margins": 0.8386533260345459, + "rewards/rejected": -4.256753921508789, + "step": 1554 + }, + { + "epoch": 0.24, + "learning_rate": 1.3006834379376525e-05, + "logits/chosen": -2.2628555297851562, + "logits/rejected": -3.2028472423553467, + "logps/chosen": -225.08181762695312, + "logps/rejected": -359.1900634765625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2296818494796753, + "rewards/margins": 5.966480255126953, + "rewards/rejected": -7.196162223815918, + "step": 1555 + }, + { + "epoch": 0.24, + "learning_rate": 1.3006100938845377e-05, + "logits/chosen": -2.305481195449829, + "logits/rejected": -3.1601881980895996, + "logps/chosen": -161.54458618164062, + "logps/rejected": -329.6889953613281, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5212997198104858, + "rewards/margins": 4.5402936935424805, + "rewards/rejected": -5.061593532562256, + "step": 1556 + }, + { + "epoch": 0.24, + "learning_rate": 1.3005367498314229e-05, + "logits/chosen": -2.962355136871338, + "logits/rejected": -2.085632085800171, + "logps/chosen": -340.7283020019531, + "logps/rejected": -157.44140625, + "loss": 3.7692, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.934867858886719, + "rewards/margins": -1.2034852504730225, + "rewards/rejected": -4.731382846832275, + "step": 1557 + }, + { + "epoch": 0.24, + "learning_rate": 1.300463405778308e-05, + "logits/chosen": -2.6197187900543213, + "logits/rejected": -3.154656410217285, + "logps/chosen": -173.96324157714844, + "logps/rejected": -263.7982482910156, + "loss": 3.7117, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3240742683410645, + "rewards/margins": -1.451050043106079, + "rewards/rejected": -3.8730239868164062, + "step": 1558 + }, + { + "epoch": 0.24, + "learning_rate": 1.3003900617251932e-05, + "logits/chosen": -2.124783754348755, + "logits/rejected": -3.048841953277588, + "logps/chosen": -129.90579223632812, + "logps/rejected": -367.19866943359375, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1917314529418945, + "rewards/margins": 3.79838228225708, + "rewards/rejected": -6.990113735198975, + "step": 1559 + }, + { + "epoch": 0.24, + "learning_rate": 1.3003167176720784e-05, + "logits/chosen": -2.887653350830078, + "logits/rejected": -1.4759899377822876, + "logps/chosen": -250.91433715820312, + "logps/rejected": -176.4912872314453, + "loss": 4.1412, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.829270839691162, + "rewards/margins": -1.9983808994293213, + "rewards/rejected": -3.83089017868042, + "step": 1560 + }, + { + "epoch": 0.24, + "learning_rate": 1.3002433736189636e-05, + "logits/chosen": -1.8108253479003906, + "logits/rejected": -2.792771100997925, + "logps/chosen": -224.906005859375, + "logps/rejected": -316.9732666015625, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.847368597984314, + "rewards/margins": 5.050201416015625, + "rewards/rejected": -5.8975701332092285, + "step": 1561 + }, + { + "epoch": 0.24, + "learning_rate": 1.3001700295658488e-05, + "logits/chosen": -1.7091912031173706, + "logits/rejected": -2.9627933502197266, + "logps/chosen": -41.90547180175781, + "logps/rejected": -255.84188842773438, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2020926475524902, + "rewards/margins": 5.5480122566223145, + "rewards/rejected": -7.750104904174805, + "step": 1562 + }, + { + "epoch": 0.24, + "learning_rate": 1.300096685512734e-05, + "logits/chosen": -1.7638276815414429, + "logits/rejected": -2.520439386367798, + "logps/chosen": -138.2550048828125, + "logps/rejected": -484.56268310546875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8930400609970093, + "rewards/margins": 5.656124114990234, + "rewards/rejected": -7.549163818359375, + "step": 1563 + }, + { + "epoch": 0.24, + "learning_rate": 1.3000233414596193e-05, + "logits/chosen": -2.4078190326690674, + "logits/rejected": -3.154294967651367, + "logps/chosen": -632.6309814453125, + "logps/rejected": -713.4269409179688, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1500160694122314, + "rewards/margins": 6.4062371253967285, + "rewards/rejected": -7.556253433227539, + "step": 1564 + }, + { + "epoch": 0.24, + "learning_rate": 1.2999499974065045e-05, + "logits/chosen": -3.105863094329834, + "logits/rejected": -3.0552468299865723, + "logps/chosen": -182.52951049804688, + "logps/rejected": -129.15936279296875, + "loss": 0.7542, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.289304494857788, + "rewards/margins": 0.660567045211792, + "rewards/rejected": -3.94987154006958, + "step": 1565 + }, + { + "epoch": 0.24, + "learning_rate": 1.2998766533533897e-05, + "logits/chosen": -3.127276659011841, + "logits/rejected": -2.586858034133911, + "logps/chosen": -120.15787506103516, + "logps/rejected": -168.63372802734375, + "loss": 0.6791, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.209585189819336, + "rewards/margins": 3.5508954524993896, + "rewards/rejected": -5.760480880737305, + "step": 1566 + }, + { + "epoch": 0.24, + "learning_rate": 1.2998033093002749e-05, + "logits/chosen": -1.9250928163528442, + "logits/rejected": -2.942558526992798, + "logps/chosen": -303.5777282714844, + "logps/rejected": -493.95184326171875, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.470944881439209, + "rewards/margins": 3.1449077129364014, + "rewards/rejected": -5.615852355957031, + "step": 1567 + }, + { + "epoch": 0.24, + "learning_rate": 1.2997299652471601e-05, + "logits/chosen": -2.5582046508789062, + "logits/rejected": -3.1190850734710693, + "logps/chosen": -62.38205337524414, + "logps/rejected": -252.83563232421875, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.295867681503296, + "rewards/margins": 5.564148902893066, + "rewards/rejected": -6.860016822814941, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 1.2996566211940453e-05, + "logits/chosen": -2.917250871658325, + "logits/rejected": -2.9673855304718018, + "logps/chosen": -281.36541748046875, + "logps/rejected": -96.76728820800781, + "loss": 3.3697, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6774139404296875, + "rewards/margins": -1.330430507659912, + "rewards/rejected": -4.346983432769775, + "step": 1569 + }, + { + "epoch": 0.24, + "learning_rate": 1.2995832771409305e-05, + "logits/chosen": -1.5742086172103882, + "logits/rejected": -2.8787612915039062, + "logps/chosen": -84.73421478271484, + "logps/rejected": -322.4260559082031, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9303587079048157, + "rewards/margins": 6.986514568328857, + "rewards/rejected": -7.916873455047607, + "step": 1570 + }, + { + "epoch": 0.24, + "learning_rate": 1.2995099330878157e-05, + "logits/chosen": -0.9944756031036377, + "logits/rejected": -2.4085540771484375, + "logps/chosen": -180.90554809570312, + "logps/rejected": -508.45111083984375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1659135818481445, + "rewards/margins": 6.118728160858154, + "rewards/rejected": -7.284642219543457, + "step": 1571 + }, + { + "epoch": 0.24, + "learning_rate": 1.2994365890347008e-05, + "logits/chosen": -2.244912624359131, + "logits/rejected": -3.0256612300872803, + "logps/chosen": -47.44042205810547, + "logps/rejected": -318.8734130859375, + "loss": 1.3061, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8704893589019775, + "rewards/margins": 2.4079461097717285, + "rewards/rejected": -4.278435707092285, + "step": 1572 + }, + { + "epoch": 0.24, + "learning_rate": 1.2993632449815862e-05, + "logits/chosen": -3.0441110134124756, + "logits/rejected": -3.2441413402557373, + "logps/chosen": -139.31886291503906, + "logps/rejected": -214.3636016845703, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2227184772491455, + "rewards/margins": 4.201900482177734, + "rewards/rejected": -4.424618721008301, + "step": 1573 + }, + { + "epoch": 0.24, + "learning_rate": 1.2992899009284714e-05, + "logits/chosen": -2.8793392181396484, + "logits/rejected": -2.0637366771698, + "logps/chosen": -711.24169921875, + "logps/rejected": -578.1602172851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5779489278793335, + "rewards/margins": 10.083809852600098, + "rewards/rejected": -10.661758422851562, + "step": 1574 + }, + { + "epoch": 0.24, + "learning_rate": 1.2992165568753566e-05, + "logits/chosen": -1.4032864570617676, + "logits/rejected": -2.92264461517334, + "logps/chosen": -198.62188720703125, + "logps/rejected": -391.83453369140625, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.580928087234497, + "rewards/margins": 5.339357376098633, + "rewards/rejected": -6.920285224914551, + "step": 1575 + }, + { + "epoch": 0.25, + "learning_rate": 1.299143212822242e-05, + "logits/chosen": -3.2634358406066895, + "logits/rejected": -2.5113844871520996, + "logps/chosen": -468.9210510253906, + "logps/rejected": -97.09907531738281, + "loss": 6.3655, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.390759468078613, + "rewards/margins": -6.363643646240234, + "rewards/rejected": -1.027116060256958, + "step": 1576 + }, + { + "epoch": 0.25, + "learning_rate": 1.2990698687691271e-05, + "logits/chosen": -2.327019214630127, + "logits/rejected": -3.033616781234741, + "logps/chosen": -214.55319213867188, + "logps/rejected": -310.35809326171875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47037461400032043, + "rewards/margins": 6.636151313781738, + "rewards/rejected": -7.106525897979736, + "step": 1577 + }, + { + "epoch": 0.25, + "learning_rate": 1.2989965247160123e-05, + "logits/chosen": -3.075559139251709, + "logits/rejected": -3.241302728652954, + "logps/chosen": -308.62115478515625, + "logps/rejected": -445.8006591796875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3956297039985657, + "rewards/margins": 8.0655517578125, + "rewards/rejected": -7.669921875, + "step": 1578 + }, + { + "epoch": 0.25, + "learning_rate": 1.2989231806628975e-05, + "logits/chosen": -3.067223310470581, + "logits/rejected": -2.84586238861084, + "logps/chosen": -653.7195434570312, + "logps/rejected": -586.6278686523438, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2977814674377441, + "rewards/margins": 5.711575508117676, + "rewards/rejected": -7.009356498718262, + "step": 1579 + }, + { + "epoch": 0.25, + "learning_rate": 1.2988498366097827e-05, + "logits/chosen": -3.075364589691162, + "logits/rejected": -3.1813108921051025, + "logps/chosen": -54.74879455566406, + "logps/rejected": -206.28501892089844, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0044546127319336, + "rewards/margins": 4.676557540893555, + "rewards/rejected": -6.681012153625488, + "step": 1580 + }, + { + "epoch": 0.25, + "learning_rate": 1.2987764925566679e-05, + "logits/chosen": -2.0092060565948486, + "logits/rejected": -2.9778857231140137, + "logps/chosen": -263.3316650390625, + "logps/rejected": -335.5072021484375, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4083877801895142, + "rewards/margins": 4.7859206199646, + "rewards/rejected": -6.194308280944824, + "step": 1581 + }, + { + "epoch": 0.25, + "learning_rate": 1.2987031485035532e-05, + "logits/chosen": -2.816190242767334, + "logits/rejected": -3.1109120845794678, + "logps/chosen": -793.8447265625, + "logps/rejected": -578.2686157226562, + "loss": 4.2019, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.609829902648926, + "rewards/margins": -1.1335549354553223, + "rewards/rejected": -4.4762749671936035, + "step": 1582 + }, + { + "epoch": 0.25, + "learning_rate": 1.2986298044504384e-05, + "logits/chosen": -3.3022303581237793, + "logits/rejected": -2.7817916870117188, + "logps/chosen": -651.8297729492188, + "logps/rejected": -405.34716796875, + "loss": 1.089, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6145424842834473, + "rewards/margins": 2.5178213119506836, + "rewards/rejected": -5.132363796234131, + "step": 1583 + }, + { + "epoch": 0.25, + "learning_rate": 1.2985564603973236e-05, + "logits/chosen": -2.332920551300049, + "logits/rejected": -3.203768253326416, + "logps/chosen": -96.5174560546875, + "logps/rejected": -296.9172668457031, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1414722204208374, + "rewards/margins": 3.7801403999328613, + "rewards/rejected": -4.921612739562988, + "step": 1584 + }, + { + "epoch": 0.25, + "learning_rate": 1.2984831163442088e-05, + "logits/chosen": -2.7203006744384766, + "logits/rejected": -2.01841402053833, + "logps/chosen": -245.28582763671875, + "logps/rejected": -351.8878173828125, + "loss": 1.8103, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1174116134643555, + "rewards/margins": 0.6172140836715698, + "rewards/rejected": -4.734625339508057, + "step": 1585 + }, + { + "epoch": 0.25, + "learning_rate": 1.298409772291094e-05, + "logits/chosen": -3.058277130126953, + "logits/rejected": -3.076939344406128, + "logps/chosen": -119.03519439697266, + "logps/rejected": -162.1461639404297, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6235872507095337, + "rewards/margins": 4.06431770324707, + "rewards/rejected": -5.6879048347473145, + "step": 1586 + }, + { + "epoch": 0.25, + "learning_rate": 1.2983364282379792e-05, + "logits/chosen": -2.5918827056884766, + "logits/rejected": -2.4563021659851074, + "logps/chosen": -125.22711944580078, + "logps/rejected": -227.5716094970703, + "loss": 0.0641, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7937065362930298, + "rewards/margins": 2.950641632080078, + "rewards/rejected": -4.744348526000977, + "step": 1587 + }, + { + "epoch": 0.25, + "learning_rate": 1.2982630841848644e-05, + "logits/chosen": -2.7421844005584717, + "logits/rejected": -3.2192981243133545, + "logps/chosen": -283.0936279296875, + "logps/rejected": -367.1691589355469, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8814018964767456, + "rewards/margins": 7.233492374420166, + "rewards/rejected": -8.11489486694336, + "step": 1588 + }, + { + "epoch": 0.25, + "learning_rate": 1.2981897401317495e-05, + "logits/chosen": -1.9498052597045898, + "logits/rejected": -3.001607894897461, + "logps/chosen": -111.49070739746094, + "logps/rejected": -261.287841796875, + "loss": 0.115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6378514766693115, + "rewards/margins": 3.280045986175537, + "rewards/rejected": -4.9178972244262695, + "step": 1589 + }, + { + "epoch": 0.25, + "learning_rate": 1.2981163960786347e-05, + "logits/chosen": -2.857847213745117, + "logits/rejected": -3.14390230178833, + "logps/chosen": -74.63179016113281, + "logps/rejected": -232.5367431640625, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.022558569908142, + "rewards/margins": 4.5604095458984375, + "rewards/rejected": -5.582968235015869, + "step": 1590 + }, + { + "epoch": 0.25, + "learning_rate": 1.2980430520255201e-05, + "logits/chosen": -3.1294474601745605, + "logits/rejected": -2.61618709564209, + "logps/chosen": -479.27313232421875, + "logps/rejected": -516.3368530273438, + "loss": 3.9653, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.492842197418213, + "rewards/margins": -1.739490032196045, + "rewards/rejected": -3.753352403640747, + "step": 1591 + }, + { + "epoch": 0.25, + "learning_rate": 1.2979697079724053e-05, + "logits/chosen": -2.8100271224975586, + "logits/rejected": -3.2496252059936523, + "logps/chosen": -42.322486877441406, + "logps/rejected": -213.63690185546875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6039190292358398, + "rewards/margins": 4.902404308319092, + "rewards/rejected": -6.506323337554932, + "step": 1592 + }, + { + "epoch": 0.25, + "learning_rate": 1.2978963639192905e-05, + "logits/chosen": -3.1181411743164062, + "logits/rejected": -2.8806726932525635, + "logps/chosen": -274.11175537109375, + "logps/rejected": -283.4340515136719, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.859468460083008, + "rewards/margins": 5.043720245361328, + "rewards/rejected": -7.903188705444336, + "step": 1593 + }, + { + "epoch": 0.25, + "learning_rate": 1.2978230198661757e-05, + "logits/chosen": -2.972412347793579, + "logits/rejected": -2.4709038734436035, + "logps/chosen": -641.0259399414062, + "logps/rejected": -610.7152099609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022792816162109375, + "rewards/margins": 9.061517715454102, + "rewards/rejected": -9.063796997070312, + "step": 1594 + }, + { + "epoch": 0.25, + "learning_rate": 1.2977496758130608e-05, + "logits/chosen": -2.3798296451568604, + "logits/rejected": -2.925283193588257, + "logps/chosen": -100.53239440917969, + "logps/rejected": -236.4599151611328, + "loss": 0.062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1952552795410156, + "rewards/margins": 4.015576362609863, + "rewards/rejected": -5.210831642150879, + "step": 1595 + }, + { + "epoch": 0.25, + "learning_rate": 1.297676331759946e-05, + "logits/chosen": -2.765017032623291, + "logits/rejected": -3.202059030532837, + "logps/chosen": -131.50686645507812, + "logps/rejected": -253.76483154296875, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.527587890625, + "rewards/margins": 4.542963027954102, + "rewards/rejected": -6.070550918579102, + "step": 1596 + }, + { + "epoch": 0.25, + "learning_rate": 1.2976029877068312e-05, + "logits/chosen": -3.036813259124756, + "logits/rejected": -2.956329584121704, + "logps/chosen": -409.3666076660156, + "logps/rejected": -342.7904052734375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8107330203056335, + "rewards/margins": 6.569957256317139, + "rewards/rejected": -7.380690097808838, + "step": 1597 + }, + { + "epoch": 0.25, + "learning_rate": 1.2975296436537164e-05, + "logits/chosen": -1.6917117834091187, + "logits/rejected": -2.93296480178833, + "logps/chosen": -431.75640869140625, + "logps/rejected": -390.02496337890625, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.708838701248169, + "rewards/margins": 7.253202438354492, + "rewards/rejected": -7.96204137802124, + "step": 1598 + }, + { + "epoch": 0.25, + "learning_rate": 1.2974562996006016e-05, + "logits/chosen": -3.155090570449829, + "logits/rejected": -2.752706289291382, + "logps/chosen": -134.77737426757812, + "logps/rejected": -163.15615844726562, + "loss": 1.9474, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.281362295150757, + "rewards/margins": -0.42915821075439453, + "rewards/rejected": -2.8522040843963623, + "step": 1599 + }, + { + "epoch": 0.25, + "learning_rate": 1.297382955547487e-05, + "logits/chosen": -2.4613184928894043, + "logits/rejected": -2.6087865829467773, + "logps/chosen": -183.27398681640625, + "logps/rejected": -272.32525634765625, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4893875122070312, + "rewards/margins": 5.357143402099609, + "rewards/rejected": -7.846530914306641, + "step": 1600 + }, + { + "epoch": 0.25, + "learning_rate": 1.2973096114943721e-05, + "logits/chosen": -3.035106658935547, + "logits/rejected": -3.122934103012085, + "logps/chosen": -105.91236114501953, + "logps/rejected": -84.52584075927734, + "loss": 2.7117, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.355539321899414, + "rewards/margins": -0.13073992729187012, + "rewards/rejected": -4.224799156188965, + "step": 1601 + }, + { + "epoch": 0.25, + "learning_rate": 1.2972362674412573e-05, + "logits/chosen": -2.0822534561157227, + "logits/rejected": -3.0947377681732178, + "logps/chosen": -165.86183166503906, + "logps/rejected": -260.34234619140625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.039429187774658, + "rewards/margins": 5.435279369354248, + "rewards/rejected": -7.474708557128906, + "step": 1602 + }, + { + "epoch": 0.25, + "learning_rate": 1.2971629233881425e-05, + "logits/chosen": -1.5418639183044434, + "logits/rejected": -2.8259904384613037, + "logps/chosen": -84.68683624267578, + "logps/rejected": -324.556396484375, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.66790771484375, + "rewards/margins": 5.574868202209473, + "rewards/rejected": -7.242775917053223, + "step": 1603 + }, + { + "epoch": 0.25, + "learning_rate": 1.2970895793350277e-05, + "logits/chosen": -2.7107744216918945, + "logits/rejected": -3.1524109840393066, + "logps/chosen": -120.42768859863281, + "logps/rejected": -347.82122802734375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9521589279174805, + "rewards/margins": 5.437718391418457, + "rewards/rejected": -7.3898773193359375, + "step": 1604 + }, + { + "epoch": 0.25, + "learning_rate": 1.2970162352819129e-05, + "logits/chosen": -2.6030848026275635, + "logits/rejected": -3.1767897605895996, + "logps/chosen": -74.807373046875, + "logps/rejected": -209.79946899414062, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5417650938034058, + "rewards/margins": 5.391360282897949, + "rewards/rejected": -6.9331254959106445, + "step": 1605 + }, + { + "epoch": 0.25, + "learning_rate": 1.296942891228798e-05, + "logits/chosen": -1.2265580892562866, + "logits/rejected": -3.1291515827178955, + "logps/chosen": -222.5377197265625, + "logps/rejected": -633.665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2842909097671509, + "rewards/margins": 10.85844612121582, + "rewards/rejected": -12.142736434936523, + "step": 1606 + }, + { + "epoch": 0.25, + "learning_rate": 1.2968695471756833e-05, + "logits/chosen": -3.0766468048095703, + "logits/rejected": -2.4500949382781982, + "logps/chosen": -596.184326171875, + "logps/rejected": -345.7430419921875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6107021570205688, + "rewards/margins": 7.582357406616211, + "rewards/rejected": -8.193059921264648, + "step": 1607 + }, + { + "epoch": 0.25, + "learning_rate": 1.2967962031225686e-05, + "logits/chosen": -3.2888927459716797, + "logits/rejected": -3.26869535446167, + "logps/chosen": -119.04867553710938, + "logps/rejected": -99.13865661621094, + "loss": 2.9643, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9292898178100586, + "rewards/margins": -0.772160530090332, + "rewards/rejected": -3.1571290493011475, + "step": 1608 + }, + { + "epoch": 0.25, + "learning_rate": 1.2967228590694538e-05, + "logits/chosen": -1.0743592977523804, + "logits/rejected": -2.9095139503479004, + "logps/chosen": -115.04454040527344, + "logps/rejected": -461.8459777832031, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2627136707305908, + "rewards/margins": 7.70402717590332, + "rewards/rejected": -8.966740608215332, + "step": 1609 + }, + { + "epoch": 0.25, + "learning_rate": 1.2966495150163392e-05, + "logits/chosen": -3.0862624645233154, + "logits/rejected": -2.9190292358398438, + "logps/chosen": -392.2469787597656, + "logps/rejected": -394.28204345703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3744730949401855, + "rewards/margins": 7.036687850952148, + "rewards/rejected": -8.411161422729492, + "step": 1610 + }, + { + "epoch": 0.25, + "learning_rate": 1.2965761709632244e-05, + "logits/chosen": -2.290924549102783, + "logits/rejected": -3.08821439743042, + "logps/chosen": -311.13421630859375, + "logps/rejected": -482.6841125488281, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2550392150878906, + "rewards/margins": 8.178348541259766, + "rewards/rejected": -9.433387756347656, + "step": 1611 + }, + { + "epoch": 0.25, + "learning_rate": 1.2965028269101095e-05, + "logits/chosen": -2.8746066093444824, + "logits/rejected": -3.071134567260742, + "logps/chosen": -257.5058898925781, + "logps/rejected": -347.4111022949219, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.073323965072632, + "rewards/margins": 6.297800064086914, + "rewards/rejected": -9.371124267578125, + "step": 1612 + }, + { + "epoch": 0.25, + "learning_rate": 1.2964294828569947e-05, + "logits/chosen": -2.4755706787109375, + "logits/rejected": -2.851233959197998, + "logps/chosen": -223.03024291992188, + "logps/rejected": -527.2070922851562, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.157864809036255, + "rewards/margins": 7.686309337615967, + "rewards/rejected": -9.8441743850708, + "step": 1613 + }, + { + "epoch": 0.25, + "learning_rate": 1.29635613880388e-05, + "logits/chosen": -3.0496160984039307, + "logits/rejected": -3.160001754760742, + "logps/chosen": -94.02726745605469, + "logps/rejected": -215.14312744140625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0395408868789673, + "rewards/margins": 6.106302738189697, + "rewards/rejected": -7.145843505859375, + "step": 1614 + }, + { + "epoch": 0.25, + "learning_rate": 1.2962827947507651e-05, + "logits/chosen": -2.774348020553589, + "logits/rejected": -3.0872561931610107, + "logps/chosen": -198.62557983398438, + "logps/rejected": -364.71875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1571569442749023, + "rewards/margins": 5.206256866455078, + "rewards/rejected": -8.36341381072998, + "step": 1615 + }, + { + "epoch": 0.25, + "learning_rate": 1.2962094506976503e-05, + "logits/chosen": -2.823613166809082, + "logits/rejected": -3.1881141662597656, + "logps/chosen": -237.18173217773438, + "logps/rejected": -242.72157287597656, + "loss": 0.9103, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7732326984405518, + "rewards/margins": 2.6023993492126465, + "rewards/rejected": -4.375632286071777, + "step": 1616 + }, + { + "epoch": 0.25, + "learning_rate": 1.2961361066445355e-05, + "logits/chosen": -3.021629810333252, + "logits/rejected": -3.1749839782714844, + "logps/chosen": -98.67668914794922, + "logps/rejected": -172.13870239257812, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3171799182891846, + "rewards/margins": 7.44495964050293, + "rewards/rejected": -8.762139320373535, + "step": 1617 + }, + { + "epoch": 0.25, + "learning_rate": 1.2960627625914208e-05, + "logits/chosen": -3.1348204612731934, + "logits/rejected": -3.014467477798462, + "logps/chosen": -160.26168823242188, + "logps/rejected": -216.4018096923828, + "loss": 0.0628, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8609548807144165, + "rewards/margins": 4.913768768310547, + "rewards/rejected": -6.774724006652832, + "step": 1618 + }, + { + "epoch": 0.25, + "learning_rate": 1.295989418538306e-05, + "logits/chosen": -2.613330841064453, + "logits/rejected": -3.128932237625122, + "logps/chosen": -155.10951232910156, + "logps/rejected": -93.21768188476562, + "loss": 1.8571, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7637557983398438, + "rewards/margins": 0.9418553113937378, + "rewards/rejected": -4.705611228942871, + "step": 1619 + }, + { + "epoch": 0.25, + "learning_rate": 1.2959160744851912e-05, + "logits/chosen": -2.8999321460723877, + "logits/rejected": -2.776729106903076, + "logps/chosen": -184.29931640625, + "logps/rejected": -321.60491943359375, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0138580799102783, + "rewards/margins": 4.864117622375488, + "rewards/rejected": -6.8779754638671875, + "step": 1620 + }, + { + "epoch": 0.25, + "learning_rate": 1.2958427304320764e-05, + "logits/chosen": -2.6359598636627197, + "logits/rejected": -3.026242971420288, + "logps/chosen": -216.223876953125, + "logps/rejected": -387.7023620605469, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5426254272460938, + "rewards/margins": 5.07489013671875, + "rewards/rejected": -7.617515563964844, + "step": 1621 + }, + { + "epoch": 0.25, + "learning_rate": 1.2957693863789616e-05, + "logits/chosen": -2.854473352432251, + "logits/rejected": -3.0086071491241455, + "logps/chosen": -117.24238586425781, + "logps/rejected": -132.30194091796875, + "loss": 0.1795, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9966084957122803, + "rewards/margins": 3.7979466915130615, + "rewards/rejected": -5.794555187225342, + "step": 1622 + }, + { + "epoch": 0.25, + "learning_rate": 1.2956960423258468e-05, + "logits/chosen": -2.1699135303497314, + "logits/rejected": -2.9199419021606445, + "logps/chosen": -177.073974609375, + "logps/rejected": -352.22296142578125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8302459716796875, + "rewards/margins": 5.097228050231934, + "rewards/rejected": -8.927474021911621, + "step": 1623 + }, + { + "epoch": 0.25, + "learning_rate": 1.295622698272732e-05, + "logits/chosen": -2.6230854988098145, + "logits/rejected": -2.70393443107605, + "logps/chosen": -103.67620849609375, + "logps/rejected": -255.32382202148438, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5217136144638062, + "rewards/margins": 7.131189823150635, + "rewards/rejected": -8.65290355682373, + "step": 1624 + }, + { + "epoch": 0.25, + "learning_rate": 1.2955493542196172e-05, + "logits/chosen": -2.355252504348755, + "logits/rejected": -2.7544491291046143, + "logps/chosen": -85.79624938964844, + "logps/rejected": -182.08596801757812, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3101567029953003, + "rewards/margins": 4.83502721786499, + "rewards/rejected": -6.14518404006958, + "step": 1625 + }, + { + "epoch": 0.25, + "learning_rate": 1.2954760101665023e-05, + "logits/chosen": -2.7795968055725098, + "logits/rejected": -3.019658327102661, + "logps/chosen": -388.39788818359375, + "logps/rejected": -323.9476013183594, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.830438256263733, + "rewards/margins": 6.731954574584961, + "rewards/rejected": -8.562393188476562, + "step": 1626 + }, + { + "epoch": 0.25, + "learning_rate": 1.2954026661133877e-05, + "logits/chosen": -2.858311176300049, + "logits/rejected": -2.981739044189453, + "logps/chosen": -139.506103515625, + "logps/rejected": -173.19580078125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1671292781829834, + "rewards/margins": 5.376099109649658, + "rewards/rejected": -6.543228626251221, + "step": 1627 + }, + { + "epoch": 0.25, + "learning_rate": 1.2953293220602729e-05, + "logits/chosen": -3.034032106399536, + "logits/rejected": -2.759436845779419, + "logps/chosen": -73.35466766357422, + "logps/rejected": -166.3249969482422, + "loss": 1.5729, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1738991737365723, + "rewards/margins": 2.4543373584747314, + "rewards/rejected": -4.628236770629883, + "step": 1628 + }, + { + "epoch": 0.25, + "learning_rate": 1.295255978007158e-05, + "logits/chosen": -3.003147840499878, + "logits/rejected": -2.9899752140045166, + "logps/chosen": -383.3906555175781, + "logps/rejected": -295.6119079589844, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5958160161972046, + "rewards/margins": 3.271355390548706, + "rewards/rejected": -4.867171287536621, + "step": 1629 + }, + { + "epoch": 0.25, + "learning_rate": 1.2951826339540433e-05, + "logits/chosen": -3.0526297092437744, + "logits/rejected": -2.1345021724700928, + "logps/chosen": -364.51531982421875, + "logps/rejected": -193.4944610595703, + "loss": 1.1983, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.687176465988159, + "rewards/margins": 2.3118972778320312, + "rewards/rejected": -5.999073505401611, + "step": 1630 + }, + { + "epoch": 0.25, + "learning_rate": 1.2951092899009284e-05, + "logits/chosen": -2.9514548778533936, + "logits/rejected": -2.7320244312286377, + "logps/chosen": -241.77040100097656, + "logps/rejected": -470.5851745605469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.116267442703247, + "rewards/margins": 10.326397895812988, + "rewards/rejected": -12.442665100097656, + "step": 1631 + }, + { + "epoch": 0.25, + "learning_rate": 1.2950359458478136e-05, + "logits/chosen": -3.227415084838867, + "logits/rejected": -3.0053811073303223, + "logps/chosen": -593.0944213867188, + "logps/rejected": -432.243896484375, + "loss": 3.8364, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.951745986938477, + "rewards/margins": -0.7209334373474121, + "rewards/rejected": -5.230812072753906, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 1.2949626017946988e-05, + "logits/chosen": -1.7379100322723389, + "logits/rejected": -2.9668993949890137, + "logps/chosen": -173.1220245361328, + "logps/rejected": -311.9110412597656, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4442038536071777, + "rewards/margins": 5.007909774780273, + "rewards/rejected": -7.452113628387451, + "step": 1633 + }, + { + "epoch": 0.25, + "learning_rate": 1.294889257741584e-05, + "logits/chosen": -2.6948869228363037, + "logits/rejected": -2.836427688598633, + "logps/chosen": -631.6475830078125, + "logps/rejected": -493.94110107421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5537323951721191, + "rewards/margins": 9.276226997375488, + "rewards/rejected": -10.829959869384766, + "step": 1634 + }, + { + "epoch": 0.25, + "learning_rate": 1.2948159136884692e-05, + "logits/chosen": -2.77864933013916, + "logits/rejected": -2.894205331802368, + "logps/chosen": -216.5480194091797, + "logps/rejected": -362.74017333984375, + "loss": 4.0673, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.949028015136719, + "rewards/margins": 3.962794303894043, + "rewards/rejected": -9.911822319030762, + "step": 1635 + }, + { + "epoch": 0.25, + "learning_rate": 1.2947425696353546e-05, + "logits/chosen": -3.0831105709075928, + "logits/rejected": -3.0651798248291016, + "logps/chosen": -117.80809783935547, + "logps/rejected": -208.97824096679688, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8864903450012207, + "rewards/margins": 4.637135028839111, + "rewards/rejected": -6.523625373840332, + "step": 1636 + }, + { + "epoch": 0.25, + "learning_rate": 1.2946692255822397e-05, + "logits/chosen": -2.8174071311950684, + "logits/rejected": -1.0748040676116943, + "logps/chosen": -231.36817932128906, + "logps/rejected": -46.068355560302734, + "loss": 2.2376, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.278000354766846, + "rewards/margins": -1.3718291521072388, + "rewards/rejected": -2.9061710834503174, + "step": 1637 + }, + { + "epoch": 0.25, + "learning_rate": 1.294595881529125e-05, + "logits/chosen": -0.9058322310447693, + "logits/rejected": -2.6615428924560547, + "logps/chosen": -94.53894805908203, + "logps/rejected": -416.5125427246094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9264545440673828, + "rewards/margins": 7.954159736633301, + "rewards/rejected": -8.880614280700684, + "step": 1638 + }, + { + "epoch": 0.25, + "learning_rate": 1.2945225374760101e-05, + "logits/chosen": -1.0149098634719849, + "logits/rejected": -2.980903387069702, + "logps/chosen": -47.01551055908203, + "logps/rejected": -223.35888671875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8157248497009277, + "rewards/margins": 7.74345588684082, + "rewards/rejected": -9.559181213378906, + "step": 1639 + }, + { + "epoch": 0.26, + "learning_rate": 1.2944491934228953e-05, + "logits/chosen": -2.4728713035583496, + "logits/rejected": -3.0830564498901367, + "logps/chosen": -349.47686767578125, + "logps/rejected": -352.78961181640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7097885012626648, + "rewards/margins": 8.061134338378906, + "rewards/rejected": -8.770923614501953, + "step": 1640 + }, + { + "epoch": 0.26, + "learning_rate": 1.2943758493697805e-05, + "logits/chosen": -1.3955556154251099, + "logits/rejected": -2.7201850414276123, + "logps/chosen": -77.0319595336914, + "logps/rejected": -368.196044921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.240277886390686, + "rewards/margins": 8.085698127746582, + "rewards/rejected": -9.32597541809082, + "step": 1641 + }, + { + "epoch": 0.26, + "learning_rate": 1.2943025053166659e-05, + "logits/chosen": -2.5427002906799316, + "logits/rejected": -3.091517448425293, + "logps/chosen": -119.7282943725586, + "logps/rejected": -249.97195434570312, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8045666217803955, + "rewards/margins": 7.354570388793945, + "rewards/rejected": -9.159136772155762, + "step": 1642 + }, + { + "epoch": 0.26, + "learning_rate": 1.294229161263551e-05, + "logits/chosen": -2.688147783279419, + "logits/rejected": -3.060635566711426, + "logps/chosen": -78.94540405273438, + "logps/rejected": -248.40707397460938, + "loss": 0.0507, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6583647727966309, + "rewards/margins": 6.063572883605957, + "rewards/rejected": -7.721937656402588, + "step": 1643 + }, + { + "epoch": 0.26, + "learning_rate": 1.2941558172104362e-05, + "logits/chosen": -2.435166835784912, + "logits/rejected": -3.1847240924835205, + "logps/chosen": -240.8582000732422, + "logps/rejected": -419.44989013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5598304271697998, + "rewards/margins": 8.592997550964355, + "rewards/rejected": -10.152828216552734, + "step": 1644 + }, + { + "epoch": 0.26, + "learning_rate": 1.2940824731573216e-05, + "logits/chosen": -3.0526628494262695, + "logits/rejected": -2.0207996368408203, + "logps/chosen": -648.1038208007812, + "logps/rejected": -287.6927795410156, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4337754249572754, + "rewards/margins": 4.766458034515381, + "rewards/rejected": -6.200233459472656, + "step": 1645 + }, + { + "epoch": 0.26, + "learning_rate": 1.2940091291042068e-05, + "logits/chosen": -2.8631019592285156, + "logits/rejected": -2.870699882507324, + "logps/chosen": -80.3773193359375, + "logps/rejected": -144.30184936523438, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8416264653205872, + "rewards/margins": 5.702094554901123, + "rewards/rejected": -6.5437211990356445, + "step": 1646 + }, + { + "epoch": 0.26, + "learning_rate": 1.293935785051092e-05, + "logits/chosen": -2.591779947280884, + "logits/rejected": -2.772554397583008, + "logps/chosen": -146.29385375976562, + "logps/rejected": -270.9714660644531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.156151294708252, + "rewards/margins": 7.7194976806640625, + "rewards/rejected": -8.875648498535156, + "step": 1647 + }, + { + "epoch": 0.26, + "learning_rate": 1.2938624409979771e-05, + "logits/chosen": -2.2171995639801025, + "logits/rejected": -3.090196371078491, + "logps/chosen": -543.36083984375, + "logps/rejected": -409.66552734375, + "loss": 3.5725, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.376924514770508, + "rewards/margins": 0.8464300632476807, + "rewards/rejected": -5.223354816436768, + "step": 1648 + }, + { + "epoch": 0.26, + "learning_rate": 1.2937890969448623e-05, + "logits/chosen": -3.1403515338897705, + "logits/rejected": -3.1570825576782227, + "logps/chosen": -817.9135131835938, + "logps/rejected": -292.3902587890625, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21420899033546448, + "rewards/margins": 5.024390697479248, + "rewards/rejected": -4.810181617736816, + "step": 1649 + }, + { + "epoch": 0.26, + "learning_rate": 1.2937157528917475e-05, + "logits/chosen": -1.8580032587051392, + "logits/rejected": -2.767815351486206, + "logps/chosen": -133.306396484375, + "logps/rejected": -249.55555725097656, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2058355808258057, + "rewards/margins": 6.704695701599121, + "rewards/rejected": -8.910531044006348, + "step": 1650 + }, + { + "epoch": 0.26, + "learning_rate": 1.2936424088386327e-05, + "logits/chosen": -2.6143057346343994, + "logits/rejected": -2.975771427154541, + "logps/chosen": -225.74053955078125, + "logps/rejected": -436.9425048828125, + "loss": 0.0581, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6726051568984985, + "rewards/margins": 4.810554027557373, + "rewards/rejected": -6.483159065246582, + "step": 1651 + }, + { + "epoch": 0.26, + "learning_rate": 1.2935690647855179e-05, + "logits/chosen": -2.8693554401397705, + "logits/rejected": -1.930208683013916, + "logps/chosen": -358.1766052246094, + "logps/rejected": -298.668212890625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0996620655059814, + "rewards/margins": 6.1143975257873535, + "rewards/rejected": -7.214059352874756, + "step": 1652 + }, + { + "epoch": 0.26, + "learning_rate": 1.2934957207324033e-05, + "logits/chosen": -1.5519170761108398, + "logits/rejected": -2.7246222496032715, + "logps/chosen": -88.95718383789062, + "logps/rejected": -288.4468688964844, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3377737998962402, + "rewards/margins": 6.100088119506836, + "rewards/rejected": -8.437861442565918, + "step": 1653 + }, + { + "epoch": 0.26, + "learning_rate": 1.2934223766792884e-05, + "logits/chosen": -3.0495729446411133, + "logits/rejected": -2.924987316131592, + "logps/chosen": -197.81385803222656, + "logps/rejected": -299.22216796875, + "loss": 1.4973, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.660052537918091, + "rewards/margins": 3.1870574951171875, + "rewards/rejected": -5.847110271453857, + "step": 1654 + }, + { + "epoch": 0.26, + "learning_rate": 1.2933490326261736e-05, + "logits/chosen": -3.1889472007751465, + "logits/rejected": -2.9766554832458496, + "logps/chosen": -182.7150421142578, + "logps/rejected": -266.593994140625, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0686595439910889, + "rewards/margins": 5.619204521179199, + "rewards/rejected": -6.687863826751709, + "step": 1655 + }, + { + "epoch": 0.26, + "learning_rate": 1.2932756885730588e-05, + "logits/chosen": -3.038519859313965, + "logits/rejected": -3.178882360458374, + "logps/chosen": -367.51507568359375, + "logps/rejected": -120.94261169433594, + "loss": 7.4759, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.334699630737305, + "rewards/margins": -7.475164890289307, + "rewards/rejected": -1.8595348596572876, + "step": 1656 + }, + { + "epoch": 0.26, + "learning_rate": 1.293202344519944e-05, + "logits/chosen": -2.9016716480255127, + "logits/rejected": -1.8057477474212646, + "logps/chosen": -293.68841552734375, + "logps/rejected": -308.7217712402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7017048001289368, + "rewards/margins": 9.779678344726562, + "rewards/rejected": -10.48138427734375, + "step": 1657 + }, + { + "epoch": 0.26, + "learning_rate": 1.2931290004668292e-05, + "logits/chosen": -2.9720652103424072, + "logits/rejected": -2.634678602218628, + "logps/chosen": -363.1925354003906, + "logps/rejected": -470.593505859375, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8996903896331787, + "rewards/margins": 5.1706085205078125, + "rewards/rejected": -8.07029914855957, + "step": 1658 + }, + { + "epoch": 0.26, + "learning_rate": 1.2930556564137144e-05, + "logits/chosen": -3.0950355529785156, + "logits/rejected": -2.885662794113159, + "logps/chosen": -120.83248901367188, + "logps/rejected": -125.97537231445312, + "loss": 1.5518, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1789307594299316, + "rewards/margins": 0.5006091594696045, + "rewards/rejected": -3.679539918899536, + "step": 1659 + }, + { + "epoch": 0.26, + "learning_rate": 1.2929823123605996e-05, + "logits/chosen": -2.013274908065796, + "logits/rejected": -3.067347526550293, + "logps/chosen": -66.38789367675781, + "logps/rejected": -195.1881866455078, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8341248035430908, + "rewards/margins": 5.430300712585449, + "rewards/rejected": -7.264425754547119, + "step": 1660 + }, + { + "epoch": 0.26, + "learning_rate": 1.2929089683074848e-05, + "logits/chosen": -3.0656940937042236, + "logits/rejected": -1.9348671436309814, + "logps/chosen": -256.2768859863281, + "logps/rejected": -59.110755920410156, + "loss": 2.1557, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2098233699798584, + "rewards/margins": -0.5128812789916992, + "rewards/rejected": -2.696942090988159, + "step": 1661 + }, + { + "epoch": 0.26, + "learning_rate": 1.2928356242543701e-05, + "logits/chosen": -1.9099401235580444, + "logits/rejected": -2.8685762882232666, + "logps/chosen": -104.0081558227539, + "logps/rejected": -283.4488525390625, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3090457916259766, + "rewards/margins": 5.40305233001709, + "rewards/rejected": -7.712098121643066, + "step": 1662 + }, + { + "epoch": 0.26, + "learning_rate": 1.2927622802012553e-05, + "logits/chosen": -2.4237585067749023, + "logits/rejected": -3.0841593742370605, + "logps/chosen": -126.15776062011719, + "logps/rejected": -352.1717529296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.976831078529358, + "rewards/margins": 8.238466262817383, + "rewards/rejected": -10.21529769897461, + "step": 1663 + }, + { + "epoch": 0.26, + "learning_rate": 1.2926889361481405e-05, + "logits/chosen": -0.8522888422012329, + "logits/rejected": -2.9562880992889404, + "logps/chosen": -47.97432327270508, + "logps/rejected": -162.65185546875, + "loss": 1.0642, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7710342407226562, + "rewards/margins": 1.1829966306686401, + "rewards/rejected": -4.954030513763428, + "step": 1664 + }, + { + "epoch": 0.26, + "learning_rate": 1.2926155920950257e-05, + "logits/chosen": -1.6481516361236572, + "logits/rejected": -3.1016805171966553, + "logps/chosen": -417.1290283203125, + "logps/rejected": -690.9302368164062, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.392669677734375, + "rewards/margins": 8.015706062316895, + "rewards/rejected": -10.40837574005127, + "step": 1665 + }, + { + "epoch": 0.26, + "learning_rate": 1.2925422480419109e-05, + "logits/chosen": -2.7288031578063965, + "logits/rejected": -3.1542465686798096, + "logps/chosen": -79.6275634765625, + "logps/rejected": -240.30657958984375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3394176959991455, + "rewards/margins": 5.266617298126221, + "rewards/rejected": -6.606035232543945, + "step": 1666 + }, + { + "epoch": 0.26, + "learning_rate": 1.292468903988796e-05, + "logits/chosen": -2.7600996494293213, + "logits/rejected": -2.117156505584717, + "logps/chosen": -282.82916259765625, + "logps/rejected": -216.91506958007812, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3964828252792358, + "rewards/margins": 6.119716644287109, + "rewards/rejected": -7.516199111938477, + "step": 1667 + }, + { + "epoch": 0.26, + "learning_rate": 1.2923955599356812e-05, + "logits/chosen": -2.33048677444458, + "logits/rejected": -3.16214919090271, + "logps/chosen": -324.00384521484375, + "logps/rejected": -712.3416137695312, + "loss": 4.9118, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.872824192047119, + "rewards/margins": -1.7356600761413574, + "rewards/rejected": -4.137164115905762, + "step": 1668 + }, + { + "epoch": 0.26, + "learning_rate": 1.2923222158825664e-05, + "logits/chosen": -2.6069631576538086, + "logits/rejected": -3.2410855293273926, + "logps/chosen": -170.95933532714844, + "logps/rejected": -350.9439697265625, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.813863754272461, + "rewards/margins": 3.53342866897583, + "rewards/rejected": -5.347292423248291, + "step": 1669 + }, + { + "epoch": 0.26, + "learning_rate": 1.2922488718294516e-05, + "logits/chosen": -2.9790568351745605, + "logits/rejected": -2.042387008666992, + "logps/chosen": -570.167236328125, + "logps/rejected": -431.6396484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2571601867675781, + "rewards/margins": 8.387940406799316, + "rewards/rejected": -9.645100593566895, + "step": 1670 + }, + { + "epoch": 0.26, + "learning_rate": 1.292175527776337e-05, + "logits/chosen": -2.6948511600494385, + "logits/rejected": -3.0047452449798584, + "logps/chosen": -177.77865600585938, + "logps/rejected": -299.88177490234375, + "loss": 2.177, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.392451524734497, + "rewards/margins": 2.9126029014587402, + "rewards/rejected": -6.305054187774658, + "step": 1671 + }, + { + "epoch": 0.26, + "learning_rate": 1.2921021837232222e-05, + "logits/chosen": -2.9564449787139893, + "logits/rejected": -3.1194586753845215, + "logps/chosen": -361.0837097167969, + "logps/rejected": -439.44720458984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9502986669540405, + "rewards/margins": 7.234568119049072, + "rewards/rejected": -9.184866905212402, + "step": 1672 + }, + { + "epoch": 0.26, + "learning_rate": 1.2920288396701074e-05, + "logits/chosen": -3.0242886543273926, + "logits/rejected": -1.7752012014389038, + "logps/chosen": -567.67333984375, + "logps/rejected": -371.5313720703125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.002864122390747, + "rewards/margins": 5.294353008270264, + "rewards/rejected": -7.297216892242432, + "step": 1673 + }, + { + "epoch": 0.26, + "learning_rate": 1.2919554956169925e-05, + "logits/chosen": -2.9217286109924316, + "logits/rejected": -2.372610330581665, + "logps/chosen": -230.47357177734375, + "logps/rejected": -180.021240234375, + "loss": 3.4216, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.505043029785156, + "rewards/margins": -0.57700514793396, + "rewards/rejected": -4.928037643432617, + "step": 1674 + }, + { + "epoch": 0.26, + "learning_rate": 1.2918821515638777e-05, + "logits/chosen": -2.201193332672119, + "logits/rejected": -2.82525897026062, + "logps/chosen": -208.17250061035156, + "logps/rejected": -323.439697265625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7730355262756348, + "rewards/margins": 5.61392879486084, + "rewards/rejected": -7.386964797973633, + "step": 1675 + }, + { + "epoch": 0.26, + "learning_rate": 1.2918088075107631e-05, + "logits/chosen": -2.849841594696045, + "logits/rejected": -2.839982509613037, + "logps/chosen": -150.87779235839844, + "logps/rejected": -154.72532653808594, + "loss": 0.0666, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.888848066329956, + "rewards/margins": 2.703601360321045, + "rewards/rejected": -5.592449188232422, + "step": 1676 + }, + { + "epoch": 0.26, + "learning_rate": 1.2917354634576483e-05, + "logits/chosen": -3.1403369903564453, + "logits/rejected": -3.087592363357544, + "logps/chosen": -148.1610870361328, + "logps/rejected": -141.07107543945312, + "loss": 1.3937, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3092455863952637, + "rewards/margins": 0.014155030250549316, + "rewards/rejected": -3.3234004974365234, + "step": 1677 + }, + { + "epoch": 0.26, + "learning_rate": 1.2916621194045335e-05, + "logits/chosen": -2.257777690887451, + "logits/rejected": -3.1146178245544434, + "logps/chosen": -40.23499298095703, + "logps/rejected": -258.90167236328125, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2405452728271484, + "rewards/margins": 5.861865043640137, + "rewards/rejected": -7.102410316467285, + "step": 1678 + }, + { + "epoch": 0.26, + "learning_rate": 1.2915887753514186e-05, + "logits/chosen": -1.6988282203674316, + "logits/rejected": -3.133638381958008, + "logps/chosen": -402.08502197265625, + "logps/rejected": -609.1068725585938, + "loss": 3.4275, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.676595211029053, + "rewards/margins": 0.9965035915374756, + "rewards/rejected": -6.673098564147949, + "step": 1679 + }, + { + "epoch": 0.26, + "learning_rate": 1.291515431298304e-05, + "logits/chosen": -2.958155393600464, + "logits/rejected": -2.7981951236724854, + "logps/chosen": -92.967529296875, + "logps/rejected": -210.83892822265625, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9423236846923828, + "rewards/margins": 5.976041316986084, + "rewards/rejected": -7.918365001678467, + "step": 1680 + }, + { + "epoch": 0.26, + "learning_rate": 1.2914420872451892e-05, + "logits/chosen": -2.60184645652771, + "logits/rejected": -3.0995335578918457, + "logps/chosen": -143.0528564453125, + "logps/rejected": -237.95907592773438, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.719872236251831, + "rewards/margins": 3.972691059112549, + "rewards/rejected": -5.692563533782959, + "step": 1681 + }, + { + "epoch": 0.26, + "learning_rate": 1.2913687431920744e-05, + "logits/chosen": -3.3098602294921875, + "logits/rejected": -2.9712607860565186, + "logps/chosen": -141.88943481445312, + "logps/rejected": -111.66483306884766, + "loss": 1.9734, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9893436431884766, + "rewards/margins": 0.2754800319671631, + "rewards/rejected": -3.2648236751556396, + "step": 1682 + }, + { + "epoch": 0.26, + "learning_rate": 1.2912953991389596e-05, + "logits/chosen": -2.857210874557495, + "logits/rejected": -3.1660194396972656, + "logps/chosen": -38.01893997192383, + "logps/rejected": -194.64227294921875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.164489984512329, + "rewards/margins": 6.988773345947266, + "rewards/rejected": -9.153263092041016, + "step": 1683 + }, + { + "epoch": 0.26, + "learning_rate": 1.2912220550858448e-05, + "logits/chosen": -2.8307056427001953, + "logits/rejected": -3.1998801231384277, + "logps/chosen": -414.617919921875, + "logps/rejected": -317.8533630371094, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2185547351837158, + "rewards/margins": 3.5138397216796875, + "rewards/rejected": -4.732394218444824, + "step": 1684 + }, + { + "epoch": 0.26, + "learning_rate": 1.29114871103273e-05, + "logits/chosen": -1.9389183521270752, + "logits/rejected": -3.1324377059936523, + "logps/chosen": -61.338134765625, + "logps/rejected": -203.46905517578125, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4067649841308594, + "rewards/margins": 3.716397285461426, + "rewards/rejected": -7.123162269592285, + "step": 1685 + }, + { + "epoch": 0.26, + "learning_rate": 1.2910753669796151e-05, + "logits/chosen": -2.7492189407348633, + "logits/rejected": -3.0486178398132324, + "logps/chosen": -299.63043212890625, + "logps/rejected": -331.96746826171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45736008882522583, + "rewards/margins": 8.877386093139648, + "rewards/rejected": -9.334746360778809, + "step": 1686 + }, + { + "epoch": 0.26, + "learning_rate": 1.2910020229265003e-05, + "logits/chosen": -3.0532453060150146, + "logits/rejected": -2.4344656467437744, + "logps/chosen": -388.14581298828125, + "logps/rejected": -329.0827331542969, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5255324840545654, + "rewards/margins": 4.614577293395996, + "rewards/rejected": -7.140110015869141, + "step": 1687 + }, + { + "epoch": 0.26, + "learning_rate": 1.2909286788733855e-05, + "logits/chosen": -2.2095999717712402, + "logits/rejected": -3.1572940349578857, + "logps/chosen": -80.31268310546875, + "logps/rejected": -249.86550903320312, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7362757921218872, + "rewards/margins": 4.118974208831787, + "rewards/rejected": -4.855249881744385, + "step": 1688 + }, + { + "epoch": 0.26, + "learning_rate": 1.2908553348202709e-05, + "logits/chosen": -2.9747273921966553, + "logits/rejected": -2.7426130771636963, + "logps/chosen": -121.19073486328125, + "logps/rejected": -254.8685302734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0040957927703857, + "rewards/margins": 7.90885066986084, + "rewards/rejected": -9.912946701049805, + "step": 1689 + }, + { + "epoch": 0.26, + "learning_rate": 1.290781990767156e-05, + "logits/chosen": -2.518301486968994, + "logits/rejected": -2.197990655899048, + "logps/chosen": -183.5870361328125, + "logps/rejected": -513.7688598632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.998673439025879, + "rewards/margins": 10.205507278442383, + "rewards/rejected": -12.204181671142578, + "step": 1690 + }, + { + "epoch": 0.26, + "learning_rate": 1.2907086467140412e-05, + "logits/chosen": -1.7766879796981812, + "logits/rejected": -2.891092300415039, + "logps/chosen": -83.85246276855469, + "logps/rejected": -282.4352111816406, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.004812240600586, + "rewards/margins": 6.925055503845215, + "rewards/rejected": -8.929868698120117, + "step": 1691 + }, + { + "epoch": 0.26, + "learning_rate": 1.2906353026609264e-05, + "logits/chosen": -2.662044048309326, + "logits/rejected": -3.077348470687866, + "logps/chosen": -321.0146789550781, + "logps/rejected": -358.15216064453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.479372501373291, + "rewards/margins": 7.207491874694824, + "rewards/rejected": -8.686864852905273, + "step": 1692 + }, + { + "epoch": 0.26, + "learning_rate": 1.2905619586078116e-05, + "logits/chosen": -2.4986157417297363, + "logits/rejected": -3.0435023307800293, + "logps/chosen": -71.78379821777344, + "logps/rejected": -308.8909912109375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8648167848587036, + "rewards/margins": 6.452056884765625, + "rewards/rejected": -8.316873550415039, + "step": 1693 + }, + { + "epoch": 0.26, + "learning_rate": 1.2904886145546968e-05, + "logits/chosen": -1.5527087450027466, + "logits/rejected": -2.9996306896209717, + "logps/chosen": -100.34651184082031, + "logps/rejected": -315.62078857421875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.14097261428833, + "rewards/margins": 6.71201753616333, + "rewards/rejected": -9.85299015045166, + "step": 1694 + }, + { + "epoch": 0.26, + "learning_rate": 1.290415270501582e-05, + "logits/chosen": -2.886476755142212, + "logits/rejected": -3.1329171657562256, + "logps/chosen": -99.78581237792969, + "logps/rejected": -358.07794189453125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.440486192703247, + "rewards/margins": 9.365974426269531, + "rewards/rejected": -10.806461334228516, + "step": 1695 + }, + { + "epoch": 0.26, + "learning_rate": 1.2903419264484672e-05, + "logits/chosen": -3.1234750747680664, + "logits/rejected": -2.995788097381592, + "logps/chosen": -141.57498168945312, + "logps/rejected": -144.021484375, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3323416709899902, + "rewards/margins": 3.9662551879882812, + "rewards/rejected": -6.29859733581543, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 1.2902685823953524e-05, + "logits/chosen": -2.8074212074279785, + "logits/rejected": -3.0520870685577393, + "logps/chosen": -276.7577209472656, + "logps/rejected": -235.78993225097656, + "loss": 1.1872, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5819809436798096, + "rewards/margins": 2.0891387462615967, + "rewards/rejected": -4.671119689941406, + "step": 1697 + }, + { + "epoch": 0.26, + "learning_rate": 1.2901952383422377e-05, + "logits/chosen": -2.0746073722839355, + "logits/rejected": -3.1305735111236572, + "logps/chosen": -92.62783813476562, + "logps/rejected": -237.652099609375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0282511711120605, + "rewards/margins": 5.411614418029785, + "rewards/rejected": -7.4398651123046875, + "step": 1698 + }, + { + "epoch": 0.26, + "learning_rate": 1.2901218942891229e-05, + "logits/chosen": -1.8395886421203613, + "logits/rejected": -2.89400053024292, + "logps/chosen": -144.2740936279297, + "logps/rejected": -340.6048583984375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.933387279510498, + "rewards/margins": 5.638920783996582, + "rewards/rejected": -7.572307586669922, + "step": 1699 + }, + { + "epoch": 0.26, + "learning_rate": 1.2900485502360081e-05, + "logits/chosen": -2.9622995853424072, + "logits/rejected": -2.9708523750305176, + "logps/chosen": -247.0394287109375, + "logps/rejected": -230.240966796875, + "loss": 3.8977, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.644681453704834, + "rewards/margins": -0.43123507499694824, + "rewards/rejected": -5.213446140289307, + "step": 1700 + }, + { + "epoch": 0.26, + "learning_rate": 1.2899752061828933e-05, + "logits/chosen": -0.7848163843154907, + "logits/rejected": -1.9530150890350342, + "logps/chosen": -202.1880645751953, + "logps/rejected": -499.5576171875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9487518072128296, + "rewards/margins": 7.692694187164307, + "rewards/rejected": -8.641446113586426, + "step": 1701 + }, + { + "epoch": 0.26, + "learning_rate": 1.2899018621297785e-05, + "logits/chosen": -2.934159755706787, + "logits/rejected": -3.1927080154418945, + "logps/chosen": -336.09539794921875, + "logps/rejected": -530.55322265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8934357166290283, + "rewards/margins": 8.627184867858887, + "rewards/rejected": -9.520620346069336, + "step": 1702 + }, + { + "epoch": 0.26, + "learning_rate": 1.2898285180766637e-05, + "logits/chosen": -3.1435024738311768, + "logits/rejected": -1.226336121559143, + "logps/chosen": -294.4231872558594, + "logps/rejected": -165.4266357421875, + "loss": 2.2091, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.112604141235352, + "rewards/margins": 1.0566954612731934, + "rewards/rejected": -5.169299602508545, + "step": 1703 + }, + { + "epoch": 0.27, + "learning_rate": 1.2897551740235489e-05, + "logits/chosen": -2.7216434478759766, + "logits/rejected": -2.982497215270996, + "logps/chosen": -73.87466430664062, + "logps/rejected": -306.55657958984375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42256927490234375, + "rewards/margins": 5.8514404296875, + "rewards/rejected": -6.274009704589844, + "step": 1704 + }, + { + "epoch": 0.27, + "learning_rate": 1.289681829970434e-05, + "logits/chosen": -2.9484686851501465, + "logits/rejected": -2.026707887649536, + "logps/chosen": -253.41571044921875, + "logps/rejected": -141.12525939941406, + "loss": 3.2528, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3529486656188965, + "rewards/margins": -0.6668281555175781, + "rewards/rejected": -4.686120510101318, + "step": 1705 + }, + { + "epoch": 0.27, + "learning_rate": 1.2896084859173192e-05, + "logits/chosen": -2.801259994506836, + "logits/rejected": -2.873478412628174, + "logps/chosen": -98.256591796875, + "logps/rejected": -189.3087615966797, + "loss": 0.143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7265945672988892, + "rewards/margins": 3.1133522987365723, + "rewards/rejected": -4.839946746826172, + "step": 1706 + }, + { + "epoch": 0.27, + "learning_rate": 1.2895351418642046e-05, + "logits/chosen": -2.7554755210876465, + "logits/rejected": -3.2494847774505615, + "logps/chosen": -81.01815795898438, + "logps/rejected": -200.21189880371094, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1547777652740479, + "rewards/margins": 5.091995716094971, + "rewards/rejected": -6.246773719787598, + "step": 1707 + }, + { + "epoch": 0.27, + "learning_rate": 1.2894617978110898e-05, + "logits/chosen": -1.7760303020477295, + "logits/rejected": -3.1352224349975586, + "logps/chosen": -86.14612579345703, + "logps/rejected": -391.73614501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1751657724380493, + "rewards/margins": 9.489376068115234, + "rewards/rejected": -10.664542198181152, + "step": 1708 + }, + { + "epoch": 0.27, + "learning_rate": 1.289388453757975e-05, + "logits/chosen": -2.9301884174346924, + "logits/rejected": -2.848104238510132, + "logps/chosen": -160.6539306640625, + "logps/rejected": -320.44073486328125, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2300574779510498, + "rewards/margins": 4.079651832580566, + "rewards/rejected": -5.309709072113037, + "step": 1709 + }, + { + "epoch": 0.27, + "learning_rate": 1.2893151097048603e-05, + "logits/chosen": -3.0165717601776123, + "logits/rejected": -1.639313817024231, + "logps/chosen": -147.6546630859375, + "logps/rejected": -251.0624542236328, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3247032165527344, + "rewards/margins": 6.622500419616699, + "rewards/rejected": -7.947203636169434, + "step": 1710 + }, + { + "epoch": 0.27, + "learning_rate": 1.2892417656517455e-05, + "logits/chosen": -2.4669551849365234, + "logits/rejected": -3.179689407348633, + "logps/chosen": -58.47232437133789, + "logps/rejected": -329.82659912109375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.684167742729187, + "rewards/margins": 5.079426288604736, + "rewards/rejected": -5.763594150543213, + "step": 1711 + }, + { + "epoch": 0.27, + "learning_rate": 1.2891684215986307e-05, + "logits/chosen": -1.5206630229949951, + "logits/rejected": -3.0646157264709473, + "logps/chosen": -153.047119140625, + "logps/rejected": -419.9098815917969, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3956246376037598, + "rewards/margins": 6.515584945678711, + "rewards/rejected": -8.911209106445312, + "step": 1712 + }, + { + "epoch": 0.27, + "learning_rate": 1.2890950775455159e-05, + "logits/chosen": -3.0899877548217773, + "logits/rejected": -3.149044990539551, + "logps/chosen": -430.5491638183594, + "logps/rejected": -473.72003173828125, + "loss": 1.4964, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2916100025177, + "rewards/margins": 1.648871660232544, + "rewards/rejected": -4.940481662750244, + "step": 1713 + }, + { + "epoch": 0.27, + "learning_rate": 1.289021733492401e-05, + "logits/chosen": -3.013124465942383, + "logits/rejected": -3.084876298904419, + "logps/chosen": -109.67918395996094, + "logps/rejected": -224.962158203125, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0096462965011597, + "rewards/margins": 5.606771469116211, + "rewards/rejected": -6.61641788482666, + "step": 1714 + }, + { + "epoch": 0.27, + "learning_rate": 1.2889483894392863e-05, + "logits/chosen": -1.9036622047424316, + "logits/rejected": -3.0111773014068604, + "logps/chosen": -286.8687744140625, + "logps/rejected": -313.02667236328125, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0372116565704346, + "rewards/margins": 4.159685134887695, + "rewards/rejected": -5.196896553039551, + "step": 1715 + }, + { + "epoch": 0.27, + "learning_rate": 1.2888750453861716e-05, + "logits/chosen": -2.998716354370117, + "logits/rejected": -1.8128442764282227, + "logps/chosen": -309.33868408203125, + "logps/rejected": -308.2528076171875, + "loss": 2.041, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.993396520614624, + "rewards/margins": 2.7249903678894043, + "rewards/rejected": -6.718387126922607, + "step": 1716 + }, + { + "epoch": 0.27, + "learning_rate": 1.2888017013330568e-05, + "logits/chosen": -2.5644102096557617, + "logits/rejected": -2.7673799991607666, + "logps/chosen": -159.65818786621094, + "logps/rejected": -248.69143676757812, + "loss": 0.4013, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.875661015510559, + "rewards/margins": 3.6858460903167725, + "rewards/rejected": -5.561507225036621, + "step": 1717 + }, + { + "epoch": 0.27, + "learning_rate": 1.288728357279942e-05, + "logits/chosen": -2.9778411388397217, + "logits/rejected": -3.2863047122955322, + "logps/chosen": -407.3641052246094, + "logps/rejected": -383.2507629394531, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43108004331588745, + "rewards/margins": 6.860905170440674, + "rewards/rejected": -7.291985034942627, + "step": 1718 + }, + { + "epoch": 0.27, + "learning_rate": 1.2886550132268272e-05, + "logits/chosen": -3.0382750034332275, + "logits/rejected": -2.531019687652588, + "logps/chosen": -224.30575561523438, + "logps/rejected": -324.777587890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4193778038024902, + "rewards/margins": 7.235872268676758, + "rewards/rejected": -9.655250549316406, + "step": 1719 + }, + { + "epoch": 0.27, + "learning_rate": 1.2885816691737124e-05, + "logits/chosen": -2.891836643218994, + "logits/rejected": -3.1383070945739746, + "logps/chosen": -294.1624755859375, + "logps/rejected": -320.7052001953125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4612858295440674, + "rewards/margins": 6.692443370819092, + "rewards/rejected": -8.153729438781738, + "step": 1720 + }, + { + "epoch": 0.27, + "learning_rate": 1.2885083251205976e-05, + "logits/chosen": -2.2239878177642822, + "logits/rejected": -2.9883041381835938, + "logps/chosen": -114.01658630371094, + "logps/rejected": -232.39056396484375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7578606605529785, + "rewards/margins": 6.212223052978516, + "rewards/rejected": -7.970083236694336, + "step": 1721 + }, + { + "epoch": 0.27, + "learning_rate": 1.2884349810674827e-05, + "logits/chosen": -1.582854151725769, + "logits/rejected": -3.042952060699463, + "logps/chosen": -50.4227294921875, + "logps/rejected": -248.25094604492188, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0456864833831787, + "rewards/margins": 5.12331485748291, + "rewards/rejected": -7.16900110244751, + "step": 1722 + }, + { + "epoch": 0.27, + "learning_rate": 1.288361637014368e-05, + "logits/chosen": -2.2386159896850586, + "logits/rejected": -3.21012806892395, + "logps/chosen": -103.39783477783203, + "logps/rejected": -370.3272705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0474705696105957, + "rewards/margins": 9.131040573120117, + "rewards/rejected": -11.178510665893555, + "step": 1723 + }, + { + "epoch": 0.27, + "learning_rate": 1.2882882929612531e-05, + "logits/chosen": -1.9773818254470825, + "logits/rejected": -3.304244041442871, + "logps/chosen": -106.67622375488281, + "logps/rejected": -440.350341796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8708240985870361, + "rewards/margins": 8.396673202514648, + "rewards/rejected": -9.267497062683105, + "step": 1724 + }, + { + "epoch": 0.27, + "learning_rate": 1.2882149489081385e-05, + "logits/chosen": -2.5626747608184814, + "logits/rejected": -3.0869295597076416, + "logps/chosen": -250.83453369140625, + "logps/rejected": -242.29600524902344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8583672046661377, + "rewards/margins": 7.688883304595947, + "rewards/rejected": -8.547250747680664, + "step": 1725 + }, + { + "epoch": 0.27, + "learning_rate": 1.2881416048550237e-05, + "logits/chosen": -2.997684955596924, + "logits/rejected": -3.173707962036133, + "logps/chosen": -33.42212677001953, + "logps/rejected": -139.8806610107422, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.02363657951355, + "rewards/margins": 5.08974552154541, + "rewards/rejected": -7.113381862640381, + "step": 1726 + }, + { + "epoch": 0.27, + "learning_rate": 1.2880682608019089e-05, + "logits/chosen": -2.9666666984558105, + "logits/rejected": -2.769463300704956, + "logps/chosen": -223.1208953857422, + "logps/rejected": -303.27764892578125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2432525157928467, + "rewards/margins": 6.031594753265381, + "rewards/rejected": -7.274847030639648, + "step": 1727 + }, + { + "epoch": 0.27, + "learning_rate": 1.287994916748794e-05, + "logits/chosen": -2.6059014797210693, + "logits/rejected": -2.8862738609313965, + "logps/chosen": -93.79817199707031, + "logps/rejected": -288.68817138671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7894387245178223, + "rewards/margins": 8.338199615478516, + "rewards/rejected": -11.127638816833496, + "step": 1728 + }, + { + "epoch": 0.27, + "learning_rate": 1.2879215726956792e-05, + "logits/chosen": -1.8371679782867432, + "logits/rejected": -2.7719898223876953, + "logps/chosen": -51.718299865722656, + "logps/rejected": -176.50991821289062, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4651987552642822, + "rewards/margins": 5.847428321838379, + "rewards/rejected": -7.312626838684082, + "step": 1729 + }, + { + "epoch": 0.27, + "learning_rate": 1.2878482286425644e-05, + "logits/chosen": -2.894550085067749, + "logits/rejected": -2.057680368423462, + "logps/chosen": -238.96212768554688, + "logps/rejected": -295.3304443359375, + "loss": 4.8176, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.803452491760254, + "rewards/margins": 0.7693042755126953, + "rewards/rejected": -7.572756767272949, + "step": 1730 + }, + { + "epoch": 0.27, + "learning_rate": 1.2877748845894496e-05, + "logits/chosen": -3.031156063079834, + "logits/rejected": -2.2807281017303467, + "logps/chosen": -396.96270751953125, + "logps/rejected": -263.508056640625, + "loss": 2.8198, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4480791091918945, + "rewards/margins": 0.7018296718597412, + "rewards/rejected": -5.149908542633057, + "step": 1731 + }, + { + "epoch": 0.27, + "learning_rate": 1.2877015405363348e-05, + "logits/chosen": -2.4724018573760986, + "logits/rejected": -3.1497175693511963, + "logps/chosen": -226.323486328125, + "logps/rejected": -309.32904052734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9425718784332275, + "rewards/margins": 7.0429840087890625, + "rewards/rejected": -9.985555648803711, + "step": 1732 + }, + { + "epoch": 0.27, + "learning_rate": 1.28762819648322e-05, + "logits/chosen": -2.9956161975860596, + "logits/rejected": -3.004317283630371, + "logps/chosen": -89.05337524414062, + "logps/rejected": -311.0072021484375, + "loss": 2.58, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1048359870910645, + "rewards/margins": 4.108939170837402, + "rewards/rejected": -8.213774681091309, + "step": 1733 + }, + { + "epoch": 0.27, + "learning_rate": 1.2875548524301053e-05, + "logits/chosen": -1.9438142776489258, + "logits/rejected": -3.245218276977539, + "logps/chosen": -73.8039779663086, + "logps/rejected": -356.66937255859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7939146757125854, + "rewards/margins": 8.535064697265625, + "rewards/rejected": -10.3289794921875, + "step": 1734 + }, + { + "epoch": 0.27, + "learning_rate": 1.2874815083769905e-05, + "logits/chosen": -3.0712499618530273, + "logits/rejected": -2.6796061992645264, + "logps/chosen": -207.75186157226562, + "logps/rejected": -174.28866577148438, + "loss": 0.4725, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.48649263381958, + "rewards/margins": 3.018376350402832, + "rewards/rejected": -4.504868984222412, + "step": 1735 + }, + { + "epoch": 0.27, + "learning_rate": 1.2874081643238757e-05, + "logits/chosen": -3.199608087539673, + "logits/rejected": -3.259051561355591, + "logps/chosen": -225.14378356933594, + "logps/rejected": -469.4273986816406, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6605628728866577, + "rewards/margins": 8.892282485961914, + "rewards/rejected": -10.552845001220703, + "step": 1736 + }, + { + "epoch": 0.27, + "learning_rate": 1.2873348202707609e-05, + "logits/chosen": -3.0199389457702637, + "logits/rejected": -3.043449878692627, + "logps/chosen": -486.9475402832031, + "logps/rejected": -225.03578186035156, + "loss": 3.8317, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.592472553253174, + "rewards/margins": 0.27248573303222656, + "rewards/rejected": -5.8649582862854, + "step": 1737 + }, + { + "epoch": 0.27, + "learning_rate": 1.2872614762176461e-05, + "logits/chosen": -2.6278014183044434, + "logits/rejected": -3.149693727493286, + "logps/chosen": -254.22698974609375, + "logps/rejected": -405.09686279296875, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.088634490966797, + "rewards/margins": 4.637209415435791, + "rewards/rejected": -6.725843906402588, + "step": 1738 + }, + { + "epoch": 0.27, + "learning_rate": 1.2871881321645313e-05, + "logits/chosen": -3.326528310775757, + "logits/rejected": -2.4089436531066895, + "logps/chosen": -162.58534240722656, + "logps/rejected": -231.924560546875, + "loss": 2.5688, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.5583977699279785, + "rewards/margins": 0.4365055561065674, + "rewards/rejected": -5.994903087615967, + "step": 1739 + }, + { + "epoch": 0.27, + "learning_rate": 1.2871147881114165e-05, + "logits/chosen": -3.1294608116149902, + "logits/rejected": -2.7081120014190674, + "logps/chosen": -695.73095703125, + "logps/rejected": -525.6571655273438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8780536651611328, + "rewards/margins": 7.678956031799316, + "rewards/rejected": -9.55700969696045, + "step": 1740 + }, + { + "epoch": 0.27, + "learning_rate": 1.2870414440583017e-05, + "logits/chosen": -3.2442381381988525, + "logits/rejected": -3.014961004257202, + "logps/chosen": -224.6820068359375, + "logps/rejected": -161.53311157226562, + "loss": 2.9983, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.444067478179932, + "rewards/margins": 0.12148618698120117, + "rewards/rejected": -4.565553188323975, + "step": 1741 + }, + { + "epoch": 0.27, + "learning_rate": 1.286968100005187e-05, + "logits/chosen": -1.3272665739059448, + "logits/rejected": -2.9015097618103027, + "logps/chosen": -65.5229721069336, + "logps/rejected": -348.9518127441406, + "loss": 0.1478, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7037713527679443, + "rewards/margins": 7.065649032592773, + "rewards/rejected": -9.769420623779297, + "step": 1742 + }, + { + "epoch": 0.27, + "learning_rate": 1.2868947559520722e-05, + "logits/chosen": -2.9420297145843506, + "logits/rejected": -2.2644286155700684, + "logps/chosen": -167.49078369140625, + "logps/rejected": -61.2523193359375, + "loss": 6.976, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.315959930419922, + "rewards/margins": -6.975011825561523, + "rewards/rejected": -1.3409479856491089, + "step": 1743 + }, + { + "epoch": 0.27, + "learning_rate": 1.2868214118989576e-05, + "logits/chosen": -1.4081171751022339, + "logits/rejected": -2.525123357772827, + "logps/chosen": -77.27430725097656, + "logps/rejected": -282.02886962890625, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7758874893188477, + "rewards/margins": 6.662336349487305, + "rewards/rejected": -8.438223838806152, + "step": 1744 + }, + { + "epoch": 0.27, + "learning_rate": 1.2867480678458427e-05, + "logits/chosen": -2.6698408126831055, + "logits/rejected": -3.2057552337646484, + "logps/chosen": -189.95909118652344, + "logps/rejected": -338.1729431152344, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0763702392578125, + "rewards/margins": 5.597970962524414, + "rewards/rejected": -7.674341201782227, + "step": 1745 + }, + { + "epoch": 0.27, + "learning_rate": 1.286674723792728e-05, + "logits/chosen": -3.1059560775756836, + "logits/rejected": -2.968878746032715, + "logps/chosen": -267.386962890625, + "logps/rejected": -204.85626220703125, + "loss": 1.6618, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.304366111755371, + "rewards/margins": 0.5702846050262451, + "rewards/rejected": -5.874650478363037, + "step": 1746 + }, + { + "epoch": 0.27, + "learning_rate": 1.2866013797396131e-05, + "logits/chosen": -2.311762571334839, + "logits/rejected": -3.234062910079956, + "logps/chosen": -181.99098205566406, + "logps/rejected": -471.090087890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6847970485687256, + "rewards/margins": 6.792534828186035, + "rewards/rejected": -8.47733211517334, + "step": 1747 + }, + { + "epoch": 0.27, + "learning_rate": 1.2865280356864983e-05, + "logits/chosen": -2.835460901260376, + "logits/rejected": -3.3123950958251953, + "logps/chosen": -40.203033447265625, + "logps/rejected": -276.7745361328125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0690962076187134, + "rewards/margins": 6.944588661193848, + "rewards/rejected": -8.01368522644043, + "step": 1748 + }, + { + "epoch": 0.27, + "learning_rate": 1.2864546916333835e-05, + "logits/chosen": -1.2257966995239258, + "logits/rejected": -2.408289909362793, + "logps/chosen": -72.77056884765625, + "logps/rejected": -311.7651672363281, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0636508464813232, + "rewards/margins": 5.387250900268555, + "rewards/rejected": -7.450901985168457, + "step": 1749 + }, + { + "epoch": 0.27, + "learning_rate": 1.2863813475802687e-05, + "logits/chosen": -3.1330549716949463, + "logits/rejected": -2.9894981384277344, + "logps/chosen": -573.835205078125, + "logps/rejected": -403.6316223144531, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9463882446289062, + "rewards/margins": 4.114688873291016, + "rewards/rejected": -7.061077117919922, + "step": 1750 + }, + { + "epoch": 0.27, + "learning_rate": 1.286308003527154e-05, + "logits/chosen": -2.619157552719116, + "logits/rejected": -2.2032697200775146, + "logps/chosen": -705.4845581054688, + "logps/rejected": -460.39166259765625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1460793018341064, + "rewards/margins": 5.92082405090332, + "rewards/rejected": -8.066903114318848, + "step": 1751 + }, + { + "epoch": 0.27, + "learning_rate": 1.2862346594740392e-05, + "logits/chosen": -3.18837308883667, + "logits/rejected": -2.1714391708374023, + "logps/chosen": -395.52056884765625, + "logps/rejected": -189.33834838867188, + "loss": 5.0645, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.372296333312988, + "rewards/margins": -5.057111740112305, + "rewards/rejected": -2.3151843547821045, + "step": 1752 + }, + { + "epoch": 0.27, + "learning_rate": 1.2861613154209244e-05, + "logits/chosen": -3.049381732940674, + "logits/rejected": -3.257699966430664, + "logps/chosen": -348.3522033691406, + "logps/rejected": -392.56134033203125, + "loss": 3.937, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.859958648681641, + "rewards/margins": -0.6433463096618652, + "rewards/rejected": -4.216612815856934, + "step": 1753 + }, + { + "epoch": 0.27, + "learning_rate": 1.2860879713678096e-05, + "logits/chosen": -3.1685125827789307, + "logits/rejected": -1.9853707551956177, + "logps/chosen": -239.12353515625, + "logps/rejected": -136.574951171875, + "loss": 2.9541, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.303256988525391, + "rewards/margins": -1.9509096145629883, + "rewards/rejected": -2.3523476123809814, + "step": 1754 + }, + { + "epoch": 0.27, + "learning_rate": 1.2860146273146948e-05, + "logits/chosen": -3.217604398727417, + "logits/rejected": -3.2646639347076416, + "logps/chosen": -52.056949615478516, + "logps/rejected": -131.53204345703125, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.503387451171875, + "rewards/margins": 4.002628326416016, + "rewards/rejected": -6.506015300750732, + "step": 1755 + }, + { + "epoch": 0.27, + "learning_rate": 1.28594128326158e-05, + "logits/chosen": -3.2991578578948975, + "logits/rejected": -3.3218915462493896, + "logps/chosen": -147.6522979736328, + "logps/rejected": -213.85955810546875, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9545907378196716, + "rewards/margins": 2.6302175521850586, + "rewards/rejected": -3.584808588027954, + "step": 1756 + }, + { + "epoch": 0.27, + "learning_rate": 1.2858679392084652e-05, + "logits/chosen": -2.807819366455078, + "logits/rejected": -3.25527024269104, + "logps/chosen": -217.7781982421875, + "logps/rejected": -311.31036376953125, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0291903018951416, + "rewards/margins": 3.745610237121582, + "rewards/rejected": -5.7748003005981445, + "step": 1757 + }, + { + "epoch": 0.27, + "learning_rate": 1.2857945951553504e-05, + "logits/chosen": -2.5867860317230225, + "logits/rejected": -3.128901481628418, + "logps/chosen": -108.91421508789062, + "logps/rejected": -130.59609985351562, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4007980823516846, + "rewards/margins": 4.461393356323242, + "rewards/rejected": -5.862191677093506, + "step": 1758 + }, + { + "epoch": 0.27, + "learning_rate": 1.2857212511022355e-05, + "logits/chosen": -2.8997371196746826, + "logits/rejected": -3.157144069671631, + "logps/chosen": -467.72894287109375, + "logps/rejected": -626.6317749023438, + "loss": 0.1363, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6568779945373535, + "rewards/margins": 6.019358158111572, + "rewards/rejected": -7.676236152648926, + "step": 1759 + }, + { + "epoch": 0.27, + "learning_rate": 1.2856479070491209e-05, + "logits/chosen": -3.174811363220215, + "logits/rejected": -2.8179585933685303, + "logps/chosen": -248.22438049316406, + "logps/rejected": -202.65682983398438, + "loss": 0.2212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.732442855834961, + "rewards/margins": 3.9198451042175293, + "rewards/rejected": -6.652288436889648, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 1.2855745629960061e-05, + "logits/chosen": -2.2332777976989746, + "logits/rejected": -3.1337780952453613, + "logps/chosen": -101.30244445800781, + "logps/rejected": -375.71826171875, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0775482654571533, + "rewards/margins": 6.32342529296875, + "rewards/rejected": -8.400973320007324, + "step": 1761 + }, + { + "epoch": 0.27, + "learning_rate": 1.2855012189428913e-05, + "logits/chosen": -2.6277401447296143, + "logits/rejected": -3.102825880050659, + "logps/chosen": -86.30432891845703, + "logps/rejected": -368.25408935546875, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.173738479614258, + "rewards/margins": 7.587030410766602, + "rewards/rejected": -9.76076889038086, + "step": 1762 + }, + { + "epoch": 0.27, + "learning_rate": 1.2854278748897765e-05, + "logits/chosen": -1.7758578062057495, + "logits/rejected": -2.969381332397461, + "logps/chosen": -215.74057006835938, + "logps/rejected": -263.48797607421875, + "loss": 1.3681, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6684465408325195, + "rewards/margins": 0.6751182079315186, + "rewards/rejected": -5.343564510345459, + "step": 1763 + }, + { + "epoch": 0.27, + "learning_rate": 1.2853545308366616e-05, + "logits/chosen": -2.2693421840667725, + "logits/rejected": -2.742729902267456, + "logps/chosen": -252.12747192382812, + "logps/rejected": -432.0397644042969, + "loss": 1.9661, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.920823097229004, + "rewards/margins": 3.991299629211426, + "rewards/rejected": -7.91212272644043, + "step": 1764 + }, + { + "epoch": 0.27, + "learning_rate": 1.2852811867835468e-05, + "logits/chosen": -3.0845580101013184, + "logits/rejected": -3.1206295490264893, + "logps/chosen": -511.9972839355469, + "logps/rejected": -244.19363403320312, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13816708326339722, + "rewards/margins": 6.699690818786621, + "rewards/rejected": -6.561524391174316, + "step": 1765 + }, + { + "epoch": 0.27, + "learning_rate": 1.285207842730432e-05, + "logits/chosen": -2.9433746337890625, + "logits/rejected": -3.1962549686431885, + "logps/chosen": -79.84135437011719, + "logps/rejected": -109.59378051757812, + "loss": 0.7927, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6649577617645264, + "rewards/margins": 1.4132492542266846, + "rewards/rejected": -4.078207015991211, + "step": 1766 + }, + { + "epoch": 0.27, + "learning_rate": 1.2851344986773172e-05, + "logits/chosen": -2.2325563430786133, + "logits/rejected": -3.121159076690674, + "logps/chosen": -141.30172729492188, + "logps/rejected": -444.27105712890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.108703374862671, + "rewards/margins": 8.773289680480957, + "rewards/rejected": -10.881993293762207, + "step": 1767 + }, + { + "epoch": 0.27, + "learning_rate": 1.2850611546242024e-05, + "logits/chosen": -2.606393814086914, + "logits/rejected": -3.083083391189575, + "logps/chosen": -239.53077697753906, + "logps/rejected": -446.53790283203125, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8248336911201477, + "rewards/margins": 4.935417175292969, + "rewards/rejected": -5.760251045227051, + "step": 1768 + }, + { + "epoch": 0.28, + "learning_rate": 1.2849878105710878e-05, + "logits/chosen": -1.9793962240219116, + "logits/rejected": -2.3602168560028076, + "logps/chosen": -197.30966186523438, + "logps/rejected": -270.90850830078125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6673215627670288, + "rewards/margins": 5.482858657836914, + "rewards/rejected": -7.150179862976074, + "step": 1769 + }, + { + "epoch": 0.28, + "learning_rate": 1.284914466517973e-05, + "logits/chosen": -2.149200916290283, + "logits/rejected": -2.676795482635498, + "logps/chosen": -168.32135009765625, + "logps/rejected": -265.92816162109375, + "loss": 3.8658, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.809687852859497, + "rewards/margins": 1.2139711380004883, + "rewards/rejected": -5.023658752441406, + "step": 1770 + }, + { + "epoch": 0.28, + "learning_rate": 1.2848411224648581e-05, + "logits/chosen": -3.2068135738372803, + "logits/rejected": -2.5115842819213867, + "logps/chosen": -666.8547973632812, + "logps/rejected": -404.9388427734375, + "loss": 3.3579, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.009739875793457, + "rewards/margins": -1.9152971506118774, + "rewards/rejected": -3.094442844390869, + "step": 1771 + }, + { + "epoch": 0.28, + "learning_rate": 1.2847677784117433e-05, + "logits/chosen": -1.9760687351226807, + "logits/rejected": -1.0895516872406006, + "logps/chosen": -511.4745788574219, + "logps/rejected": -282.11480712890625, + "loss": 2.6584, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1860198974609375, + "rewards/margins": 0.44707345962524414, + "rewards/rejected": -4.633093357086182, + "step": 1772 + }, + { + "epoch": 0.28, + "learning_rate": 1.2846944343586285e-05, + "logits/chosen": -3.108262062072754, + "logits/rejected": -3.370722532272339, + "logps/chosen": -284.66748046875, + "logps/rejected": -297.31915283203125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4577285647392273, + "rewards/margins": 6.709875583648682, + "rewards/rejected": -7.167604446411133, + "step": 1773 + }, + { + "epoch": 0.28, + "learning_rate": 1.2846210903055137e-05, + "logits/chosen": -3.1270830631256104, + "logits/rejected": -3.106668710708618, + "logps/chosen": -132.52365112304688, + "logps/rejected": -245.98211669921875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5911685824394226, + "rewards/margins": 6.254613876342773, + "rewards/rejected": -6.845782279968262, + "step": 1774 + }, + { + "epoch": 0.28, + "learning_rate": 1.2845477462523989e-05, + "logits/chosen": -3.1162002086639404, + "logits/rejected": -3.0533559322357178, + "logps/chosen": -192.30450439453125, + "logps/rejected": -290.9996337890625, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3817344903945923, + "rewards/margins": 5.327671527862549, + "rewards/rejected": -6.709405899047852, + "step": 1775 + }, + { + "epoch": 0.28, + "learning_rate": 1.2844744021992842e-05, + "logits/chosen": -2.8380305767059326, + "logits/rejected": -3.210789442062378, + "logps/chosen": -267.76629638671875, + "logps/rejected": -310.0561218261719, + "loss": 1.9567, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.012979507446289, + "rewards/margins": -0.7440640926361084, + "rewards/rejected": -2.2689156532287598, + "step": 1776 + }, + { + "epoch": 0.28, + "learning_rate": 1.2844010581461694e-05, + "logits/chosen": -0.9047219753265381, + "logits/rejected": -2.8133883476257324, + "logps/chosen": -169.63143920898438, + "logps/rejected": -407.37237548828125, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9574272632598877, + "rewards/margins": 5.41937780380249, + "rewards/rejected": -6.376805305480957, + "step": 1777 + }, + { + "epoch": 0.28, + "learning_rate": 1.2843277140930548e-05, + "logits/chosen": -3.0550289154052734, + "logits/rejected": -2.088836431503296, + "logps/chosen": -175.18211364746094, + "logps/rejected": -224.71957397460938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4254753589630127, + "rewards/margins": 7.104125499725342, + "rewards/rejected": -9.529600143432617, + "step": 1778 + }, + { + "epoch": 0.28, + "learning_rate": 1.28425437003994e-05, + "logits/chosen": -2.417698860168457, + "logits/rejected": -3.4374074935913086, + "logps/chosen": -15.590947151184082, + "logps/rejected": -235.1661376953125, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7449772953987122, + "rewards/margins": 3.507847785949707, + "rewards/rejected": -4.252824783325195, + "step": 1779 + }, + { + "epoch": 0.28, + "learning_rate": 1.2841810259868252e-05, + "logits/chosen": -2.412161111831665, + "logits/rejected": -3.0382328033447266, + "logps/chosen": -104.74842834472656, + "logps/rejected": -446.0494689941406, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5048424005508423, + "rewards/margins": 8.575551986694336, + "rewards/rejected": -9.08039379119873, + "step": 1780 + }, + { + "epoch": 0.28, + "learning_rate": 1.2841076819337103e-05, + "logits/chosen": -2.068532943725586, + "logits/rejected": -3.035104990005493, + "logps/chosen": -107.9484634399414, + "logps/rejected": -305.1412048339844, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.580432415008545, + "rewards/margins": 5.347443103790283, + "rewards/rejected": -6.927875518798828, + "step": 1781 + }, + { + "epoch": 0.28, + "learning_rate": 1.2840343378805955e-05, + "logits/chosen": -1.1717480421066284, + "logits/rejected": -3.1081271171569824, + "logps/chosen": -54.25889205932617, + "logps/rejected": -447.8295593261719, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2511167526245117, + "rewards/margins": 4.816913604736328, + "rewards/rejected": -7.06803035736084, + "step": 1782 + }, + { + "epoch": 0.28, + "learning_rate": 1.2839609938274807e-05, + "logits/chosen": -3.0537309646606445, + "logits/rejected": -2.2461860179901123, + "logps/chosen": -473.99908447265625, + "logps/rejected": -339.7933349609375, + "loss": 5.6395, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.239622592926025, + "rewards/margins": -1.4235076904296875, + "rewards/rejected": -5.816114902496338, + "step": 1783 + }, + { + "epoch": 0.28, + "learning_rate": 1.2838876497743659e-05, + "logits/chosen": -2.4040989875793457, + "logits/rejected": -3.072089195251465, + "logps/chosen": -673.9818115234375, + "logps/rejected": -680.3111572265625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1913803219795227, + "rewards/margins": 6.288928508758545, + "rewards/rejected": -6.480308532714844, + "step": 1784 + }, + { + "epoch": 0.28, + "learning_rate": 1.2838143057212511e-05, + "logits/chosen": -1.0363975763320923, + "logits/rejected": -3.241382122039795, + "logps/chosen": -173.61044311523438, + "logps/rejected": -483.5224609375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.555158257484436, + "rewards/margins": 5.420435905456543, + "rewards/rejected": -6.9755940437316895, + "step": 1785 + }, + { + "epoch": 0.28, + "learning_rate": 1.2837409616681363e-05, + "logits/chosen": -2.702603578567505, + "logits/rejected": -3.2142627239227295, + "logps/chosen": -166.59542846679688, + "logps/rejected": -243.80535888671875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6459434628486633, + "rewards/margins": 6.644988059997559, + "rewards/rejected": -7.290931701660156, + "step": 1786 + }, + { + "epoch": 0.28, + "learning_rate": 1.2836676176150216e-05, + "logits/chosen": -2.1524202823638916, + "logits/rejected": -3.221506118774414, + "logps/chosen": -30.901187896728516, + "logps/rejected": -267.35516357421875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4922890663146973, + "rewards/margins": 6.548583984375, + "rewards/rejected": -8.040873527526855, + "step": 1787 + }, + { + "epoch": 0.28, + "learning_rate": 1.2835942735619068e-05, + "logits/chosen": -1.7809419631958008, + "logits/rejected": -1.7375121116638184, + "logps/chosen": -383.247314453125, + "logps/rejected": -395.68023681640625, + "loss": 0.0309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4008491635322571, + "rewards/margins": 3.5041730403900146, + "rewards/rejected": -3.905022144317627, + "step": 1788 + }, + { + "epoch": 0.28, + "learning_rate": 1.283520929508792e-05, + "logits/chosen": -1.8668224811553955, + "logits/rejected": -2.985600709915161, + "logps/chosen": -167.940185546875, + "logps/rejected": -262.3358459472656, + "loss": 0.5712, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7681536674499512, + "rewards/margins": 3.170346260070801, + "rewards/rejected": -4.938499927520752, + "step": 1789 + }, + { + "epoch": 0.28, + "learning_rate": 1.2834475854556772e-05, + "logits/chosen": -3.087693691253662, + "logits/rejected": -2.934438467025757, + "logps/chosen": -380.8500671386719, + "logps/rejected": -384.65960693359375, + "loss": 0.0486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.074272871017456, + "rewards/margins": 4.285091400146484, + "rewards/rejected": -5.3593645095825195, + "step": 1790 + }, + { + "epoch": 0.28, + "learning_rate": 1.2833742414025624e-05, + "logits/chosen": -2.7768783569335938, + "logits/rejected": -3.208310842514038, + "logps/chosen": -50.45873260498047, + "logps/rejected": -135.52947998046875, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.277452826499939, + "rewards/margins": 3.869940757751465, + "rewards/rejected": -5.147393226623535, + "step": 1791 + }, + { + "epoch": 0.28, + "learning_rate": 1.2833008973494476e-05, + "logits/chosen": -3.2307825088500977, + "logits/rejected": -2.4857449531555176, + "logps/chosen": -284.2166442871094, + "logps/rejected": -286.9476013183594, + "loss": 2.7943, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.099028587341309, + "rewards/margins": 0.3040497303009033, + "rewards/rejected": -4.403078556060791, + "step": 1792 + }, + { + "epoch": 0.28, + "learning_rate": 1.2832275532963328e-05, + "logits/chosen": -2.8849377632141113, + "logits/rejected": -3.355485677719116, + "logps/chosen": -44.22598648071289, + "logps/rejected": -160.51902770996094, + "loss": 0.1105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.442472219467163, + "rewards/margins": 3.533637762069702, + "rewards/rejected": -4.976109981536865, + "step": 1793 + }, + { + "epoch": 0.28, + "learning_rate": 1.283154209243218e-05, + "logits/chosen": -2.7053709030151367, + "logits/rejected": -3.140796422958374, + "logps/chosen": -173.06411743164062, + "logps/rejected": -161.70492553710938, + "loss": 2.8039, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.116830825805664, + "rewards/margins": 0.3334507942199707, + "rewards/rejected": -4.450281620025635, + "step": 1794 + }, + { + "epoch": 0.28, + "learning_rate": 1.2830808651901031e-05, + "logits/chosen": -2.9538838863372803, + "logits/rejected": -3.1777944564819336, + "logps/chosen": -160.003662109375, + "logps/rejected": -298.49859619140625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9821338653564453, + "rewards/margins": 4.730071067810059, + "rewards/rejected": -5.712204933166504, + "step": 1795 + }, + { + "epoch": 0.28, + "learning_rate": 1.2830075211369885e-05, + "logits/chosen": -2.8452811241149902, + "logits/rejected": -3.242685317993164, + "logps/chosen": -55.13575744628906, + "logps/rejected": -188.80633544921875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5201306343078613, + "rewards/margins": 5.540108680725098, + "rewards/rejected": -7.060239315032959, + "step": 1796 + }, + { + "epoch": 0.28, + "learning_rate": 1.2829341770838737e-05, + "logits/chosen": -1.7006497383117676, + "logits/rejected": -3.2148830890655518, + "logps/chosen": -126.05839538574219, + "logps/rejected": -454.1382141113281, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.192009687423706, + "rewards/margins": 5.647604942321777, + "rewards/rejected": -6.8396148681640625, + "step": 1797 + }, + { + "epoch": 0.28, + "learning_rate": 1.2828608330307589e-05, + "logits/chosen": -2.752816677093506, + "logits/rejected": -1.5881489515304565, + "logps/chosen": -207.855712890625, + "logps/rejected": -158.89129638671875, + "loss": 2.5854, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8764171600341797, + "rewards/margins": -1.4428269863128662, + "rewards/rejected": -2.4335899353027344, + "step": 1798 + }, + { + "epoch": 0.28, + "learning_rate": 1.282787488977644e-05, + "logits/chosen": -3.0911478996276855, + "logits/rejected": -1.1576597690582275, + "logps/chosen": -331.9650573730469, + "logps/rejected": -235.51275634765625, + "loss": 3.6399, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.323494911193848, + "rewards/margins": -1.1098213195800781, + "rewards/rejected": -4.2136735916137695, + "step": 1799 + }, + { + "epoch": 0.28, + "learning_rate": 1.2827141449245293e-05, + "logits/chosen": -3.2493486404418945, + "logits/rejected": -2.0761232376098633, + "logps/chosen": -205.2618408203125, + "logps/rejected": -116.54342651367188, + "loss": 3.161, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.021657466888428, + "rewards/margins": -1.2545921802520752, + "rewards/rejected": -2.7670652866363525, + "step": 1800 + }, + { + "epoch": 0.28, + "learning_rate": 1.2826408008714144e-05, + "logits/chosen": -3.2060091495513916, + "logits/rejected": -2.9170548915863037, + "logps/chosen": -113.90064239501953, + "logps/rejected": -82.17436218261719, + "loss": 1.6409, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.943399667739868, + "rewards/margins": -0.9812276363372803, + "rewards/rejected": -1.962172031402588, + "step": 1801 + }, + { + "epoch": 0.28, + "learning_rate": 1.2825674568182996e-05, + "logits/chosen": -3.023159980773926, + "logits/rejected": -2.8771417140960693, + "logps/chosen": -90.4446029663086, + "logps/rejected": -297.23919677734375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6757619976997375, + "rewards/margins": 5.8478312492370605, + "rewards/rejected": -6.523592948913574, + "step": 1802 + }, + { + "epoch": 0.28, + "learning_rate": 1.2824941127651848e-05, + "logits/chosen": -3.2220191955566406, + "logits/rejected": -3.0359737873077393, + "logps/chosen": -344.7170104980469, + "logps/rejected": -337.70538330078125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6672869920730591, + "rewards/margins": 5.4006147384643555, + "rewards/rejected": -6.067901611328125, + "step": 1803 + }, + { + "epoch": 0.28, + "learning_rate": 1.28242076871207e-05, + "logits/chosen": -3.3069992065429688, + "logits/rejected": -2.460909843444824, + "logps/chosen": -413.7650146484375, + "logps/rejected": -217.89547729492188, + "loss": 1.8598, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6870217323303223, + "rewards/margins": 1.8852410316467285, + "rewards/rejected": -4.572262763977051, + "step": 1804 + }, + { + "epoch": 0.28, + "learning_rate": 1.2823474246589554e-05, + "logits/chosen": -1.3696306943893433, + "logits/rejected": -3.129272937774658, + "logps/chosen": -274.4346923828125, + "logps/rejected": -341.5229187011719, + "loss": 1.2323, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.416459798812866, + "rewards/margins": 3.5648140907287598, + "rewards/rejected": -6.981273651123047, + "step": 1805 + }, + { + "epoch": 0.28, + "learning_rate": 1.2822740806058406e-05, + "logits/chosen": -3.1534149646759033, + "logits/rejected": -2.840569496154785, + "logps/chosen": -127.16896057128906, + "logps/rejected": -325.07525634765625, + "loss": 0.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3829411268234253, + "rewards/margins": 5.514822006225586, + "rewards/rejected": -6.897763252258301, + "step": 1806 + }, + { + "epoch": 0.28, + "learning_rate": 1.2822007365527257e-05, + "logits/chosen": -3.3297414779663086, + "logits/rejected": -3.4037744998931885, + "logps/chosen": -20.75732421875, + "logps/rejected": -149.44436645507812, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5774022340774536, + "rewards/margins": 4.642109394073486, + "rewards/rejected": -5.219511985778809, + "step": 1807 + }, + { + "epoch": 0.28, + "learning_rate": 1.282127392499611e-05, + "logits/chosen": -2.8139805793762207, + "logits/rejected": -3.3361053466796875, + "logps/chosen": -140.4596405029297, + "logps/rejected": -299.9068298339844, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6408401727676392, + "rewards/margins": 4.7662200927734375, + "rewards/rejected": -6.407060146331787, + "step": 1808 + }, + { + "epoch": 0.28, + "learning_rate": 1.2820540484464961e-05, + "logits/chosen": -2.7857398986816406, + "logits/rejected": -2.993685245513916, + "logps/chosen": -76.74435424804688, + "logps/rejected": -161.8767852783203, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5071117877960205, + "rewards/margins": 3.7630648612976074, + "rewards/rejected": -6.270176887512207, + "step": 1809 + }, + { + "epoch": 0.28, + "learning_rate": 1.2819807043933815e-05, + "logits/chosen": -2.1194052696228027, + "logits/rejected": -2.6733670234680176, + "logps/chosen": -422.21014404296875, + "logps/rejected": -496.3027038574219, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7820103168487549, + "rewards/margins": 5.072934150695801, + "rewards/rejected": -6.854944705963135, + "step": 1810 + }, + { + "epoch": 0.28, + "learning_rate": 1.2819073603402667e-05, + "logits/chosen": -3.1944663524627686, + "logits/rejected": -2.6574947834014893, + "logps/chosen": -166.73574829101562, + "logps/rejected": -124.98303985595703, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0054329633712769, + "rewards/margins": 2.647722005844116, + "rewards/rejected": -3.6531548500061035, + "step": 1811 + }, + { + "epoch": 0.28, + "learning_rate": 1.2818340162871518e-05, + "logits/chosen": -3.1352908611297607, + "logits/rejected": -3.1795341968536377, + "logps/chosen": -158.9008331298828, + "logps/rejected": -216.3037872314453, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4475715160369873, + "rewards/margins": 4.374183177947998, + "rewards/rejected": -5.821754455566406, + "step": 1812 + }, + { + "epoch": 0.28, + "learning_rate": 1.281760672234037e-05, + "logits/chosen": -3.2048544883728027, + "logits/rejected": -2.8461852073669434, + "logps/chosen": -584.3857421875, + "logps/rejected": -590.6602783203125, + "loss": 3.9815, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8037428855896, + "rewards/margins": -1.6003494262695312, + "rewards/rejected": -3.2033936977386475, + "step": 1813 + }, + { + "epoch": 0.28, + "learning_rate": 1.2816873281809224e-05, + "logits/chosen": -2.9205267429351807, + "logits/rejected": -3.129345417022705, + "logps/chosen": -340.4290771484375, + "logps/rejected": -236.82296752929688, + "loss": 1.236, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3927001953125, + "rewards/margins": 3.0345020294189453, + "rewards/rejected": -6.427202224731445, + "step": 1814 + }, + { + "epoch": 0.28, + "learning_rate": 1.2816139841278076e-05, + "logits/chosen": -3.111971378326416, + "logits/rejected": -3.1845383644104004, + "logps/chosen": -50.16571044921875, + "logps/rejected": -152.80340576171875, + "loss": 0.0828, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8881453275680542, + "rewards/margins": 2.717163562774658, + "rewards/rejected": -4.605308532714844, + "step": 1815 + }, + { + "epoch": 0.28, + "learning_rate": 1.2815406400746928e-05, + "logits/chosen": -0.9743863940238953, + "logits/rejected": -3.270712375640869, + "logps/chosen": -44.57789611816406, + "logps/rejected": -459.97296142578125, + "loss": 0.4927, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4496614933013916, + "rewards/margins": 4.349340438842773, + "rewards/rejected": -7.799002170562744, + "step": 1816 + }, + { + "epoch": 0.28, + "learning_rate": 1.281467296021578e-05, + "logits/chosen": -2.75447940826416, + "logits/rejected": -3.2344655990600586, + "logps/chosen": -140.71363830566406, + "logps/rejected": -168.57598876953125, + "loss": 1.5093, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.251084566116333, + "rewards/margins": 2.3152668476104736, + "rewards/rejected": -4.566351413726807, + "step": 1817 + }, + { + "epoch": 0.28, + "learning_rate": 1.2813939519684631e-05, + "logits/chosen": -3.209744691848755, + "logits/rejected": -3.2755472660064697, + "logps/chosen": -198.92294311523438, + "logps/rejected": -261.6875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5028452277183533, + "rewards/margins": 6.785063743591309, + "rewards/rejected": -7.287909030914307, + "step": 1818 + }, + { + "epoch": 0.28, + "learning_rate": 1.2813206079153483e-05, + "logits/chosen": -3.3258092403411865, + "logits/rejected": -2.719846487045288, + "logps/chosen": -1051.2047119140625, + "logps/rejected": -740.9287109375, + "loss": 1.1059, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1372392177581787, + "rewards/margins": 1.2796567678451538, + "rewards/rejected": -3.416896104812622, + "step": 1819 + }, + { + "epoch": 0.28, + "learning_rate": 1.2812472638622335e-05, + "logits/chosen": -2.637460470199585, + "logits/rejected": -3.346015691757202, + "logps/chosen": -372.9476318359375, + "logps/rejected": -552.625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1251777410507202, + "rewards/margins": 7.107863426208496, + "rewards/rejected": -8.233041763305664, + "step": 1820 + }, + { + "epoch": 0.28, + "learning_rate": 1.2811739198091187e-05, + "logits/chosen": -3.217261791229248, + "logits/rejected": -2.992469072341919, + "logps/chosen": -137.156982421875, + "logps/rejected": -264.8900451660156, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8392952084541321, + "rewards/margins": 4.441283702850342, + "rewards/rejected": -5.28057861328125, + "step": 1821 + }, + { + "epoch": 0.28, + "learning_rate": 1.2811005757560039e-05, + "logits/chosen": -2.2885358333587646, + "logits/rejected": -3.2041115760803223, + "logps/chosen": -164.88055419921875, + "logps/rejected": -138.156005859375, + "loss": 0.6851, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1985466480255127, + "rewards/margins": 1.0720689296722412, + "rewards/rejected": -4.270615577697754, + "step": 1822 + }, + { + "epoch": 0.28, + "learning_rate": 1.2810272317028893e-05, + "logits/chosen": -3.3004605770111084, + "logits/rejected": -3.3871419429779053, + "logps/chosen": -464.1951599121094, + "logps/rejected": -381.98822021484375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3581567704677582, + "rewards/margins": 6.828045845031738, + "rewards/rejected": -7.186202049255371, + "step": 1823 + }, + { + "epoch": 0.28, + "learning_rate": 1.2809538876497744e-05, + "logits/chosen": -1.643289566040039, + "logits/rejected": -3.3159804344177246, + "logps/chosen": -92.4921646118164, + "logps/rejected": -397.36810302734375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6600668430328369, + "rewards/margins": 6.57783317565918, + "rewards/rejected": -7.2378997802734375, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 1.2808805435966596e-05, + "logits/chosen": -1.848119854927063, + "logits/rejected": -2.9973719120025635, + "logps/chosen": -211.86373901367188, + "logps/rejected": -234.47763061523438, + "loss": 2.1247, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8286638259887695, + "rewards/margins": -1.2026008367538452, + "rewards/rejected": -2.6260628700256348, + "step": 1825 + }, + { + "epoch": 0.28, + "learning_rate": 1.2808071995435448e-05, + "logits/chosen": -2.815483331680298, + "logits/rejected": -3.2440218925476074, + "logps/chosen": -117.20335388183594, + "logps/rejected": -238.71389770507812, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6638062000274658, + "rewards/margins": 4.864935874938965, + "rewards/rejected": -6.52874231338501, + "step": 1826 + }, + { + "epoch": 0.28, + "learning_rate": 1.28073385549043e-05, + "logits/chosen": -2.058588981628418, + "logits/rejected": -3.220076322555542, + "logps/chosen": -111.88264465332031, + "logps/rejected": -403.8302001953125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3279350996017456, + "rewards/margins": 5.704204559326172, + "rewards/rejected": -7.032139778137207, + "step": 1827 + }, + { + "epoch": 0.28, + "learning_rate": 1.2806605114373152e-05, + "logits/chosen": -2.965790033340454, + "logits/rejected": -3.206202268600464, + "logps/chosen": -227.22640991210938, + "logps/rejected": -223.14178466796875, + "loss": 1.4011, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7951653003692627, + "rewards/margins": 1.9316482543945312, + "rewards/rejected": -3.726813554763794, + "step": 1828 + }, + { + "epoch": 0.28, + "learning_rate": 1.2805871673842004e-05, + "logits/chosen": -2.7232449054718018, + "logits/rejected": -3.110405445098877, + "logps/chosen": -616.76318359375, + "logps/rejected": -730.8452758789062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21670076251029968, + "rewards/margins": 9.496650695800781, + "rewards/rejected": -9.279950141906738, + "step": 1829 + }, + { + "epoch": 0.28, + "learning_rate": 1.2805138233310856e-05, + "logits/chosen": -2.991640090942383, + "logits/rejected": -3.165311336517334, + "logps/chosen": -85.52388000488281, + "logps/rejected": -132.33880615234375, + "loss": 0.0988, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.359959602355957, + "rewards/margins": 3.6408872604370117, + "rewards/rejected": -6.000846862792969, + "step": 1830 + }, + { + "epoch": 0.28, + "learning_rate": 1.2804404792779708e-05, + "logits/chosen": -1.4547975063323975, + "logits/rejected": -3.0683674812316895, + "logps/chosen": -194.56619262695312, + "logps/rejected": -682.3951416015625, + "loss": 2.6202, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2727408409118652, + "rewards/margins": -0.5153517723083496, + "rewards/rejected": -2.7573890686035156, + "step": 1831 + }, + { + "epoch": 0.28, + "learning_rate": 1.2803671352248561e-05, + "logits/chosen": -1.5667756795883179, + "logits/rejected": -3.064634084701538, + "logps/chosen": -141.92662048339844, + "logps/rejected": -332.648193359375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0843250751495361, + "rewards/margins": 4.7017059326171875, + "rewards/rejected": -5.7860307693481445, + "step": 1832 + }, + { + "epoch": 0.29, + "learning_rate": 1.2802937911717413e-05, + "logits/chosen": -2.9766440391540527, + "logits/rejected": -2.8070883750915527, + "logps/chosen": -253.55572509765625, + "logps/rejected": -298.3114013671875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6405082941055298, + "rewards/margins": 6.9355316162109375, + "rewards/rejected": -8.576040267944336, + "step": 1833 + }, + { + "epoch": 0.29, + "learning_rate": 1.2802204471186265e-05, + "logits/chosen": -3.2727370262145996, + "logits/rejected": -3.1518192291259766, + "logps/chosen": -755.36572265625, + "logps/rejected": -419.08489990234375, + "loss": 0.1063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2080658674240112, + "rewards/margins": 3.614144802093506, + "rewards/rejected": -4.822210788726807, + "step": 1834 + }, + { + "epoch": 0.29, + "learning_rate": 1.2801471030655117e-05, + "logits/chosen": -2.40429425239563, + "logits/rejected": -3.1934609413146973, + "logps/chosen": -411.2744140625, + "logps/rejected": -563.233642578125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7522170543670654, + "rewards/margins": 5.883934020996094, + "rewards/rejected": -6.636151313781738, + "step": 1835 + }, + { + "epoch": 0.29, + "learning_rate": 1.2800737590123969e-05, + "logits/chosen": -3.23677659034729, + "logits/rejected": -3.294480085372925, + "logps/chosen": -72.79766082763672, + "logps/rejected": -96.15347290039062, + "loss": 0.0573, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2238637208938599, + "rewards/margins": 3.294741630554199, + "rewards/rejected": -4.5186052322387695, + "step": 1836 + }, + { + "epoch": 0.29, + "learning_rate": 1.280000414959282e-05, + "logits/chosen": -3.271965742111206, + "logits/rejected": -2.9494528770446777, + "logps/chosen": -219.1995849609375, + "logps/rejected": -137.5806427001953, + "loss": 0.065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1533408164978027, + "rewards/margins": 3.2476377487182617, + "rewards/rejected": -5.4009785652160645, + "step": 1837 + }, + { + "epoch": 0.29, + "learning_rate": 1.2799270709061672e-05, + "logits/chosen": -3.079389810562134, + "logits/rejected": -2.132075071334839, + "logps/chosen": -329.9804382324219, + "logps/rejected": -168.76962280273438, + "loss": 3.805, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.051876068115234, + "rewards/margins": -3.77847957611084, + "rewards/rejected": -2.2733962535858154, + "step": 1838 + }, + { + "epoch": 0.29, + "learning_rate": 1.2798537268530524e-05, + "logits/chosen": -2.740666389465332, + "logits/rejected": -2.9966323375701904, + "logps/chosen": -214.315673828125, + "logps/rejected": -532.05712890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4161797761917114, + "rewards/margins": 6.945069313049316, + "rewards/rejected": -8.361248970031738, + "step": 1839 + }, + { + "epoch": 0.29, + "learning_rate": 1.2797803827999378e-05, + "logits/chosen": -2.8340108394622803, + "logits/rejected": -3.229522705078125, + "logps/chosen": -46.09861755371094, + "logps/rejected": -175.14471435546875, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0502612590789795, + "rewards/margins": 4.345313549041748, + "rewards/rejected": -5.395575046539307, + "step": 1840 + }, + { + "epoch": 0.29, + "learning_rate": 1.279707038746823e-05, + "logits/chosen": -1.680633544921875, + "logits/rejected": -3.1740972995758057, + "logps/chosen": -69.71659851074219, + "logps/rejected": -225.28271484375, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0507941246032715, + "rewards/margins": 5.211341857910156, + "rewards/rejected": -6.262136459350586, + "step": 1841 + }, + { + "epoch": 0.29, + "learning_rate": 1.2796336946937082e-05, + "logits/chosen": -2.045638084411621, + "logits/rejected": -3.3004562854766846, + "logps/chosen": -67.22112274169922, + "logps/rejected": -232.83984375, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225625991821289, + "rewards/margins": 3.3823587894439697, + "rewards/rejected": -5.607985019683838, + "step": 1842 + }, + { + "epoch": 0.29, + "learning_rate": 1.2795603506405934e-05, + "logits/chosen": -3.0932576656341553, + "logits/rejected": -2.050096273422241, + "logps/chosen": -316.6959228515625, + "logps/rejected": -264.85540771484375, + "loss": 0.6134, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.507934808731079, + "rewards/margins": 0.4798200726509094, + "rewards/rejected": -1.9877548217773438, + "step": 1843 + }, + { + "epoch": 0.29, + "learning_rate": 1.2794870065874787e-05, + "logits/chosen": -3.210963249206543, + "logits/rejected": -2.955256223678589, + "logps/chosen": -103.38981628417969, + "logps/rejected": -269.7119445800781, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8819687366485596, + "rewards/margins": 4.182783126831055, + "rewards/rejected": -7.064751625061035, + "step": 1844 + }, + { + "epoch": 0.29, + "learning_rate": 1.2794136625343639e-05, + "logits/chosen": -3.080404043197632, + "logits/rejected": -3.2299487590789795, + "logps/chosen": -275.5811767578125, + "logps/rejected": -371.4937744140625, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3761298656463623, + "rewards/margins": 5.053529739379883, + "rewards/rejected": -6.429659843444824, + "step": 1845 + }, + { + "epoch": 0.29, + "learning_rate": 1.279340318481249e-05, + "logits/chosen": -3.2200441360473633, + "logits/rejected": -3.23395037651062, + "logps/chosen": -434.6484069824219, + "logps/rejected": -426.845458984375, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2830123901367188, + "rewards/margins": 4.508275508880615, + "rewards/rejected": -6.791287422180176, + "step": 1846 + }, + { + "epoch": 0.29, + "learning_rate": 1.2792669744281343e-05, + "logits/chosen": -3.1824545860290527, + "logits/rejected": -2.200641632080078, + "logps/chosen": -132.0766143798828, + "logps/rejected": -150.21548461914062, + "loss": 3.9075, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.80317497253418, + "rewards/margins": 1.066725730895996, + "rewards/rejected": -6.869901180267334, + "step": 1847 + }, + { + "epoch": 0.29, + "learning_rate": 1.2791936303750195e-05, + "logits/chosen": -2.3803939819335938, + "logits/rejected": -3.240511655807495, + "logps/chosen": -237.99319458007812, + "logps/rejected": -444.963134765625, + "loss": 2.6377, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.681090354919434, + "rewards/margins": 1.4269843101501465, + "rewards/rejected": -6.108075141906738, + "step": 1848 + }, + { + "epoch": 0.29, + "learning_rate": 1.2791202863219048e-05, + "logits/chosen": -1.6060140132904053, + "logits/rejected": -3.1169204711914062, + "logps/chosen": -161.94947814941406, + "logps/rejected": -282.59112548828125, + "loss": 0.089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.185504198074341, + "rewards/margins": 2.375775098800659, + "rewards/rejected": -5.561279296875, + "step": 1849 + }, + { + "epoch": 0.29, + "learning_rate": 1.27904694226879e-05, + "logits/chosen": -1.3692981004714966, + "logits/rejected": -2.9022247791290283, + "logps/chosen": -110.61053466796875, + "logps/rejected": -508.7847595214844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8949753046035767, + "rewards/margins": 9.580854415893555, + "rewards/rejected": -11.475830078125, + "step": 1850 + }, + { + "epoch": 0.29, + "learning_rate": 1.2789735982156752e-05, + "logits/chosen": -1.076568365097046, + "logits/rejected": -3.1989364624023438, + "logps/chosen": -80.09379577636719, + "logps/rejected": -513.108642578125, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.221919298171997, + "rewards/margins": 4.638417720794678, + "rewards/rejected": -6.860337257385254, + "step": 1851 + }, + { + "epoch": 0.29, + "learning_rate": 1.2789002541625604e-05, + "logits/chosen": -1.945347785949707, + "logits/rejected": -3.166231632232666, + "logps/chosen": -39.046993255615234, + "logps/rejected": -117.54300689697266, + "loss": 0.8434, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6344614028930664, + "rewards/margins": 0.19402337074279785, + "rewards/rejected": -2.8284847736358643, + "step": 1852 + }, + { + "epoch": 0.29, + "learning_rate": 1.2788269101094456e-05, + "logits/chosen": -2.636486530303955, + "logits/rejected": -3.0832271575927734, + "logps/chosen": -101.39524841308594, + "logps/rejected": -270.0260009765625, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0680413246154785, + "rewards/margins": 7.105144500732422, + "rewards/rejected": -9.173186302185059, + "step": 1853 + }, + { + "epoch": 0.29, + "learning_rate": 1.2787535660563308e-05, + "logits/chosen": -1.062286376953125, + "logits/rejected": -2.9443612098693848, + "logps/chosen": -73.97364044189453, + "logps/rejected": -430.36968994140625, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3899919986724854, + "rewards/margins": 7.858294486999512, + "rewards/rejected": -9.248286247253418, + "step": 1854 + }, + { + "epoch": 0.29, + "learning_rate": 1.278680222003216e-05, + "logits/chosen": -2.1206233501434326, + "logits/rejected": -3.0326192378997803, + "logps/chosen": -213.4577178955078, + "logps/rejected": -418.71136474609375, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9212350845336914, + "rewards/margins": 6.58280086517334, + "rewards/rejected": -9.504035949707031, + "step": 1855 + }, + { + "epoch": 0.29, + "learning_rate": 1.2786068779501011e-05, + "logits/chosen": -3.2507073879241943, + "logits/rejected": -2.118234395980835, + "logps/chosen": -201.37062072753906, + "logps/rejected": -124.86396789550781, + "loss": 2.3299, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.097687244415283, + "rewards/margins": 0.06878256797790527, + "rewards/rejected": -4.166470050811768, + "step": 1856 + }, + { + "epoch": 0.29, + "learning_rate": 1.2785335338969863e-05, + "logits/chosen": -2.982753276824951, + "logits/rejected": -3.3155202865600586, + "logps/chosen": -212.54290771484375, + "logps/rejected": -415.4791564941406, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7263458371162415, + "rewards/margins": 6.0466413497924805, + "rewards/rejected": -6.772987365722656, + "step": 1857 + }, + { + "epoch": 0.29, + "learning_rate": 1.2784601898438717e-05, + "logits/chosen": -3.3976094722747803, + "logits/rejected": -3.103027820587158, + "logps/chosen": -129.04803466796875, + "logps/rejected": -158.69667053222656, + "loss": 3.4067, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.749587059020996, + "rewards/margins": -1.5851773023605347, + "rewards/rejected": -3.164409637451172, + "step": 1858 + }, + { + "epoch": 0.29, + "learning_rate": 1.2783868457907569e-05, + "logits/chosen": -2.5504677295684814, + "logits/rejected": -3.1124844551086426, + "logps/chosen": -121.10545349121094, + "logps/rejected": -280.33868408203125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6402183771133423, + "rewards/margins": 5.652581214904785, + "rewards/rejected": -6.292799472808838, + "step": 1859 + }, + { + "epoch": 0.29, + "learning_rate": 1.278313501737642e-05, + "logits/chosen": -3.2929439544677734, + "logits/rejected": -2.4651002883911133, + "logps/chosen": -223.31288146972656, + "logps/rejected": -59.023033142089844, + "loss": 2.6765, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.259547472000122, + "rewards/margins": -2.1658716201782227, + "rewards/rejected": -1.0936758518218994, + "step": 1860 + }, + { + "epoch": 0.29, + "learning_rate": 1.2782401576845272e-05, + "logits/chosen": -3.303358554840088, + "logits/rejected": -3.168386936187744, + "logps/chosen": -378.3905029296875, + "logps/rejected": -363.89410400390625, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6202667355537415, + "rewards/margins": 5.282522678375244, + "rewards/rejected": -5.902789115905762, + "step": 1861 + }, + { + "epoch": 0.29, + "learning_rate": 1.2781668136314124e-05, + "logits/chosen": -3.3992743492126465, + "logits/rejected": -3.3852880001068115, + "logps/chosen": -73.35162353515625, + "logps/rejected": -142.59225463867188, + "loss": 1.2848, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.494255542755127, + "rewards/margins": 0.8640223741531372, + "rewards/rejected": -3.3582777976989746, + "step": 1862 + }, + { + "epoch": 0.29, + "learning_rate": 1.2780934695782976e-05, + "logits/chosen": -2.8931219577789307, + "logits/rejected": -3.1355156898498535, + "logps/chosen": -93.46647644042969, + "logps/rejected": -243.14157104492188, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9921700954437256, + "rewards/margins": 3.7119808197021484, + "rewards/rejected": -5.704151153564453, + "step": 1863 + }, + { + "epoch": 0.29, + "learning_rate": 1.2780201255251828e-05, + "logits/chosen": -2.052617073059082, + "logits/rejected": -2.463672161102295, + "logps/chosen": -209.17274475097656, + "logps/rejected": -221.91668701171875, + "loss": 1.1322, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0118966102600098, + "rewards/margins": 2.2351765632629395, + "rewards/rejected": -5.247073173522949, + "step": 1864 + }, + { + "epoch": 0.29, + "learning_rate": 1.277946781472068e-05, + "logits/chosen": -3.1631107330322266, + "logits/rejected": -3.160689353942871, + "logps/chosen": -72.39909362792969, + "logps/rejected": -81.28079223632812, + "loss": 0.8461, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4578680992126465, + "rewards/margins": 0.0013273954391479492, + "rewards/rejected": -3.459195375442505, + "step": 1865 + }, + { + "epoch": 0.29, + "learning_rate": 1.2778734374189532e-05, + "logits/chosen": -2.3819029331207275, + "logits/rejected": -2.9797277450561523, + "logps/chosen": -256.4208679199219, + "logps/rejected": -455.02764892578125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8411781191825867, + "rewards/margins": 7.157634735107422, + "rewards/rejected": -7.998813152313232, + "step": 1866 + }, + { + "epoch": 0.29, + "learning_rate": 1.2778000933658385e-05, + "logits/chosen": -3.0507702827453613, + "logits/rejected": -2.5360348224639893, + "logps/chosen": -288.51336669921875, + "logps/rejected": -403.7006530761719, + "loss": 4.3464, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.431353569030762, + "rewards/margins": -0.5856812000274658, + "rewards/rejected": -6.845671653747559, + "step": 1867 + }, + { + "epoch": 0.29, + "learning_rate": 1.2777267493127237e-05, + "logits/chosen": -2.27962589263916, + "logits/rejected": -3.0858850479125977, + "logps/chosen": -31.138992309570312, + "logps/rejected": -354.7008056640625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6064211130142212, + "rewards/margins": 8.309904098510742, + "rewards/rejected": -8.916325569152832, + "step": 1868 + }, + { + "epoch": 0.29, + "learning_rate": 1.2776534052596089e-05, + "logits/chosen": -2.482440233230591, + "logits/rejected": -3.094209909439087, + "logps/chosen": -432.0139465332031, + "logps/rejected": -467.72625732421875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.137525200843811, + "rewards/margins": 6.887818336486816, + "rewards/rejected": -8.025343894958496, + "step": 1869 + }, + { + "epoch": 0.29, + "learning_rate": 1.2775800612064941e-05, + "logits/chosen": -3.2224411964416504, + "logits/rejected": -3.1352756023406982, + "logps/chosen": -160.28182983398438, + "logps/rejected": -346.7171936035156, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9766337871551514, + "rewards/margins": 7.1398515701293945, + "rewards/rejected": -9.116485595703125, + "step": 1870 + }, + { + "epoch": 0.29, + "learning_rate": 1.2775067171533793e-05, + "logits/chosen": -3.2444815635681152, + "logits/rejected": -3.241149425506592, + "logps/chosen": -341.0895080566406, + "logps/rejected": -469.81231689453125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4104793071746826, + "rewards/margins": 7.213104248046875, + "rewards/rejected": -8.623583793640137, + "step": 1871 + }, + { + "epoch": 0.29, + "learning_rate": 1.2774333731002645e-05, + "logits/chosen": -2.608383893966675, + "logits/rejected": -2.906388998031616, + "logps/chosen": -123.6597671508789, + "logps/rejected": -252.2599639892578, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6935569643974304, + "rewards/margins": 4.8182783126831055, + "rewards/rejected": -5.511835098266602, + "step": 1872 + }, + { + "epoch": 0.29, + "learning_rate": 1.2773600290471497e-05, + "logits/chosen": -3.2112386226654053, + "logits/rejected": -3.155416488647461, + "logps/chosen": -371.8338623046875, + "logps/rejected": -324.7637939453125, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4687341451644897, + "rewards/margins": 4.275985240936279, + "rewards/rejected": -5.744719505310059, + "step": 1873 + }, + { + "epoch": 0.29, + "learning_rate": 1.2772866849940349e-05, + "logits/chosen": -3.256004810333252, + "logits/rejected": -3.4611217975616455, + "logps/chosen": -22.094310760498047, + "logps/rejected": -187.4193115234375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9126532077789307, + "rewards/margins": 6.201109886169434, + "rewards/rejected": -7.113762855529785, + "step": 1874 + }, + { + "epoch": 0.29, + "learning_rate": 1.27721334094092e-05, + "logits/chosen": -2.5577456951141357, + "logits/rejected": -3.2374634742736816, + "logps/chosen": -111.6998291015625, + "logps/rejected": -233.71328735351562, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6186916828155518, + "rewards/margins": 5.657170295715332, + "rewards/rejected": -6.275862216949463, + "step": 1875 + }, + { + "epoch": 0.29, + "learning_rate": 1.2771399968878054e-05, + "logits/chosen": -3.0733015537261963, + "logits/rejected": -2.9049854278564453, + "logps/chosen": -768.161865234375, + "logps/rejected": -650.2877197265625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.179046630859375, + "rewards/margins": 7.256060600280762, + "rewards/rejected": -8.435107231140137, + "step": 1876 + }, + { + "epoch": 0.29, + "learning_rate": 1.2770666528346906e-05, + "logits/chosen": -2.160987138748169, + "logits/rejected": -2.9060299396514893, + "logps/chosen": -234.126953125, + "logps/rejected": -457.07989501953125, + "loss": 0.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02730254828929901, + "rewards/margins": 4.334362983703613, + "rewards/rejected": -4.36166524887085, + "step": 1877 + }, + { + "epoch": 0.29, + "learning_rate": 1.276993308781576e-05, + "logits/chosen": -2.1754562854766846, + "logits/rejected": -2.4678304195404053, + "logps/chosen": -294.09857177734375, + "logps/rejected": -315.33795166015625, + "loss": 2.7881, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7483673095703125, + "rewards/margins": 0.2517073154449463, + "rewards/rejected": -5.000074863433838, + "step": 1878 + }, + { + "epoch": 0.29, + "learning_rate": 1.2769199647284611e-05, + "logits/chosen": -3.036447763442993, + "logits/rejected": -3.085369825363159, + "logps/chosen": -73.85055541992188, + "logps/rejected": -184.0275115966797, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6464052200317383, + "rewards/margins": 5.7738037109375, + "rewards/rejected": -7.420208930969238, + "step": 1879 + }, + { + "epoch": 0.29, + "learning_rate": 1.2768466206753463e-05, + "logits/chosen": -2.0285677909851074, + "logits/rejected": -3.16046404838562, + "logps/chosen": -255.49822998046875, + "logps/rejected": -369.57501220703125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9820457696914673, + "rewards/margins": 5.272704124450684, + "rewards/rejected": -6.2547502517700195, + "step": 1880 + }, + { + "epoch": 0.29, + "learning_rate": 1.2767732766222315e-05, + "logits/chosen": -3.126344680786133, + "logits/rejected": -2.3892822265625, + "logps/chosen": -274.2802734375, + "logps/rejected": -245.07423400878906, + "loss": 0.1537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7344679832458496, + "rewards/margins": 3.7668185234069824, + "rewards/rejected": -6.501286506652832, + "step": 1881 + }, + { + "epoch": 0.29, + "learning_rate": 1.2766999325691167e-05, + "logits/chosen": -3.0546767711639404, + "logits/rejected": -2.8465161323547363, + "logps/chosen": -178.92823791503906, + "logps/rejected": -167.77389526367188, + "loss": 2.3877, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.102633476257324, + "rewards/margins": -2.1793570518493652, + "rewards/rejected": -1.923276662826538, + "step": 1882 + }, + { + "epoch": 0.29, + "learning_rate": 1.2766265885160019e-05, + "logits/chosen": -2.9377853870391846, + "logits/rejected": -2.6141459941864014, + "logps/chosen": -180.77102661132812, + "logps/rejected": -241.41819763183594, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.219989776611328, + "rewards/margins": 6.243216514587402, + "rewards/rejected": -8.46320629119873, + "step": 1883 + }, + { + "epoch": 0.29, + "learning_rate": 1.276553244462887e-05, + "logits/chosen": -1.6965126991271973, + "logits/rejected": -3.006354808807373, + "logps/chosen": -45.00571060180664, + "logps/rejected": -294.5094299316406, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3635636568069458, + "rewards/margins": 4.78350830078125, + "rewards/rejected": -6.147071838378906, + "step": 1884 + }, + { + "epoch": 0.29, + "learning_rate": 1.2764799004097724e-05, + "logits/chosen": -1.9741312265396118, + "logits/rejected": -3.1440398693084717, + "logps/chosen": -78.62166595458984, + "logps/rejected": -226.86187744140625, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.758115530014038, + "rewards/margins": 3.715968132019043, + "rewards/rejected": -5.47408390045166, + "step": 1885 + }, + { + "epoch": 0.29, + "learning_rate": 1.2764065563566576e-05, + "logits/chosen": -2.7218708992004395, + "logits/rejected": -3.3106935024261475, + "logps/chosen": -252.21340942382812, + "logps/rejected": -380.5523681640625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0526633262634277, + "rewards/margins": 6.388734817504883, + "rewards/rejected": -7.4413981437683105, + "step": 1886 + }, + { + "epoch": 0.29, + "learning_rate": 1.2763332123035428e-05, + "logits/chosen": -2.1779983043670654, + "logits/rejected": -3.1295013427734375, + "logps/chosen": -132.79013061523438, + "logps/rejected": -282.0060119628906, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3411285877227783, + "rewards/margins": 5.9708147048950195, + "rewards/rejected": -7.311943054199219, + "step": 1887 + }, + { + "epoch": 0.29, + "learning_rate": 1.276259868250428e-05, + "logits/chosen": -2.7142751216888428, + "logits/rejected": -3.0996181964874268, + "logps/chosen": -163.60015869140625, + "logps/rejected": -437.074462890625, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9045951962471008, + "rewards/margins": 8.718310356140137, + "rewards/rejected": -9.622905731201172, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 1.2761865241973132e-05, + "logits/chosen": -2.129444122314453, + "logits/rejected": -2.798922300338745, + "logps/chosen": -186.83047485351562, + "logps/rejected": -253.77511596679688, + "loss": 0.7269, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3295645713806152, + "rewards/margins": 2.840527057647705, + "rewards/rejected": -5.17009162902832, + "step": 1889 + }, + { + "epoch": 0.29, + "learning_rate": 1.2761131801441984e-05, + "logits/chosen": -2.7248032093048096, + "logits/rejected": -3.215576648712158, + "logps/chosen": -185.61863708496094, + "logps/rejected": -208.53506469726562, + "loss": 3.1059, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.326307773590088, + "rewards/margins": -0.5251684188842773, + "rewards/rejected": -4.8011393547058105, + "step": 1890 + }, + { + "epoch": 0.29, + "learning_rate": 1.2760398360910836e-05, + "logits/chosen": -1.1781541109085083, + "logits/rejected": -2.9040610790252686, + "logps/chosen": -25.644121170043945, + "logps/rejected": -306.8587646484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.231959581375122, + "rewards/margins": 6.277222633361816, + "rewards/rejected": -7.509181976318359, + "step": 1891 + }, + { + "epoch": 0.29, + "learning_rate": 1.2759664920379687e-05, + "logits/chosen": -1.9485410451889038, + "logits/rejected": -2.7672135829925537, + "logps/chosen": -138.8368377685547, + "logps/rejected": -227.5139923095703, + "loss": 2.1922, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.928077220916748, + "rewards/margins": 2.4920363426208496, + "rewards/rejected": -7.420113563537598, + "step": 1892 + }, + { + "epoch": 0.29, + "learning_rate": 1.275893147984854e-05, + "logits/chosen": -2.2171285152435303, + "logits/rejected": -2.6703367233276367, + "logps/chosen": -191.72544860839844, + "logps/rejected": -240.7936248779297, + "loss": 2.4388, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6222805976867676, + "rewards/margins": -1.6450939178466797, + "rewards/rejected": -1.977186679840088, + "step": 1893 + }, + { + "epoch": 0.29, + "learning_rate": 1.2758198039317393e-05, + "logits/chosen": -2.575350284576416, + "logits/rejected": -3.1535661220550537, + "logps/chosen": -338.10400390625, + "logps/rejected": -365.324462890625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4097000062465668, + "rewards/margins": 5.826254844665527, + "rewards/rejected": -6.235955238342285, + "step": 1894 + }, + { + "epoch": 0.29, + "learning_rate": 1.2757464598786245e-05, + "logits/chosen": -2.182445764541626, + "logits/rejected": -3.349111557006836, + "logps/chosen": -151.23049926757812, + "logps/rejected": -346.23590087890625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.790878176689148, + "rewards/margins": 5.6542558670043945, + "rewards/rejected": -6.445134162902832, + "step": 1895 + }, + { + "epoch": 0.29, + "learning_rate": 1.2756731158255097e-05, + "logits/chosen": -1.810758113861084, + "logits/rejected": -2.993278741836548, + "logps/chosen": -137.45693969726562, + "logps/rejected": -488.5948486328125, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0481719970703125, + "rewards/margins": 7.682346343994141, + "rewards/rejected": -8.730518341064453, + "step": 1896 + }, + { + "epoch": 0.3, + "learning_rate": 1.2755997717723948e-05, + "logits/chosen": -2.622343063354492, + "logits/rejected": -3.0753023624420166, + "logps/chosen": -113.1982421875, + "logps/rejected": -322.974609375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0976492166519165, + "rewards/margins": 6.692541122436523, + "rewards/rejected": -7.79019021987915, + "step": 1897 + }, + { + "epoch": 0.3, + "learning_rate": 1.27552642771928e-05, + "logits/chosen": -1.9911988973617554, + "logits/rejected": -2.7331080436706543, + "logps/chosen": -180.12637329101562, + "logps/rejected": -489.3421630859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28240659832954407, + "rewards/margins": 6.867439270019531, + "rewards/rejected": -7.149846076965332, + "step": 1898 + }, + { + "epoch": 0.3, + "learning_rate": 1.2754530836661652e-05, + "logits/chosen": -2.7989449501037598, + "logits/rejected": -3.04630446434021, + "logps/chosen": -171.15194702148438, + "logps/rejected": -333.0019226074219, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7130268216133118, + "rewards/margins": 8.623287200927734, + "rewards/rejected": -9.33631420135498, + "step": 1899 + }, + { + "epoch": 0.3, + "learning_rate": 1.2753797396130504e-05, + "logits/chosen": -3.0170834064483643, + "logits/rejected": -1.6960984468460083, + "logps/chosen": -311.1956787109375, + "logps/rejected": -231.76675415039062, + "loss": 2.8604, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.227655410766602, + "rewards/margins": 0.7265920639038086, + "rewards/rejected": -5.95424747467041, + "step": 1900 + }, + { + "epoch": 0.3, + "learning_rate": 1.2753063955599356e-05, + "logits/chosen": -1.7709153890609741, + "logits/rejected": -3.007349967956543, + "logps/chosen": -87.8845443725586, + "logps/rejected": -243.75241088867188, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.549272298812866, + "rewards/margins": 2.960726499557495, + "rewards/rejected": -5.509998798370361, + "step": 1901 + }, + { + "epoch": 0.3, + "learning_rate": 1.2752330515068208e-05, + "logits/chosen": -1.7109955549240112, + "logits/rejected": -3.162358045578003, + "logps/chosen": -117.72268676757812, + "logps/rejected": -508.68402099609375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.02494215965271, + "rewards/margins": 7.147652626037598, + "rewards/rejected": -9.172595024108887, + "step": 1902 + }, + { + "epoch": 0.3, + "learning_rate": 1.2751597074537061e-05, + "logits/chosen": -2.335099935531616, + "logits/rejected": -2.963423728942871, + "logps/chosen": -113.9915542602539, + "logps/rejected": -327.5580139160156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.713775634765625, + "rewards/margins": 8.269861221313477, + "rewards/rejected": -9.983636856079102, + "step": 1903 + }, + { + "epoch": 0.3, + "learning_rate": 1.2750863634005913e-05, + "logits/chosen": -3.0797953605651855, + "logits/rejected": -2.7502856254577637, + "logps/chosen": -121.66130065917969, + "logps/rejected": -293.51593017578125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9581308364868164, + "rewards/margins": 5.052709102630615, + "rewards/rejected": -6.010839939117432, + "step": 1904 + }, + { + "epoch": 0.3, + "learning_rate": 1.2750130193474765e-05, + "logits/chosen": -3.1922872066497803, + "logits/rejected": -2.274487018585205, + "logps/chosen": -355.14813232421875, + "logps/rejected": -260.8606262207031, + "loss": 5.7432, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.703037738800049, + "rewards/margins": -5.7378363609313965, + "rewards/rejected": 0.03479880094528198, + "step": 1905 + }, + { + "epoch": 0.3, + "learning_rate": 1.2749396752943617e-05, + "logits/chosen": -2.47445011138916, + "logits/rejected": -2.8940765857696533, + "logps/chosen": -209.93235778808594, + "logps/rejected": -194.96829223632812, + "loss": 1.8196, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3908474445343018, + "rewards/margins": 0.648362398147583, + "rewards/rejected": -4.039209842681885, + "step": 1906 + }, + { + "epoch": 0.3, + "learning_rate": 1.2748663312412469e-05, + "logits/chosen": -2.993727684020996, + "logits/rejected": -2.1670634746551514, + "logps/chosen": -239.96339416503906, + "logps/rejected": -121.28551483154297, + "loss": 2.6188, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3305954933166504, + "rewards/margins": 1.7290353775024414, + "rewards/rejected": -5.059630870819092, + "step": 1907 + }, + { + "epoch": 0.3, + "learning_rate": 1.274792987188132e-05, + "logits/chosen": -3.014751434326172, + "logits/rejected": -2.362276792526245, + "logps/chosen": -117.4466323852539, + "logps/rejected": -158.793212890625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5622737407684326, + "rewards/margins": 6.610895156860352, + "rewards/rejected": -7.173169136047363, + "step": 1908 + }, + { + "epoch": 0.3, + "learning_rate": 1.2747196431350173e-05, + "logits/chosen": -2.90612530708313, + "logits/rejected": -2.6120386123657227, + "logps/chosen": -321.72552490234375, + "logps/rejected": -357.5145263671875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5832587480545044, + "rewards/margins": 5.35365104675293, + "rewards/rejected": -6.9369096755981445, + "step": 1909 + }, + { + "epoch": 0.3, + "learning_rate": 1.2746462990819025e-05, + "logits/chosen": -3.1901447772979736, + "logits/rejected": -2.3284099102020264, + "logps/chosen": -234.9208984375, + "logps/rejected": -202.3387451171875, + "loss": 2.0666, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.658510208129883, + "rewards/margins": 1.4781162738800049, + "rewards/rejected": -6.136626243591309, + "step": 1910 + }, + { + "epoch": 0.3, + "learning_rate": 1.2745729550287878e-05, + "logits/chosen": -3.1980621814727783, + "logits/rejected": -3.24861216545105, + "logps/chosen": -87.4852066040039, + "logps/rejected": -154.81011962890625, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1398921012878418, + "rewards/margins": 3.6508240699768066, + "rewards/rejected": -4.790716171264648, + "step": 1911 + }, + { + "epoch": 0.3, + "learning_rate": 1.2744996109756732e-05, + "logits/chosen": -2.6365795135498047, + "logits/rejected": -3.31341290473938, + "logps/chosen": -510.3233642578125, + "logps/rejected": -1206.577880859375, + "loss": 2.2439, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7367653846740723, + "rewards/margins": 2.3598439693450928, + "rewards/rejected": -6.096609592437744, + "step": 1912 + }, + { + "epoch": 0.3, + "learning_rate": 1.2744262669225584e-05, + "logits/chosen": -2.1097776889801025, + "logits/rejected": -3.0535390377044678, + "logps/chosen": -77.31069946289062, + "logps/rejected": -344.37823486328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1819933652877808, + "rewards/margins": 6.932014465332031, + "rewards/rejected": -8.114007949829102, + "step": 1913 + }, + { + "epoch": 0.3, + "learning_rate": 1.2743529228694435e-05, + "logits/chosen": -2.596632480621338, + "logits/rejected": -2.9893722534179688, + "logps/chosen": -311.5046691894531, + "logps/rejected": -406.5333557128906, + "loss": 0.1455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5854864120483398, + "rewards/margins": 3.016831398010254, + "rewards/rejected": -3.6023178100585938, + "step": 1914 + }, + { + "epoch": 0.3, + "learning_rate": 1.2742795788163287e-05, + "logits/chosen": -3.0532498359680176, + "logits/rejected": -2.992544412612915, + "logps/chosen": -302.7030334472656, + "logps/rejected": -484.48748779296875, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3325159549713135, + "rewards/margins": 5.853519439697266, + "rewards/rejected": -7.18603515625, + "step": 1915 + }, + { + "epoch": 0.3, + "learning_rate": 1.274206234763214e-05, + "logits/chosen": -3.1110289096832275, + "logits/rejected": -2.9260125160217285, + "logps/chosen": -180.67404174804688, + "logps/rejected": -129.5473175048828, + "loss": 1.1795, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.683061122894287, + "rewards/margins": 0.6512352228164673, + "rewards/rejected": -3.334296464920044, + "step": 1916 + }, + { + "epoch": 0.3, + "learning_rate": 1.2741328907100991e-05, + "logits/chosen": -3.2440881729125977, + "logits/rejected": -2.766861915588379, + "logps/chosen": -679.2296142578125, + "logps/rejected": -641.845703125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1871261596679688, + "rewards/margins": 8.905242919921875, + "rewards/rejected": -10.092369079589844, + "step": 1917 + }, + { + "epoch": 0.3, + "learning_rate": 1.2740595466569843e-05, + "logits/chosen": -2.719398260116577, + "logits/rejected": -3.156703233718872, + "logps/chosen": -372.6186828613281, + "logps/rejected": -533.5167236328125, + "loss": 0.1409, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5071911811828613, + "rewards/margins": 4.502715110778809, + "rewards/rejected": -6.009905815124512, + "step": 1918 + }, + { + "epoch": 0.3, + "learning_rate": 1.2739862026038695e-05, + "logits/chosen": -1.7039766311645508, + "logits/rejected": -3.002758264541626, + "logps/chosen": -152.30653381347656, + "logps/rejected": -590.3674926757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9341694116592407, + "rewards/margins": 12.027685165405273, + "rewards/rejected": -13.961854934692383, + "step": 1919 + }, + { + "epoch": 0.3, + "learning_rate": 1.2739128585507547e-05, + "logits/chosen": -2.1460366249084473, + "logits/rejected": -2.9756336212158203, + "logps/chosen": -347.0442199707031, + "logps/rejected": -276.6570739746094, + "loss": 1.4663, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0034546852111816, + "rewards/margins": 1.6741609573364258, + "rewards/rejected": -3.6776154041290283, + "step": 1920 + }, + { + "epoch": 0.3, + "learning_rate": 1.27383951449764e-05, + "logits/chosen": -2.8877429962158203, + "logits/rejected": -3.3089959621429443, + "logps/chosen": -573.0952758789062, + "logps/rejected": -461.77850341796875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2354099452495575, + "rewards/margins": 6.692660331726074, + "rewards/rejected": -6.928070068359375, + "step": 1921 + }, + { + "epoch": 0.3, + "learning_rate": 1.2737661704445252e-05, + "logits/chosen": -2.218223810195923, + "logits/rejected": -3.2640202045440674, + "logps/chosen": -183.21914672851562, + "logps/rejected": -416.5417175292969, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0815749168395996, + "rewards/margins": 6.408022403717041, + "rewards/rejected": -9.48959732055664, + "step": 1922 + }, + { + "epoch": 0.3, + "learning_rate": 1.2736928263914104e-05, + "logits/chosen": -2.790388822555542, + "logits/rejected": -3.257737874984741, + "logps/chosen": -179.79348754882812, + "logps/rejected": -301.082275390625, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7924141883850098, + "rewards/margins": 4.391735076904297, + "rewards/rejected": -6.184149265289307, + "step": 1923 + }, + { + "epoch": 0.3, + "learning_rate": 1.2736194823382956e-05, + "logits/chosen": -3.2345285415649414, + "logits/rejected": -3.1573903560638428, + "logps/chosen": -82.73130798339844, + "logps/rejected": -174.91932678222656, + "loss": 1.3067, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6265716552734375, + "rewards/margins": 1.3217520713806152, + "rewards/rejected": -3.9483237266540527, + "step": 1924 + }, + { + "epoch": 0.3, + "learning_rate": 1.2735461382851808e-05, + "logits/chosen": -2.579310178756714, + "logits/rejected": -2.7732551097869873, + "logps/chosen": -249.59274291992188, + "logps/rejected": -312.09063720703125, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3156242370605469, + "rewards/margins": 4.303831100463867, + "rewards/rejected": -5.619455337524414, + "step": 1925 + }, + { + "epoch": 0.3, + "learning_rate": 1.273472794232066e-05, + "logits/chosen": -2.978945016860962, + "logits/rejected": -1.3223607540130615, + "logps/chosen": -266.8606262207031, + "logps/rejected": -150.92459106445312, + "loss": 3.3136, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.447924613952637, + "rewards/margins": -1.918434977531433, + "rewards/rejected": -2.529489755630493, + "step": 1926 + }, + { + "epoch": 0.3, + "learning_rate": 1.2733994501789512e-05, + "logits/chosen": -3.25956654548645, + "logits/rejected": -3.310076951980591, + "logps/chosen": -97.79985046386719, + "logps/rejected": -172.9566650390625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.124752163887024, + "rewards/margins": 5.5083417892456055, + "rewards/rejected": -6.633094310760498, + "step": 1927 + }, + { + "epoch": 0.3, + "learning_rate": 1.2733261061258363e-05, + "logits/chosen": -2.8741073608398438, + "logits/rejected": -3.178345203399658, + "logps/chosen": -158.90625, + "logps/rejected": -320.1613464355469, + "loss": 0.3692, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7065377235412598, + "rewards/margins": 2.5714714527130127, + "rewards/rejected": -4.278009414672852, + "step": 1928 + }, + { + "epoch": 0.3, + "learning_rate": 1.2732527620727217e-05, + "logits/chosen": -1.1164155006408691, + "logits/rejected": -2.895185947418213, + "logps/chosen": -58.23388671875, + "logps/rejected": -147.14736938476562, + "loss": 1.7366, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1514012813568115, + "rewards/margins": 0.9954724311828613, + "rewards/rejected": -4.146873474121094, + "step": 1929 + }, + { + "epoch": 0.3, + "learning_rate": 1.2731794180196069e-05, + "logits/chosen": -1.1697489023208618, + "logits/rejected": -3.0443713665008545, + "logps/chosen": -72.94789123535156, + "logps/rejected": -360.78533935546875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.619614601135254, + "rewards/margins": 4.191086769104004, + "rewards/rejected": -5.810701370239258, + "step": 1930 + }, + { + "epoch": 0.3, + "learning_rate": 1.273106073966492e-05, + "logits/chosen": -3.1325595378875732, + "logits/rejected": -2.663228988647461, + "logps/chosen": -185.4744873046875, + "logps/rejected": -184.18557739257812, + "loss": 2.582, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.37655782699585, + "rewards/margins": 0.41509342193603516, + "rewards/rejected": -4.791651725769043, + "step": 1931 + }, + { + "epoch": 0.3, + "learning_rate": 1.2730327299133773e-05, + "logits/chosen": -2.225116491317749, + "logits/rejected": -3.1709206104278564, + "logps/chosen": -494.93035888671875, + "logps/rejected": -754.635498046875, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40560227632522583, + "rewards/margins": 5.115859031677246, + "rewards/rejected": -4.710257053375244, + "step": 1932 + }, + { + "epoch": 0.3, + "learning_rate": 1.2729593858602625e-05, + "logits/chosen": -1.6654162406921387, + "logits/rejected": -3.0265071392059326, + "logps/chosen": -191.9092559814453, + "logps/rejected": -396.23577880859375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9681236743927002, + "rewards/margins": 4.516003131866455, + "rewards/rejected": -5.484126567840576, + "step": 1933 + }, + { + "epoch": 0.3, + "learning_rate": 1.2728860418071476e-05, + "logits/chosen": -1.8333446979522705, + "logits/rejected": -3.0208640098571777, + "logps/chosen": -107.85592651367188, + "logps/rejected": -499.16851806640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.72910737991333, + "rewards/margins": 9.386970520019531, + "rewards/rejected": -11.116077423095703, + "step": 1934 + }, + { + "epoch": 0.3, + "learning_rate": 1.2728126977540328e-05, + "logits/chosen": -1.692178726196289, + "logits/rejected": -3.160637855529785, + "logps/chosen": -53.099884033203125, + "logps/rejected": -225.93798828125, + "loss": 0.0705, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.412679672241211, + "rewards/margins": 3.9403605461120605, + "rewards/rejected": -5.35304069519043, + "step": 1935 + }, + { + "epoch": 0.3, + "learning_rate": 1.272739353700918e-05, + "logits/chosen": -3.2419700622558594, + "logits/rejected": -1.9888721704483032, + "logps/chosen": -257.5398254394531, + "logps/rejected": -134.42970275878906, + "loss": 0.1715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8160774111747742, + "rewards/margins": 3.2045578956604004, + "rewards/rejected": -4.02063512802124, + "step": 1936 + }, + { + "epoch": 0.3, + "learning_rate": 1.2726660096478032e-05, + "logits/chosen": -2.6238315105438232, + "logits/rejected": -3.0425684452056885, + "logps/chosen": -41.93648910522461, + "logps/rejected": -105.63851165771484, + "loss": 0.0427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7303082942962646, + "rewards/margins": 3.946812629699707, + "rewards/rejected": -4.677121162414551, + "step": 1937 + }, + { + "epoch": 0.3, + "learning_rate": 1.2725926655946886e-05, + "logits/chosen": -2.1854143142700195, + "logits/rejected": -2.9458863735198975, + "logps/chosen": -397.74713134765625, + "logps/rejected": -581.7261962890625, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.595819115638733, + "rewards/margins": 4.8889617919921875, + "rewards/rejected": -6.484780788421631, + "step": 1938 + }, + { + "epoch": 0.3, + "learning_rate": 1.2725193215415738e-05, + "logits/chosen": -3.2786269187927246, + "logits/rejected": -2.3823771476745605, + "logps/chosen": -270.89935302734375, + "logps/rejected": -84.02336883544922, + "loss": 3.0423, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.016077518463135, + "rewards/margins": -2.992809295654297, + "rewards/rejected": -1.023268222808838, + "step": 1939 + }, + { + "epoch": 0.3, + "learning_rate": 1.272445977488459e-05, + "logits/chosen": -1.7568094730377197, + "logits/rejected": -2.79945707321167, + "logps/chosen": -178.3621368408203, + "logps/rejected": -236.48715209960938, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7793426513671875, + "rewards/margins": 5.376391887664795, + "rewards/rejected": -6.155734539031982, + "step": 1940 + }, + { + "epoch": 0.3, + "learning_rate": 1.2723726334353441e-05, + "logits/chosen": -1.7920461893081665, + "logits/rejected": -3.157824754714966, + "logps/chosen": -72.83695220947266, + "logps/rejected": -367.65155029296875, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9600727558135986, + "rewards/margins": 6.14077091217041, + "rewards/rejected": -7.10084342956543, + "step": 1941 + }, + { + "epoch": 0.3, + "learning_rate": 1.2722992893822293e-05, + "logits/chosen": -1.4638147354125977, + "logits/rejected": -2.865168571472168, + "logps/chosen": -199.73069763183594, + "logps/rejected": -506.2091064453125, + "loss": 2.8007, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.445127487182617, + "rewards/margins": 1.1095757484436035, + "rewards/rejected": -5.5547027587890625, + "step": 1942 + }, + { + "epoch": 0.3, + "learning_rate": 1.2722259453291145e-05, + "logits/chosen": -2.736464262008667, + "logits/rejected": -3.343398094177246, + "logps/chosen": -57.263099670410156, + "logps/rejected": -189.45413208007812, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2114405632019043, + "rewards/margins": 2.6260409355163574, + "rewards/rejected": -3.8374814987182617, + "step": 1943 + }, + { + "epoch": 0.3, + "learning_rate": 1.2721526012759997e-05, + "logits/chosen": -3.1479954719543457, + "logits/rejected": -0.9792830348014832, + "logps/chosen": -572.176025390625, + "logps/rejected": -146.9940948486328, + "loss": 1.7558, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.220968008041382, + "rewards/margins": 0.505353569984436, + "rewards/rejected": -3.7263216972351074, + "step": 1944 + }, + { + "epoch": 0.3, + "learning_rate": 1.272079257222885e-05, + "logits/chosen": -2.477781057357788, + "logits/rejected": -3.119492292404175, + "logps/chosen": -173.62881469726562, + "logps/rejected": -415.7715759277344, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9849331378936768, + "rewards/margins": 7.1660919189453125, + "rewards/rejected": -9.15102481842041, + "step": 1945 + }, + { + "epoch": 0.3, + "learning_rate": 1.2720059131697702e-05, + "logits/chosen": -3.077802896499634, + "logits/rejected": -2.249516487121582, + "logps/chosen": -210.2900848388672, + "logps/rejected": -340.1246337890625, + "loss": 2.8852, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.407086133956909, + "rewards/margins": 2.121307611465454, + "rewards/rejected": -5.528393745422363, + "step": 1946 + }, + { + "epoch": 0.3, + "learning_rate": 1.2719325691166556e-05, + "logits/chosen": -3.201256036758423, + "logits/rejected": -3.2595913410186768, + "logps/chosen": -181.07220458984375, + "logps/rejected": -271.14056396484375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.248151421546936, + "rewards/margins": 5.261378288269043, + "rewards/rejected": -6.509530067443848, + "step": 1947 + }, + { + "epoch": 0.3, + "learning_rate": 1.2718592250635408e-05, + "logits/chosen": -2.898707866668701, + "logits/rejected": -2.693424701690674, + "logps/chosen": -265.4091491699219, + "logps/rejected": -336.59808349609375, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2268314361572266, + "rewards/margins": 4.853877544403076, + "rewards/rejected": -8.080709457397461, + "step": 1948 + }, + { + "epoch": 0.3, + "learning_rate": 1.271785881010426e-05, + "logits/chosen": -3.0278117656707764, + "logits/rejected": -3.228473424911499, + "logps/chosen": -257.5982971191406, + "logps/rejected": -314.752197265625, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6186695098876953, + "rewards/margins": 2.51600980758667, + "rewards/rejected": -4.134679317474365, + "step": 1949 + }, + { + "epoch": 0.3, + "learning_rate": 1.2717125369573112e-05, + "logits/chosen": -2.643516778945923, + "logits/rejected": -3.1602697372436523, + "logps/chosen": -85.60305786132812, + "logps/rejected": -273.2576904296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1162575483322144, + "rewards/margins": 6.609018802642822, + "rewards/rejected": -7.725276470184326, + "step": 1950 + }, + { + "epoch": 0.3, + "learning_rate": 1.2716391929041963e-05, + "logits/chosen": -2.938617706298828, + "logits/rejected": -3.1481740474700928, + "logps/chosen": -357.40130615234375, + "logps/rejected": -406.12872314453125, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8781477212905884, + "rewards/margins": 4.365584373474121, + "rewards/rejected": -6.243732452392578, + "step": 1951 + }, + { + "epoch": 0.3, + "learning_rate": 1.2715658488510815e-05, + "logits/chosen": -2.180706739425659, + "logits/rejected": -2.257262945175171, + "logps/chosen": -487.186279296875, + "logps/rejected": -441.87347412109375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3492717742919922, + "rewards/margins": 5.580499172210693, + "rewards/rejected": -6.9297709465026855, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 1.2714925047979667e-05, + "logits/chosen": -2.4854190349578857, + "logits/rejected": -3.2531349658966064, + "logps/chosen": -467.3943786621094, + "logps/rejected": -450.79571533203125, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5696884393692017, + "rewards/margins": 4.736720085144043, + "rewards/rejected": -5.306408882141113, + "step": 1953 + }, + { + "epoch": 0.3, + "learning_rate": 1.2714191607448519e-05, + "logits/chosen": -2.294127941131592, + "logits/rejected": -3.2706899642944336, + "logps/chosen": -74.8086166381836, + "logps/rejected": -148.98146057128906, + "loss": 1.9217, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3002777099609375, + "rewards/margins": 0.866387128829956, + "rewards/rejected": -3.1666648387908936, + "step": 1954 + }, + { + "epoch": 0.3, + "learning_rate": 1.2713458166917371e-05, + "logits/chosen": -0.8327382802963257, + "logits/rejected": -2.9988021850585938, + "logps/chosen": -38.126983642578125, + "logps/rejected": -361.46099853515625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1041309833526611, + "rewards/margins": 5.432646751403809, + "rewards/rejected": -6.536777973175049, + "step": 1955 + }, + { + "epoch": 0.3, + "learning_rate": 1.2712724726386225e-05, + "logits/chosen": -3.046076536178589, + "logits/rejected": -3.14996600151062, + "logps/chosen": -379.1412658691406, + "logps/rejected": -322.95709228515625, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43818703293800354, + "rewards/margins": 3.8930585384368896, + "rewards/rejected": -4.331245422363281, + "step": 1956 + }, + { + "epoch": 0.3, + "learning_rate": 1.2711991285855076e-05, + "logits/chosen": -1.4878785610198975, + "logits/rejected": -2.353121042251587, + "logps/chosen": -140.67066955566406, + "logps/rejected": -359.7408752441406, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1174705028533936, + "rewards/margins": 6.87717342376709, + "rewards/rejected": -7.9946441650390625, + "step": 1957 + }, + { + "epoch": 0.3, + "learning_rate": 1.2711257845323928e-05, + "logits/chosen": -2.6167073249816895, + "logits/rejected": -3.0919933319091797, + "logps/chosen": -181.22055053710938, + "logps/rejected": -490.03204345703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5249030590057373, + "rewards/margins": 8.803248405456543, + "rewards/rejected": -10.32815170288086, + "step": 1958 + }, + { + "epoch": 0.3, + "learning_rate": 1.271052440479278e-05, + "logits/chosen": -1.5961449146270752, + "logits/rejected": -3.061497926712036, + "logps/chosen": -56.14308166503906, + "logps/rejected": -352.1688232421875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2034153938293457, + "rewards/margins": 6.469733715057373, + "rewards/rejected": -7.673149108886719, + "step": 1959 + }, + { + "epoch": 0.3, + "learning_rate": 1.2709790964261632e-05, + "logits/chosen": -3.2993557453155518, + "logits/rejected": -2.9244894981384277, + "logps/chosen": -185.36700439453125, + "logps/rejected": -163.68727111816406, + "loss": 1.295, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2008228302001953, + "rewards/margins": 1.9325703382492065, + "rewards/rejected": -4.133393287658691, + "step": 1960 + }, + { + "epoch": 0.3, + "learning_rate": 1.2709057523730484e-05, + "logits/chosen": -1.4247850179672241, + "logits/rejected": -3.1308326721191406, + "logps/chosen": -244.97036743164062, + "logps/rejected": -385.5699157714844, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2011855840682983, + "rewards/margins": 6.0452399253845215, + "rewards/rejected": -7.246425628662109, + "step": 1961 + }, + { + "epoch": 0.31, + "learning_rate": 1.2708324083199336e-05, + "logits/chosen": -3.2876815795898438, + "logits/rejected": -3.025510549545288, + "logps/chosen": -256.28790283203125, + "logps/rejected": -242.1424560546875, + "loss": 2.9636, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3179919719696045, + "rewards/margins": -0.3067626953125, + "rewards/rejected": -3.0112292766571045, + "step": 1962 + }, + { + "epoch": 0.31, + "learning_rate": 1.2707590642668188e-05, + "logits/chosen": -3.2408370971679688, + "logits/rejected": -3.13926362991333, + "logps/chosen": -270.75531005859375, + "logps/rejected": -158.98495483398438, + "loss": 1.7534, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1842041015625, + "rewards/margins": -0.27169203758239746, + "rewards/rejected": -2.9125120639801025, + "step": 1963 + }, + { + "epoch": 0.31, + "learning_rate": 1.270685720213704e-05, + "logits/chosen": -1.854577660560608, + "logits/rejected": -3.035132646560669, + "logps/chosen": -126.15387725830078, + "logps/rejected": -220.24082946777344, + "loss": 0.7457, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7523958683013916, + "rewards/margins": 1.289236068725586, + "rewards/rejected": -4.041631698608398, + "step": 1964 + }, + { + "epoch": 0.31, + "learning_rate": 1.2706123761605893e-05, + "logits/chosen": -3.0964765548706055, + "logits/rejected": -3.194896697998047, + "logps/chosen": -80.12701416015625, + "logps/rejected": -194.12435913085938, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.394454836845398, + "rewards/margins": 4.687054634094238, + "rewards/rejected": -6.081509590148926, + "step": 1965 + }, + { + "epoch": 0.31, + "learning_rate": 1.2705390321074745e-05, + "logits/chosen": -3.043616533279419, + "logits/rejected": -2.918105363845825, + "logps/chosen": -653.7625122070312, + "logps/rejected": -509.813720703125, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7653156518936157, + "rewards/margins": 4.90699577331543, + "rewards/rejected": -6.672311305999756, + "step": 1966 + }, + { + "epoch": 0.31, + "learning_rate": 1.2704656880543597e-05, + "logits/chosen": -1.6745318174362183, + "logits/rejected": -2.9570634365081787, + "logps/chosen": -115.7116470336914, + "logps/rejected": -337.41485595703125, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3083384037017822, + "rewards/margins": 4.101616382598877, + "rewards/rejected": -5.409955024719238, + "step": 1967 + }, + { + "epoch": 0.31, + "learning_rate": 1.2703923440012449e-05, + "logits/chosen": -2.126779794692993, + "logits/rejected": -3.1042051315307617, + "logps/chosen": -62.0784912109375, + "logps/rejected": -262.6535339355469, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.222533702850342, + "rewards/margins": 5.199671268463135, + "rewards/rejected": -7.422204971313477, + "step": 1968 + }, + { + "epoch": 0.31, + "learning_rate": 1.27031899994813e-05, + "logits/chosen": -3.079101324081421, + "logits/rejected": -1.7000857591629028, + "logps/chosen": -798.9976806640625, + "logps/rejected": -355.19671630859375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9058868885040283, + "rewards/margins": 4.96562385559082, + "rewards/rejected": -5.8715105056762695, + "step": 1969 + }, + { + "epoch": 0.31, + "learning_rate": 1.2702456558950153e-05, + "logits/chosen": -2.965465784072876, + "logits/rejected": -3.1677889823913574, + "logps/chosen": -120.58226013183594, + "logps/rejected": -291.4734802246094, + "loss": 0.1863, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3925529718399048, + "rewards/margins": 3.2563059329986572, + "rewards/rejected": -4.648859024047852, + "step": 1970 + }, + { + "epoch": 0.31, + "learning_rate": 1.2701723118419004e-05, + "logits/chosen": -3.174258232116699, + "logits/rejected": -2.7677042484283447, + "logps/chosen": -476.2430419921875, + "logps/rejected": -494.5754699707031, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0262020826339722, + "rewards/margins": 5.388777732849121, + "rewards/rejected": -6.414979934692383, + "step": 1971 + }, + { + "epoch": 0.31, + "learning_rate": 1.2700989677887856e-05, + "logits/chosen": -2.4017140865325928, + "logits/rejected": -3.384683132171631, + "logps/chosen": -337.07421875, + "logps/rejected": -393.8434753417969, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1054048538208008, + "rewards/margins": 5.049482822418213, + "rewards/rejected": -6.154887676239014, + "step": 1972 + }, + { + "epoch": 0.31, + "learning_rate": 1.2700256237356708e-05, + "logits/chosen": -2.72141695022583, + "logits/rejected": -3.038071632385254, + "logps/chosen": -601.4791259765625, + "logps/rejected": -536.0370483398438, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8215690851211548, + "rewards/margins": 6.803324222564697, + "rewards/rejected": -7.6248931884765625, + "step": 1973 + }, + { + "epoch": 0.31, + "learning_rate": 1.2699522796825562e-05, + "logits/chosen": -1.8927526473999023, + "logits/rejected": -2.973376512527466, + "logps/chosen": -94.7728271484375, + "logps/rejected": -285.7079772949219, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9279732704162598, + "rewards/margins": 6.304250717163086, + "rewards/rejected": -8.232223510742188, + "step": 1974 + }, + { + "epoch": 0.31, + "learning_rate": 1.2698789356294414e-05, + "logits/chosen": -2.552461862564087, + "logits/rejected": -3.185837745666504, + "logps/chosen": -60.132450103759766, + "logps/rejected": -283.431884765625, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.826145052909851, + "rewards/margins": 5.169243812561035, + "rewards/rejected": -6.995388984680176, + "step": 1975 + }, + { + "epoch": 0.31, + "learning_rate": 1.2698055915763266e-05, + "logits/chosen": -3.076385259628296, + "logits/rejected": -3.257277011871338, + "logps/chosen": -415.090576171875, + "logps/rejected": -593.147705078125, + "loss": 3.9737, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.885831356048584, + "rewards/margins": -1.7590446472167969, + "rewards/rejected": -3.126786708831787, + "step": 1976 + }, + { + "epoch": 0.31, + "learning_rate": 1.2697322475232117e-05, + "logits/chosen": -3.077754020690918, + "logits/rejected": -2.9935672283172607, + "logps/chosen": -459.7006530761719, + "logps/rejected": -431.2452392578125, + "loss": 2.521, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1693246364593506, + "rewards/margins": -0.3110237121582031, + "rewards/rejected": -2.8583009243011475, + "step": 1977 + }, + { + "epoch": 0.31, + "learning_rate": 1.269658903470097e-05, + "logits/chosen": -1.9226384162902832, + "logits/rejected": -2.8515782356262207, + "logps/chosen": -200.07095336914062, + "logps/rejected": -469.9429931640625, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7512238025665283, + "rewards/margins": 4.680604934692383, + "rewards/rejected": -6.43182897567749, + "step": 1978 + }, + { + "epoch": 0.31, + "learning_rate": 1.2695855594169823e-05, + "logits/chosen": -3.088106155395508, + "logits/rejected": -1.7972674369812012, + "logps/chosen": -378.07293701171875, + "logps/rejected": -307.933837890625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6424057483673096, + "rewards/margins": 6.435541152954102, + "rewards/rejected": -8.077946662902832, + "step": 1979 + }, + { + "epoch": 0.31, + "learning_rate": 1.2695122153638675e-05, + "logits/chosen": -1.7119184732437134, + "logits/rejected": -3.1817047595977783, + "logps/chosen": -228.8963623046875, + "logps/rejected": -573.63671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2788283824920654, + "rewards/margins": 7.980236053466797, + "rewards/rejected": -9.259063720703125, + "step": 1980 + }, + { + "epoch": 0.31, + "learning_rate": 1.2694388713107527e-05, + "logits/chosen": -2.636261463165283, + "logits/rejected": -3.153135061264038, + "logps/chosen": -202.62527465820312, + "logps/rejected": -247.90419006347656, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2704452574253082, + "rewards/margins": 5.676351070404053, + "rewards/rejected": -5.946796417236328, + "step": 1981 + }, + { + "epoch": 0.31, + "learning_rate": 1.2693655272576378e-05, + "logits/chosen": -1.689858317375183, + "logits/rejected": -3.186115026473999, + "logps/chosen": -40.965179443359375, + "logps/rejected": -306.88482666015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7072619199752808, + "rewards/margins": 8.696817398071289, + "rewards/rejected": -9.40407943725586, + "step": 1982 + }, + { + "epoch": 0.31, + "learning_rate": 1.2692921832045232e-05, + "logits/chosen": -3.1106619834899902, + "logits/rejected": -2.2524518966674805, + "logps/chosen": -225.2075653076172, + "logps/rejected": -201.25711059570312, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.096012830734253, + "rewards/margins": 5.1148552894592285, + "rewards/rejected": -6.210867881774902, + "step": 1983 + }, + { + "epoch": 0.31, + "learning_rate": 1.2692188391514084e-05, + "logits/chosen": -3.0550098419189453, + "logits/rejected": -2.8628203868865967, + "logps/chosen": -437.5274658203125, + "logps/rejected": -471.3411560058594, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7005119323730469, + "rewards/margins": 6.536657333374023, + "rewards/rejected": -7.23716926574707, + "step": 1984 + }, + { + "epoch": 0.31, + "learning_rate": 1.2691454950982936e-05, + "logits/chosen": -3.066694736480713, + "logits/rejected": -3.0042965412139893, + "logps/chosen": -533.5729370117188, + "logps/rejected": -502.4361572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0842911005020142, + "rewards/margins": 9.715984344482422, + "rewards/rejected": -10.800274848937988, + "step": 1985 + }, + { + "epoch": 0.31, + "learning_rate": 1.2690721510451788e-05, + "logits/chosen": -2.149393081665039, + "logits/rejected": -2.974886655807495, + "logps/chosen": -243.0892333984375, + "logps/rejected": -436.114013671875, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6388801336288452, + "rewards/margins": 6.016155242919922, + "rewards/rejected": -6.655035495758057, + "step": 1986 + }, + { + "epoch": 0.31, + "learning_rate": 1.268998806992064e-05, + "logits/chosen": -2.5636796951293945, + "logits/rejected": -3.2434961795806885, + "logps/chosen": -133.38145446777344, + "logps/rejected": -384.7789306640625, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9852290153503418, + "rewards/margins": 3.9039111137390137, + "rewards/rejected": -5.8891401290893555, + "step": 1987 + }, + { + "epoch": 0.31, + "learning_rate": 1.2689254629389491e-05, + "logits/chosen": -2.843031167984009, + "logits/rejected": -3.2625057697296143, + "logps/chosen": -343.2156066894531, + "logps/rejected": -359.70587158203125, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.017089605331421, + "rewards/margins": 4.675178527832031, + "rewards/rejected": -6.692268371582031, + "step": 1988 + }, + { + "epoch": 0.31, + "learning_rate": 1.2688521188858343e-05, + "logits/chosen": -2.7379672527313232, + "logits/rejected": -3.1307454109191895, + "logps/chosen": -459.127685546875, + "logps/rejected": -496.783447265625, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2571334838867188, + "rewards/margins": 5.6442999839782715, + "rewards/rejected": -6.901432991027832, + "step": 1989 + }, + { + "epoch": 0.31, + "learning_rate": 1.2687787748327195e-05, + "logits/chosen": -3.127418279647827, + "logits/rejected": -3.1866097450256348, + "logps/chosen": -115.22846984863281, + "logps/rejected": -318.7306823730469, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.180267572402954, + "rewards/margins": 4.89525842666626, + "rewards/rejected": -7.075526237487793, + "step": 1990 + }, + { + "epoch": 0.31, + "learning_rate": 1.2687054307796047e-05, + "logits/chosen": -2.4789459705352783, + "logits/rejected": -3.02986478805542, + "logps/chosen": -614.6204223632812, + "logps/rejected": -797.3856201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4794068336486816, + "rewards/margins": 10.536439895629883, + "rewards/rejected": -12.015846252441406, + "step": 1991 + }, + { + "epoch": 0.31, + "learning_rate": 1.26863208672649e-05, + "logits/chosen": -2.9991860389709473, + "logits/rejected": -1.875759482383728, + "logps/chosen": -603.1965942382812, + "logps/rejected": -290.6835021972656, + "loss": 2.0362, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3104963302612305, + "rewards/margins": -0.05512499809265137, + "rewards/rejected": -5.255371570587158, + "step": 1992 + }, + { + "epoch": 0.31, + "learning_rate": 1.2685587426733753e-05, + "logits/chosen": -3.0127460956573486, + "logits/rejected": -3.1394968032836914, + "logps/chosen": -104.60066223144531, + "logps/rejected": -286.09063720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4293050765991211, + "rewards/margins": 8.554786682128906, + "rewards/rejected": -8.984092712402344, + "step": 1993 + }, + { + "epoch": 0.31, + "learning_rate": 1.2684853986202604e-05, + "logits/chosen": -2.6808319091796875, + "logits/rejected": -2.8754138946533203, + "logps/chosen": -330.2372131347656, + "logps/rejected": -556.950439453125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6672933101654053, + "rewards/margins": 5.965839862823486, + "rewards/rejected": -7.6331329345703125, + "step": 1994 + }, + { + "epoch": 0.31, + "learning_rate": 1.2684120545671456e-05, + "logits/chosen": -1.356908917427063, + "logits/rejected": -3.050992727279663, + "logps/chosen": -79.32408142089844, + "logps/rejected": -251.30624389648438, + "loss": 0.0644, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3819329738616943, + "rewards/margins": 4.2789106369018555, + "rewards/rejected": -6.660843849182129, + "step": 1995 + }, + { + "epoch": 0.31, + "learning_rate": 1.2683387105140308e-05, + "logits/chosen": -2.3820302486419678, + "logits/rejected": -3.138446569442749, + "logps/chosen": -208.70030212402344, + "logps/rejected": -346.61260986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0003604888916015625, + "rewards/margins": 9.651134490966797, + "rewards/rejected": -9.651494979858398, + "step": 1996 + }, + { + "epoch": 0.31, + "learning_rate": 1.268265366460916e-05, + "logits/chosen": -3.0150339603424072, + "logits/rejected": -3.080225944519043, + "logps/chosen": -49.656944274902344, + "logps/rejected": -192.5098114013672, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4579904079437256, + "rewards/margins": 6.885379791259766, + "rewards/rejected": -8.34337043762207, + "step": 1997 + }, + { + "epoch": 0.31, + "learning_rate": 1.2681920224078012e-05, + "logits/chosen": -2.542968511581421, + "logits/rejected": -3.236478090286255, + "logps/chosen": -31.41241455078125, + "logps/rejected": -230.678466796875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2633752822875977, + "rewards/margins": 7.0903472900390625, + "rewards/rejected": -9.35372257232666, + "step": 1998 + }, + { + "epoch": 0.31, + "learning_rate": 1.2681186783546864e-05, + "logits/chosen": -3.0694754123687744, + "logits/rejected": -3.105778217315674, + "logps/chosen": -596.3888549804688, + "logps/rejected": -820.0621337890625, + "loss": 1.9031, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.31439208984375, + "rewards/margins": 1.3104959726333618, + "rewards/rejected": -5.624887943267822, + "step": 1999 + }, + { + "epoch": 0.31, + "learning_rate": 1.2680453343015716e-05, + "logits/chosen": -2.8810994625091553, + "logits/rejected": -3.1451921463012695, + "logps/chosen": -52.80677795410156, + "logps/rejected": -184.4357147216797, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8220434188842773, + "rewards/margins": 5.639578819274902, + "rewards/rejected": -8.46162223815918, + "step": 2000 + }, + { + "epoch": 0.31, + "learning_rate": 1.267971990248457e-05, + "logits/chosen": -2.660527229309082, + "logits/rejected": -3.128012180328369, + "logps/chosen": -214.748046875, + "logps/rejected": -119.46924591064453, + "loss": 3.1428, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.655256748199463, + "rewards/margins": -0.8782334327697754, + "rewards/rejected": -3.7770233154296875, + "step": 2001 + }, + { + "epoch": 0.31, + "learning_rate": 1.2678986461953421e-05, + "logits/chosen": -2.50106143951416, + "logits/rejected": -3.052295446395874, + "logps/chosen": -291.3909912109375, + "logps/rejected": -271.9609069824219, + "loss": 3.5161, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.218643665313721, + "rewards/margins": 1.2464609146118164, + "rewards/rejected": -6.465104579925537, + "step": 2002 + }, + { + "epoch": 0.31, + "learning_rate": 1.2678253021422273e-05, + "logits/chosen": -3.171638250350952, + "logits/rejected": -3.1716086864471436, + "logps/chosen": -104.32722473144531, + "logps/rejected": -404.0408935546875, + "loss": 0.7075, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1406962871551514, + "rewards/margins": 5.791453838348389, + "rewards/rejected": -7.932150363922119, + "step": 2003 + }, + { + "epoch": 0.31, + "learning_rate": 1.2677519580891125e-05, + "logits/chosen": -2.3448545932769775, + "logits/rejected": -3.0522191524505615, + "logps/chosen": -292.1325378417969, + "logps/rejected": -564.5125122070312, + "loss": 4.2683, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.649521827697754, + "rewards/margins": -0.775672197341919, + "rewards/rejected": -3.873849630355835, + "step": 2004 + }, + { + "epoch": 0.31, + "learning_rate": 1.2676786140359977e-05, + "logits/chosen": -2.0716569423675537, + "logits/rejected": -3.1123034954071045, + "logps/chosen": -213.77499389648438, + "logps/rejected": -439.67901611328125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.180513381958008, + "rewards/margins": 5.614904403686523, + "rewards/rejected": -7.795417785644531, + "step": 2005 + }, + { + "epoch": 0.31, + "learning_rate": 1.2676052699828829e-05, + "logits/chosen": -1.607966423034668, + "logits/rejected": -3.1810030937194824, + "logps/chosen": -406.0499572753906, + "logps/rejected": -574.9641723632812, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2512741088867188, + "rewards/margins": 4.76865816116333, + "rewards/rejected": -6.019932746887207, + "step": 2006 + }, + { + "epoch": 0.31, + "learning_rate": 1.267531925929768e-05, + "logits/chosen": -2.502656936645508, + "logits/rejected": -3.16013503074646, + "logps/chosen": -380.9285583496094, + "logps/rejected": -450.50677490234375, + "loss": 0.8566, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.638451099395752, + "rewards/margins": 1.4802579879760742, + "rewards/rejected": -4.118709087371826, + "step": 2007 + }, + { + "epoch": 0.31, + "learning_rate": 1.2674585818766532e-05, + "logits/chosen": -3.020303964614868, + "logits/rejected": -2.976778745651245, + "logps/chosen": -95.58686065673828, + "logps/rejected": -168.4641876220703, + "loss": 1.0279, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0385754108428955, + "rewards/margins": 2.668644428253174, + "rewards/rejected": -5.707220077514648, + "step": 2008 + }, + { + "epoch": 0.31, + "learning_rate": 1.2673852378235384e-05, + "logits/chosen": -1.2444506883621216, + "logits/rejected": -2.8518121242523193, + "logps/chosen": -49.36595916748047, + "logps/rejected": -171.57135009765625, + "loss": 0.4851, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.583710193634033, + "rewards/margins": 2.3368875980377197, + "rewards/rejected": -4.920597553253174, + "step": 2009 + }, + { + "epoch": 0.31, + "learning_rate": 1.2673118937704238e-05, + "logits/chosen": -2.7175800800323486, + "logits/rejected": -3.029716730117798, + "logps/chosen": -425.6129150390625, + "logps/rejected": -512.0904541015625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9423618316650391, + "rewards/margins": 6.491050720214844, + "rewards/rejected": -7.433412551879883, + "step": 2010 + }, + { + "epoch": 0.31, + "learning_rate": 1.267238549717309e-05, + "logits/chosen": -2.830599069595337, + "logits/rejected": -3.1316819190979004, + "logps/chosen": -89.97566223144531, + "logps/rejected": -165.05946350097656, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9692679643630981, + "rewards/margins": 4.865754127502441, + "rewards/rejected": -5.83502197265625, + "step": 2011 + }, + { + "epoch": 0.31, + "learning_rate": 1.2671652056641942e-05, + "logits/chosen": -3.169447422027588, + "logits/rejected": -2.712773323059082, + "logps/chosen": -541.605712890625, + "logps/rejected": -413.01641845703125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6832029819488525, + "rewards/margins": 7.367838382720947, + "rewards/rejected": -9.051041603088379, + "step": 2012 + }, + { + "epoch": 0.31, + "learning_rate": 1.2670918616110795e-05, + "logits/chosen": -1.6882717609405518, + "logits/rejected": -3.157623767852783, + "logps/chosen": -65.35049438476562, + "logps/rejected": -429.8438415527344, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2871170043945312, + "rewards/margins": 10.050027847290039, + "rewards/rejected": -11.33714485168457, + "step": 2013 + }, + { + "epoch": 0.31, + "learning_rate": 1.2670185175579647e-05, + "logits/chosen": -2.31241512298584, + "logits/rejected": -2.1717000007629395, + "logps/chosen": -218.40768432617188, + "logps/rejected": -372.988037109375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3947845697402954, + "rewards/margins": 6.3497772216796875, + "rewards/rejected": -7.744562149047852, + "step": 2014 + }, + { + "epoch": 0.31, + "learning_rate": 1.2669451735048499e-05, + "logits/chosen": -2.934473752975464, + "logits/rejected": -2.804300308227539, + "logps/chosen": -107.6076431274414, + "logps/rejected": -210.40744018554688, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4422907829284668, + "rewards/margins": 4.682391166687012, + "rewards/rejected": -6.1246819496154785, + "step": 2015 + }, + { + "epoch": 0.31, + "learning_rate": 1.266871829451735e-05, + "logits/chosen": -1.8059000968933105, + "logits/rejected": -2.6535146236419678, + "logps/chosen": -144.91542053222656, + "logps/rejected": -327.63720703125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.310998558998108, + "rewards/margins": 6.9745025634765625, + "rewards/rejected": -8.285501480102539, + "step": 2016 + }, + { + "epoch": 0.31, + "learning_rate": 1.2667984853986203e-05, + "logits/chosen": -2.5550589561462402, + "logits/rejected": -3.1305158138275146, + "logps/chosen": -62.035789489746094, + "logps/rejected": -245.49386596679688, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4274885654449463, + "rewards/margins": 4.193974018096924, + "rewards/rejected": -6.621462821960449, + "step": 2017 + }, + { + "epoch": 0.31, + "learning_rate": 1.2667251413455055e-05, + "logits/chosen": -2.842966079711914, + "logits/rejected": -2.537942886352539, + "logps/chosen": -622.9681396484375, + "logps/rejected": -542.4041137695312, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6880645751953125, + "rewards/margins": 6.1158905029296875, + "rewards/rejected": -8.803955078125, + "step": 2018 + }, + { + "epoch": 0.31, + "learning_rate": 1.2666517972923908e-05, + "logits/chosen": -2.032867908477783, + "logits/rejected": -2.9090447425842285, + "logps/chosen": -239.9436798095703, + "logps/rejected": -215.40963745117188, + "loss": 2.3466, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7935791015625, + "rewards/margins": 1.3631768226623535, + "rewards/rejected": -6.1567559242248535, + "step": 2019 + }, + { + "epoch": 0.31, + "learning_rate": 1.266578453239276e-05, + "logits/chosen": -3.2042346000671387, + "logits/rejected": -3.2703115940093994, + "logps/chosen": -51.83594512939453, + "logps/rejected": -120.70368194580078, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.777742862701416, + "rewards/margins": 3.2866263389587402, + "rewards/rejected": -6.064369201660156, + "step": 2020 + }, + { + "epoch": 0.31, + "learning_rate": 1.2665051091861612e-05, + "logits/chosen": -2.999363660812378, + "logits/rejected": -1.1503138542175293, + "logps/chosen": -627.9689331054688, + "logps/rejected": -359.4339599609375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.817445755004883, + "rewards/margins": 6.041288375854492, + "rewards/rejected": -8.858734130859375, + "step": 2021 + }, + { + "epoch": 0.31, + "learning_rate": 1.2664317651330464e-05, + "logits/chosen": -2.05525803565979, + "logits/rejected": -2.9405806064605713, + "logps/chosen": -338.52728271484375, + "logps/rejected": -1171.0728759765625, + "loss": 4.0346, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.951359510421753, + "rewards/margins": 3.542921543121338, + "rewards/rejected": -7.494280815124512, + "step": 2022 + }, + { + "epoch": 0.31, + "learning_rate": 1.2663584210799316e-05, + "logits/chosen": -2.4675304889678955, + "logits/rejected": -2.9364190101623535, + "logps/chosen": -138.71115112304688, + "logps/rejected": -194.19296264648438, + "loss": 2.6019, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0780839920043945, + "rewards/margins": -1.7569156885147095, + "rewards/rejected": -2.3211681842803955, + "step": 2023 + }, + { + "epoch": 0.31, + "learning_rate": 1.2662850770268168e-05, + "logits/chosen": -2.2884068489074707, + "logits/rejected": -2.941200017929077, + "logps/chosen": -133.73077392578125, + "logps/rejected": -269.1563415527344, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.191175103187561, + "rewards/margins": 5.553666114807129, + "rewards/rejected": -6.744840621948242, + "step": 2024 + }, + { + "epoch": 0.31, + "learning_rate": 1.266211732973702e-05, + "logits/chosen": -1.5110701322555542, + "logits/rejected": -2.971778154373169, + "logps/chosen": -68.79476928710938, + "logps/rejected": -217.03273010253906, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7267249822616577, + "rewards/margins": 3.9004111289978027, + "rewards/rejected": -5.62713623046875, + "step": 2025 + }, + { + "epoch": 0.32, + "learning_rate": 1.2661383889205871e-05, + "logits/chosen": -1.852502703666687, + "logits/rejected": -3.000899314880371, + "logps/chosen": -89.68928527832031, + "logps/rejected": -262.56829833984375, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5200048685073853, + "rewards/margins": 4.182756423950195, + "rewards/rejected": -5.702761173248291, + "step": 2026 + }, + { + "epoch": 0.32, + "learning_rate": 1.2660650448674725e-05, + "logits/chosen": -3.2982468605041504, + "logits/rejected": -3.3777668476104736, + "logps/chosen": -80.11809539794922, + "logps/rejected": -138.85659790039062, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3134924173355103, + "rewards/margins": 3.9823222160339355, + "rewards/rejected": -5.295814514160156, + "step": 2027 + }, + { + "epoch": 0.32, + "learning_rate": 1.2659917008143577e-05, + "logits/chosen": -3.095722198486328, + "logits/rejected": -3.0775914192199707, + "logps/chosen": -16.707897186279297, + "logps/rejected": -201.41207885742188, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7072082161903381, + "rewards/margins": 6.417211532592773, + "rewards/rejected": -7.124420166015625, + "step": 2028 + }, + { + "epoch": 0.32, + "learning_rate": 1.2659183567612429e-05, + "logits/chosen": -3.0529820919036865, + "logits/rejected": -2.4168126583099365, + "logps/chosen": -393.7309875488281, + "logps/rejected": -206.0228271484375, + "loss": 6.3995, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.848676681518555, + "rewards/margins": -5.3411688804626465, + "rewards/rejected": -3.50750732421875, + "step": 2029 + }, + { + "epoch": 0.32, + "learning_rate": 1.265845012708128e-05, + "logits/chosen": -3.242905378341675, + "logits/rejected": -3.2233822345733643, + "logps/chosen": -157.38470458984375, + "logps/rejected": -193.25308227539062, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16668128967285156, + "rewards/margins": 3.8955087661743164, + "rewards/rejected": -4.062190055847168, + "step": 2030 + }, + { + "epoch": 0.32, + "learning_rate": 1.2657716686550132e-05, + "logits/chosen": -1.7123663425445557, + "logits/rejected": -3.128544569015503, + "logps/chosen": -42.19666290283203, + "logps/rejected": -287.91326904296875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1064131259918213, + "rewards/margins": 5.4493279457092285, + "rewards/rejected": -6.555741310119629, + "step": 2031 + }, + { + "epoch": 0.32, + "learning_rate": 1.2656983246018984e-05, + "logits/chosen": -1.1628103256225586, + "logits/rejected": -2.6668615341186523, + "logps/chosen": -79.21302795410156, + "logps/rejected": -406.2086181640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9807031750679016, + "rewards/margins": 8.403321266174316, + "rewards/rejected": -9.384024620056152, + "step": 2032 + }, + { + "epoch": 0.32, + "learning_rate": 1.2656249805487836e-05, + "logits/chosen": -2.847839593887329, + "logits/rejected": -2.8945202827453613, + "logps/chosen": -163.3870849609375, + "logps/rejected": -237.02554321289062, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6131813526153564, + "rewards/margins": 5.912250518798828, + "rewards/rejected": -6.5254316329956055, + "step": 2033 + }, + { + "epoch": 0.32, + "learning_rate": 1.2655516364956688e-05, + "logits/chosen": -2.8788249492645264, + "logits/rejected": -3.0638394355773926, + "logps/chosen": -58.94973373413086, + "logps/rejected": -303.88739013671875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.514305830001831, + "rewards/margins": 8.107709884643555, + "rewards/rejected": -9.622014999389648, + "step": 2034 + }, + { + "epoch": 0.32, + "learning_rate": 1.265478292442554e-05, + "logits/chosen": -2.8155763149261475, + "logits/rejected": -3.0180137157440186, + "logps/chosen": -116.96004486083984, + "logps/rejected": -214.9161376953125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0641639232635498, + "rewards/margins": 5.082040786743164, + "rewards/rejected": -6.146204948425293, + "step": 2035 + }, + { + "epoch": 0.32, + "learning_rate": 1.2654049483894393e-05, + "logits/chosen": -1.1805466413497925, + "logits/rejected": -3.0416345596313477, + "logps/chosen": -41.04610061645508, + "logps/rejected": -325.20721435546875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2924540042877197, + "rewards/margins": 6.2683796882629395, + "rewards/rejected": -7.560833930969238, + "step": 2036 + }, + { + "epoch": 0.32, + "learning_rate": 1.2653316043363245e-05, + "logits/chosen": -2.982243537902832, + "logits/rejected": -1.2215639352798462, + "logps/chosen": -307.5320129394531, + "logps/rejected": -183.99032592773438, + "loss": 2.1776, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6309046745300293, + "rewards/margins": 2.9248504638671875, + "rewards/rejected": -6.555755138397217, + "step": 2037 + }, + { + "epoch": 0.32, + "learning_rate": 1.2652582602832097e-05, + "logits/chosen": -3.076158285140991, + "logits/rejected": -3.2399280071258545, + "logps/chosen": -104.50147247314453, + "logps/rejected": -190.74044799804688, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.156363844871521, + "rewards/margins": 5.377098083496094, + "rewards/rejected": -6.533462047576904, + "step": 2038 + }, + { + "epoch": 0.32, + "learning_rate": 1.2651849162300949e-05, + "logits/chosen": -3.0564403533935547, + "logits/rejected": -2.8647279739379883, + "logps/chosen": -288.48046875, + "logps/rejected": -598.0230712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.125065416097641, + "rewards/margins": 10.179454803466797, + "rewards/rejected": -10.304519653320312, + "step": 2039 + }, + { + "epoch": 0.32, + "learning_rate": 1.2651115721769801e-05, + "logits/chosen": -1.8296011686325073, + "logits/rejected": -2.971863269805908, + "logps/chosen": -59.99810791015625, + "logps/rejected": -162.1103057861328, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5660936832427979, + "rewards/margins": 4.458672523498535, + "rewards/rejected": -6.024766445159912, + "step": 2040 + }, + { + "epoch": 0.32, + "learning_rate": 1.2650382281238653e-05, + "logits/chosen": -2.0015881061553955, + "logits/rejected": -2.8882715702056885, + "logps/chosen": -78.11251831054688, + "logps/rejected": -184.2905731201172, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7029485702514648, + "rewards/margins": 5.539794921875, + "rewards/rejected": -7.242743492126465, + "step": 2041 + }, + { + "epoch": 0.32, + "learning_rate": 1.2649648840707505e-05, + "logits/chosen": -1.6101555824279785, + "logits/rejected": -3.0266010761260986, + "logps/chosen": -97.3070297241211, + "logps/rejected": -257.440673828125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5181110501289368, + "rewards/margins": 6.432633399963379, + "rewards/rejected": -6.95074462890625, + "step": 2042 + }, + { + "epoch": 0.32, + "learning_rate": 1.2648915400176357e-05, + "logits/chosen": -3.036367893218994, + "logits/rejected": -1.8022139072418213, + "logps/chosen": -450.0939636230469, + "logps/rejected": -206.68470764160156, + "loss": 1.808, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9470643997192383, + "rewards/margins": 1.4260231256484985, + "rewards/rejected": -5.373087406158447, + "step": 2043 + }, + { + "epoch": 0.32, + "learning_rate": 1.2648181959645208e-05, + "logits/chosen": -2.9597790241241455, + "logits/rejected": -2.4614672660827637, + "logps/chosen": -181.8888702392578, + "logps/rejected": -197.02606201171875, + "loss": 0.1592, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.692249298095703, + "rewards/margins": 3.890920639038086, + "rewards/rejected": -6.583169937133789, + "step": 2044 + }, + { + "epoch": 0.32, + "learning_rate": 1.2647448519114062e-05, + "logits/chosen": -2.31221342086792, + "logits/rejected": -3.0324556827545166, + "logps/chosen": -325.82025146484375, + "logps/rejected": -335.86505126953125, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7590774297714233, + "rewards/margins": 6.513886451721191, + "rewards/rejected": -7.272964000701904, + "step": 2045 + }, + { + "epoch": 0.32, + "learning_rate": 1.2646715078582914e-05, + "logits/chosen": -2.3983101844787598, + "logits/rejected": -2.940304756164551, + "logps/chosen": -278.5105895996094, + "logps/rejected": -420.3704833984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5066077709197998, + "rewards/margins": 8.06529426574707, + "rewards/rejected": -9.571901321411133, + "step": 2046 + }, + { + "epoch": 0.32, + "learning_rate": 1.2645981638051767e-05, + "logits/chosen": -3.247666597366333, + "logits/rejected": -2.935980796813965, + "logps/chosen": -353.6795654296875, + "logps/rejected": -412.79217529296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7827516794204712, + "rewards/margins": 6.696500301361084, + "rewards/rejected": -8.479251861572266, + "step": 2047 + }, + { + "epoch": 0.32, + "learning_rate": 1.264524819752062e-05, + "logits/chosen": -1.9801316261291504, + "logits/rejected": -3.000535488128662, + "logps/chosen": -69.61041259765625, + "logps/rejected": -318.29034423828125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.285488486289978, + "rewards/margins": 7.375457763671875, + "rewards/rejected": -7.660945892333984, + "step": 2048 + }, + { + "epoch": 0.32, + "learning_rate": 1.2644514756989471e-05, + "logits/chosen": -2.755370855331421, + "logits/rejected": -3.1375129222869873, + "logps/chosen": -135.3562774658203, + "logps/rejected": -248.48988342285156, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0196104049682617, + "rewards/margins": 6.135400772094727, + "rewards/rejected": -7.155011177062988, + "step": 2049 + }, + { + "epoch": 0.32, + "learning_rate": 1.2643781316458323e-05, + "logits/chosen": -1.3949570655822754, + "logits/rejected": -2.931241512298584, + "logps/chosen": -55.30059051513672, + "logps/rejected": -420.431396484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5542445182800293, + "rewards/margins": 7.720561981201172, + "rewards/rejected": -9.27480697631836, + "step": 2050 + }, + { + "epoch": 0.32, + "learning_rate": 1.2643047875927175e-05, + "logits/chosen": -1.7443405389785767, + "logits/rejected": -3.064894676208496, + "logps/chosen": -163.42550659179688, + "logps/rejected": -240.9758758544922, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8544530868530273, + "rewards/margins": 8.001046180725098, + "rewards/rejected": -9.855499267578125, + "step": 2051 + }, + { + "epoch": 0.32, + "learning_rate": 1.2642314435396027e-05, + "logits/chosen": -2.323542356491089, + "logits/rejected": -2.9852960109710693, + "logps/chosen": -84.5120620727539, + "logps/rejected": -475.0355224609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9018215537071228, + "rewards/margins": 7.859165191650391, + "rewards/rejected": -8.760986328125, + "step": 2052 + }, + { + "epoch": 0.32, + "learning_rate": 1.2641580994864879e-05, + "logits/chosen": -3.1622817516326904, + "logits/rejected": -3.017941951751709, + "logps/chosen": -375.3328552246094, + "logps/rejected": -276.74273681640625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9433865547180176, + "rewards/margins": 5.158315658569336, + "rewards/rejected": -8.101702690124512, + "step": 2053 + }, + { + "epoch": 0.32, + "learning_rate": 1.2640847554333732e-05, + "logits/chosen": -3.1858222484588623, + "logits/rejected": -3.0513992309570312, + "logps/chosen": -451.2255859375, + "logps/rejected": -471.1556396484375, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1998133659362793, + "rewards/margins": 5.573412895202637, + "rewards/rejected": -6.773226261138916, + "step": 2054 + }, + { + "epoch": 0.32, + "learning_rate": 1.2640114113802584e-05, + "logits/chosen": -2.604997158050537, + "logits/rejected": -3.068401575088501, + "logps/chosen": -532.5982666015625, + "logps/rejected": -319.815185546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1061174869537354, + "rewards/margins": 8.140369415283203, + "rewards/rejected": -9.24648666381836, + "step": 2055 + }, + { + "epoch": 0.32, + "learning_rate": 1.2639380673271436e-05, + "logits/chosen": -2.249074697494507, + "logits/rejected": -2.5588464736938477, + "logps/chosen": -73.03591918945312, + "logps/rejected": -214.13400268554688, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6194448471069336, + "rewards/margins": 7.214907646179199, + "rewards/rejected": -8.834352493286133, + "step": 2056 + }, + { + "epoch": 0.32, + "learning_rate": 1.2638647232740288e-05, + "logits/chosen": -2.1558194160461426, + "logits/rejected": -3.043551445007324, + "logps/chosen": -71.79432678222656, + "logps/rejected": -203.77023315429688, + "loss": 1.9549, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.266845703125, + "rewards/margins": 1.365684986114502, + "rewards/rejected": -5.632530212402344, + "step": 2057 + }, + { + "epoch": 0.32, + "learning_rate": 1.263791379220914e-05, + "logits/chosen": -3.1040709018707275, + "logits/rejected": -2.8766894340515137, + "logps/chosen": -1082.1416015625, + "logps/rejected": -603.5318603515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4153534173965454, + "rewards/margins": 9.434237480163574, + "rewards/rejected": -9.849591255187988, + "step": 2058 + }, + { + "epoch": 0.32, + "learning_rate": 1.2637180351677992e-05, + "logits/chosen": -2.9783244132995605, + "logits/rejected": -3.1810431480407715, + "logps/chosen": -448.4597473144531, + "logps/rejected": -714.7003784179688, + "loss": 3.2275, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.747110366821289, + "rewards/margins": -1.296280026435852, + "rewards/rejected": -4.450829982757568, + "step": 2059 + }, + { + "epoch": 0.32, + "learning_rate": 1.2636446911146844e-05, + "logits/chosen": -2.949974298477173, + "logits/rejected": -3.08263897895813, + "logps/chosen": -59.71472930908203, + "logps/rejected": -274.29449462890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44437986612319946, + "rewards/margins": 7.961849212646484, + "rewards/rejected": -8.406229019165039, + "step": 2060 + }, + { + "epoch": 0.32, + "learning_rate": 1.2635713470615695e-05, + "logits/chosen": -3.1118290424346924, + "logits/rejected": -2.5290310382843018, + "logps/chosen": -602.0343627929688, + "logps/rejected": -414.19390869140625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1409592628479004, + "rewards/margins": 6.751626968383789, + "rewards/rejected": -8.892585754394531, + "step": 2061 + }, + { + "epoch": 0.32, + "learning_rate": 1.2634980030084547e-05, + "logits/chosen": -1.9547690153121948, + "logits/rejected": -2.8203790187835693, + "logps/chosen": -178.73333740234375, + "logps/rejected": -440.7987060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1655166149139404, + "rewards/margins": 10.029324531555176, + "rewards/rejected": -12.194841384887695, + "step": 2062 + }, + { + "epoch": 0.32, + "learning_rate": 1.2634246589553401e-05, + "logits/chosen": -2.649260997772217, + "logits/rejected": -2.8727903366088867, + "logps/chosen": -233.29067993164062, + "logps/rejected": -349.71771240234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6893337965011597, + "rewards/margins": 6.689204216003418, + "rewards/rejected": -8.378538131713867, + "step": 2063 + }, + { + "epoch": 0.32, + "learning_rate": 1.2633513149022253e-05, + "logits/chosen": -2.2101569175720215, + "logits/rejected": -3.0481390953063965, + "logps/chosen": -477.854248046875, + "logps/rejected": -602.5028076171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3926506042480469, + "rewards/margins": 6.901148796081543, + "rewards/rejected": -8.29379940032959, + "step": 2064 + }, + { + "epoch": 0.32, + "learning_rate": 1.2632779708491105e-05, + "logits/chosen": -1.7939749956130981, + "logits/rejected": -2.848541498184204, + "logps/chosen": -169.2283172607422, + "logps/rejected": -430.3927307128906, + "loss": 2.1027, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4469330310821533, + "rewards/margins": 3.296173095703125, + "rewards/rejected": -5.743105888366699, + "step": 2065 + }, + { + "epoch": 0.32, + "learning_rate": 1.2632046267959957e-05, + "logits/chosen": -2.7062783241271973, + "logits/rejected": -2.974276065826416, + "logps/chosen": -165.85946655273438, + "logps/rejected": -421.2909851074219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.023040771484375, + "rewards/margins": 8.728975296020508, + "rewards/rejected": -9.752016067504883, + "step": 2066 + }, + { + "epoch": 0.32, + "learning_rate": 1.2631312827428808e-05, + "logits/chosen": -1.5949130058288574, + "logits/rejected": -2.896381378173828, + "logps/chosen": -120.28456115722656, + "logps/rejected": -430.24298095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3523755073547363, + "rewards/margins": 12.413095474243164, + "rewards/rejected": -13.765470504760742, + "step": 2067 + }, + { + "epoch": 0.32, + "learning_rate": 1.263057938689766e-05, + "logits/chosen": -3.0092825889587402, + "logits/rejected": -2.8032193183898926, + "logps/chosen": -402.2737731933594, + "logps/rejected": -452.07110595703125, + "loss": 3.8651, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.737522125244141, + "rewards/margins": -3.817230701446533, + "rewards/rejected": -0.9202916622161865, + "step": 2068 + }, + { + "epoch": 0.32, + "learning_rate": 1.2629845946366512e-05, + "logits/chosen": -2.6122007369995117, + "logits/rejected": -3.1416728496551514, + "logps/chosen": -66.22074890136719, + "logps/rejected": -190.55625915527344, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5971139669418335, + "rewards/margins": 5.132013320922852, + "rewards/rejected": -6.729126930236816, + "step": 2069 + }, + { + "epoch": 0.32, + "learning_rate": 1.2629112505835364e-05, + "logits/chosen": -2.964332103729248, + "logits/rejected": -2.0702407360076904, + "logps/chosen": -470.7053527832031, + "logps/rejected": -391.4856872558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9201096296310425, + "rewards/margins": 10.773263931274414, + "rewards/rejected": -9.853155136108398, + "step": 2070 + }, + { + "epoch": 0.32, + "learning_rate": 1.2628379065304216e-05, + "logits/chosen": -2.734306573867798, + "logits/rejected": -3.0959911346435547, + "logps/chosen": -76.14509582519531, + "logps/rejected": -101.87982177734375, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5459517240524292, + "rewards/margins": 3.6190059185028076, + "rewards/rejected": -5.164957523345947, + "step": 2071 + }, + { + "epoch": 0.32, + "learning_rate": 1.262764562477307e-05, + "logits/chosen": -1.6941642761230469, + "logits/rejected": -2.876142740249634, + "logps/chosen": -170.29977416992188, + "logps/rejected": -451.81829833984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6805362701416016, + "rewards/margins": 10.249479293823242, + "rewards/rejected": -11.930015563964844, + "step": 2072 + }, + { + "epoch": 0.32, + "learning_rate": 1.2626912184241921e-05, + "logits/chosen": -2.600140333175659, + "logits/rejected": -3.2466628551483154, + "logps/chosen": -146.7415008544922, + "logps/rejected": -230.72010803222656, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5814205408096313, + "rewards/margins": 5.600857734680176, + "rewards/rejected": -7.182278633117676, + "step": 2073 + }, + { + "epoch": 0.32, + "learning_rate": 1.2626178743710773e-05, + "logits/chosen": -2.9572107791900635, + "logits/rejected": -2.102748155593872, + "logps/chosen": -406.3233337402344, + "logps/rejected": -401.98486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3523203134536743, + "rewards/margins": 10.021181106567383, + "rewards/rejected": -11.373501777648926, + "step": 2074 + }, + { + "epoch": 0.32, + "learning_rate": 1.2625445303179625e-05, + "logits/chosen": -2.8629395961761475, + "logits/rejected": -3.1161859035491943, + "logps/chosen": -448.72918701171875, + "logps/rejected": -474.1888122558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.855590283870697, + "rewards/margins": 11.624298095703125, + "rewards/rejected": -12.479888916015625, + "step": 2075 + }, + { + "epoch": 0.32, + "learning_rate": 1.2624711862648477e-05, + "logits/chosen": -2.6268410682678223, + "logits/rejected": -3.0496695041656494, + "logps/chosen": -272.208984375, + "logps/rejected": -323.9091796875, + "loss": 4.4644, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.422486782073975, + "rewards/margins": 1.6401362419128418, + "rewards/rejected": -7.062623023986816, + "step": 2076 + }, + { + "epoch": 0.32, + "learning_rate": 1.2623978422117329e-05, + "logits/chosen": -2.087148666381836, + "logits/rejected": -2.9946606159210205, + "logps/chosen": -160.10760498046875, + "logps/rejected": -352.6966857910156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7128403186798096, + "rewards/margins": 9.767694473266602, + "rewards/rejected": -10.480534553527832, + "step": 2077 + }, + { + "epoch": 0.32, + "learning_rate": 1.262324498158618e-05, + "logits/chosen": -1.399320363998413, + "logits/rejected": -2.4798665046691895, + "logps/chosen": -472.0203857421875, + "logps/rejected": -424.4266357421875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1500686407089233, + "rewards/margins": 7.278809547424316, + "rewards/rejected": -8.428877830505371, + "step": 2078 + }, + { + "epoch": 0.32, + "learning_rate": 1.2622511541055034e-05, + "logits/chosen": -1.617496371269226, + "logits/rejected": -2.6539604663848877, + "logps/chosen": -250.5802459716797, + "logps/rejected": -513.127197265625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.832270860671997, + "rewards/margins": 6.986330032348633, + "rewards/rejected": -8.81860065460205, + "step": 2079 + }, + { + "epoch": 0.32, + "learning_rate": 1.2621778100523886e-05, + "logits/chosen": -3.062459707260132, + "logits/rejected": -1.6561808586120605, + "logps/chosen": -331.0827941894531, + "logps/rejected": -358.2298583984375, + "loss": 2.6117, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.328619480133057, + "rewards/margins": 1.022310733795166, + "rewards/rejected": -6.350930213928223, + "step": 2080 + }, + { + "epoch": 0.32, + "learning_rate": 1.262104465999274e-05, + "logits/chosen": -2.2273850440979004, + "logits/rejected": -2.753296375274658, + "logps/chosen": -98.83797454833984, + "logps/rejected": -178.78526306152344, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3398890495300293, + "rewards/margins": 4.839448928833008, + "rewards/rejected": -6.179338455200195, + "step": 2081 + }, + { + "epoch": 0.32, + "learning_rate": 1.2620311219461592e-05, + "logits/chosen": -2.890143871307373, + "logits/rejected": -2.763887643814087, + "logps/chosen": -119.12413787841797, + "logps/rejected": -228.43740844726562, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8931514620780945, + "rewards/margins": 5.947568893432617, + "rewards/rejected": -6.840720176696777, + "step": 2082 + }, + { + "epoch": 0.32, + "learning_rate": 1.2619577778930444e-05, + "logits/chosen": -3.041492223739624, + "logits/rejected": -2.2482340335845947, + "logps/chosen": -380.04144287109375, + "logps/rejected": -422.6121826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7053260803222656, + "rewards/margins": 10.784744262695312, + "rewards/rejected": -12.490070343017578, + "step": 2083 + }, + { + "epoch": 0.32, + "learning_rate": 1.2618844338399295e-05, + "logits/chosen": -1.397222876548767, + "logits/rejected": -2.7797863483428955, + "logps/chosen": -94.63227844238281, + "logps/rejected": -478.995849609375, + "loss": 0.1524, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2769861221313477, + "rewards/margins": 7.760868072509766, + "rewards/rejected": -10.037854194641113, + "step": 2084 + }, + { + "epoch": 0.32, + "learning_rate": 1.2618110897868147e-05, + "logits/chosen": -2.032658576965332, + "logits/rejected": -1.3042268753051758, + "logps/chosen": -432.5093688964844, + "logps/rejected": -246.78346252441406, + "loss": 7.3471, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.750480651855469, + "rewards/margins": -7.346245765686035, + "rewards/rejected": -1.4042351245880127, + "step": 2085 + }, + { + "epoch": 0.32, + "learning_rate": 1.2617377457337e-05, + "logits/chosen": -2.001128911972046, + "logits/rejected": -2.866323471069336, + "logps/chosen": -104.5621109008789, + "logps/rejected": -204.24404907226562, + "loss": 0.5648, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6938903331756592, + "rewards/margins": 4.317955493927002, + "rewards/rejected": -6.01184606552124, + "step": 2086 + }, + { + "epoch": 0.32, + "learning_rate": 1.2616644016805851e-05, + "logits/chosen": -2.8290040493011475, + "logits/rejected": -1.9478243589401245, + "logps/chosen": -514.55712890625, + "logps/rejected": -470.9564514160156, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.696019172668457, + "rewards/margins": 6.488331317901611, + "rewards/rejected": -9.184350967407227, + "step": 2087 + }, + { + "epoch": 0.32, + "learning_rate": 1.2615910576274703e-05, + "logits/chosen": -2.7410879135131836, + "logits/rejected": -3.12977933883667, + "logps/chosen": -23.471372604370117, + "logps/rejected": -424.3612060546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9696725010871887, + "rewards/margins": 10.507835388183594, + "rewards/rejected": -11.477508544921875, + "step": 2088 + }, + { + "epoch": 0.32, + "learning_rate": 1.2615177135743555e-05, + "logits/chosen": -2.054438591003418, + "logits/rejected": -3.050095319747925, + "logps/chosen": -123.89891052246094, + "logps/rejected": -189.58106994628906, + "loss": 2.7574, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3694522380828857, + "rewards/margins": 1.482316255569458, + "rewards/rejected": -4.851768493652344, + "step": 2089 + }, + { + "epoch": 0.33, + "learning_rate": 1.2614443695212408e-05, + "logits/chosen": -2.0168397426605225, + "logits/rejected": -2.9922516345977783, + "logps/chosen": -132.47998046875, + "logps/rejected": -279.0705871582031, + "loss": 3.3444, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.023449897766113, + "rewards/margins": 1.4703388214111328, + "rewards/rejected": -5.493788719177246, + "step": 2090 + }, + { + "epoch": 0.33, + "learning_rate": 1.261371025468126e-05, + "logits/chosen": -2.864715576171875, + "logits/rejected": -3.086587429046631, + "logps/chosen": -339.37677001953125, + "logps/rejected": -394.6535949707031, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35544511675834656, + "rewards/margins": 6.091242790222168, + "rewards/rejected": -6.446688175201416, + "step": 2091 + }, + { + "epoch": 0.33, + "learning_rate": 1.2612976814150112e-05, + "logits/chosen": -3.247657537460327, + "logits/rejected": -3.2680306434631348, + "logps/chosen": -43.71765899658203, + "logps/rejected": -147.23257446289062, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8140700459480286, + "rewards/margins": 5.805333137512207, + "rewards/rejected": -6.61940336227417, + "step": 2092 + }, + { + "epoch": 0.33, + "learning_rate": 1.2612243373618964e-05, + "logits/chosen": -2.226789712905884, + "logits/rejected": -3.094026803970337, + "logps/chosen": -125.76490783691406, + "logps/rejected": -301.9359130859375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2269117832183838, + "rewards/margins": 6.202404022216797, + "rewards/rejected": -7.429315567016602, + "step": 2093 + }, + { + "epoch": 0.33, + "learning_rate": 1.2611509933087816e-05, + "logits/chosen": -1.6903454065322876, + "logits/rejected": -2.7977542877197266, + "logps/chosen": -311.8047180175781, + "logps/rejected": -712.2354736328125, + "loss": 3.4602, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5714170932769775, + "rewards/margins": 0.232710599899292, + "rewards/rejected": -3.8041276931762695, + "step": 2094 + }, + { + "epoch": 0.33, + "learning_rate": 1.2610776492556668e-05, + "logits/chosen": -1.8213926553726196, + "logits/rejected": -3.0129551887512207, + "logps/chosen": -167.72299194335938, + "logps/rejected": -271.8531494140625, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.222726821899414, + "rewards/margins": 4.619621753692627, + "rewards/rejected": -7.842348575592041, + "step": 2095 + }, + { + "epoch": 0.33, + "learning_rate": 1.261004305202552e-05, + "logits/chosen": -3.0854058265686035, + "logits/rejected": -2.6515889167785645, + "logps/chosen": -456.4865417480469, + "logps/rejected": -462.455322265625, + "loss": 6.3731, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.028162956237793, + "rewards/margins": -6.365274429321289, + "rewards/rejected": -1.6628892421722412, + "step": 2096 + }, + { + "epoch": 0.33, + "learning_rate": 1.2609309611494372e-05, + "logits/chosen": -2.4028799533843994, + "logits/rejected": -2.8736929893493652, + "logps/chosen": -208.1085205078125, + "logps/rejected": -202.88778686523438, + "loss": 0.0942, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0362014770507812, + "rewards/margins": 4.2862749099731445, + "rewards/rejected": -6.322476387023926, + "step": 2097 + }, + { + "epoch": 0.33, + "learning_rate": 1.2608576170963223e-05, + "logits/chosen": -2.996797800064087, + "logits/rejected": -2.4056828022003174, + "logps/chosen": -358.487548828125, + "logps/rejected": -426.5921936035156, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6993931531906128, + "rewards/margins": 7.641808986663818, + "rewards/rejected": -9.341201782226562, + "step": 2098 + }, + { + "epoch": 0.33, + "learning_rate": 1.2607842730432077e-05, + "logits/chosen": -2.5894412994384766, + "logits/rejected": -3.0977654457092285, + "logps/chosen": -224.0930633544922, + "logps/rejected": -365.98248291015625, + "loss": 0.2717, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0226550102233887, + "rewards/margins": 2.945869207382202, + "rewards/rejected": -3.968524217605591, + "step": 2099 + }, + { + "epoch": 0.33, + "learning_rate": 1.2607109289900929e-05, + "logits/chosen": -2.4562270641326904, + "logits/rejected": -3.1161582469940186, + "logps/chosen": -213.03118896484375, + "logps/rejected": -361.3857421875, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0555763244628906, + "rewards/margins": 5.52152156829834, + "rewards/rejected": -7.5770978927612305, + "step": 2100 + }, + { + "epoch": 0.33, + "learning_rate": 1.260637584936978e-05, + "logits/chosen": -2.39959979057312, + "logits/rejected": -2.835341215133667, + "logps/chosen": -192.25161743164062, + "logps/rejected": -439.266845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8516197204589844, + "rewards/margins": 10.408721923828125, + "rewards/rejected": -12.26034164428711, + "step": 2101 + }, + { + "epoch": 0.33, + "learning_rate": 1.2605642408838633e-05, + "logits/chosen": -1.9862719774246216, + "logits/rejected": -2.9019410610198975, + "logps/chosen": -141.26150512695312, + "logps/rejected": -375.0516662597656, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3689953088760376, + "rewards/margins": 7.460977554321289, + "rewards/rejected": -8.829973220825195, + "step": 2102 + }, + { + "epoch": 0.33, + "learning_rate": 1.2604908968307485e-05, + "logits/chosen": -3.0401480197906494, + "logits/rejected": -2.2017292976379395, + "logps/chosen": -277.1697692871094, + "logps/rejected": -213.61172485351562, + "loss": 4.7667, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.262786388397217, + "rewards/margins": -2.860248327255249, + "rewards/rejected": -3.402538299560547, + "step": 2103 + }, + { + "epoch": 0.33, + "learning_rate": 1.2604175527776336e-05, + "logits/chosen": -2.4511477947235107, + "logits/rejected": -3.1275627613067627, + "logps/chosen": -130.47967529296875, + "logps/rejected": -518.793212890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8726813793182373, + "rewards/margins": 7.512365341186523, + "rewards/rejected": -9.385046005249023, + "step": 2104 + }, + { + "epoch": 0.33, + "learning_rate": 1.2603442087245188e-05, + "logits/chosen": -2.887500047683716, + "logits/rejected": -3.080489158630371, + "logps/chosen": -34.793548583984375, + "logps/rejected": -134.70509338378906, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.129598379135132, + "rewards/margins": 4.132354736328125, + "rewards/rejected": -6.261953353881836, + "step": 2105 + }, + { + "epoch": 0.33, + "learning_rate": 1.260270864671404e-05, + "logits/chosen": -2.5424840450286865, + "logits/rejected": -3.0741612911224365, + "logps/chosen": -408.1010437011719, + "logps/rejected": -605.2979736328125, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3469605445861816, + "rewards/margins": 4.2801513671875, + "rewards/rejected": -6.627111911773682, + "step": 2106 + }, + { + "epoch": 0.33, + "learning_rate": 1.2601975206182892e-05, + "logits/chosen": -3.0797200202941895, + "logits/rejected": -2.4756340980529785, + "logps/chosen": -137.01458740234375, + "logps/rejected": -132.74427795410156, + "loss": 0.5218, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.858346939086914, + "rewards/margins": 3.450134038925171, + "rewards/rejected": -6.308481216430664, + "step": 2107 + }, + { + "epoch": 0.33, + "learning_rate": 1.2601241765651746e-05, + "logits/chosen": -2.1241209506988525, + "logits/rejected": -3.181284189224243, + "logps/chosen": -154.75784301757812, + "logps/rejected": -351.81298828125, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5790172815322876, + "rewards/margins": 7.345769882202148, + "rewards/rejected": -7.924787521362305, + "step": 2108 + }, + { + "epoch": 0.33, + "learning_rate": 1.2600508325120598e-05, + "logits/chosen": -1.9561928510665894, + "logits/rejected": -2.7332775592803955, + "logps/chosen": -204.45211791992188, + "logps/rejected": -346.812744140625, + "loss": 1.3156, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.087336778640747, + "rewards/margins": 4.5216569900512695, + "rewards/rejected": -7.6089935302734375, + "step": 2109 + }, + { + "epoch": 0.33, + "learning_rate": 1.259977488458945e-05, + "logits/chosen": -1.4734925031661987, + "logits/rejected": -2.3885574340820312, + "logps/chosen": -85.07923126220703, + "logps/rejected": -357.4921875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4624896049499512, + "rewards/margins": 7.117555618286133, + "rewards/rejected": -8.580045700073242, + "step": 2110 + }, + { + "epoch": 0.33, + "learning_rate": 1.2599041444058301e-05, + "logits/chosen": -2.4957072734832764, + "logits/rejected": -2.823503255844116, + "logps/chosen": -335.5438537597656, + "logps/rejected": -398.69268798828125, + "loss": 1.91, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.003609657287598, + "rewards/margins": 2.560980796813965, + "rewards/rejected": -6.5645904541015625, + "step": 2111 + }, + { + "epoch": 0.33, + "learning_rate": 1.2598308003527153e-05, + "logits/chosen": -2.5441389083862305, + "logits/rejected": -2.633495807647705, + "logps/chosen": -181.76544189453125, + "logps/rejected": -253.4664306640625, + "loss": 2.5912, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.494004964828491, + "rewards/margins": 1.020594596862793, + "rewards/rejected": -4.514599800109863, + "step": 2112 + }, + { + "epoch": 0.33, + "learning_rate": 1.2597574562996007e-05, + "logits/chosen": -3.1409690380096436, + "logits/rejected": -3.12422776222229, + "logps/chosen": -198.29425048828125, + "logps/rejected": -191.49986267089844, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8977409601211548, + "rewards/margins": 5.257420063018799, + "rewards/rejected": -7.155160903930664, + "step": 2113 + }, + { + "epoch": 0.33, + "learning_rate": 1.2596841122464859e-05, + "logits/chosen": -2.9076197147369385, + "logits/rejected": -2.2560293674468994, + "logps/chosen": -126.89889526367188, + "logps/rejected": -199.42799377441406, + "loss": 0.3491, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5450350046157837, + "rewards/margins": 3.9873409271240234, + "rewards/rejected": -5.532375812530518, + "step": 2114 + }, + { + "epoch": 0.33, + "learning_rate": 1.259610768193371e-05, + "logits/chosen": -3.114879608154297, + "logits/rejected": -3.2327425479888916, + "logps/chosen": -137.51531982421875, + "logps/rejected": -84.51335906982422, + "loss": 1.4428, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9867961406707764, + "rewards/margins": 0.09981667995452881, + "rewards/rejected": -3.0866127014160156, + "step": 2115 + }, + { + "epoch": 0.33, + "learning_rate": 1.2595374241402564e-05, + "logits/chosen": -2.7634928226470947, + "logits/rejected": -2.959883689880371, + "logps/chosen": -111.15457153320312, + "logps/rejected": -104.85560607910156, + "loss": 1.2186, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8877131938934326, + "rewards/margins": 0.7565138339996338, + "rewards/rejected": -2.6442270278930664, + "step": 2116 + }, + { + "epoch": 0.33, + "learning_rate": 1.2594640800871416e-05, + "logits/chosen": -3.0766284465789795, + "logits/rejected": -2.8900163173675537, + "logps/chosen": -149.78213500976562, + "logps/rejected": -272.3292236328125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9687844514846802, + "rewards/margins": 6.944943428039551, + "rewards/rejected": -7.913727760314941, + "step": 2117 + }, + { + "epoch": 0.33, + "learning_rate": 1.2593907360340268e-05, + "logits/chosen": -2.9428491592407227, + "logits/rejected": -3.007122278213501, + "logps/chosen": -129.87551879882812, + "logps/rejected": -147.28338623046875, + "loss": 1.6426, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.976457357406616, + "rewards/margins": 1.1205501556396484, + "rewards/rejected": -4.0970072746276855, + "step": 2118 + }, + { + "epoch": 0.33, + "learning_rate": 1.259317391980912e-05, + "logits/chosen": -3.078942060470581, + "logits/rejected": -3.0547139644622803, + "logps/chosen": -256.0951232910156, + "logps/rejected": -239.60403442382812, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.152451276779175, + "rewards/margins": 3.2197694778442383, + "rewards/rejected": -5.372220993041992, + "step": 2119 + }, + { + "epoch": 0.33, + "learning_rate": 1.2592440479277972e-05, + "logits/chosen": -2.8723490238189697, + "logits/rejected": -2.432415246963501, + "logps/chosen": -177.38681030273438, + "logps/rejected": -181.45257568359375, + "loss": 2.4553, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.418678283691406, + "rewards/margins": -0.39827728271484375, + "rewards/rejected": -4.020400524139404, + "step": 2120 + }, + { + "epoch": 0.33, + "learning_rate": 1.2591707038746823e-05, + "logits/chosen": -2.174631118774414, + "logits/rejected": -3.0694522857666016, + "logps/chosen": -102.74787139892578, + "logps/rejected": -508.0491943359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3823254108428955, + "rewards/margins": 8.300307273864746, + "rewards/rejected": -9.682632446289062, + "step": 2121 + }, + { + "epoch": 0.33, + "learning_rate": 1.2590973598215675e-05, + "logits/chosen": -2.0381715297698975, + "logits/rejected": -2.8148281574249268, + "logps/chosen": -255.3858184814453, + "logps/rejected": -323.1330261230469, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7199611663818359, + "rewards/margins": 4.278539657592773, + "rewards/rejected": -4.998500823974609, + "step": 2122 + }, + { + "epoch": 0.33, + "learning_rate": 1.2590240157684527e-05, + "logits/chosen": -1.3343664407730103, + "logits/rejected": -2.7878432273864746, + "logps/chosen": -92.15653991699219, + "logps/rejected": -414.59307861328125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.006325125694275, + "rewards/margins": 8.69445514678955, + "rewards/rejected": -9.700779914855957, + "step": 2123 + }, + { + "epoch": 0.33, + "learning_rate": 1.2589506717153379e-05, + "logits/chosen": -2.0688276290893555, + "logits/rejected": -2.4154486656188965, + "logps/chosen": -221.402099609375, + "logps/rejected": -289.6919860839844, + "loss": 1.6542, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.60155725479126, + "rewards/margins": 0.5941576957702637, + "rewards/rejected": -5.195714950561523, + "step": 2124 + }, + { + "epoch": 0.33, + "learning_rate": 1.2588773276622233e-05, + "logits/chosen": -2.841174602508545, + "logits/rejected": -2.998426914215088, + "logps/chosen": -153.05548095703125, + "logps/rejected": -144.0005645751953, + "loss": 0.7338, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1506881713867188, + "rewards/margins": 2.606584072113037, + "rewards/rejected": -4.757272720336914, + "step": 2125 + }, + { + "epoch": 0.33, + "learning_rate": 1.2588039836091085e-05, + "logits/chosen": -2.72363543510437, + "logits/rejected": -3.036423444747925, + "logps/chosen": -121.1520767211914, + "logps/rejected": -255.5114288330078, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8785362243652344, + "rewards/margins": 4.62493896484375, + "rewards/rejected": -5.503475189208984, + "step": 2126 + }, + { + "epoch": 0.33, + "learning_rate": 1.2587306395559936e-05, + "logits/chosen": -2.653574228286743, + "logits/rejected": -3.330436944961548, + "logps/chosen": -248.30902099609375, + "logps/rejected": -277.9868469238281, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1632133722305298, + "rewards/margins": 4.789306640625, + "rewards/rejected": -5.952520370483398, + "step": 2127 + }, + { + "epoch": 0.33, + "learning_rate": 1.2586572955028788e-05, + "logits/chosen": -2.661287307739258, + "logits/rejected": -2.3144004344940186, + "logps/chosen": -125.72267150878906, + "logps/rejected": -273.5508117675781, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5819816589355469, + "rewards/margins": 7.576864719390869, + "rewards/rejected": -9.158845901489258, + "step": 2128 + }, + { + "epoch": 0.33, + "learning_rate": 1.258583951449764e-05, + "logits/chosen": -3.1461069583892822, + "logits/rejected": -2.08530592918396, + "logps/chosen": -970.2606201171875, + "logps/rejected": -577.8623657226562, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38760530948638916, + "rewards/margins": 4.8819169998168945, + "rewards/rejected": -5.269522190093994, + "step": 2129 + }, + { + "epoch": 0.33, + "learning_rate": 1.2585106073966492e-05, + "logits/chosen": -2.7590389251708984, + "logits/rejected": -3.083613634109497, + "logps/chosen": -235.05044555664062, + "logps/rejected": -401.08868408203125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.160367965698242, + "rewards/margins": 7.388777732849121, + "rewards/rejected": -9.549145698547363, + "step": 2130 + }, + { + "epoch": 0.33, + "learning_rate": 1.2584372633435344e-05, + "logits/chosen": -2.983335256576538, + "logits/rejected": -3.279242753982544, + "logps/chosen": -30.91067123413086, + "logps/rejected": -319.646728515625, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7512621879577637, + "rewards/margins": 5.112147331237793, + "rewards/rejected": -6.863409519195557, + "step": 2131 + }, + { + "epoch": 0.33, + "learning_rate": 1.2583639192904196e-05, + "logits/chosen": -2.9897329807281494, + "logits/rejected": -3.202972650527954, + "logps/chosen": -130.93446350097656, + "logps/rejected": -452.78106689453125, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.453836441040039, + "rewards/margins": 8.673251152038574, + "rewards/rejected": -10.127087593078613, + "step": 2132 + }, + { + "epoch": 0.33, + "learning_rate": 1.2582905752373048e-05, + "logits/chosen": -2.813365936279297, + "logits/rejected": -3.051856279373169, + "logps/chosen": -103.27509307861328, + "logps/rejected": -222.98348999023438, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6336345672607422, + "rewards/margins": 5.389177322387695, + "rewards/rejected": -7.0228118896484375, + "step": 2133 + }, + { + "epoch": 0.33, + "learning_rate": 1.2582172311841901e-05, + "logits/chosen": -2.1531403064727783, + "logits/rejected": -3.004986047744751, + "logps/chosen": -189.26695251464844, + "logps/rejected": -285.2099304199219, + "loss": 2.3119, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.834747791290283, + "rewards/margins": 1.2089953422546387, + "rewards/rejected": -5.043743133544922, + "step": 2134 + }, + { + "epoch": 0.33, + "learning_rate": 1.2581438871310753e-05, + "logits/chosen": -2.9829952716827393, + "logits/rejected": -1.9971110820770264, + "logps/chosen": -398.57257080078125, + "logps/rejected": -314.3623962402344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2960220575332642, + "rewards/margins": 9.31370735168457, + "rewards/rejected": -10.609729766845703, + "step": 2135 + }, + { + "epoch": 0.33, + "learning_rate": 1.2580705430779605e-05, + "logits/chosen": -2.8790171146392822, + "logits/rejected": -3.0724034309387207, + "logps/chosen": -151.85842895507812, + "logps/rejected": -195.50631713867188, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3712749481201172, + "rewards/margins": 4.572968482971191, + "rewards/rejected": -5.944243907928467, + "step": 2136 + }, + { + "epoch": 0.33, + "learning_rate": 1.2579971990248457e-05, + "logits/chosen": -2.341766119003296, + "logits/rejected": -3.007387161254883, + "logps/chosen": -47.578041076660156, + "logps/rejected": -161.42124938964844, + "loss": 0.36, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.704556465148926, + "rewards/margins": 2.0908212661743164, + "rewards/rejected": -4.795377731323242, + "step": 2137 + }, + { + "epoch": 0.33, + "learning_rate": 1.2579238549717309e-05, + "logits/chosen": -2.6981112957000732, + "logits/rejected": -3.030700206756592, + "logps/chosen": -80.42324829101562, + "logps/rejected": -212.91000366210938, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0895044803619385, + "rewards/margins": 4.537986755371094, + "rewards/rejected": -5.627490997314453, + "step": 2138 + }, + { + "epoch": 0.33, + "learning_rate": 1.257850510918616e-05, + "logits/chosen": -2.5760817527770996, + "logits/rejected": -2.9670112133026123, + "logps/chosen": -822.713134765625, + "logps/rejected": -928.75048828125, + "loss": 0.0517, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7292633056640625, + "rewards/margins": 5.818853855133057, + "rewards/rejected": -7.548116683959961, + "step": 2139 + }, + { + "epoch": 0.33, + "learning_rate": 1.2577771668655013e-05, + "logits/chosen": -2.0344598293304443, + "logits/rejected": -3.102159023284912, + "logps/chosen": -190.27088928222656, + "logps/rejected": -336.14459228515625, + "loss": 0.1941, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.823477268218994, + "rewards/margins": 2.074326992034912, + "rewards/rejected": -4.897804260253906, + "step": 2140 + }, + { + "epoch": 0.33, + "learning_rate": 1.2577038228123864e-05, + "logits/chosen": -2.6569738388061523, + "logits/rejected": -2.9508016109466553, + "logps/chosen": -451.46209716796875, + "logps/rejected": -516.02587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5988319516181946, + "rewards/margins": 9.635920524597168, + "rewards/rejected": -10.234752655029297, + "step": 2141 + }, + { + "epoch": 0.33, + "learning_rate": 1.2576304787592716e-05, + "logits/chosen": -1.297816514968872, + "logits/rejected": -2.86263370513916, + "logps/chosen": -158.41107177734375, + "logps/rejected": -429.1419372558594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8706554174423218, + "rewards/margins": 7.727349281311035, + "rewards/rejected": -8.598004341125488, + "step": 2142 + }, + { + "epoch": 0.33, + "learning_rate": 1.257557134706157e-05, + "logits/chosen": -2.742433547973633, + "logits/rejected": -3.0866525173187256, + "logps/chosen": -130.18833923339844, + "logps/rejected": -240.8647918701172, + "loss": 0.1893, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0618804693222046, + "rewards/margins": 3.5678436756134033, + "rewards/rejected": -4.629724025726318, + "step": 2143 + }, + { + "epoch": 0.33, + "learning_rate": 1.2574837906530422e-05, + "logits/chosen": -3.0553183555603027, + "logits/rejected": -1.6675615310668945, + "logps/chosen": -719.6357421875, + "logps/rejected": -331.9369201660156, + "loss": 0.1151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6456971168518066, + "rewards/margins": 3.022150993347168, + "rewards/rejected": -4.667848110198975, + "step": 2144 + }, + { + "epoch": 0.33, + "learning_rate": 1.2574104465999274e-05, + "logits/chosen": -1.773557186126709, + "logits/rejected": -3.12564754486084, + "logps/chosen": -83.541748046875, + "logps/rejected": -415.2528076171875, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8885965347290039, + "rewards/margins": 4.646634578704834, + "rewards/rejected": -5.535231113433838, + "step": 2145 + }, + { + "epoch": 0.33, + "learning_rate": 1.2573371025468125e-05, + "logits/chosen": -2.2647135257720947, + "logits/rejected": -2.841836452484131, + "logps/chosen": -144.189697265625, + "logps/rejected": -324.80303955078125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6721802949905396, + "rewards/margins": 7.1524763107299805, + "rewards/rejected": -8.82465648651123, + "step": 2146 + }, + { + "epoch": 0.33, + "learning_rate": 1.2572637584936979e-05, + "logits/chosen": -1.4167956113815308, + "logits/rejected": -2.9456934928894043, + "logps/chosen": -165.783935546875, + "logps/rejected": -341.65777587890625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4996193647384644, + "rewards/margins": 6.796077728271484, + "rewards/rejected": -8.295697212219238, + "step": 2147 + }, + { + "epoch": 0.33, + "learning_rate": 1.2571904144405831e-05, + "logits/chosen": -1.3694912195205688, + "logits/rejected": -2.720583200454712, + "logps/chosen": -190.9272918701172, + "logps/rejected": -520.58447265625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.173365831375122, + "rewards/margins": 7.393302917480469, + "rewards/rejected": -10.566668510437012, + "step": 2148 + }, + { + "epoch": 0.33, + "learning_rate": 1.2571170703874683e-05, + "logits/chosen": -2.6868739128112793, + "logits/rejected": -3.1564300060272217, + "logps/chosen": -40.41489028930664, + "logps/rejected": -214.2688446044922, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.066756248474121, + "rewards/margins": 6.120924472808838, + "rewards/rejected": -8.187681198120117, + "step": 2149 + }, + { + "epoch": 0.33, + "learning_rate": 1.2570437263343535e-05, + "logits/chosen": -1.8889507055282593, + "logits/rejected": -3.02111554145813, + "logps/chosen": -116.86891174316406, + "logps/rejected": -395.98272705078125, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7573199272155762, + "rewards/margins": 5.742413520812988, + "rewards/rejected": -7.499732971191406, + "step": 2150 + }, + { + "epoch": 0.33, + "learning_rate": 1.2569703822812387e-05, + "logits/chosen": -2.5654289722442627, + "logits/rejected": -2.9123823642730713, + "logps/chosen": -149.1343994140625, + "logps/rejected": -173.15530395507812, + "loss": 1.4987, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.445897340774536, + "rewards/margins": 1.9429380893707275, + "rewards/rejected": -4.388835430145264, + "step": 2151 + }, + { + "epoch": 0.33, + "learning_rate": 1.256897038228124e-05, + "logits/chosen": -3.0629630088806152, + "logits/rejected": -1.7142091989517212, + "logps/chosen": -637.7110595703125, + "logps/rejected": -358.21978759765625, + "loss": 1.5258, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3962523937225342, + "rewards/margins": 2.020406484603882, + "rewards/rejected": -3.416658878326416, + "step": 2152 + }, + { + "epoch": 0.33, + "learning_rate": 1.2568236941750092e-05, + "logits/chosen": -1.4549437761306763, + "logits/rejected": -2.8861992359161377, + "logps/chosen": -235.3186798095703, + "logps/rejected": -340.5070495605469, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4660265445709229, + "rewards/margins": 4.153180122375488, + "rewards/rejected": -5.61920690536499, + "step": 2153 + }, + { + "epoch": 0.33, + "learning_rate": 1.2567503501218944e-05, + "logits/chosen": -2.0663185119628906, + "logits/rejected": -3.153794050216675, + "logps/chosen": -78.23241424560547, + "logps/rejected": -461.7203369140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19565047323703766, + "rewards/margins": 8.791254043579102, + "rewards/rejected": -8.986905097961426, + "step": 2154 + }, + { + "epoch": 0.34, + "learning_rate": 1.2566770060687796e-05, + "logits/chosen": -2.5020272731781006, + "logits/rejected": -2.9736697673797607, + "logps/chosen": -499.29132080078125, + "logps/rejected": -174.0969696044922, + "loss": 3.9166, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3884358406066895, + "rewards/margins": -1.3346238136291504, + "rewards/rejected": -4.053812026977539, + "step": 2155 + }, + { + "epoch": 0.34, + "learning_rate": 1.2566036620156648e-05, + "logits/chosen": -3.112227439880371, + "logits/rejected": -2.959721326828003, + "logps/chosen": -206.9820098876953, + "logps/rejected": -315.644775390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1318291425704956, + "rewards/margins": 6.404803276062012, + "rewards/rejected": -7.536632537841797, + "step": 2156 + }, + { + "epoch": 0.34, + "learning_rate": 1.25653031796255e-05, + "logits/chosen": -2.743983268737793, + "logits/rejected": -3.0104410648345947, + "logps/chosen": -156.90792846679688, + "logps/rejected": -323.08349609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9363441467285156, + "rewards/margins": 6.580060958862305, + "rewards/rejected": -7.51640510559082, + "step": 2157 + }, + { + "epoch": 0.34, + "learning_rate": 1.2564569739094351e-05, + "logits/chosen": -3.061277151107788, + "logits/rejected": -3.1421446800231934, + "logps/chosen": -245.01419067382812, + "logps/rejected": -333.38946533203125, + "loss": 4.0601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.665533542633057, + "rewards/margins": -1.772324562072754, + "rewards/rejected": -2.893209218978882, + "step": 2158 + }, + { + "epoch": 0.34, + "learning_rate": 1.2563836298563203e-05, + "logits/chosen": -1.4893120527267456, + "logits/rejected": -1.791043758392334, + "logps/chosen": -290.5501708984375, + "logps/rejected": -379.65753173828125, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1583313941955566, + "rewards/margins": 4.941114902496338, + "rewards/rejected": -7.0994462966918945, + "step": 2159 + }, + { + "epoch": 0.34, + "learning_rate": 1.2563102858032055e-05, + "logits/chosen": -2.9282846450805664, + "logits/rejected": -3.0233101844787598, + "logps/chosen": -177.65052795410156, + "logps/rejected": -208.46954345703125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6247849464416504, + "rewards/margins": 5.197247505187988, + "rewards/rejected": -6.8220319747924805, + "step": 2160 + }, + { + "epoch": 0.34, + "learning_rate": 1.2562369417500909e-05, + "logits/chosen": -1.8405061960220337, + "logits/rejected": -2.687127113342285, + "logps/chosen": -99.130859375, + "logps/rejected": -338.82708740234375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.518244743347168, + "rewards/margins": 5.863970756530762, + "rewards/rejected": -9.38221549987793, + "step": 2161 + }, + { + "epoch": 0.34, + "learning_rate": 1.256163597696976e-05, + "logits/chosen": -2.2151472568511963, + "logits/rejected": -3.012383222579956, + "logps/chosen": -239.86354064941406, + "logps/rejected": -267.2817687988281, + "loss": 2.7923, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.511023044586182, + "rewards/margins": 0.8876075744628906, + "rewards/rejected": -5.398630619049072, + "step": 2162 + }, + { + "epoch": 0.34, + "learning_rate": 1.2560902536438612e-05, + "logits/chosen": -2.438563823699951, + "logits/rejected": -2.7195756435394287, + "logps/chosen": -69.18827819824219, + "logps/rejected": -298.63116455078125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.224257230758667, + "rewards/margins": 7.5670881271362305, + "rewards/rejected": -9.791345596313477, + "step": 2163 + }, + { + "epoch": 0.34, + "learning_rate": 1.2560169095907464e-05, + "logits/chosen": -1.3732346296310425, + "logits/rejected": -3.02091646194458, + "logps/chosen": -95.06193542480469, + "logps/rejected": -342.25665283203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45931628346443176, + "rewards/margins": 9.71817398071289, + "rewards/rejected": -10.177490234375, + "step": 2164 + }, + { + "epoch": 0.34, + "learning_rate": 1.2559435655376316e-05, + "logits/chosen": -2.492384195327759, + "logits/rejected": -2.8772125244140625, + "logps/chosen": -153.89620971679688, + "logps/rejected": -188.8213653564453, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0843963623046875, + "rewards/margins": 5.356333255767822, + "rewards/rejected": -6.44072961807251, + "step": 2165 + }, + { + "epoch": 0.34, + "learning_rate": 1.2558702214845168e-05, + "logits/chosen": -2.0565967559814453, + "logits/rejected": -3.1562600135803223, + "logps/chosen": -115.49100494384766, + "logps/rejected": -279.6904602050781, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6693334579467773, + "rewards/margins": 7.023195266723633, + "rewards/rejected": -7.69252872467041, + "step": 2166 + }, + { + "epoch": 0.34, + "learning_rate": 1.255796877431402e-05, + "logits/chosen": -2.5419857501983643, + "logits/rejected": -2.8893516063690186, + "logps/chosen": -106.92361450195312, + "logps/rejected": -284.14752197265625, + "loss": 1.5357, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.995021343231201, + "rewards/margins": 4.413006782531738, + "rewards/rejected": -8.408028602600098, + "step": 2167 + }, + { + "epoch": 0.34, + "learning_rate": 1.2557235333782872e-05, + "logits/chosen": -2.2344346046447754, + "logits/rejected": -3.0302274227142334, + "logps/chosen": -327.42852783203125, + "logps/rejected": -354.5819091796875, + "loss": 1.1576, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8504204750061035, + "rewards/margins": 2.0524215698242188, + "rewards/rejected": -5.902841567993164, + "step": 2168 + }, + { + "epoch": 0.34, + "learning_rate": 1.2556501893251724e-05, + "logits/chosen": -3.1351637840270996, + "logits/rejected": -2.0905213356018066, + "logps/chosen": -267.81915283203125, + "logps/rejected": -151.98353576660156, + "loss": 1.2518, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8376994132995605, + "rewards/margins": 2.0851738452911377, + "rewards/rejected": -4.922873497009277, + "step": 2169 + }, + { + "epoch": 0.34, + "learning_rate": 1.2555768452720577e-05, + "logits/chosen": -2.6507389545440674, + "logits/rejected": -3.072082281112671, + "logps/chosen": -158.38885498046875, + "logps/rejected": -215.28636169433594, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6220531463623047, + "rewards/margins": 5.96539306640625, + "rewards/rejected": -6.587446212768555, + "step": 2170 + }, + { + "epoch": 0.34, + "learning_rate": 1.255503501218943e-05, + "logits/chosen": -2.479444980621338, + "logits/rejected": -2.9429893493652344, + "logps/chosen": -170.54632568359375, + "logps/rejected": -281.2415771484375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5075981616973877, + "rewards/margins": 6.061119079589844, + "rewards/rejected": -7.568717002868652, + "step": 2171 + }, + { + "epoch": 0.34, + "learning_rate": 1.2554301571658281e-05, + "logits/chosen": -2.2141366004943848, + "logits/rejected": -3.0687129497528076, + "logps/chosen": -342.32501220703125, + "logps/rejected": -464.8936767578125, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3724892139434814, + "rewards/margins": 5.099659442901611, + "rewards/rejected": -6.472148418426514, + "step": 2172 + }, + { + "epoch": 0.34, + "learning_rate": 1.2553568131127133e-05, + "logits/chosen": -2.133397340774536, + "logits/rejected": -2.37485671043396, + "logps/chosen": -260.0294494628906, + "logps/rejected": -616.9420166015625, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0432441234588623, + "rewards/margins": 6.8024773597717285, + "rewards/rejected": -8.845721244812012, + "step": 2173 + }, + { + "epoch": 0.34, + "learning_rate": 1.2552834690595985e-05, + "logits/chosen": -3.1599464416503906, + "logits/rejected": -2.862661123275757, + "logps/chosen": -169.488525390625, + "logps/rejected": -277.45269775390625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2364410161972046, + "rewards/margins": 5.928301811218262, + "rewards/rejected": -7.164742469787598, + "step": 2174 + }, + { + "epoch": 0.34, + "learning_rate": 1.2552101250064837e-05, + "logits/chosen": -1.8167043924331665, + "logits/rejected": -2.970890522003174, + "logps/chosen": -275.904296875, + "logps/rejected": -471.9620056152344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0885810852050781, + "rewards/margins": 7.432038307189941, + "rewards/rejected": -8.52061939239502, + "step": 2175 + }, + { + "epoch": 0.34, + "learning_rate": 1.2551367809533689e-05, + "logits/chosen": -2.197178840637207, + "logits/rejected": -2.969578981399536, + "logps/chosen": -235.62338256835938, + "logps/rejected": -266.8356628417969, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.164738416671753, + "rewards/margins": 6.050141334533691, + "rewards/rejected": -7.214879989624023, + "step": 2176 + }, + { + "epoch": 0.34, + "learning_rate": 1.255063436900254e-05, + "logits/chosen": -2.374762535095215, + "logits/rejected": -3.0670483112335205, + "logps/chosen": -153.05810546875, + "logps/rejected": -276.69183349609375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7003998160362244, + "rewards/margins": 5.979187965393066, + "rewards/rejected": -6.679587364196777, + "step": 2177 + }, + { + "epoch": 0.34, + "learning_rate": 1.2549900928471392e-05, + "logits/chosen": -3.0299277305603027, + "logits/rejected": -2.7108020782470703, + "logps/chosen": -544.68212890625, + "logps/rejected": -525.7410278320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7522728443145752, + "rewards/margins": 8.955737113952637, + "rewards/rejected": -9.708009719848633, + "step": 2178 + }, + { + "epoch": 0.34, + "learning_rate": 1.2549167487940246e-05, + "logits/chosen": -2.896341562271118, + "logits/rejected": -3.1528217792510986, + "logps/chosen": -365.87286376953125, + "logps/rejected": -491.2135009765625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09093326330184937, + "rewards/margins": 5.146783351898193, + "rewards/rejected": -5.055850028991699, + "step": 2179 + }, + { + "epoch": 0.34, + "learning_rate": 1.2548434047409098e-05, + "logits/chosen": -2.64390230178833, + "logits/rejected": -2.63911509513855, + "logps/chosen": -73.42752075195312, + "logps/rejected": -186.10617065429688, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.25119686126709, + "rewards/margins": 6.353652000427246, + "rewards/rejected": -8.604848861694336, + "step": 2180 + }, + { + "epoch": 0.34, + "learning_rate": 1.2547700606877951e-05, + "logits/chosen": -2.975609540939331, + "logits/rejected": -1.5610796213150024, + "logps/chosen": -482.0316162109375, + "logps/rejected": -292.4322509765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2662270069122314, + "rewards/margins": 6.093630313873291, + "rewards/rejected": -7.359857559204102, + "step": 2181 + }, + { + "epoch": 0.34, + "learning_rate": 1.2546967166346803e-05, + "logits/chosen": -1.2912734746932983, + "logits/rejected": -3.1013243198394775, + "logps/chosen": -116.14619445800781, + "logps/rejected": -390.0306701660156, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.227603554725647, + "rewards/margins": 4.3047261238098145, + "rewards/rejected": -5.532329559326172, + "step": 2182 + }, + { + "epoch": 0.34, + "learning_rate": 1.2546233725815655e-05, + "logits/chosen": -3.0044262409210205, + "logits/rejected": -2.4459903240203857, + "logps/chosen": -318.44293212890625, + "logps/rejected": -259.57220458984375, + "loss": 3.7321, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.056763648986816, + "rewards/margins": -1.5775353908538818, + "rewards/rejected": -3.4792282581329346, + "step": 2183 + }, + { + "epoch": 0.34, + "learning_rate": 1.2545500285284507e-05, + "logits/chosen": -2.307788610458374, + "logits/rejected": -2.913019895553589, + "logps/chosen": -266.6955871582031, + "logps/rejected": -299.04473876953125, + "loss": 0.3834, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2054970264434814, + "rewards/margins": 4.6854047775268555, + "rewards/rejected": -7.890901565551758, + "step": 2184 + }, + { + "epoch": 0.34, + "learning_rate": 1.2544766844753359e-05, + "logits/chosen": -3.0426390171051025, + "logits/rejected": -2.6894173622131348, + "logps/chosen": -77.66322326660156, + "logps/rejected": -201.22088623046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8673710227012634, + "rewards/margins": 7.744731903076172, + "rewards/rejected": -8.612102508544922, + "step": 2185 + }, + { + "epoch": 0.34, + "learning_rate": 1.254403340422221e-05, + "logits/chosen": -2.1108310222625732, + "logits/rejected": -3.0255684852600098, + "logps/chosen": -277.8777770996094, + "logps/rejected": -304.4989013671875, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5617189407348633, + "rewards/margins": 3.6590113639831543, + "rewards/rejected": -6.220730304718018, + "step": 2186 + }, + { + "epoch": 0.34, + "learning_rate": 1.2543299963691063e-05, + "logits/chosen": -3.051081418991089, + "logits/rejected": -2.7828657627105713, + "logps/chosen": -433.295166015625, + "logps/rejected": -441.8471984863281, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5746917724609375, + "rewards/margins": 6.209920883178711, + "rewards/rejected": -6.784612655639648, + "step": 2187 + }, + { + "epoch": 0.34, + "learning_rate": 1.2542566523159916e-05, + "logits/chosen": -3.067981481552124, + "logits/rejected": -2.5570061206817627, + "logps/chosen": -286.22369384765625, + "logps/rejected": -290.1673278808594, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.719754934310913, + "rewards/margins": 4.750999450683594, + "rewards/rejected": -8.470754623413086, + "step": 2188 + }, + { + "epoch": 0.34, + "learning_rate": 1.2541833082628768e-05, + "logits/chosen": -2.593552589416504, + "logits/rejected": -2.4963130950927734, + "logps/chosen": -325.71002197265625, + "logps/rejected": -430.6966857910156, + "loss": 2.994, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.920328617095947, + "rewards/margins": 1.805694818496704, + "rewards/rejected": -6.726023197174072, + "step": 2189 + }, + { + "epoch": 0.34, + "learning_rate": 1.254109964209762e-05, + "logits/chosen": -1.9296602010726929, + "logits/rejected": -2.8792200088500977, + "logps/chosen": -165.90606689453125, + "logps/rejected": -289.89239501953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1560258865356445, + "rewards/margins": 5.812198162078857, + "rewards/rejected": -7.968223571777344, + "step": 2190 + }, + { + "epoch": 0.34, + "learning_rate": 1.2540366201566472e-05, + "logits/chosen": -2.925462245941162, + "logits/rejected": -2.7415130138397217, + "logps/chosen": -417.9354553222656, + "logps/rejected": -414.71051025390625, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5764694213867188, + "rewards/margins": 4.020403861999512, + "rewards/rejected": -6.5968732833862305, + "step": 2191 + }, + { + "epoch": 0.34, + "learning_rate": 1.2539632761035324e-05, + "logits/chosen": -2.663240909576416, + "logits/rejected": -3.06648325920105, + "logps/chosen": -92.39877319335938, + "logps/rejected": -524.3847045898438, + "loss": 0.112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.883558750152588, + "rewards/margins": 2.5025315284729004, + "rewards/rejected": -4.386090278625488, + "step": 2192 + }, + { + "epoch": 0.34, + "learning_rate": 1.2538899320504176e-05, + "logits/chosen": -2.6951961517333984, + "logits/rejected": -2.6879420280456543, + "logps/chosen": -228.45193481445312, + "logps/rejected": -341.1800231933594, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.906665563583374, + "rewards/margins": 6.25480318069458, + "rewards/rejected": -8.161468505859375, + "step": 2193 + }, + { + "epoch": 0.34, + "learning_rate": 1.2538165879973027e-05, + "logits/chosen": -2.7506117820739746, + "logits/rejected": -2.8403513431549072, + "logps/chosen": -444.4834899902344, + "logps/rejected": -500.8419494628906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04605025798082352, + "rewards/margins": 7.80942440032959, + "rewards/rejected": -7.855474948883057, + "step": 2194 + }, + { + "epoch": 0.34, + "learning_rate": 1.253743243944188e-05, + "logits/chosen": -2.9578685760498047, + "logits/rejected": -2.5855088233947754, + "logps/chosen": -321.76080322265625, + "logps/rejected": -336.6800537109375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6242126226425171, + "rewards/margins": 5.96279764175415, + "rewards/rejected": -6.587010383605957, + "step": 2195 + }, + { + "epoch": 0.34, + "learning_rate": 1.2536698998910731e-05, + "logits/chosen": -0.9648891091346741, + "logits/rejected": -2.8445980548858643, + "logps/chosen": -189.42677307128906, + "logps/rejected": -475.56634521484375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8655363917350769, + "rewards/margins": 7.6240925788879395, + "rewards/rejected": -8.489629745483398, + "step": 2196 + }, + { + "epoch": 0.34, + "learning_rate": 1.2535965558379585e-05, + "logits/chosen": -2.462778091430664, + "logits/rejected": -2.5301613807678223, + "logps/chosen": -210.81085205078125, + "logps/rejected": -191.83743286132812, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1831696033477783, + "rewards/margins": 6.8295488357543945, + "rewards/rejected": -8.012718200683594, + "step": 2197 + }, + { + "epoch": 0.34, + "learning_rate": 1.2535232117848437e-05, + "logits/chosen": -2.750670909881592, + "logits/rejected": -2.033416748046875, + "logps/chosen": -294.9457702636719, + "logps/rejected": -361.51080322265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2607440948486328, + "rewards/margins": 8.842820167541504, + "rewards/rejected": -10.103564262390137, + "step": 2198 + }, + { + "epoch": 0.34, + "learning_rate": 1.2534498677317289e-05, + "logits/chosen": -3.236638307571411, + "logits/rejected": -2.955458641052246, + "logps/chosen": -155.38668823242188, + "logps/rejected": -306.97845458984375, + "loss": 1.9175, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.343998908996582, + "rewards/margins": 3.3413825035095215, + "rewards/rejected": -6.6853814125061035, + "step": 2199 + }, + { + "epoch": 0.34, + "learning_rate": 1.253376523678614e-05, + "logits/chosen": -2.5696864128112793, + "logits/rejected": -2.9716062545776367, + "logps/chosen": -352.58941650390625, + "logps/rejected": -391.75079345703125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1648838520050049, + "rewards/margins": 7.605537414550781, + "rewards/rejected": -8.770421028137207, + "step": 2200 + }, + { + "epoch": 0.34, + "learning_rate": 1.2533031796254992e-05, + "logits/chosen": -2.820996046066284, + "logits/rejected": -3.024423122406006, + "logps/chosen": -43.328880310058594, + "logps/rejected": -97.32742309570312, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9705073833465576, + "rewards/margins": 2.576697826385498, + "rewards/rejected": -4.547204971313477, + "step": 2201 + }, + { + "epoch": 0.34, + "learning_rate": 1.2532298355723844e-05, + "logits/chosen": -2.924907922744751, + "logits/rejected": -2.2558655738830566, + "logps/chosen": -326.23248291015625, + "logps/rejected": -192.67640686035156, + "loss": 3.0559, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.3893723487854, + "rewards/margins": -0.8339757919311523, + "rewards/rejected": -3.555396556854248, + "step": 2202 + }, + { + "epoch": 0.34, + "learning_rate": 1.2531564915192696e-05, + "logits/chosen": -1.252475380897522, + "logits/rejected": -2.4136204719543457, + "logps/chosen": -100.5460205078125, + "logps/rejected": -245.0442352294922, + "loss": 1.8157, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.234154224395752, + "rewards/margins": 2.275406837463379, + "rewards/rejected": -5.509561061859131, + "step": 2203 + }, + { + "epoch": 0.34, + "learning_rate": 1.2530831474661548e-05, + "logits/chosen": -2.434969902038574, + "logits/rejected": -3.08004093170166, + "logps/chosen": -249.83535766601562, + "logps/rejected": -340.67132568359375, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1188879013061523, + "rewards/margins": 4.324077606201172, + "rewards/rejected": -6.442965507507324, + "step": 2204 + }, + { + "epoch": 0.34, + "learning_rate": 1.25300980341304e-05, + "logits/chosen": -2.399203300476074, + "logits/rejected": -2.887416124343872, + "logps/chosen": -216.31161499023438, + "logps/rejected": -396.24658203125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3063944578170776, + "rewards/margins": 6.178615570068359, + "rewards/rejected": -7.485010147094727, + "step": 2205 + }, + { + "epoch": 0.34, + "learning_rate": 1.2529364593599253e-05, + "logits/chosen": -2.6326022148132324, + "logits/rejected": -2.3443124294281006, + "logps/chosen": -298.9158020019531, + "logps/rejected": -369.95477294921875, + "loss": 2.8017, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.521754264831543, + "rewards/margins": -0.43295741081237793, + "rewards/rejected": -5.088797092437744, + "step": 2206 + }, + { + "epoch": 0.34, + "learning_rate": 1.2528631153068105e-05, + "logits/chosen": -2.704374074935913, + "logits/rejected": -2.9605677127838135, + "logps/chosen": -222.86383056640625, + "logps/rejected": -299.82159423828125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7957649230957031, + "rewards/margins": 7.356297492980957, + "rewards/rejected": -8.15206241607666, + "step": 2207 + }, + { + "epoch": 0.34, + "learning_rate": 1.2527897712536957e-05, + "logits/chosen": -0.8324291110038757, + "logits/rejected": -3.025627374649048, + "logps/chosen": -39.01555633544922, + "logps/rejected": -407.01434326171875, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.096436619758606, + "rewards/margins": 10.678966522216797, + "rewards/rejected": -11.775403022766113, + "step": 2208 + }, + { + "epoch": 0.34, + "learning_rate": 1.2527164272005809e-05, + "logits/chosen": -1.4833571910858154, + "logits/rejected": -3.001638174057007, + "logps/chosen": -515.684814453125, + "logps/rejected": -657.9964599609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24989622831344604, + "rewards/margins": 9.903448104858398, + "rewards/rejected": -10.153345108032227, + "step": 2209 + }, + { + "epoch": 0.34, + "learning_rate": 1.2526430831474661e-05, + "logits/chosen": -2.208519458770752, + "logits/rejected": -3.0960283279418945, + "logps/chosen": -148.1131591796875, + "logps/rejected": -407.84661865234375, + "loss": 2.4554, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.565726280212402, + "rewards/margins": -0.6791703701019287, + "rewards/rejected": -3.8865556716918945, + "step": 2210 + }, + { + "epoch": 0.34, + "learning_rate": 1.2525697390943513e-05, + "logits/chosen": -1.5866178274154663, + "logits/rejected": -2.6915628910064697, + "logps/chosen": -73.70411682128906, + "logps/rejected": -327.2744140625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2007297277450562, + "rewards/margins": 6.870987415313721, + "rewards/rejected": -8.071717262268066, + "step": 2211 + }, + { + "epoch": 0.34, + "learning_rate": 1.2524963950412365e-05, + "logits/chosen": -1.290708303451538, + "logits/rejected": -2.7523670196533203, + "logps/chosen": -165.70831298828125, + "logps/rejected": -482.4482727050781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.171881914138794, + "rewards/margins": 8.726215362548828, + "rewards/rejected": -10.89809799194336, + "step": 2212 + }, + { + "epoch": 0.34, + "learning_rate": 1.2524230509881218e-05, + "logits/chosen": -1.5792869329452515, + "logits/rejected": -2.8055472373962402, + "logps/chosen": -122.82768249511719, + "logps/rejected": -296.8164367675781, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.383168935775757, + "rewards/margins": 5.19072151184082, + "rewards/rejected": -7.573890686035156, + "step": 2213 + }, + { + "epoch": 0.34, + "learning_rate": 1.252349706935007e-05, + "logits/chosen": -3.304450750350952, + "logits/rejected": -2.923727035522461, + "logps/chosen": -354.1412353515625, + "logps/rejected": -146.66079711914062, + "loss": 3.6658, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.068326473236084, + "rewards/margins": -0.3510119915008545, + "rewards/rejected": -4.71731424331665, + "step": 2214 + }, + { + "epoch": 0.34, + "learning_rate": 1.2522763628818924e-05, + "logits/chosen": -3.004624366760254, + "logits/rejected": -2.6649818420410156, + "logps/chosen": -242.63983154296875, + "logps/rejected": -266.2480773925781, + "loss": 3.01, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8432915210723877, + "rewards/margins": -0.10151052474975586, + "rewards/rejected": -3.741780996322632, + "step": 2215 + }, + { + "epoch": 0.34, + "learning_rate": 1.2522030188287776e-05, + "logits/chosen": -2.2726833820343018, + "logits/rejected": -3.0711004734039307, + "logps/chosen": -132.85972595214844, + "logps/rejected": -332.8501892089844, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7618774771690369, + "rewards/margins": 6.392432689666748, + "rewards/rejected": -7.15431022644043, + "step": 2216 + }, + { + "epoch": 0.34, + "learning_rate": 1.2521296747756627e-05, + "logits/chosen": -2.680114984512329, + "logits/rejected": -3.181593179702759, + "logps/chosen": -327.19036865234375, + "logps/rejected": -329.2346496582031, + "loss": 3.1601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.025374412536621, + "rewards/margins": 0.47800278663635254, + "rewards/rejected": -4.5033769607543945, + "step": 2217 + }, + { + "epoch": 0.34, + "learning_rate": 1.252056330722548e-05, + "logits/chosen": -3.037463665008545, + "logits/rejected": -2.5117666721343994, + "logps/chosen": -447.62677001953125, + "logps/rejected": -425.24908447265625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9870975017547607, + "rewards/margins": 5.482822895050049, + "rewards/rejected": -6.4699201583862305, + "step": 2218 + }, + { + "epoch": 0.35, + "learning_rate": 1.2519829866694331e-05, + "logits/chosen": -2.114304542541504, + "logits/rejected": -3.138962984085083, + "logps/chosen": -71.2953872680664, + "logps/rejected": -281.12408447265625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7289514541625977, + "rewards/margins": 6.838147163391113, + "rewards/rejected": -7.567098617553711, + "step": 2219 + }, + { + "epoch": 0.35, + "learning_rate": 1.2519096426163183e-05, + "logits/chosen": -2.7166836261749268, + "logits/rejected": -3.041064739227295, + "logps/chosen": -196.58242797851562, + "logps/rejected": -372.7613830566406, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4876937866210938, + "rewards/margins": 5.200238227844238, + "rewards/rejected": -6.687932014465332, + "step": 2220 + }, + { + "epoch": 0.35, + "learning_rate": 1.2518362985632035e-05, + "logits/chosen": -2.946995735168457, + "logits/rejected": -2.793592691421509, + "logps/chosen": -114.48348999023438, + "logps/rejected": -295.0128173828125, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1720932722091675, + "rewards/margins": 7.303854942321777, + "rewards/rejected": -8.475948333740234, + "step": 2221 + }, + { + "epoch": 0.35, + "learning_rate": 1.2517629545100887e-05, + "logits/chosen": -2.1012582778930664, + "logits/rejected": -3.032191514968872, + "logps/chosen": -193.21466064453125, + "logps/rejected": -223.56700134277344, + "loss": 2.2412, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1929895877838135, + "rewards/margins": 0.5146658420562744, + "rewards/rejected": -3.707655429840088, + "step": 2222 + }, + { + "epoch": 0.35, + "learning_rate": 1.251689610456974e-05, + "logits/chosen": -2.677908420562744, + "logits/rejected": -3.0864906311035156, + "logps/chosen": -69.12849426269531, + "logps/rejected": -337.7149658203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4094986021518707, + "rewards/margins": 11.133390426635742, + "rewards/rejected": -11.542888641357422, + "step": 2223 + }, + { + "epoch": 0.35, + "learning_rate": 1.2516162664038592e-05, + "logits/chosen": -2.468757152557373, + "logits/rejected": -0.8400607109069824, + "logps/chosen": -368.1441345214844, + "logps/rejected": -219.2407989501953, + "loss": 4.4812, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.139339923858643, + "rewards/margins": -2.108262300491333, + "rewards/rejected": -3.0310776233673096, + "step": 2224 + }, + { + "epoch": 0.35, + "learning_rate": 1.2515429223507444e-05, + "logits/chosen": -3.1167044639587402, + "logits/rejected": -2.3936681747436523, + "logps/chosen": -949.1934814453125, + "logps/rejected": -556.059814453125, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3602447509765625, + "rewards/margins": 4.725286960601807, + "rewards/rejected": -6.085531711578369, + "step": 2225 + }, + { + "epoch": 0.35, + "learning_rate": 1.2514695782976296e-05, + "logits/chosen": -2.4951281547546387, + "logits/rejected": -3.0383481979370117, + "logps/chosen": -169.97157287597656, + "logps/rejected": -115.31893920898438, + "loss": 3.3386, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9757981300354004, + "rewards/margins": -0.17324471473693848, + "rewards/rejected": -3.802553415298462, + "step": 2226 + }, + { + "epoch": 0.35, + "learning_rate": 1.2513962342445148e-05, + "logits/chosen": -2.898399591445923, + "logits/rejected": -3.0382728576660156, + "logps/chosen": -59.5867805480957, + "logps/rejected": -231.3982391357422, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.718071460723877, + "rewards/margins": 5.227655410766602, + "rewards/rejected": -6.9457268714904785, + "step": 2227 + }, + { + "epoch": 0.35, + "learning_rate": 1.2513228901914e-05, + "logits/chosen": -1.677778959274292, + "logits/rejected": -3.111464262008667, + "logps/chosen": -258.0661926269531, + "logps/rejected": -732.8023681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2138572931289673, + "rewards/margins": 12.806745529174805, + "rewards/rejected": -14.020602226257324, + "step": 2228 + }, + { + "epoch": 0.35, + "learning_rate": 1.2512495461382852e-05, + "logits/chosen": -2.8056440353393555, + "logits/rejected": -2.870046615600586, + "logps/chosen": -51.426719665527344, + "logps/rejected": -198.10589599609375, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7109475135803223, + "rewards/margins": 3.871650457382202, + "rewards/rejected": -6.582597732543945, + "step": 2229 + }, + { + "epoch": 0.35, + "learning_rate": 1.2511762020851704e-05, + "logits/chosen": -2.2125041484832764, + "logits/rejected": -3.0675463676452637, + "logps/chosen": -466.23516845703125, + "logps/rejected": -519.750732421875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4110405445098877, + "rewards/margins": 5.358576774597168, + "rewards/rejected": -6.769617557525635, + "step": 2230 + }, + { + "epoch": 0.35, + "learning_rate": 1.2511028580320555e-05, + "logits/chosen": -3.0289721488952637, + "logits/rejected": -2.5159733295440674, + "logps/chosen": -324.0318908691406, + "logps/rejected": -306.0363464355469, + "loss": 2.6953, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.738996744155884, + "rewards/margins": 0.7914202213287354, + "rewards/rejected": -4.530416965484619, + "step": 2231 + }, + { + "epoch": 0.35, + "learning_rate": 1.2510295139789409e-05, + "logits/chosen": -3.260200023651123, + "logits/rejected": -3.234483003616333, + "logps/chosen": -111.27113342285156, + "logps/rejected": -143.31561279296875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5218077301979065, + "rewards/margins": 4.905984878540039, + "rewards/rejected": -5.427792549133301, + "step": 2232 + }, + { + "epoch": 0.35, + "learning_rate": 1.2509561699258261e-05, + "logits/chosen": -2.8061954975128174, + "logits/rejected": -3.081157684326172, + "logps/chosen": -684.4973754882812, + "logps/rejected": -682.0804443359375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8702659606933594, + "rewards/margins": 6.946866989135742, + "rewards/rejected": -7.817132949829102, + "step": 2233 + }, + { + "epoch": 0.35, + "learning_rate": 1.2508828258727113e-05, + "logits/chosen": -2.2465457916259766, + "logits/rejected": -2.499242067337036, + "logps/chosen": -485.2380065917969, + "logps/rejected": -425.3617248535156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0756120681762695, + "rewards/margins": 7.544032096862793, + "rewards/rejected": -9.619644165039062, + "step": 2234 + }, + { + "epoch": 0.35, + "learning_rate": 1.2508094818195965e-05, + "logits/chosen": -3.0907833576202393, + "logits/rejected": -3.2325732707977295, + "logps/chosen": -49.30876541137695, + "logps/rejected": -130.95458984375, + "loss": 0.098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9396175146102905, + "rewards/margins": 2.6504955291748047, + "rewards/rejected": -4.590112686157227, + "step": 2235 + }, + { + "epoch": 0.35, + "learning_rate": 1.2507361377664817e-05, + "logits/chosen": -2.660158157348633, + "logits/rejected": -3.233550786972046, + "logps/chosen": -221.09689331054688, + "logps/rejected": -303.8627014160156, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.907580554485321, + "rewards/margins": 8.104230880737305, + "rewards/rejected": -7.196650505065918, + "step": 2236 + }, + { + "epoch": 0.35, + "learning_rate": 1.2506627937133668e-05, + "logits/chosen": -2.0126585960388184, + "logits/rejected": -3.03316068649292, + "logps/chosen": -81.29557037353516, + "logps/rejected": -240.1904754638672, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8559763431549072, + "rewards/margins": 4.205012321472168, + "rewards/rejected": -6.060988426208496, + "step": 2237 + }, + { + "epoch": 0.35, + "learning_rate": 1.250589449660252e-05, + "logits/chosen": -3.1668128967285156, + "logits/rejected": -2.4142560958862305, + "logps/chosen": -405.0176086425781, + "logps/rejected": -178.10638427734375, + "loss": 6.1514, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.8913092613220215, + "rewards/margins": -6.130312919616699, + "rewards/rejected": -1.7609965801239014, + "step": 2238 + }, + { + "epoch": 0.35, + "learning_rate": 1.2505161056071372e-05, + "logits/chosen": -2.852778434753418, + "logits/rejected": -3.019719362258911, + "logps/chosen": -70.03699493408203, + "logps/rejected": -201.74078369140625, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8969275951385498, + "rewards/margins": 5.700563907623291, + "rewards/rejected": -7.59749174118042, + "step": 2239 + }, + { + "epoch": 0.35, + "learning_rate": 1.2504427615540224e-05, + "logits/chosen": -1.9072898626327515, + "logits/rejected": -2.591116428375244, + "logps/chosen": -226.01771545410156, + "logps/rejected": -242.08729553222656, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8649311065673828, + "rewards/margins": 5.975314617156982, + "rewards/rejected": -6.840245723724365, + "step": 2240 + }, + { + "epoch": 0.35, + "learning_rate": 1.2503694175009078e-05, + "logits/chosen": -2.621673345565796, + "logits/rejected": -3.1701266765594482, + "logps/chosen": -266.1114501953125, + "logps/rejected": -156.68167114257812, + "loss": 0.1209, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6297545433044434, + "rewards/margins": 2.052608013153076, + "rewards/rejected": -3.6823623180389404, + "step": 2241 + }, + { + "epoch": 0.35, + "learning_rate": 1.250296073447793e-05, + "logits/chosen": -1.5063995122909546, + "logits/rejected": -2.8925492763519287, + "logps/chosen": -154.63551330566406, + "logps/rejected": -261.05267333984375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8970695734024048, + "rewards/margins": 7.486894607543945, + "rewards/rejected": -8.383964538574219, + "step": 2242 + }, + { + "epoch": 0.35, + "learning_rate": 1.2502227293946781e-05, + "logits/chosen": -3.0313289165496826, + "logits/rejected": -3.2260043621063232, + "logps/chosen": -61.29955291748047, + "logps/rejected": -267.03460693359375, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32066380977630615, + "rewards/margins": 4.682043075561523, + "rewards/rejected": -5.002707004547119, + "step": 2243 + }, + { + "epoch": 0.35, + "learning_rate": 1.2501493853415633e-05, + "logits/chosen": -2.8205995559692383, + "logits/rejected": -2.9594905376434326, + "logps/chosen": -156.51419067382812, + "logps/rejected": -242.3108673095703, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2683041095733643, + "rewards/margins": 7.627311706542969, + "rewards/rejected": -9.895615577697754, + "step": 2244 + }, + { + "epoch": 0.35, + "learning_rate": 1.2500760412884485e-05, + "logits/chosen": -2.5297770500183105, + "logits/rejected": -2.920607566833496, + "logps/chosen": -367.86419677734375, + "logps/rejected": -314.4084777832031, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33490753173828125, + "rewards/margins": 4.920544624328613, + "rewards/rejected": -5.2554521560668945, + "step": 2245 + }, + { + "epoch": 0.35, + "learning_rate": 1.2500026972353337e-05, + "logits/chosen": -2.2735986709594727, + "logits/rejected": -3.039377212524414, + "logps/chosen": -335.8077392578125, + "logps/rejected": -371.72088623046875, + "loss": 1.9407, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.384289741516113, + "rewards/margins": 1.3918125629425049, + "rewards/rejected": -5.776102066040039, + "step": 2246 + }, + { + "epoch": 0.35, + "learning_rate": 1.249929353182219e-05, + "logits/chosen": -2.824394702911377, + "logits/rejected": -2.863966464996338, + "logps/chosen": -306.9164123535156, + "logps/rejected": -491.8128662109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8525566458702087, + "rewards/margins": 10.30235481262207, + "rewards/rejected": -11.154911994934082, + "step": 2247 + }, + { + "epoch": 0.35, + "learning_rate": 1.2498560091291042e-05, + "logits/chosen": -2.7564480304718018, + "logits/rejected": -3.2718260288238525, + "logps/chosen": -123.62982177734375, + "logps/rejected": -372.5573425292969, + "loss": 0.0392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.690879762172699, + "rewards/margins": 4.669536113739014, + "rewards/rejected": -5.360415935516357, + "step": 2248 + }, + { + "epoch": 0.35, + "learning_rate": 1.2497826650759894e-05, + "logits/chosen": -2.713834285736084, + "logits/rejected": -3.1415855884552, + "logps/chosen": -102.86155700683594, + "logps/rejected": -452.3813781738281, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.470081090927124, + "rewards/margins": 7.979066848754883, + "rewards/rejected": -10.449148178100586, + "step": 2249 + }, + { + "epoch": 0.35, + "learning_rate": 1.2497093210228748e-05, + "logits/chosen": -2.423039674758911, + "logits/rejected": -3.03680419921875, + "logps/chosen": -387.91607666015625, + "logps/rejected": -525.7229614257812, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8043079376220703, + "rewards/margins": 5.060691833496094, + "rewards/rejected": -5.864999771118164, + "step": 2250 + }, + { + "epoch": 0.35, + "learning_rate": 1.24963597696976e-05, + "logits/chosen": -2.7374985218048096, + "logits/rejected": -3.1477198600769043, + "logps/chosen": -129.93524169921875, + "logps/rejected": -174.34512329101562, + "loss": 3.2422, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.033562183380127, + "rewards/margins": -0.5326638221740723, + "rewards/rejected": -3.500898599624634, + "step": 2251 + }, + { + "epoch": 0.35, + "learning_rate": 1.2495626329166452e-05, + "logits/chosen": -2.610255718231201, + "logits/rejected": -3.2712059020996094, + "logps/chosen": -120.41946411132812, + "logps/rejected": -274.5404052734375, + "loss": 1.8517, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.505117654800415, + "rewards/margins": 2.216702938079834, + "rewards/rejected": -4.72182035446167, + "step": 2252 + }, + { + "epoch": 0.35, + "learning_rate": 1.2494892888635304e-05, + "logits/chosen": -2.660172462463379, + "logits/rejected": -2.89945125579834, + "logps/chosen": -90.50059509277344, + "logps/rejected": -156.82650756835938, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.003517985343933, + "rewards/margins": 3.589505195617676, + "rewards/rejected": -4.593023300170898, + "step": 2253 + }, + { + "epoch": 0.35, + "learning_rate": 1.2494159448104155e-05, + "logits/chosen": -1.466801404953003, + "logits/rejected": -2.7729175090789795, + "logps/chosen": -58.70976257324219, + "logps/rejected": -367.63653564453125, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5997942686080933, + "rewards/margins": 6.785854816436768, + "rewards/rejected": -8.385648727416992, + "step": 2254 + }, + { + "epoch": 0.35, + "learning_rate": 1.2493426007573007e-05, + "logits/chosen": -2.474809169769287, + "logits/rejected": -2.892528772354126, + "logps/chosen": -178.61471557617188, + "logps/rejected": -521.4627685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5756797790527344, + "rewards/margins": 9.458097457885742, + "rewards/rejected": -10.033777236938477, + "step": 2255 + }, + { + "epoch": 0.35, + "learning_rate": 1.249269256704186e-05, + "logits/chosen": -1.4721119403839111, + "logits/rejected": -2.762444496154785, + "logps/chosen": -92.24131774902344, + "logps/rejected": -382.2872314453125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.904366374015808, + "rewards/margins": 9.492255210876465, + "rewards/rejected": -11.396621704101562, + "step": 2256 + }, + { + "epoch": 0.35, + "learning_rate": 1.2491959126510711e-05, + "logits/chosen": -3.047696828842163, + "logits/rejected": -2.1818394660949707, + "logps/chosen": -534.299072265625, + "logps/rejected": -241.44204711914062, + "loss": 1.0867, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1260712146759033, + "rewards/margins": 2.4624319076538086, + "rewards/rejected": -5.588503360748291, + "step": 2257 + }, + { + "epoch": 0.35, + "learning_rate": 1.2491225685979563e-05, + "logits/chosen": -3.219615936279297, + "logits/rejected": -3.217832326889038, + "logps/chosen": -317.83660888671875, + "logps/rejected": -171.64938354492188, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10264718532562256, + "rewards/margins": 6.214985370635986, + "rewards/rejected": -6.112338066101074, + "step": 2258 + }, + { + "epoch": 0.35, + "learning_rate": 1.2490492245448417e-05, + "logits/chosen": -2.983663320541382, + "logits/rejected": -1.275945782661438, + "logps/chosen": -251.376220703125, + "logps/rejected": -147.80111694335938, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.41216242313385, + "rewards/margins": 5.490598678588867, + "rewards/rejected": -6.902761459350586, + "step": 2259 + }, + { + "epoch": 0.35, + "learning_rate": 1.2489758804917268e-05, + "logits/chosen": -1.2115650177001953, + "logits/rejected": -2.841566801071167, + "logps/chosen": -28.1575870513916, + "logps/rejected": -428.1305236816406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4586678743362427, + "rewards/margins": 10.878670692443848, + "rewards/rejected": -11.3373384475708, + "step": 2260 + }, + { + "epoch": 0.35, + "learning_rate": 1.248902536438612e-05, + "logits/chosen": -3.0376672744750977, + "logits/rejected": -2.3227436542510986, + "logps/chosen": -237.9045867919922, + "logps/rejected": -296.0609130859375, + "loss": 0.1253, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.822131395339966, + "rewards/margins": 3.1679329872131348, + "rewards/rejected": -5.99006462097168, + "step": 2261 + }, + { + "epoch": 0.35, + "learning_rate": 1.2488291923854972e-05, + "logits/chosen": -2.6085946559906006, + "logits/rejected": -3.095789909362793, + "logps/chosen": -293.16998291015625, + "logps/rejected": -480.28759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07439880073070526, + "rewards/margins": 10.415874481201172, + "rewards/rejected": -10.490272521972656, + "step": 2262 + }, + { + "epoch": 0.35, + "learning_rate": 1.2487558483323824e-05, + "logits/chosen": -3.02052903175354, + "logits/rejected": -2.650552749633789, + "logps/chosen": -293.45361328125, + "logps/rejected": -239.63613891601562, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3374130725860596, + "rewards/margins": 2.4064736366271973, + "rewards/rejected": -4.743886470794678, + "step": 2263 + }, + { + "epoch": 0.35, + "learning_rate": 1.2486825042792676e-05, + "logits/chosen": -2.20294189453125, + "logits/rejected": -3.044522285461426, + "logps/chosen": -79.45417022705078, + "logps/rejected": -367.8377685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1859902143478394, + "rewards/margins": 11.20930290222168, + "rewards/rejected": -12.395294189453125, + "step": 2264 + }, + { + "epoch": 0.35, + "learning_rate": 1.2486091602261528e-05, + "logits/chosen": -2.2137136459350586, + "logits/rejected": -2.6579396724700928, + "logps/chosen": -133.0003662109375, + "logps/rejected": -351.7707214355469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6197166442871094, + "rewards/margins": 9.04485034942627, + "rewards/rejected": -10.664567947387695, + "step": 2265 + }, + { + "epoch": 0.35, + "learning_rate": 1.248535816173038e-05, + "logits/chosen": -1.3626649379730225, + "logits/rejected": -2.843123435974121, + "logps/chosen": -100.97272491455078, + "logps/rejected": -456.2353515625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.12213397026062, + "rewards/margins": 9.206884384155273, + "rewards/rejected": -11.329018592834473, + "step": 2266 + }, + { + "epoch": 0.35, + "learning_rate": 1.2484624721199232e-05, + "logits/chosen": -2.4473891258239746, + "logits/rejected": -2.746852397918701, + "logps/chosen": -226.74940490722656, + "logps/rejected": -276.7601013183594, + "loss": 2.4412, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8861382007598877, + "rewards/margins": 1.5798978805541992, + "rewards/rejected": -5.466036319732666, + "step": 2267 + }, + { + "epoch": 0.35, + "learning_rate": 1.2483891280668085e-05, + "logits/chosen": -3.0712711811065674, + "logits/rejected": -2.855848550796509, + "logps/chosen": -165.37384033203125, + "logps/rejected": -229.73361206054688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2631175220012665, + "rewards/margins": 8.02341079711914, + "rewards/rejected": -8.286528587341309, + "step": 2268 + }, + { + "epoch": 0.35, + "learning_rate": 1.2483157840136937e-05, + "logits/chosen": -2.898535966873169, + "logits/rejected": -2.5517444610595703, + "logps/chosen": -396.50482177734375, + "logps/rejected": -379.16229248046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3482766151428223, + "rewards/margins": 7.867376327514648, + "rewards/rejected": -9.215652465820312, + "step": 2269 + }, + { + "epoch": 0.35, + "learning_rate": 1.2482424399605789e-05, + "logits/chosen": -2.8508126735687256, + "logits/rejected": -2.2024292945861816, + "logps/chosen": -271.8451843261719, + "logps/rejected": -195.12538146972656, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6890024542808533, + "rewards/margins": 7.458059310913086, + "rewards/rejected": -8.147062301635742, + "step": 2270 + }, + { + "epoch": 0.35, + "learning_rate": 1.248169095907464e-05, + "logits/chosen": -3.071204423904419, + "logits/rejected": -2.841843843460083, + "logps/chosen": -73.50834655761719, + "logps/rejected": -135.41555786132812, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0363080501556396, + "rewards/margins": 6.782293319702148, + "rewards/rejected": -7.818601608276367, + "step": 2271 + }, + { + "epoch": 0.35, + "learning_rate": 1.2480957518543493e-05, + "logits/chosen": -2.6535444259643555, + "logits/rejected": -2.9715752601623535, + "logps/chosen": -78.66996765136719, + "logps/rejected": -449.61474609375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9657193422317505, + "rewards/margins": 9.180957794189453, + "rewards/rejected": -11.146677017211914, + "step": 2272 + }, + { + "epoch": 0.35, + "learning_rate": 1.2480224078012345e-05, + "logits/chosen": -3.2745914459228516, + "logits/rejected": -3.2461962699890137, + "logps/chosen": -72.49690246582031, + "logps/rejected": -108.9852066040039, + "loss": 0.2645, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2419562339782715, + "rewards/margins": 1.2830700874328613, + "rewards/rejected": -4.525026321411133, + "step": 2273 + }, + { + "epoch": 0.35, + "learning_rate": 1.2479490637481196e-05, + "logits/chosen": -0.4828855097293854, + "logits/rejected": -2.9119577407836914, + "logps/chosen": -24.7083797454834, + "logps/rejected": -308.9490966796875, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6630116701126099, + "rewards/margins": 3.6422200202941895, + "rewards/rejected": -5.30523157119751, + "step": 2274 + }, + { + "epoch": 0.35, + "learning_rate": 1.2478757196950048e-05, + "logits/chosen": -2.9557807445526123, + "logits/rejected": -1.8043583631515503, + "logps/chosen": -421.22186279296875, + "logps/rejected": -278.6374206542969, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7359512448310852, + "rewards/margins": 5.353631019592285, + "rewards/rejected": -6.089582443237305, + "step": 2275 + }, + { + "epoch": 0.35, + "learning_rate": 1.24780237564189e-05, + "logits/chosen": -3.0286803245544434, + "logits/rejected": -2.4800543785095215, + "logps/chosen": -187.14306640625, + "logps/rejected": -194.41815185546875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3590534925460815, + "rewards/margins": 4.332277297973633, + "rewards/rejected": -5.691330909729004, + "step": 2276 + }, + { + "epoch": 0.35, + "learning_rate": 1.2477290315887754e-05, + "logits/chosen": -1.2924498319625854, + "logits/rejected": -1.848616123199463, + "logps/chosen": -33.423282623291016, + "logps/rejected": -178.3167266845703, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5167062282562256, + "rewards/margins": 6.231040954589844, + "rewards/rejected": -7.747747421264648, + "step": 2277 + }, + { + "epoch": 0.35, + "learning_rate": 1.2476556875356606e-05, + "logits/chosen": -2.9274613857269287, + "logits/rejected": -3.1522574424743652, + "logps/chosen": -288.3782043457031, + "logps/rejected": -304.7103271484375, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0899481773376465, + "rewards/margins": 5.256050109863281, + "rewards/rejected": -6.3459978103637695, + "step": 2278 + }, + { + "epoch": 0.35, + "learning_rate": 1.2475823434825457e-05, + "logits/chosen": -3.013922691345215, + "logits/rejected": -2.7911105155944824, + "logps/chosen": -234.761962890625, + "logps/rejected": -318.834228515625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4885247945785522, + "rewards/margins": 5.558110237121582, + "rewards/rejected": -7.046635627746582, + "step": 2279 + }, + { + "epoch": 0.35, + "learning_rate": 1.247508999429431e-05, + "logits/chosen": -2.8018200397491455, + "logits/rejected": -3.1429059505462646, + "logps/chosen": -657.1937255859375, + "logps/rejected": -176.75729370117188, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.642605781555176, + "rewards/margins": 4.438258171081543, + "rewards/rejected": -7.080863952636719, + "step": 2280 + }, + { + "epoch": 0.35, + "learning_rate": 1.2474356553763161e-05, + "logits/chosen": -2.1095075607299805, + "logits/rejected": -2.9428741931915283, + "logps/chosen": -121.97811889648438, + "logps/rejected": -314.7051086425781, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6137025356292725, + "rewards/margins": 5.852556228637695, + "rewards/rejected": -7.466258525848389, + "step": 2281 + }, + { + "epoch": 0.35, + "learning_rate": 1.2473623113232015e-05, + "logits/chosen": -3.132366895675659, + "logits/rejected": -3.061821460723877, + "logps/chosen": -200.3068084716797, + "logps/rejected": -299.4180603027344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7674102783203125, + "rewards/margins": 7.889797210693359, + "rewards/rejected": -8.657207489013672, + "step": 2282 + }, + { + "epoch": 0.36, + "learning_rate": 1.2472889672700867e-05, + "logits/chosen": -3.1227164268493652, + "logits/rejected": -2.347456455230713, + "logps/chosen": -746.3847045898438, + "logps/rejected": -468.1571044921875, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.000532865524292, + "rewards/margins": 4.160549640655518, + "rewards/rejected": -6.1610822677612305, + "step": 2283 + }, + { + "epoch": 0.36, + "learning_rate": 1.2472156232169719e-05, + "logits/chosen": -0.84183269739151, + "logits/rejected": -2.088259696960449, + "logps/chosen": -172.0576934814453, + "logps/rejected": -458.60003662109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.069042205810547, + "rewards/margins": 7.497988700866699, + "rewards/rejected": -9.56702995300293, + "step": 2284 + }, + { + "epoch": 0.36, + "learning_rate": 1.247142279163857e-05, + "logits/chosen": -2.189188003540039, + "logits/rejected": -2.5527806282043457, + "logps/chosen": -92.4044418334961, + "logps/rejected": -216.60580444335938, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.168684959411621, + "rewards/margins": 5.0524702072143555, + "rewards/rejected": -8.221155166625977, + "step": 2285 + }, + { + "epoch": 0.36, + "learning_rate": 1.2470689351107424e-05, + "logits/chosen": -1.588423490524292, + "logits/rejected": -2.586224317550659, + "logps/chosen": -140.10850524902344, + "logps/rejected": -564.6386108398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2610985040664673, + "rewards/margins": 14.126922607421875, + "rewards/rejected": -15.388020515441895, + "step": 2286 + }, + { + "epoch": 0.36, + "learning_rate": 1.2469955910576276e-05, + "logits/chosen": -3.048489570617676, + "logits/rejected": -1.9862810373306274, + "logps/chosen": -387.04852294921875, + "logps/rejected": -294.9540710449219, + "loss": 3.5439, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.69542121887207, + "rewards/margins": -3.5124313831329346, + "rewards/rejected": -2.1829895973205566, + "step": 2287 + }, + { + "epoch": 0.36, + "learning_rate": 1.2469222470045128e-05, + "logits/chosen": -2.6997201442718506, + "logits/rejected": -3.116917610168457, + "logps/chosen": -18.764436721801758, + "logps/rejected": -225.69407653808594, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9887062311172485, + "rewards/margins": 7.701341152191162, + "rewards/rejected": -8.690047264099121, + "step": 2288 + }, + { + "epoch": 0.36, + "learning_rate": 1.246848902951398e-05, + "logits/chosen": -2.451580047607422, + "logits/rejected": -2.72204852104187, + "logps/chosen": -166.80877685546875, + "logps/rejected": -483.13836669921875, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9743660092353821, + "rewards/margins": 9.481886863708496, + "rewards/rejected": -10.456253051757812, + "step": 2289 + }, + { + "epoch": 0.36, + "learning_rate": 1.2467755588982832e-05, + "logits/chosen": -2.7132396697998047, + "logits/rejected": -3.074324369430542, + "logps/chosen": -45.113243103027344, + "logps/rejected": -223.4329833984375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.530991792678833, + "rewards/margins": 6.858030319213867, + "rewards/rejected": -8.389022827148438, + "step": 2290 + }, + { + "epoch": 0.36, + "learning_rate": 1.2467022148451683e-05, + "logits/chosen": -2.2277493476867676, + "logits/rejected": -3.0199668407440186, + "logps/chosen": -338.33428955078125, + "logps/rejected": -322.15130615234375, + "loss": 4.3956, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.936590671539307, + "rewards/margins": -1.5305507183074951, + "rewards/rejected": -3.4060394763946533, + "step": 2291 + }, + { + "epoch": 0.36, + "learning_rate": 1.2466288707920535e-05, + "logits/chosen": -2.808302402496338, + "logits/rejected": -3.345797538757324, + "logps/chosen": -81.55902099609375, + "logps/rejected": -273.4667663574219, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6045559644699097, + "rewards/margins": 4.315727710723877, + "rewards/rejected": -5.920283794403076, + "step": 2292 + }, + { + "epoch": 0.36, + "learning_rate": 1.2465555267389387e-05, + "logits/chosen": -3.1886250972747803, + "logits/rejected": -2.5292041301727295, + "logps/chosen": -337.1267395019531, + "logps/rejected": -284.44036865234375, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3032623529434204, + "rewards/margins": 6.254313945770264, + "rewards/rejected": -7.5575761795043945, + "step": 2293 + }, + { + "epoch": 0.36, + "learning_rate": 1.2464821826858239e-05, + "logits/chosen": -1.8356291055679321, + "logits/rejected": -2.6883082389831543, + "logps/chosen": -32.97294998168945, + "logps/rejected": -331.3649597167969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.105836033821106, + "rewards/margins": 10.263792037963867, + "rewards/rejected": -11.36962890625, + "step": 2294 + }, + { + "epoch": 0.36, + "learning_rate": 1.2464088386327093e-05, + "logits/chosen": -3.153765916824341, + "logits/rejected": -3.192101001739502, + "logps/chosen": -90.46476745605469, + "logps/rejected": -183.48118591308594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1445976495742798, + "rewards/margins": 7.012617111206055, + "rewards/rejected": -8.157214164733887, + "step": 2295 + }, + { + "epoch": 0.36, + "learning_rate": 1.2463354945795944e-05, + "logits/chosen": -2.392174243927002, + "logits/rejected": -2.9986350536346436, + "logps/chosen": -133.3641357421875, + "logps/rejected": -400.17315673828125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5709511041641235, + "rewards/margins": 6.137401580810547, + "rewards/rejected": -6.708353042602539, + "step": 2296 + }, + { + "epoch": 0.36, + "learning_rate": 1.2462621505264796e-05, + "logits/chosen": -1.2306517362594604, + "logits/rejected": -1.8880099058151245, + "logps/chosen": -139.3606719970703, + "logps/rejected": -412.63525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6280345916748047, + "rewards/margins": 9.02393913269043, + "rewards/rejected": -10.651973724365234, + "step": 2297 + }, + { + "epoch": 0.36, + "learning_rate": 1.2461888064733648e-05, + "logits/chosen": -3.2121758460998535, + "logits/rejected": -3.0209619998931885, + "logps/chosen": -352.5372619628906, + "logps/rejected": -405.10736083984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9612060785293579, + "rewards/margins": 8.031062126159668, + "rewards/rejected": -8.992268562316895, + "step": 2298 + }, + { + "epoch": 0.36, + "learning_rate": 1.24611546242025e-05, + "logits/chosen": -2.8116366863250732, + "logits/rejected": -3.0195441246032715, + "logps/chosen": -74.65528869628906, + "logps/rejected": -161.9013671875, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.043975830078125, + "rewards/margins": 4.264908790588379, + "rewards/rejected": -6.308884620666504, + "step": 2299 + }, + { + "epoch": 0.36, + "learning_rate": 1.2460421183671352e-05, + "logits/chosen": -2.3431785106658936, + "logits/rejected": -2.9751017093658447, + "logps/chosen": -347.904052734375, + "logps/rejected": -401.6926574707031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7113044857978821, + "rewards/margins": 9.566112518310547, + "rewards/rejected": -8.85480785369873, + "step": 2300 + }, + { + "epoch": 0.36, + "learning_rate": 1.2459687743140204e-05, + "logits/chosen": -1.4352107048034668, + "logits/rejected": -2.809931755065918, + "logps/chosen": -114.7621841430664, + "logps/rejected": -406.32763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.062382459640503, + "rewards/margins": 10.865084648132324, + "rewards/rejected": -11.927467346191406, + "step": 2301 + }, + { + "epoch": 0.36, + "learning_rate": 1.2458954302609056e-05, + "logits/chosen": -2.7470786571502686, + "logits/rejected": -3.009845733642578, + "logps/chosen": -166.81484985351562, + "logps/rejected": -179.48277282714844, + "loss": 3.3393, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.037842273712158, + "rewards/margins": -0.2968628406524658, + "rewards/rejected": -4.740979194641113, + "step": 2302 + }, + { + "epoch": 0.36, + "learning_rate": 1.245822086207791e-05, + "logits/chosen": -2.5508477687835693, + "logits/rejected": -2.9216525554656982, + "logps/chosen": -70.6085205078125, + "logps/rejected": -422.73492431640625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5832128524780273, + "rewards/margins": 6.452463150024414, + "rewards/rejected": -8.035676002502441, + "step": 2303 + }, + { + "epoch": 0.36, + "learning_rate": 1.2457487421546761e-05, + "logits/chosen": -2.830152988433838, + "logits/rejected": -2.4841842651367188, + "logps/chosen": -340.68878173828125, + "logps/rejected": -160.02053833007812, + "loss": 9.1166, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.906641006469727, + "rewards/margins": -9.116329193115234, + "rewards/rejected": -1.7903122901916504, + "step": 2304 + }, + { + "epoch": 0.36, + "learning_rate": 1.2456753981015613e-05, + "logits/chosen": -1.7910975217819214, + "logits/rejected": -3.0690817832946777, + "logps/chosen": -85.9500732421875, + "logps/rejected": -394.1694030761719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7489525079727173, + "rewards/margins": 9.110649108886719, + "rewards/rejected": -9.859601974487305, + "step": 2305 + }, + { + "epoch": 0.36, + "learning_rate": 1.2456020540484465e-05, + "logits/chosen": -2.9128341674804688, + "logits/rejected": -2.8861594200134277, + "logps/chosen": -243.31442260742188, + "logps/rejected": -296.51202392578125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6704803705215454, + "rewards/margins": 5.390031814575195, + "rewards/rejected": -6.060511589050293, + "step": 2306 + }, + { + "epoch": 0.36, + "learning_rate": 1.2455287099953317e-05, + "logits/chosen": -3.207427978515625, + "logits/rejected": -2.450225353240967, + "logps/chosen": -685.8550415039062, + "logps/rejected": -488.97711181640625, + "loss": 7.4045, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.837928771972656, + "rewards/margins": -7.401961326599121, + "rewards/rejected": -0.4359680116176605, + "step": 2307 + }, + { + "epoch": 0.36, + "learning_rate": 1.2454553659422169e-05, + "logits/chosen": -2.1906187534332275, + "logits/rejected": -3.0249569416046143, + "logps/chosen": -80.99609375, + "logps/rejected": -327.1406555175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8792133331298828, + "rewards/margins": 9.385225296020508, + "rewards/rejected": -10.26443862915039, + "step": 2308 + }, + { + "epoch": 0.36, + "learning_rate": 1.245382021889102e-05, + "logits/chosen": -2.6910500526428223, + "logits/rejected": -2.968384027481079, + "logps/chosen": -106.78717041015625, + "logps/rejected": -172.96316528320312, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3332104682922363, + "rewards/margins": 4.098259925842285, + "rewards/rejected": -6.431469917297363, + "step": 2309 + }, + { + "epoch": 0.36, + "learning_rate": 1.2453086778359872e-05, + "logits/chosen": -1.5927553176879883, + "logits/rejected": -2.699659824371338, + "logps/chosen": -168.41079711914062, + "logps/rejected": -391.8023681640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.094886064529419, + "rewards/margins": 9.147622108459473, + "rewards/rejected": -10.242507934570312, + "step": 2310 + }, + { + "epoch": 0.36, + "learning_rate": 1.2452353337828724e-05, + "logits/chosen": -2.965627670288086, + "logits/rejected": -2.437148332595825, + "logps/chosen": -438.5943298339844, + "logps/rejected": -543.6522216796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5128661394119263, + "rewards/margins": 8.920167922973633, + "rewards/rejected": -10.43303394317627, + "step": 2311 + }, + { + "epoch": 0.36, + "learning_rate": 1.2451619897297578e-05, + "logits/chosen": -2.870924949645996, + "logits/rejected": -3.1268153190612793, + "logps/chosen": -69.56916809082031, + "logps/rejected": -207.1565399169922, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4605367183685303, + "rewards/margins": 7.335618019104004, + "rewards/rejected": -8.796154975891113, + "step": 2312 + }, + { + "epoch": 0.36, + "learning_rate": 1.245088645676643e-05, + "logits/chosen": -0.9376654624938965, + "logits/rejected": -2.785170316696167, + "logps/chosen": -24.60996437072754, + "logps/rejected": -383.1368713378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8180705308914185, + "rewards/margins": 9.716756820678711, + "rewards/rejected": -10.53482723236084, + "step": 2313 + }, + { + "epoch": 0.36, + "learning_rate": 1.2450153016235282e-05, + "logits/chosen": -2.7951819896698, + "logits/rejected": -2.873753547668457, + "logps/chosen": -396.87298583984375, + "logps/rejected": -410.45367431640625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6406333446502686, + "rewards/margins": 4.7039594650268555, + "rewards/rejected": -8.344593048095703, + "step": 2314 + }, + { + "epoch": 0.36, + "learning_rate": 1.2449419575704134e-05, + "logits/chosen": -3.064711093902588, + "logits/rejected": -1.317633867263794, + "logps/chosen": -248.63052368164062, + "logps/rejected": -81.59115600585938, + "loss": 4.519, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.601508617401123, + "rewards/margins": -0.6732344627380371, + "rewards/rejected": -4.928274154663086, + "step": 2315 + }, + { + "epoch": 0.36, + "learning_rate": 1.2448686135172987e-05, + "logits/chosen": -2.986813545227051, + "logits/rejected": -2.523048162460327, + "logps/chosen": -144.51760864257812, + "logps/rejected": -277.53863525390625, + "loss": 3.3308, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.155410289764404, + "rewards/margins": 2.112668991088867, + "rewards/rejected": -7.2680792808532715, + "step": 2316 + }, + { + "epoch": 0.36, + "learning_rate": 1.2447952694641839e-05, + "logits/chosen": -2.9206039905548096, + "logits/rejected": -2.661949396133423, + "logps/chosen": -96.56976318359375, + "logps/rejected": -222.5021514892578, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.34991455078125, + "rewards/margins": 6.463118553161621, + "rewards/rejected": -7.813033580780029, + "step": 2317 + }, + { + "epoch": 0.36, + "learning_rate": 1.2447219254110691e-05, + "logits/chosen": -2.3109421730041504, + "logits/rejected": -2.9160122871398926, + "logps/chosen": -114.98825073242188, + "logps/rejected": -348.013427734375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0619969367980957, + "rewards/margins": 6.996992111206055, + "rewards/rejected": -8.058988571166992, + "step": 2318 + }, + { + "epoch": 0.36, + "learning_rate": 1.2446485813579543e-05, + "logits/chosen": -2.876215934753418, + "logits/rejected": -3.3249704837799072, + "logps/chosen": -56.826595306396484, + "logps/rejected": -219.5266876220703, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6893908977508545, + "rewards/margins": 6.407467365264893, + "rewards/rejected": -7.096858024597168, + "step": 2319 + }, + { + "epoch": 0.36, + "learning_rate": 1.2445752373048395e-05, + "logits/chosen": -2.523563861846924, + "logits/rejected": -2.8304009437561035, + "logps/chosen": -119.93885803222656, + "logps/rejected": -109.48619079589844, + "loss": 1.0953, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.504669189453125, + "rewards/margins": 1.40522038936615, + "rewards/rejected": -4.9098896980285645, + "step": 2320 + }, + { + "epoch": 0.36, + "learning_rate": 1.2445018932517248e-05, + "logits/chosen": -2.425893545150757, + "logits/rejected": -3.2796900272369385, + "logps/chosen": -238.4094696044922, + "logps/rejected": -316.0322570800781, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.390756607055664, + "rewards/margins": 5.811027526855469, + "rewards/rejected": -9.201784133911133, + "step": 2321 + }, + { + "epoch": 0.36, + "learning_rate": 1.24442854919861e-05, + "logits/chosen": -2.736549139022827, + "logits/rejected": -2.8405466079711914, + "logps/chosen": -267.60601806640625, + "logps/rejected": -222.52078247070312, + "loss": 3.3586, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.237607479095459, + "rewards/margins": 0.9105381965637207, + "rewards/rejected": -6.14814567565918, + "step": 2322 + }, + { + "epoch": 0.36, + "learning_rate": 1.2443552051454952e-05, + "logits/chosen": -2.857287883758545, + "logits/rejected": -2.8250162601470947, + "logps/chosen": -463.61346435546875, + "logps/rejected": -331.4539794921875, + "loss": 3.2184, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.663107395172119, + "rewards/margins": 0.4683043956756592, + "rewards/rejected": -5.131411552429199, + "step": 2323 + }, + { + "epoch": 0.36, + "learning_rate": 1.2442818610923804e-05, + "logits/chosen": -2.4031059741973877, + "logits/rejected": -2.593585968017578, + "logps/chosen": -225.45916748046875, + "logps/rejected": -318.7455749511719, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.535546064376831, + "rewards/margins": 5.593904495239258, + "rewards/rejected": -7.129450798034668, + "step": 2324 + }, + { + "epoch": 0.36, + "learning_rate": 1.2442085170392656e-05, + "logits/chosen": -1.9311466217041016, + "logits/rejected": -3.0698678493499756, + "logps/chosen": -157.42788696289062, + "logps/rejected": -235.6998291015625, + "loss": 2.0805, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.00293493270874, + "rewards/margins": 1.1634960174560547, + "rewards/rejected": -5.166430950164795, + "step": 2325 + }, + { + "epoch": 0.36, + "learning_rate": 1.2441351729861508e-05, + "logits/chosen": -1.961983323097229, + "logits/rejected": -3.1350603103637695, + "logps/chosen": -140.44149780273438, + "logps/rejected": -407.11767578125, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0435237884521484, + "rewards/margins": 5.4255170822143555, + "rewards/rejected": -7.469040870666504, + "step": 2326 + }, + { + "epoch": 0.36, + "learning_rate": 1.244061828933036e-05, + "logits/chosen": -2.8409197330474854, + "logits/rejected": -2.822312831878662, + "logps/chosen": -86.28784942626953, + "logps/rejected": -285.04736328125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39300835132598877, + "rewards/margins": 9.341841697692871, + "rewards/rejected": -9.73484992980957, + "step": 2327 + }, + { + "epoch": 0.36, + "learning_rate": 1.2439884848799211e-05, + "logits/chosen": -2.9759716987609863, + "logits/rejected": -2.0947277545928955, + "logps/chosen": -813.8884887695312, + "logps/rejected": -582.1658325195312, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7092800140380859, + "rewards/margins": 9.244632720947266, + "rewards/rejected": -9.953912734985352, + "step": 2328 + }, + { + "epoch": 0.36, + "learning_rate": 1.2439151408268063e-05, + "logits/chosen": -3.1339869499206543, + "logits/rejected": -3.1605064868927, + "logps/chosen": -31.648645401000977, + "logps/rejected": -172.11447143554688, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6389319896697998, + "rewards/margins": 5.762082576751709, + "rewards/rejected": -7.401014804840088, + "step": 2329 + }, + { + "epoch": 0.36, + "learning_rate": 1.2438417967736917e-05, + "logits/chosen": -3.176447868347168, + "logits/rejected": -3.0626819133758545, + "logps/chosen": -157.7530975341797, + "logps/rejected": -147.5796661376953, + "loss": 1.5838, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0321097373962402, + "rewards/margins": 1.975149393081665, + "rewards/rejected": -5.007259368896484, + "step": 2330 + }, + { + "epoch": 0.36, + "learning_rate": 1.2437684527205769e-05, + "logits/chosen": -2.9857821464538574, + "logits/rejected": -2.73307466506958, + "logps/chosen": -239.6163787841797, + "logps/rejected": -171.5880889892578, + "loss": 4.2356, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6848578453063965, + "rewards/margins": -2.0536372661590576, + "rewards/rejected": -3.6312203407287598, + "step": 2331 + }, + { + "epoch": 0.36, + "learning_rate": 1.243695108667462e-05, + "logits/chosen": -3.2189176082611084, + "logits/rejected": -1.9949625730514526, + "logps/chosen": -376.3518371582031, + "logps/rejected": -352.34381103515625, + "loss": 1.8624, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.402569532394409, + "rewards/margins": 4.122631549835205, + "rewards/rejected": -7.525200843811035, + "step": 2332 + }, + { + "epoch": 0.36, + "learning_rate": 1.2436217646143472e-05, + "logits/chosen": -3.1817359924316406, + "logits/rejected": -2.705958366394043, + "logps/chosen": -255.41978454589844, + "logps/rejected": -243.67323303222656, + "loss": 0.5235, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.1117233037948608, + "rewards/margins": 3.9057321548461914, + "rewards/rejected": -5.017455101013184, + "step": 2333 + }, + { + "epoch": 0.36, + "learning_rate": 1.2435484205612324e-05, + "logits/chosen": -3.1462628841400146, + "logits/rejected": -1.6019214391708374, + "logps/chosen": -539.8628540039062, + "logps/rejected": -167.501708984375, + "loss": 3.0092, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5109305381774902, + "rewards/margins": 0.3075838088989258, + "rewards/rejected": -3.818514347076416, + "step": 2334 + }, + { + "epoch": 0.36, + "learning_rate": 1.2434750765081176e-05, + "logits/chosen": -2.237656354904175, + "logits/rejected": -3.1108455657958984, + "logps/chosen": -209.2181396484375, + "logps/rejected": -288.78533935546875, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.045370101928711, + "rewards/margins": 4.710483074188232, + "rewards/rejected": -6.755853176116943, + "step": 2335 + }, + { + "epoch": 0.36, + "learning_rate": 1.2434017324550028e-05, + "logits/chosen": -2.9092183113098145, + "logits/rejected": -3.1488091945648193, + "logps/chosen": -29.998802185058594, + "logps/rejected": -274.22552490234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7771003246307373, + "rewards/margins": 8.137779235839844, + "rewards/rejected": -9.91487979888916, + "step": 2336 + }, + { + "epoch": 0.36, + "learning_rate": 1.243328388401888e-05, + "logits/chosen": -2.133533000946045, + "logits/rejected": -3.0382001399993896, + "logps/chosen": -86.10462951660156, + "logps/rejected": -337.63006591796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5285453796386719, + "rewards/margins": 6.8460798263549805, + "rewards/rejected": -8.374625205993652, + "step": 2337 + }, + { + "epoch": 0.36, + "learning_rate": 1.2432550443487732e-05, + "logits/chosen": -3.1294429302215576, + "logits/rejected": -3.003260850906372, + "logps/chosen": -99.12741088867188, + "logps/rejected": -231.49349975585938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7721938490867615, + "rewards/margins": 7.322841644287109, + "rewards/rejected": -8.095035552978516, + "step": 2338 + }, + { + "epoch": 0.36, + "learning_rate": 1.2431817002956585e-05, + "logits/chosen": -2.956350326538086, + "logits/rejected": -1.6466056108474731, + "logps/chosen": -303.9208068847656, + "logps/rejected": -190.13015747070312, + "loss": 2.7379, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0599212646484375, + "rewards/margins": 0.4974396228790283, + "rewards/rejected": -4.557360649108887, + "step": 2339 + }, + { + "epoch": 0.36, + "learning_rate": 1.2431083562425437e-05, + "logits/chosen": -3.0605509281158447, + "logits/rejected": -1.8836079835891724, + "logps/chosen": -309.6412658691406, + "logps/rejected": -264.7367858886719, + "loss": 2.038, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.728275299072266, + "rewards/margins": 1.0326151847839355, + "rewards/rejected": -5.760890483856201, + "step": 2340 + }, + { + "epoch": 0.36, + "learning_rate": 1.243035012189429e-05, + "logits/chosen": -3.0446343421936035, + "logits/rejected": -2.563117027282715, + "logps/chosen": -427.3900451660156, + "logps/rejected": -355.7784423828125, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5617873668670654, + "rewards/margins": 5.015374660491943, + "rewards/rejected": -7.57716178894043, + "step": 2341 + }, + { + "epoch": 0.36, + "learning_rate": 1.2429616681363141e-05, + "logits/chosen": -0.7980518937110901, + "logits/rejected": -2.4430325031280518, + "logps/chosen": -92.88410949707031, + "logps/rejected": -401.7435302734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1243698596954346, + "rewards/margins": 7.681262493133545, + "rewards/rejected": -10.805631637573242, + "step": 2342 + }, + { + "epoch": 0.36, + "learning_rate": 1.2428883240831993e-05, + "logits/chosen": -2.026116132736206, + "logits/rejected": -2.9150826930999756, + "logps/chosen": -266.05181884765625, + "logps/rejected": -549.9300537109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.908513069152832, + "rewards/margins": 7.143695831298828, + "rewards/rejected": -12.052207946777344, + "step": 2343 + }, + { + "epoch": 0.36, + "learning_rate": 1.2428149800300845e-05, + "logits/chosen": -2.1065526008605957, + "logits/rejected": -2.990992307662964, + "logps/chosen": -25.870670318603516, + "logps/rejected": -399.6092529296875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.025583028793335, + "rewards/margins": 5.3327789306640625, + "rewards/rejected": -6.358362197875977, + "step": 2344 + }, + { + "epoch": 0.36, + "learning_rate": 1.2427416359769697e-05, + "logits/chosen": -3.222105026245117, + "logits/rejected": -2.670276641845703, + "logps/chosen": -97.31539154052734, + "logps/rejected": -201.37063598632812, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.640485405921936, + "rewards/margins": 5.5945305824279785, + "rewards/rejected": -7.235015869140625, + "step": 2345 + }, + { + "epoch": 0.36, + "learning_rate": 1.2426682919238549e-05, + "logits/chosen": -1.2918753623962402, + "logits/rejected": -3.10305118560791, + "logps/chosen": -159.56536865234375, + "logps/rejected": -336.0235595703125, + "loss": 1.9731, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.85638689994812, + "rewards/margins": -0.5827306509017944, + "rewards/rejected": -3.2736563682556152, + "step": 2346 + }, + { + "epoch": 0.37, + "learning_rate": 1.24259494787074e-05, + "logits/chosen": -1.8233332633972168, + "logits/rejected": -2.8109755516052246, + "logps/chosen": -211.56024169921875, + "logps/rejected": -465.0611267089844, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0181411504745483, + "rewards/margins": 9.388644218444824, + "rewards/rejected": -10.40678596496582, + "step": 2347 + }, + { + "epoch": 0.37, + "learning_rate": 1.2425216038176254e-05, + "logits/chosen": -1.6634935140609741, + "logits/rejected": -2.93660044670105, + "logps/chosen": -145.2157440185547, + "logps/rejected": -357.7220764160156, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2389488220214844, + "rewards/margins": 5.071578502655029, + "rewards/rejected": -7.3105268478393555, + "step": 2348 + }, + { + "epoch": 0.37, + "learning_rate": 1.2424482597645106e-05, + "logits/chosen": -2.9256396293640137, + "logits/rejected": -2.673853874206543, + "logps/chosen": -208.36868286132812, + "logps/rejected": -200.36880493164062, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2093384265899658, + "rewards/margins": 3.3488409519195557, + "rewards/rejected": -4.5581793785095215, + "step": 2349 + }, + { + "epoch": 0.37, + "learning_rate": 1.242374915711396e-05, + "logits/chosen": -3.1286563873291016, + "logits/rejected": -3.3926467895507812, + "logps/chosen": -20.07632064819336, + "logps/rejected": -177.5972900390625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6678714752197266, + "rewards/margins": 5.649837493896484, + "rewards/rejected": -6.317708969116211, + "step": 2350 + }, + { + "epoch": 0.37, + "learning_rate": 1.2423015716582811e-05, + "logits/chosen": -1.8860764503479004, + "logits/rejected": -2.8731367588043213, + "logps/chosen": -164.13082885742188, + "logps/rejected": -367.05078125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2562782764434814, + "rewards/margins": 7.980340003967285, + "rewards/rejected": -9.236618041992188, + "step": 2351 + }, + { + "epoch": 0.37, + "learning_rate": 1.2422282276051663e-05, + "logits/chosen": -2.36576247215271, + "logits/rejected": -3.0929417610168457, + "logps/chosen": -181.85350036621094, + "logps/rejected": -349.03277587890625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9462225437164307, + "rewards/margins": 6.00093412399292, + "rewards/rejected": -7.94715690612793, + "step": 2352 + }, + { + "epoch": 0.37, + "learning_rate": 1.2421548835520515e-05, + "logits/chosen": -2.6665585041046143, + "logits/rejected": -3.063011646270752, + "logps/chosen": -68.32491302490234, + "logps/rejected": -315.3329162597656, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3496439456939697, + "rewards/margins": 7.364037990570068, + "rewards/rejected": -8.713682174682617, + "step": 2353 + }, + { + "epoch": 0.37, + "learning_rate": 1.2420815394989367e-05, + "logits/chosen": -3.09995698928833, + "logits/rejected": -2.6248691082000732, + "logps/chosen": -154.03988647460938, + "logps/rejected": -170.79006958007812, + "loss": 0.2306, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0289714336395264, + "rewards/margins": 3.246408462524414, + "rewards/rejected": -5.2753801345825195, + "step": 2354 + }, + { + "epoch": 0.37, + "learning_rate": 1.2420081954458219e-05, + "logits/chosen": -2.3182437419891357, + "logits/rejected": -3.013874053955078, + "logps/chosen": -34.678932189941406, + "logps/rejected": -157.1172637939453, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8082700967788696, + "rewards/margins": 3.1614279747009277, + "rewards/rejected": -3.969697952270508, + "step": 2355 + }, + { + "epoch": 0.37, + "learning_rate": 1.241934851392707e-05, + "logits/chosen": -1.9915926456451416, + "logits/rejected": -2.5792276859283447, + "logps/chosen": -126.23303985595703, + "logps/rejected": -339.9290771484375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0772321224212646, + "rewards/margins": 6.568122863769531, + "rewards/rejected": -9.645355224609375, + "step": 2356 + }, + { + "epoch": 0.37, + "learning_rate": 1.2418615073395924e-05, + "logits/chosen": -2.8808467388153076, + "logits/rejected": -3.079942226409912, + "logps/chosen": -561.2989501953125, + "logps/rejected": -522.19921875, + "loss": 0.5986, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2827072143554688, + "rewards/margins": 2.6612703800201416, + "rewards/rejected": -5.9439778327941895, + "step": 2357 + }, + { + "epoch": 0.37, + "learning_rate": 1.2417881632864776e-05, + "logits/chosen": -1.798233985900879, + "logits/rejected": -2.377171039581299, + "logps/chosen": -104.35301208496094, + "logps/rejected": -345.6178894042969, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9776524305343628, + "rewards/margins": 5.948638916015625, + "rewards/rejected": -6.926291465759277, + "step": 2358 + }, + { + "epoch": 0.37, + "learning_rate": 1.2417148192333628e-05, + "logits/chosen": -3.200549602508545, + "logits/rejected": -2.0391321182250977, + "logps/chosen": -347.5936279296875, + "logps/rejected": -339.1119079589844, + "loss": 2.045, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.948061466217041, + "rewards/margins": 1.3423171043395996, + "rewards/rejected": -4.290378570556641, + "step": 2359 + }, + { + "epoch": 0.37, + "learning_rate": 1.241641475180248e-05, + "logits/chosen": -2.8346312046051025, + "logits/rejected": -1.8773967027664185, + "logps/chosen": -324.9004821777344, + "logps/rejected": -140.20187377929688, + "loss": 5.2888, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.166964530944824, + "rewards/margins": -5.279688835144043, + "rewards/rejected": -1.88727605342865, + "step": 2360 + }, + { + "epoch": 0.37, + "learning_rate": 1.2415681311271332e-05, + "logits/chosen": -3.0886735916137695, + "logits/rejected": -2.613800048828125, + "logps/chosen": -171.4952392578125, + "logps/rejected": -227.11509704589844, + "loss": 3.569, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.904613971710205, + "rewards/margins": 0.5840256214141846, + "rewards/rejected": -6.4886393547058105, + "step": 2361 + }, + { + "epoch": 0.37, + "learning_rate": 1.2414947870740184e-05, + "logits/chosen": -3.2618043422698975, + "logits/rejected": -3.1557350158691406, + "logps/chosen": -106.82421112060547, + "logps/rejected": -135.77687072753906, + "loss": 1.4777, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.461766242980957, + "rewards/margins": 1.6846624612808228, + "rewards/rejected": -4.146429061889648, + "step": 2362 + }, + { + "epoch": 0.37, + "learning_rate": 1.2414214430209036e-05, + "logits/chosen": -2.757476568222046, + "logits/rejected": -2.971651077270508, + "logps/chosen": -497.03515625, + "logps/rejected": -281.3380432128906, + "loss": 1.5886, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1753480434417725, + "rewards/margins": 1.6084158420562744, + "rewards/rejected": -4.783763885498047, + "step": 2363 + }, + { + "epoch": 0.37, + "learning_rate": 1.2413480989677887e-05, + "logits/chosen": -2.959171772003174, + "logits/rejected": -3.0892088413238525, + "logps/chosen": -130.92169189453125, + "logps/rejected": -329.48388671875, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1859104633331299, + "rewards/margins": 4.670034408569336, + "rewards/rejected": -5.855944633483887, + "step": 2364 + }, + { + "epoch": 0.37, + "learning_rate": 1.241274754914674e-05, + "logits/chosen": -2.696967124938965, + "logits/rejected": -3.1706061363220215, + "logps/chosen": -366.9273376464844, + "logps/rejected": -383.74237060546875, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0564643144607544, + "rewards/margins": 4.979578971862793, + "rewards/rejected": -6.036043167114258, + "step": 2365 + }, + { + "epoch": 0.37, + "learning_rate": 1.2412014108615593e-05, + "logits/chosen": -2.5409722328186035, + "logits/rejected": -3.239123582839966, + "logps/chosen": -297.36578369140625, + "logps/rejected": -411.00799560546875, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8267120122909546, + "rewards/margins": 7.0439043045043945, + "rewards/rejected": -8.870616912841797, + "step": 2366 + }, + { + "epoch": 0.37, + "learning_rate": 1.2411280668084445e-05, + "logits/chosen": -3.1485490798950195, + "logits/rejected": -2.6247177124023438, + "logps/chosen": -1065.261474609375, + "logps/rejected": -823.583740234375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.039663791656494, + "rewards/margins": 7.277154922485352, + "rewards/rejected": -9.316818237304688, + "step": 2367 + }, + { + "epoch": 0.37, + "learning_rate": 1.2410547227553297e-05, + "logits/chosen": -2.6074090003967285, + "logits/rejected": -3.1303694248199463, + "logps/chosen": -195.74078369140625, + "logps/rejected": -282.68585205078125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9531723260879517, + "rewards/margins": 5.529747009277344, + "rewards/rejected": -7.482919692993164, + "step": 2368 + }, + { + "epoch": 0.37, + "learning_rate": 1.2409813787022149e-05, + "logits/chosen": -3.154912233352661, + "logits/rejected": -2.7741787433624268, + "logps/chosen": -570.45458984375, + "logps/rejected": -433.07086181640625, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2667653560638428, + "rewards/margins": 5.675070762634277, + "rewards/rejected": -6.941836357116699, + "step": 2369 + }, + { + "epoch": 0.37, + "learning_rate": 1.2409080346491e-05, + "logits/chosen": -3.06069016456604, + "logits/rejected": -2.6375303268432617, + "logps/chosen": -1070.112548828125, + "logps/rejected": -553.3980102539062, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.000213623046875, + "rewards/margins": 3.70869779586792, + "rewards/rejected": -5.708910942077637, + "step": 2370 + }, + { + "epoch": 0.37, + "learning_rate": 1.2408346905959852e-05, + "logits/chosen": -1.882895827293396, + "logits/rejected": -3.2089498043060303, + "logps/chosen": -442.8896179199219, + "logps/rejected": -587.9307250976562, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.088397741317749, + "rewards/margins": 4.157675266265869, + "rewards/rejected": -5.246073246002197, + "step": 2371 + }, + { + "epoch": 0.37, + "learning_rate": 1.2407613465428704e-05, + "logits/chosen": -3.25458025932312, + "logits/rejected": -2.245279550552368, + "logps/chosen": -219.09609985351562, + "logps/rejected": -137.61399841308594, + "loss": 1.1182, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7287797927856445, + "rewards/margins": 1.357840657234192, + "rewards/rejected": -5.086620330810547, + "step": 2372 + }, + { + "epoch": 0.37, + "learning_rate": 1.2406880024897556e-05, + "logits/chosen": -2.5985605716705322, + "logits/rejected": -3.1116344928741455, + "logps/chosen": -320.878662109375, + "logps/rejected": -497.2998046875, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0266895294189453, + "rewards/margins": 5.631951332092285, + "rewards/rejected": -7.6586408615112305, + "step": 2373 + }, + { + "epoch": 0.37, + "learning_rate": 1.2406146584366408e-05, + "logits/chosen": -2.995199680328369, + "logits/rejected": -1.0157335996627808, + "logps/chosen": -165.829345703125, + "logps/rejected": -120.82615661621094, + "loss": 2.7685, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.2597737312316895, + "rewards/margins": -1.719504475593567, + "rewards/rejected": -3.540269374847412, + "step": 2374 + }, + { + "epoch": 0.37, + "learning_rate": 1.2405413143835261e-05, + "logits/chosen": -2.4061081409454346, + "logits/rejected": -3.294853925704956, + "logps/chosen": -73.18911743164062, + "logps/rejected": -268.44659423828125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9513390064239502, + "rewards/margins": 6.357658386230469, + "rewards/rejected": -7.30899715423584, + "step": 2375 + }, + { + "epoch": 0.37, + "learning_rate": 1.2404679703304113e-05, + "logits/chosen": -2.5409183502197266, + "logits/rejected": -2.408627510070801, + "logps/chosen": -580.3605346679688, + "logps/rejected": -645.127685546875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0313920974731445, + "rewards/margins": 6.986406326293945, + "rewards/rejected": -9.017797470092773, + "step": 2376 + }, + { + "epoch": 0.37, + "learning_rate": 1.2403946262772965e-05, + "logits/chosen": -2.529615640640259, + "logits/rejected": -2.8275458812713623, + "logps/chosen": -108.42295837402344, + "logps/rejected": -257.1523132324219, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0559849739074707, + "rewards/margins": 5.268308162689209, + "rewards/rejected": -8.32429313659668, + "step": 2377 + }, + { + "epoch": 0.37, + "learning_rate": 1.2403212822241817e-05, + "logits/chosen": -3.179842948913574, + "logits/rejected": -2.501307249069214, + "logps/chosen": -332.7612609863281, + "logps/rejected": -259.9151916503906, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1002098321914673, + "rewards/margins": 4.275817394256592, + "rewards/rejected": -5.3760271072387695, + "step": 2378 + }, + { + "epoch": 0.37, + "learning_rate": 1.2402479381710669e-05, + "logits/chosen": -2.7611889839172363, + "logits/rejected": -3.201517105102539, + "logps/chosen": -60.00656509399414, + "logps/rejected": -191.53262329101562, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3900797367095947, + "rewards/margins": 4.451262950897217, + "rewards/rejected": -5.841342449188232, + "step": 2379 + }, + { + "epoch": 0.37, + "learning_rate": 1.2401745941179521e-05, + "logits/chosen": -2.0815773010253906, + "logits/rejected": -2.4902267456054688, + "logps/chosen": -226.2318115234375, + "logps/rejected": -377.1648254394531, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2276055812835693, + "rewards/margins": 6.519034385681152, + "rewards/rejected": -8.7466402053833, + "step": 2380 + }, + { + "epoch": 0.37, + "learning_rate": 1.2401012500648373e-05, + "logits/chosen": -3.146761417388916, + "logits/rejected": -2.375403881072998, + "logps/chosen": -277.2467041015625, + "logps/rejected": -194.79562377929688, + "loss": 4.2908, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.528253555297852, + "rewards/margins": -4.264959812164307, + "rewards/rejected": -2.263293504714966, + "step": 2381 + }, + { + "epoch": 0.37, + "learning_rate": 1.2400279060117226e-05, + "logits/chosen": -1.6086153984069824, + "logits/rejected": -2.76381254196167, + "logps/chosen": -124.24783325195312, + "logps/rejected": -403.59783935546875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9199638366699219, + "rewards/margins": 6.962212562561035, + "rewards/rejected": -8.882176399230957, + "step": 2382 + }, + { + "epoch": 0.37, + "learning_rate": 1.2399545619586078e-05, + "logits/chosen": -3.2038495540618896, + "logits/rejected": -2.884155511856079, + "logps/chosen": -142.4091796875, + "logps/rejected": -262.615966796875, + "loss": 0.2249, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.598179817199707, + "rewards/margins": 3.7554237842559814, + "rewards/rejected": -6.353603363037109, + "step": 2383 + }, + { + "epoch": 0.37, + "learning_rate": 1.2398812179054932e-05, + "logits/chosen": -1.787829041481018, + "logits/rejected": -3.123777151107788, + "logps/chosen": -234.21707153320312, + "logps/rejected": -460.287841796875, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319498300552368, + "rewards/margins": 5.078374862670898, + "rewards/rejected": -8.397872924804688, + "step": 2384 + }, + { + "epoch": 0.37, + "learning_rate": 1.2398078738523784e-05, + "logits/chosen": -2.676440477371216, + "logits/rejected": -3.308238983154297, + "logps/chosen": -274.80010986328125, + "logps/rejected": -406.3433532714844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3721494674682617, + "rewards/margins": 9.561656951904297, + "rewards/rejected": -11.933805465698242, + "step": 2385 + }, + { + "epoch": 0.37, + "learning_rate": 1.2397345297992636e-05, + "logits/chosen": -3.2072272300720215, + "logits/rejected": -3.138962745666504, + "logps/chosen": -458.2494201660156, + "logps/rejected": -326.8311767578125, + "loss": 1.508, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.609149217605591, + "rewards/margins": 2.1478261947631836, + "rewards/rejected": -5.756975173950195, + "step": 2386 + }, + { + "epoch": 0.37, + "learning_rate": 1.2396611857461487e-05, + "logits/chosen": -3.0757839679718018, + "logits/rejected": -3.231637716293335, + "logps/chosen": -138.64028930664062, + "logps/rejected": -203.93002319335938, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.062197685241699, + "rewards/margins": 4.275021553039551, + "rewards/rejected": -7.33721923828125, + "step": 2387 + }, + { + "epoch": 0.37, + "learning_rate": 1.239587841693034e-05, + "logits/chosen": -2.7318661212921143, + "logits/rejected": -3.281522035598755, + "logps/chosen": -46.46303939819336, + "logps/rejected": -246.87518310546875, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0866036415100098, + "rewards/margins": 4.926630973815918, + "rewards/rejected": -7.013235092163086, + "step": 2388 + }, + { + "epoch": 0.37, + "learning_rate": 1.2395144976399191e-05, + "logits/chosen": -2.4481499195098877, + "logits/rejected": -2.5849361419677734, + "logps/chosen": -515.6240844726562, + "logps/rejected": -343.1148986816406, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2650047540664673, + "rewards/margins": 4.75559139251709, + "rewards/rejected": -6.020596504211426, + "step": 2389 + }, + { + "epoch": 0.37, + "learning_rate": 1.2394411535868043e-05, + "logits/chosen": -2.5856070518493652, + "logits/rejected": -3.2199409008026123, + "logps/chosen": -108.88710021972656, + "logps/rejected": -241.05284118652344, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9671945571899414, + "rewards/margins": 4.94635534286499, + "rewards/rejected": -8.91355037689209, + "step": 2390 + }, + { + "epoch": 0.37, + "learning_rate": 1.2393678095336895e-05, + "logits/chosen": -1.2378617525100708, + "logits/rejected": -2.875650644302368, + "logps/chosen": -195.2471923828125, + "logps/rejected": -483.9071044921875, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3024251461029053, + "rewards/margins": 5.319279670715332, + "rewards/rejected": -7.621705055236816, + "step": 2391 + }, + { + "epoch": 0.37, + "learning_rate": 1.2392944654805747e-05, + "logits/chosen": -3.128612518310547, + "logits/rejected": -2.1823933124542236, + "logps/chosen": -499.29498291015625, + "logps/rejected": -546.2953491210938, + "loss": 2.9057, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.411554336547852, + "rewards/margins": -0.7614319324493408, + "rewards/rejected": -5.650122165679932, + "step": 2392 + }, + { + "epoch": 0.37, + "learning_rate": 1.23922112142746e-05, + "logits/chosen": -2.394024610519409, + "logits/rejected": -3.287014961242676, + "logps/chosen": -206.98641967773438, + "logps/rejected": -356.6724853515625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2919363975524902, + "rewards/margins": 6.0970330238342285, + "rewards/rejected": -8.388969421386719, + "step": 2393 + }, + { + "epoch": 0.37, + "learning_rate": 1.2391477773743452e-05, + "logits/chosen": -2.5327353477478027, + "logits/rejected": -3.1491384506225586, + "logps/chosen": -698.0013427734375, + "logps/rejected": -840.8145751953125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0201858282089233, + "rewards/margins": 7.816300868988037, + "rewards/rejected": -8.83648681640625, + "step": 2394 + }, + { + "epoch": 0.37, + "learning_rate": 1.2390744333212304e-05, + "logits/chosen": -3.015986442565918, + "logits/rejected": -2.6899986267089844, + "logps/chosen": -261.96527099609375, + "logps/rejected": -288.8546142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.30964732170105, + "rewards/margins": 8.974658012390137, + "rewards/rejected": -11.284305572509766, + "step": 2395 + }, + { + "epoch": 0.37, + "learning_rate": 1.2390010892681156e-05, + "logits/chosen": -3.03700852394104, + "logits/rejected": -2.313202142715454, + "logps/chosen": -434.95697021484375, + "logps/rejected": -301.52337646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1750504970550537, + "rewards/margins": 9.339807510375977, + "rewards/rejected": -11.51485824584961, + "step": 2396 + }, + { + "epoch": 0.37, + "learning_rate": 1.2389277452150008e-05, + "logits/chosen": -2.523238182067871, + "logits/rejected": -3.047321319580078, + "logps/chosen": -195.62408447265625, + "logps/rejected": -135.75057983398438, + "loss": 1.0366, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.011444568634033, + "rewards/margins": 2.346379041671753, + "rewards/rejected": -6.357823371887207, + "step": 2397 + }, + { + "epoch": 0.37, + "learning_rate": 1.238854401161886e-05, + "logits/chosen": -3.1098272800445557, + "logits/rejected": -3.1635239124298096, + "logps/chosen": -197.09292602539062, + "logps/rejected": -361.9783935546875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04144975543022156, + "rewards/margins": 5.69636344909668, + "rewards/rejected": -5.654913425445557, + "step": 2398 + }, + { + "epoch": 0.37, + "learning_rate": 1.2387810571087712e-05, + "logits/chosen": -2.897636890411377, + "logits/rejected": -2.9471161365509033, + "logps/chosen": -50.87522888183594, + "logps/rejected": -246.9038543701172, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.119429111480713, + "rewards/margins": 7.024111747741699, + "rewards/rejected": -9.143540382385254, + "step": 2399 + }, + { + "epoch": 0.37, + "learning_rate": 1.2387077130556564e-05, + "logits/chosen": -3.0255727767944336, + "logits/rejected": -2.67340087890625, + "logps/chosen": -374.3197937011719, + "logps/rejected": -333.48138427734375, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.330846071243286, + "rewards/margins": 5.0794267654418945, + "rewards/rejected": -7.410272598266602, + "step": 2400 + }, + { + "epoch": 0.37, + "learning_rate": 1.2386343690025417e-05, + "logits/chosen": -1.8339499235153198, + "logits/rejected": -2.991168737411499, + "logps/chosen": -234.615478515625, + "logps/rejected": -512.912109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3738563060760498, + "rewards/margins": 9.838724136352539, + "rewards/rejected": -11.212580680847168, + "step": 2401 + }, + { + "epoch": 0.37, + "learning_rate": 1.2385610249494269e-05, + "logits/chosen": -2.5959558486938477, + "logits/rejected": -3.2034857273101807, + "logps/chosen": -228.21839904785156, + "logps/rejected": -386.8330383300781, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.279482364654541, + "rewards/margins": 4.476059913635254, + "rewards/rejected": -6.755542278289795, + "step": 2402 + }, + { + "epoch": 0.37, + "learning_rate": 1.2384876808963121e-05, + "logits/chosen": -2.4077024459838867, + "logits/rejected": -2.96093487739563, + "logps/chosen": -386.26849365234375, + "logps/rejected": -555.4693603515625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7660999298095703, + "rewards/margins": 7.410393714904785, + "rewards/rejected": -9.176493644714355, + "step": 2403 + }, + { + "epoch": 0.37, + "learning_rate": 1.2384143368431973e-05, + "logits/chosen": -3.0698487758636475, + "logits/rejected": -2.617913007736206, + "logps/chosen": -216.96774291992188, + "logps/rejected": -269.6300048828125, + "loss": 3.0932, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.161452293395996, + "rewards/margins": -1.496838927268982, + "rewards/rejected": -4.664613246917725, + "step": 2404 + }, + { + "epoch": 0.37, + "learning_rate": 1.2383409927900825e-05, + "logits/chosen": -2.059852361679077, + "logits/rejected": -3.0104103088378906, + "logps/chosen": -187.76461791992188, + "logps/rejected": -306.99383544921875, + "loss": 4.0126, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7757978439331055, + "rewards/margins": -1.9048995971679688, + "rewards/rejected": -3.870898485183716, + "step": 2405 + }, + { + "epoch": 0.37, + "learning_rate": 1.2382676487369677e-05, + "logits/chosen": -1.9899311065673828, + "logits/rejected": -3.085620164871216, + "logps/chosen": -219.8873748779297, + "logps/rejected": -216.2877197265625, + "loss": 2.0747, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.060732364654541, + "rewards/margins": 1.803908348083496, + "rewards/rejected": -6.864640712738037, + "step": 2406 + }, + { + "epoch": 0.37, + "learning_rate": 1.2381943046838528e-05, + "logits/chosen": -2.173806667327881, + "logits/rejected": -3.087556838989258, + "logps/chosen": -88.42725372314453, + "logps/rejected": -487.27276611328125, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1959691047668457, + "rewards/margins": 5.457003593444824, + "rewards/rejected": -7.652972221374512, + "step": 2407 + }, + { + "epoch": 0.37, + "learning_rate": 1.238120960630738e-05, + "logits/chosen": -2.9994852542877197, + "logits/rejected": -2.897284507751465, + "logps/chosen": -597.2570190429688, + "logps/rejected": -489.825927734375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7507514953613281, + "rewards/margins": 4.790360927581787, + "rewards/rejected": -6.541112899780273, + "step": 2408 + }, + { + "epoch": 0.37, + "learning_rate": 1.2380476165776232e-05, + "logits/chosen": -3.1057424545288086, + "logits/rejected": -2.463566780090332, + "logps/chosen": -944.8560791015625, + "logps/rejected": -720.6080322265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.755523681640625, + "rewards/margins": 6.938511848449707, + "rewards/rejected": -8.694035530090332, + "step": 2409 + }, + { + "epoch": 0.37, + "learning_rate": 1.2379742725245086e-05, + "logits/chosen": -2.9161548614501953, + "logits/rejected": -2.781182289123535, + "logps/chosen": -265.6598205566406, + "logps/rejected": -294.88079833984375, + "loss": 3.9876, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.121264934539795, + "rewards/margins": -1.8449797630310059, + "rewards/rejected": -4.276285171508789, + "step": 2410 + }, + { + "epoch": 0.37, + "learning_rate": 1.2379009284713938e-05, + "logits/chosen": -3.0012004375457764, + "logits/rejected": -2.8682990074157715, + "logps/chosen": -276.6990661621094, + "logps/rejected": -339.2750244140625, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.323744297027588, + "rewards/margins": 4.052433013916016, + "rewards/rejected": -5.3761773109436035, + "step": 2411 + }, + { + "epoch": 0.38, + "learning_rate": 1.237827584418279e-05, + "logits/chosen": -2.7656068801879883, + "logits/rejected": -3.156827449798584, + "logps/chosen": -50.43912887573242, + "logps/rejected": -263.1122741699219, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.041344404220581, + "rewards/margins": 9.274857521057129, + "rewards/rejected": -11.316202163696289, + "step": 2412 + }, + { + "epoch": 0.38, + "learning_rate": 1.2377542403651641e-05, + "logits/chosen": -2.7887895107269287, + "logits/rejected": -2.91951322555542, + "logps/chosen": -200.46170043945312, + "logps/rejected": -128.5747833251953, + "loss": 1.3338, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.589312553405762, + "rewards/margins": 0.5214016437530518, + "rewards/rejected": -6.110714435577393, + "step": 2413 + }, + { + "epoch": 0.38, + "learning_rate": 1.2376808963120493e-05, + "logits/chosen": -1.946395754814148, + "logits/rejected": -3.0328774452209473, + "logps/chosen": -398.548583984375, + "logps/rejected": -514.937744140625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4732863903045654, + "rewards/margins": 7.011109352111816, + "rewards/rejected": -9.484395980834961, + "step": 2414 + }, + { + "epoch": 0.38, + "learning_rate": 1.2376075522589345e-05, + "logits/chosen": -1.4933857917785645, + "logits/rejected": -2.987208366394043, + "logps/chosen": -202.27027893066406, + "logps/rejected": -463.3726806640625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6831085681915283, + "rewards/margins": 5.610825061798096, + "rewards/rejected": -7.293933391571045, + "step": 2415 + }, + { + "epoch": 0.38, + "learning_rate": 1.2375342082058199e-05, + "logits/chosen": -2.820483684539795, + "logits/rejected": -2.6612532138824463, + "logps/chosen": -342.15985107421875, + "logps/rejected": -604.2857666015625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4789953231811523, + "rewards/margins": 8.53425407409668, + "rewards/rejected": -12.013250350952148, + "step": 2416 + }, + { + "epoch": 0.38, + "learning_rate": 1.237460864152705e-05, + "logits/chosen": -2.320146083831787, + "logits/rejected": -2.5898959636688232, + "logps/chosen": -168.9651641845703, + "logps/rejected": -316.1351013183594, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0620017051696777, + "rewards/margins": 7.070152759552002, + "rewards/rejected": -10.13215446472168, + "step": 2417 + }, + { + "epoch": 0.38, + "learning_rate": 1.2373875200995902e-05, + "logits/chosen": -3.0766799449920654, + "logits/rejected": -2.5454773902893066, + "logps/chosen": -478.66619873046875, + "logps/rejected": -382.1605224609375, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4557998180389404, + "rewards/margins": 6.227439880371094, + "rewards/rejected": -7.683239936828613, + "step": 2418 + }, + { + "epoch": 0.38, + "learning_rate": 1.2373141760464756e-05, + "logits/chosen": -1.4075026512145996, + "logits/rejected": -2.835696220397949, + "logps/chosen": -202.16390991210938, + "logps/rejected": -340.2896423339844, + "loss": 1.4886, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5352776050567627, + "rewards/margins": 3.9626636505126953, + "rewards/rejected": -7.497941017150879, + "step": 2419 + }, + { + "epoch": 0.38, + "learning_rate": 1.2372408319933608e-05, + "logits/chosen": -1.8848705291748047, + "logits/rejected": -3.05564284324646, + "logps/chosen": -169.12486267089844, + "logps/rejected": -470.49285888671875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5851311683654785, + "rewards/margins": 5.546947479248047, + "rewards/rejected": -10.132078170776367, + "step": 2420 + }, + { + "epoch": 0.38, + "learning_rate": 1.237167487940246e-05, + "logits/chosen": -2.9933183193206787, + "logits/rejected": -1.7707401514053345, + "logps/chosen": -604.7205200195312, + "logps/rejected": -404.12762451171875, + "loss": 0.0553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.893174648284912, + "rewards/margins": 5.76920747756958, + "rewards/rejected": -8.662382125854492, + "step": 2421 + }, + { + "epoch": 0.38, + "learning_rate": 1.2370941438871312e-05, + "logits/chosen": -3.154520273208618, + "logits/rejected": -3.155256986618042, + "logps/chosen": -402.04425048828125, + "logps/rejected": -375.19140625, + "loss": 2.0317, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7966904640197754, + "rewards/margins": -0.01883101463317871, + "rewards/rejected": -3.7778594493865967, + "step": 2422 + }, + { + "epoch": 0.38, + "learning_rate": 1.2370207998340164e-05, + "logits/chosen": -2.8468947410583496, + "logits/rejected": -3.2394587993621826, + "logps/chosen": -128.66552734375, + "logps/rejected": -261.7102966308594, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6490731239318848, + "rewards/margins": 4.1241326332092285, + "rewards/rejected": -7.773205757141113, + "step": 2423 + }, + { + "epoch": 0.38, + "learning_rate": 1.2369474557809015e-05, + "logits/chosen": -3.240039348602295, + "logits/rejected": -3.050755739212036, + "logps/chosen": -155.40850830078125, + "logps/rejected": -131.28695678710938, + "loss": 2.8506, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.550594329833984, + "rewards/margins": -0.17101144790649414, + "rewards/rejected": -4.37958288192749, + "step": 2424 + }, + { + "epoch": 0.38, + "learning_rate": 1.2368741117277867e-05, + "logits/chosen": -2.0167202949523926, + "logits/rejected": -2.3115041255950928, + "logps/chosen": -244.9832763671875, + "logps/rejected": -237.33074951171875, + "loss": 1.6556, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.3490705490112305, + "rewards/margins": 1.8148372173309326, + "rewards/rejected": -6.163908004760742, + "step": 2425 + }, + { + "epoch": 0.38, + "learning_rate": 1.2368007676746719e-05, + "logits/chosen": -1.7737534046173096, + "logits/rejected": -2.503460645675659, + "logps/chosen": -154.16201782226562, + "logps/rejected": -464.2763366699219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5574688911437988, + "rewards/margins": 9.64700698852539, + "rewards/rejected": -11.204475402832031, + "step": 2426 + }, + { + "epoch": 0.38, + "learning_rate": 1.2367274236215571e-05, + "logits/chosen": -2.56949520111084, + "logits/rejected": -3.10810923576355, + "logps/chosen": -153.04100036621094, + "logps/rejected": -263.741455078125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6516830921173096, + "rewards/margins": 5.564558982849121, + "rewards/rejected": -8.216241836547852, + "step": 2427 + }, + { + "epoch": 0.38, + "learning_rate": 1.2366540795684425e-05, + "logits/chosen": -2.6990511417388916, + "logits/rejected": -2.9388251304626465, + "logps/chosen": -126.31048583984375, + "logps/rejected": -257.511962890625, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.329174757003784, + "rewards/margins": 3.0795834064483643, + "rewards/rejected": -5.408758163452148, + "step": 2428 + }, + { + "epoch": 0.38, + "learning_rate": 1.2365807355153276e-05, + "logits/chosen": -3.085491418838501, + "logits/rejected": -2.8927948474884033, + "logps/chosen": -774.70263671875, + "logps/rejected": -540.208984375, + "loss": 2.333, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.953736305236816, + "rewards/margins": 0.7422215938568115, + "rewards/rejected": -5.695958137512207, + "step": 2429 + }, + { + "epoch": 0.38, + "learning_rate": 1.2365073914622128e-05, + "logits/chosen": -2.733863115310669, + "logits/rejected": -3.121798038482666, + "logps/chosen": -271.2882385253906, + "logps/rejected": -368.9445495605469, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4921936988830566, + "rewards/margins": 6.788613319396973, + "rewards/rejected": -8.280806541442871, + "step": 2430 + }, + { + "epoch": 0.38, + "learning_rate": 1.236434047409098e-05, + "logits/chosen": -3.0523464679718018, + "logits/rejected": -2.4206764698028564, + "logps/chosen": -283.25048828125, + "logps/rejected": -211.6528778076172, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.714005470275879, + "rewards/margins": 4.520087718963623, + "rewards/rejected": -7.234092712402344, + "step": 2431 + }, + { + "epoch": 0.38, + "learning_rate": 1.2363607033559832e-05, + "logits/chosen": -2.9746522903442383, + "logits/rejected": -2.922804117202759, + "logps/chosen": -112.43467712402344, + "logps/rejected": -129.55018615722656, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.041395902633667, + "rewards/margins": 2.8588638305664062, + "rewards/rejected": -5.900259971618652, + "step": 2432 + }, + { + "epoch": 0.38, + "learning_rate": 1.2362873593028684e-05, + "logits/chosen": -3.1325621604919434, + "logits/rejected": -1.4077578783035278, + "logps/chosen": -233.06712341308594, + "logps/rejected": -140.3822021484375, + "loss": 0.6702, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.354942798614502, + "rewards/margins": 2.780693292617798, + "rewards/rejected": -7.135636329650879, + "step": 2433 + }, + { + "epoch": 0.38, + "learning_rate": 1.2362140152497536e-05, + "logits/chosen": -2.315329074859619, + "logits/rejected": -1.970327377319336, + "logps/chosen": -596.640869140625, + "logps/rejected": -424.7996520996094, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.891668677330017, + "rewards/margins": 6.563092231750488, + "rewards/rejected": -8.454761505126953, + "step": 2434 + }, + { + "epoch": 0.38, + "learning_rate": 1.2361406711966388e-05, + "logits/chosen": -2.9780759811401367, + "logits/rejected": -3.0340332984924316, + "logps/chosen": -100.03765106201172, + "logps/rejected": -158.7153778076172, + "loss": 0.1713, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.455763339996338, + "rewards/margins": 2.902289867401123, + "rewards/rejected": -5.358053207397461, + "step": 2435 + }, + { + "epoch": 0.38, + "learning_rate": 1.236067327143524e-05, + "logits/chosen": -2.869340658187866, + "logits/rejected": -1.9447425603866577, + "logps/chosen": -242.28477478027344, + "logps/rejected": -236.03399658203125, + "loss": 2.3448, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.267573833465576, + "rewards/margins": 1.3009819984436035, + "rewards/rejected": -5.5685553550720215, + "step": 2436 + }, + { + "epoch": 0.38, + "learning_rate": 1.2359939830904093e-05, + "logits/chosen": -2.1330530643463135, + "logits/rejected": -2.893256187438965, + "logps/chosen": -125.33606719970703, + "logps/rejected": -281.18707275390625, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4160648584365845, + "rewards/margins": 4.364989280700684, + "rewards/rejected": -5.7810540199279785, + "step": 2437 + }, + { + "epoch": 0.38, + "learning_rate": 1.2359206390372945e-05, + "logits/chosen": -3.0975632667541504, + "logits/rejected": -2.9668452739715576, + "logps/chosen": -128.37677001953125, + "logps/rejected": -175.90882873535156, + "loss": 1.7121, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.550076961517334, + "rewards/margins": 0.7768745422363281, + "rewards/rejected": -4.326951503753662, + "step": 2438 + }, + { + "epoch": 0.38, + "learning_rate": 1.2358472949841797e-05, + "logits/chosen": -3.0966503620147705, + "logits/rejected": -2.549989938735962, + "logps/chosen": -260.0142822265625, + "logps/rejected": -238.72496032714844, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0106804370880127, + "rewards/margins": 4.082337379455566, + "rewards/rejected": -6.093018054962158, + "step": 2439 + }, + { + "epoch": 0.38, + "learning_rate": 1.2357739509310649e-05, + "logits/chosen": -3.085000991821289, + "logits/rejected": -3.0754177570343018, + "logps/chosen": -197.14837646484375, + "logps/rejected": -219.66307067871094, + "loss": 0.1986, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6821532249450684, + "rewards/margins": 1.5443291664123535, + "rewards/rejected": -4.226482391357422, + "step": 2440 + }, + { + "epoch": 0.38, + "learning_rate": 1.23570060687795e-05, + "logits/chosen": -2.8162386417388916, + "logits/rejected": -2.8081583976745605, + "logps/chosen": -354.830322265625, + "logps/rejected": -427.594482421875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3426711559295654, + "rewards/margins": 6.202765464782715, + "rewards/rejected": -7.545436382293701, + "step": 2441 + }, + { + "epoch": 0.38, + "learning_rate": 1.2356272628248353e-05, + "logits/chosen": -1.7836500406265259, + "logits/rejected": -3.125232219696045, + "logps/chosen": -135.6710968017578, + "logps/rejected": -558.500732421875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.381484031677246, + "rewards/margins": 7.878015518188477, + "rewards/rejected": -12.259499549865723, + "step": 2442 + }, + { + "epoch": 0.38, + "learning_rate": 1.2355539187717204e-05, + "logits/chosen": -2.569502830505371, + "logits/rejected": -3.13642954826355, + "logps/chosen": -171.1798553466797, + "logps/rejected": -184.91873168945312, + "loss": 0.1297, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.324359893798828, + "rewards/margins": 3.482736110687256, + "rewards/rejected": -6.807096004486084, + "step": 2443 + }, + { + "epoch": 0.38, + "learning_rate": 1.2354805747186056e-05, + "logits/chosen": -3.258657693862915, + "logits/rejected": -3.1104722023010254, + "logps/chosen": -319.0214538574219, + "logps/rejected": -253.33526611328125, + "loss": 4.4052, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.486917495727539, + "rewards/margins": -2.3488688468933105, + "rewards/rejected": -3.1380486488342285, + "step": 2444 + }, + { + "epoch": 0.38, + "learning_rate": 1.2354072306654908e-05, + "logits/chosen": -2.7534053325653076, + "logits/rejected": -2.962874174118042, + "logps/chosen": -233.16873168945312, + "logps/rejected": -336.55780029296875, + "loss": 4.0015, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.836860656738281, + "rewards/margins": 0.8367891311645508, + "rewards/rejected": -7.673649787902832, + "step": 2445 + }, + { + "epoch": 0.38, + "learning_rate": 1.2353338866123762e-05, + "logits/chosen": -2.932957172393799, + "logits/rejected": -3.1761820316314697, + "logps/chosen": -451.1827392578125, + "logps/rejected": -430.59832763671875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8058221340179443, + "rewards/margins": 6.599054336547852, + "rewards/rejected": -8.404876708984375, + "step": 2446 + }, + { + "epoch": 0.38, + "learning_rate": 1.2352605425592614e-05, + "logits/chosen": -1.766243815422058, + "logits/rejected": -3.030336618423462, + "logps/chosen": -229.70712280273438, + "logps/rejected": -495.9521789550781, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.78054141998291, + "rewards/margins": 4.79373836517334, + "rewards/rejected": -7.57427978515625, + "step": 2447 + }, + { + "epoch": 0.38, + "learning_rate": 1.2351871985061466e-05, + "logits/chosen": -3.166684627532959, + "logits/rejected": -2.3286044597625732, + "logps/chosen": -355.3165283203125, + "logps/rejected": -315.9302978515625, + "loss": 0.7089, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1347289085388184, + "rewards/margins": 6.973039627075195, + "rewards/rejected": -10.107769012451172, + "step": 2448 + }, + { + "epoch": 0.38, + "learning_rate": 1.2351138544530317e-05, + "logits/chosen": -2.994555950164795, + "logits/rejected": -2.5410220623016357, + "logps/chosen": -104.33685302734375, + "logps/rejected": -193.41531372070312, + "loss": 1.1749, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.46373176574707, + "rewards/margins": 0.8304331302642822, + "rewards/rejected": -5.294164657592773, + "step": 2449 + }, + { + "epoch": 0.38, + "learning_rate": 1.2350405103999171e-05, + "logits/chosen": -1.7839428186416626, + "logits/rejected": -2.549010992050171, + "logps/chosen": -113.52729797363281, + "logps/rejected": -333.0640869140625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5284132957458496, + "rewards/margins": 6.410577774047852, + "rewards/rejected": -8.93899154663086, + "step": 2450 + }, + { + "epoch": 0.38, + "learning_rate": 1.2349671663468023e-05, + "logits/chosen": -3.111830711364746, + "logits/rejected": -2.815826177597046, + "logps/chosen": -517.0418701171875, + "logps/rejected": -494.38397216796875, + "loss": 3.703, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.643560886383057, + "rewards/margins": -0.4120774269104004, + "rewards/rejected": -5.231483459472656, + "step": 2451 + }, + { + "epoch": 0.38, + "learning_rate": 1.2348938222936875e-05, + "logits/chosen": -2.3988380432128906, + "logits/rejected": -2.366701364517212, + "logps/chosen": -179.39480590820312, + "logps/rejected": -238.22463989257812, + "loss": 2.3122, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.376975059509277, + "rewards/margins": 1.7811713218688965, + "rewards/rejected": -7.158146381378174, + "step": 2452 + }, + { + "epoch": 0.38, + "learning_rate": 1.2348204782405727e-05, + "logits/chosen": -3.0751891136169434, + "logits/rejected": -2.102036952972412, + "logps/chosen": -691.031494140625, + "logps/rejected": -350.60430908203125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.062295436859131, + "rewards/margins": 6.415016174316406, + "rewards/rejected": -8.477312088012695, + "step": 2453 + }, + { + "epoch": 0.38, + "learning_rate": 1.2347471341874579e-05, + "logits/chosen": -2.039365530014038, + "logits/rejected": -3.1241049766540527, + "logps/chosen": -148.19833374023438, + "logps/rejected": -320.47821044921875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4266819953918457, + "rewards/margins": 5.386433124542236, + "rewards/rejected": -7.813115119934082, + "step": 2454 + }, + { + "epoch": 0.38, + "learning_rate": 1.2346737901343432e-05, + "logits/chosen": -2.120002031326294, + "logits/rejected": -3.1484267711639404, + "logps/chosen": -116.85279846191406, + "logps/rejected": -392.5643310546875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.041916847229004, + "rewards/margins": 7.493697643280029, + "rewards/rejected": -9.535614013671875, + "step": 2455 + }, + { + "epoch": 0.38, + "learning_rate": 1.2346004460812284e-05, + "logits/chosen": -3.1229467391967773, + "logits/rejected": -1.7046586275100708, + "logps/chosen": -629.581298828125, + "logps/rejected": -510.85296630859375, + "loss": 3.1704, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7988739013671875, + "rewards/margins": -0.32999110221862793, + "rewards/rejected": -4.4688825607299805, + "step": 2456 + }, + { + "epoch": 0.38, + "learning_rate": 1.2345271020281136e-05, + "logits/chosen": -1.919756531715393, + "logits/rejected": -2.9898080825805664, + "logps/chosen": -73.49223327636719, + "logps/rejected": -255.40200805664062, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6525051593780518, + "rewards/margins": 3.847935199737549, + "rewards/rejected": -7.5004401206970215, + "step": 2457 + }, + { + "epoch": 0.38, + "learning_rate": 1.2344537579749988e-05, + "logits/chosen": -3.2154178619384766, + "logits/rejected": -3.205587863922119, + "logps/chosen": -424.85137939453125, + "logps/rejected": -464.7130432128906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5640335083007812, + "rewards/margins": 6.995170593261719, + "rewards/rejected": -8.5592041015625, + "step": 2458 + }, + { + "epoch": 0.38, + "learning_rate": 1.234380413921884e-05, + "logits/chosen": -2.611664295196533, + "logits/rejected": -3.029780864715576, + "logps/chosen": -123.80989074707031, + "logps/rejected": -276.99346923828125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2073864936828613, + "rewards/margins": 6.69535493850708, + "rewards/rejected": -8.902741432189941, + "step": 2459 + }, + { + "epoch": 0.38, + "learning_rate": 1.2343070698687691e-05, + "logits/chosen": -3.0858592987060547, + "logits/rejected": -3.023254632949829, + "logps/chosen": -225.22833251953125, + "logps/rejected": -406.78289794921875, + "loss": 1.201, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.586329698562622, + "rewards/margins": 1.3344316482543945, + "rewards/rejected": -4.9207611083984375, + "step": 2460 + }, + { + "epoch": 0.38, + "learning_rate": 1.2342337258156543e-05, + "logits/chosen": -1.7983728647232056, + "logits/rejected": -2.9026012420654297, + "logps/chosen": -71.18118286132812, + "logps/rejected": -257.8499755859375, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4123363494873047, + "rewards/margins": 4.188922882080078, + "rewards/rejected": -7.601259231567383, + "step": 2461 + }, + { + "epoch": 0.38, + "learning_rate": 1.2341603817625395e-05, + "logits/chosen": -2.680629014968872, + "logits/rejected": -3.0311954021453857, + "logps/chosen": -94.30029296875, + "logps/rejected": -229.0403594970703, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3349101543426514, + "rewards/margins": 5.5910749435424805, + "rewards/rejected": -8.925985336303711, + "step": 2462 + }, + { + "epoch": 0.38, + "learning_rate": 1.2340870377094247e-05, + "logits/chosen": -3.1463918685913086, + "logits/rejected": -2.942221164703369, + "logps/chosen": -340.216064453125, + "logps/rejected": -361.80859375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9209022521972656, + "rewards/margins": 6.311424255371094, + "rewards/rejected": -9.23232650756836, + "step": 2463 + }, + { + "epoch": 0.38, + "learning_rate": 1.23401369365631e-05, + "logits/chosen": -2.4600706100463867, + "logits/rejected": -2.9989311695098877, + "logps/chosen": -71.88243865966797, + "logps/rejected": -263.0412292480469, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.234694719314575, + "rewards/margins": 6.416708946228027, + "rewards/rejected": -9.651403427124023, + "step": 2464 + }, + { + "epoch": 0.38, + "learning_rate": 1.2339403496031953e-05, + "logits/chosen": -1.7254705429077148, + "logits/rejected": -3.133847236633301, + "logps/chosen": -134.6241455078125, + "logps/rejected": -303.1197509765625, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.762969970703125, + "rewards/margins": 1.252189040184021, + "rewards/rejected": -6.0151591300964355, + "step": 2465 + }, + { + "epoch": 0.38, + "learning_rate": 1.2338670055500804e-05, + "logits/chosen": -2.9298112392425537, + "logits/rejected": -3.023555278778076, + "logps/chosen": -121.70952606201172, + "logps/rejected": -176.15097045898438, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.174753189086914, + "rewards/margins": 6.418229103088379, + "rewards/rejected": -8.592982292175293, + "step": 2466 + }, + { + "epoch": 0.38, + "learning_rate": 1.2337936614969656e-05, + "logits/chosen": -2.9790730476379395, + "logits/rejected": -3.1143667697906494, + "logps/chosen": -189.76417541503906, + "logps/rejected": -285.8751525878906, + "loss": 0.1007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2047011852264404, + "rewards/margins": 5.285755157470703, + "rewards/rejected": -7.490456581115723, + "step": 2467 + }, + { + "epoch": 0.38, + "learning_rate": 1.2337203174438508e-05, + "logits/chosen": -2.1474478244781494, + "logits/rejected": -2.8935060501098633, + "logps/chosen": -325.87823486328125, + "logps/rejected": -606.161376953125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.570622444152832, + "rewards/margins": 7.9546799659729, + "rewards/rejected": -10.52530288696289, + "step": 2468 + }, + { + "epoch": 0.38, + "learning_rate": 1.233646973390736e-05, + "logits/chosen": -2.874251365661621, + "logits/rejected": -3.110140800476074, + "logps/chosen": -127.66700744628906, + "logps/rejected": -159.18362426757812, + "loss": 2.3399, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.70017671585083, + "rewards/margins": -0.5626612901687622, + "rewards/rejected": -5.137515544891357, + "step": 2469 + }, + { + "epoch": 0.38, + "learning_rate": 1.2335736293376212e-05, + "logits/chosen": -2.5413014888763428, + "logits/rejected": -3.016390800476074, + "logps/chosen": -676.1962280273438, + "logps/rejected": -599.1277465820312, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9028074741363525, + "rewards/margins": 6.835576057434082, + "rewards/rejected": -8.738383293151855, + "step": 2470 + }, + { + "epoch": 0.38, + "learning_rate": 1.2335002852845064e-05, + "logits/chosen": -3.3046579360961914, + "logits/rejected": -3.1338655948638916, + "logps/chosen": -208.73410034179688, + "logps/rejected": -166.2401885986328, + "loss": 2.0134, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.640893936157227, + "rewards/margins": -1.1352825164794922, + "rewards/rejected": -3.5056111812591553, + "step": 2471 + }, + { + "epoch": 0.38, + "learning_rate": 1.2334269412313916e-05, + "logits/chosen": -3.020057439804077, + "logits/rejected": -2.664686679840088, + "logps/chosen": -191.15228271484375, + "logps/rejected": -200.69158935546875, + "loss": 2.3194, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.850857734680176, + "rewards/margins": -0.5219953060150146, + "rewards/rejected": -5.32886266708374, + "step": 2472 + }, + { + "epoch": 0.38, + "learning_rate": 1.233353597178277e-05, + "logits/chosen": -2.9650375843048096, + "logits/rejected": -2.1919479370117188, + "logps/chosen": -282.9584655761719, + "logps/rejected": -280.5195617675781, + "loss": 2.1057, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.510545492172241, + "rewards/margins": 1.8901770114898682, + "rewards/rejected": -5.400722503662109, + "step": 2473 + }, + { + "epoch": 0.38, + "learning_rate": 1.2332802531251621e-05, + "logits/chosen": -2.636772871017456, + "logits/rejected": -2.99701189994812, + "logps/chosen": -301.04388427734375, + "logps/rejected": -356.12921142578125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8084917068481445, + "rewards/margins": 6.123504638671875, + "rewards/rejected": -8.93199634552002, + "step": 2474 + }, + { + "epoch": 0.38, + "learning_rate": 1.2332069090720473e-05, + "logits/chosen": -0.9034981727600098, + "logits/rejected": -2.9661619663238525, + "logps/chosen": -91.75344848632812, + "logps/rejected": -610.6243896484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9026615619659424, + "rewards/margins": 9.27773666381836, + "rewards/rejected": -12.180398941040039, + "step": 2475 + }, + { + "epoch": 0.39, + "learning_rate": 1.2331335650189325e-05, + "logits/chosen": -2.9930553436279297, + "logits/rejected": -3.056648015975952, + "logps/chosen": -160.64512634277344, + "logps/rejected": -181.28028869628906, + "loss": 1.7655, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.402752637863159, + "rewards/margins": 1.0109083652496338, + "rewards/rejected": -4.413661003112793, + "step": 2476 + }, + { + "epoch": 0.39, + "learning_rate": 1.2330602209658177e-05, + "logits/chosen": -2.756768226623535, + "logits/rejected": -3.1481711864471436, + "logps/chosen": -121.96726989746094, + "logps/rejected": -293.5254821777344, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.778034210205078, + "rewards/margins": 6.237730026245117, + "rewards/rejected": -10.015764236450195, + "step": 2477 + }, + { + "epoch": 0.39, + "learning_rate": 1.2329868769127029e-05, + "logits/chosen": -2.2517855167388916, + "logits/rejected": -2.9289491176605225, + "logps/chosen": -126.01262664794922, + "logps/rejected": -303.1589050292969, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.536515235900879, + "rewards/margins": 6.450289249420166, + "rewards/rejected": -8.986804008483887, + "step": 2478 + }, + { + "epoch": 0.39, + "learning_rate": 1.232913532859588e-05, + "logits/chosen": -2.680736780166626, + "logits/rejected": -3.0753610134124756, + "logps/chosen": -110.96284484863281, + "logps/rejected": -268.95428466796875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9599575996398926, + "rewards/margins": 4.998542785644531, + "rewards/rejected": -7.958499908447266, + "step": 2479 + }, + { + "epoch": 0.39, + "learning_rate": 1.2328401888064732e-05, + "logits/chosen": -2.8136115074157715, + "logits/rejected": -1.5537497997283936, + "logps/chosen": -298.39703369140625, + "logps/rejected": -126.29815673828125, + "loss": 3.1191, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.92458438873291, + "rewards/margins": -2.275763988494873, + "rewards/rejected": -3.648820161819458, + "step": 2480 + }, + { + "epoch": 0.39, + "learning_rate": 1.2327668447533584e-05, + "logits/chosen": -1.1725986003875732, + "logits/rejected": -2.3097643852233887, + "logps/chosen": -555.6771240234375, + "logps/rejected": -750.8897705078125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1960151195526123, + "rewards/margins": 10.652788162231445, + "rewards/rejected": -12.848804473876953, + "step": 2481 + }, + { + "epoch": 0.39, + "learning_rate": 1.2326935007002438e-05, + "logits/chosen": -2.5397164821624756, + "logits/rejected": -2.9949090480804443, + "logps/chosen": -118.76143646240234, + "logps/rejected": -298.87451171875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.977731704711914, + "rewards/margins": 6.49037504196167, + "rewards/rejected": -8.468107223510742, + "step": 2482 + }, + { + "epoch": 0.39, + "learning_rate": 1.232620156647129e-05, + "logits/chosen": -2.0771427154541016, + "logits/rejected": -2.0486748218536377, + "logps/chosen": -232.32901000976562, + "logps/rejected": -272.7049865722656, + "loss": 2.3231, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.284697532653809, + "rewards/margins": -1.909570574760437, + "rewards/rejected": -3.375126838684082, + "step": 2483 + }, + { + "epoch": 0.39, + "learning_rate": 1.2325468125940143e-05, + "logits/chosen": -2.9446516036987305, + "logits/rejected": -2.776388645172119, + "logps/chosen": -592.4139404296875, + "logps/rejected": -582.3424072265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45797425508499146, + "rewards/margins": 8.962442398071289, + "rewards/rejected": -9.420415878295898, + "step": 2484 + }, + { + "epoch": 0.39, + "learning_rate": 1.2324734685408995e-05, + "logits/chosen": -3.109912157058716, + "logits/rejected": -2.7061595916748047, + "logps/chosen": -528.0396728515625, + "logps/rejected": -315.9796142578125, + "loss": 1.0851, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.814924716949463, + "rewards/margins": 4.36513614654541, + "rewards/rejected": -8.180060386657715, + "step": 2485 + }, + { + "epoch": 0.39, + "learning_rate": 1.2324001244877847e-05, + "logits/chosen": -2.9099724292755127, + "logits/rejected": -2.9675076007843018, + "logps/chosen": -421.4920654296875, + "logps/rejected": -208.86404418945312, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.377896547317505, + "rewards/margins": 4.091455936431885, + "rewards/rejected": -6.469352722167969, + "step": 2486 + }, + { + "epoch": 0.39, + "learning_rate": 1.2323267804346699e-05, + "logits/chosen": -3.004488945007324, + "logits/rejected": -2.8759071826934814, + "logps/chosen": -185.02676391601562, + "logps/rejected": -239.03662109375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.203730821609497, + "rewards/margins": 6.028690814971924, + "rewards/rejected": -8.232421875, + "step": 2487 + }, + { + "epoch": 0.39, + "learning_rate": 1.2322534363815551e-05, + "logits/chosen": -2.72361421585083, + "logits/rejected": -2.9693331718444824, + "logps/chosen": -114.33185577392578, + "logps/rejected": -268.3368225097656, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1119093894958496, + "rewards/margins": 5.143274307250977, + "rewards/rejected": -7.255183696746826, + "step": 2488 + }, + { + "epoch": 0.39, + "learning_rate": 1.2321800923284403e-05, + "logits/chosen": -2.7972683906555176, + "logits/rejected": -2.867565631866455, + "logps/chosen": -167.641845703125, + "logps/rejected": -228.65487670898438, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.11405086517334, + "rewards/margins": 4.649760723114014, + "rewards/rejected": -7.7638115882873535, + "step": 2489 + }, + { + "epoch": 0.39, + "learning_rate": 1.2321067482753255e-05, + "logits/chosen": -2.300807237625122, + "logits/rejected": -2.9921820163726807, + "logps/chosen": -126.00880432128906, + "logps/rejected": -196.39166259765625, + "loss": 0.0762, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.705444812774658, + "rewards/margins": 5.054937362670898, + "rewards/rejected": -7.760382652282715, + "step": 2490 + }, + { + "epoch": 0.39, + "learning_rate": 1.2320334042222108e-05, + "logits/chosen": -2.1496100425720215, + "logits/rejected": -3.2309465408325195, + "logps/chosen": -99.67394256591797, + "logps/rejected": -497.9202880859375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.830768346786499, + "rewards/margins": 7.5980682373046875, + "rewards/rejected": -9.428836822509766, + "step": 2491 + }, + { + "epoch": 0.39, + "learning_rate": 1.231960060169096e-05, + "logits/chosen": -2.954803943634033, + "logits/rejected": -3.1862423419952393, + "logps/chosen": -133.9455108642578, + "logps/rejected": -255.00143432617188, + "loss": 0.0664, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5286481380462646, + "rewards/margins": 4.112747669219971, + "rewards/rejected": -6.641395568847656, + "step": 2492 + }, + { + "epoch": 0.39, + "learning_rate": 1.2318867161159812e-05, + "logits/chosen": -2.2637274265289307, + "logits/rejected": -2.96905517578125, + "logps/chosen": -71.2105941772461, + "logps/rejected": -344.17144775390625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8252313137054443, + "rewards/margins": 7.111438751220703, + "rewards/rejected": -10.936670303344727, + "step": 2493 + }, + { + "epoch": 0.39, + "learning_rate": 1.2318133720628664e-05, + "logits/chosen": -1.5706933736801147, + "logits/rejected": -2.7508206367492676, + "logps/chosen": -93.53143310546875, + "logps/rejected": -316.5953369140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1323485374450684, + "rewards/margins": 7.507181167602539, + "rewards/rejected": -10.639530181884766, + "step": 2494 + }, + { + "epoch": 0.39, + "learning_rate": 1.2317400280097516e-05, + "logits/chosen": -2.571601152420044, + "logits/rejected": -2.8413515090942383, + "logps/chosen": -373.7525634765625, + "logps/rejected": -548.3221435546875, + "loss": 4.0478, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3521318435668945, + "rewards/margins": -1.3961658477783203, + "rewards/rejected": -3.9559662342071533, + "step": 2495 + }, + { + "epoch": 0.39, + "learning_rate": 1.2316666839566368e-05, + "logits/chosen": -2.708712577819824, + "logits/rejected": -2.1962685585021973, + "logps/chosen": -226.49786376953125, + "logps/rejected": -185.09652709960938, + "loss": 2.659, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1481614112854, + "rewards/margins": 0.5325844287872314, + "rewards/rejected": -4.680746078491211, + "step": 2496 + }, + { + "epoch": 0.39, + "learning_rate": 1.231593339903522e-05, + "logits/chosen": -3.007162570953369, + "logits/rejected": -2.998491048812866, + "logps/chosen": -370.7880859375, + "logps/rejected": -341.1926574707031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.785426378250122, + "rewards/margins": 8.395763397216797, + "rewards/rejected": -11.181190490722656, + "step": 2497 + }, + { + "epoch": 0.39, + "learning_rate": 1.2315199958504071e-05, + "logits/chosen": -2.8684210777282715, + "logits/rejected": -3.103853464126587, + "logps/chosen": -137.77737426757812, + "logps/rejected": -225.73281860351562, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.156446933746338, + "rewards/margins": 4.0113444328308105, + "rewards/rejected": -6.167791366577148, + "step": 2498 + }, + { + "epoch": 0.39, + "learning_rate": 1.2314466517972925e-05, + "logits/chosen": -2.990307092666626, + "logits/rejected": -2.953350305557251, + "logps/chosen": -126.80731201171875, + "logps/rejected": -295.7278137207031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9251708984375, + "rewards/margins": 8.805644989013672, + "rewards/rejected": -9.730815887451172, + "step": 2499 + }, + { + "epoch": 0.39, + "learning_rate": 1.2313733077441777e-05, + "logits/chosen": -1.6655412912368774, + "logits/rejected": -3.1916017532348633, + "logps/chosen": -118.34575653076172, + "logps/rejected": -505.68328857421875, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.798577308654785, + "rewards/margins": 3.9686532020568848, + "rewards/rejected": -6.767230033874512, + "step": 2500 + }, + { + "epoch": 0.39, + "learning_rate": 1.2312999636910629e-05, + "logits/chosen": -3.04081654548645, + "logits/rejected": -2.7163896560668945, + "logps/chosen": -464.8416748046875, + "logps/rejected": -454.51654052734375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4272356033325195, + "rewards/margins": 6.1327314376831055, + "rewards/rejected": -8.559967041015625, + "step": 2501 + }, + { + "epoch": 0.39, + "learning_rate": 1.231226619637948e-05, + "logits/chosen": -2.7584166526794434, + "logits/rejected": -2.4247045516967773, + "logps/chosen": -197.26544189453125, + "logps/rejected": -224.97242736816406, + "loss": 2.1212, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.083052158355713, + "rewards/margins": 0.3721177577972412, + "rewards/rejected": -5.455169677734375, + "step": 2502 + }, + { + "epoch": 0.39, + "learning_rate": 1.2311532755848332e-05, + "logits/chosen": -2.5715558528900146, + "logits/rejected": -3.0251195430755615, + "logps/chosen": -169.1168670654297, + "logps/rejected": -327.1434326171875, + "loss": 0.0414, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7632455825805664, + "rewards/margins": 3.8123855590820312, + "rewards/rejected": -6.575631141662598, + "step": 2503 + }, + { + "epoch": 0.39, + "learning_rate": 1.2310799315317184e-05, + "logits/chosen": -0.9476284980773926, + "logits/rejected": -3.16900634765625, + "logps/chosen": -115.62493896484375, + "logps/rejected": -430.2144775390625, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2125710248947144, + "rewards/margins": 6.263758659362793, + "rewards/rejected": -7.476329326629639, + "step": 2504 + }, + { + "epoch": 0.39, + "learning_rate": 1.2310065874786036e-05, + "logits/chosen": -3.0715513229370117, + "logits/rejected": -0.9835927486419678, + "logps/chosen": -652.79150390625, + "logps/rejected": -251.9849853515625, + "loss": 0.0692, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.412748098373413, + "rewards/margins": 4.283931255340576, + "rewards/rejected": -7.69667911529541, + "step": 2505 + }, + { + "epoch": 0.39, + "learning_rate": 1.2309332434254888e-05, + "logits/chosen": -3.116899251937866, + "logits/rejected": -2.9660682678222656, + "logps/chosen": -175.44345092773438, + "logps/rejected": -120.42665100097656, + "loss": 0.4486, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.90110182762146, + "rewards/margins": 2.495375871658325, + "rewards/rejected": -6.396477699279785, + "step": 2506 + }, + { + "epoch": 0.39, + "learning_rate": 1.230859899372374e-05, + "logits/chosen": -3.071153402328491, + "logits/rejected": -2.5763590335845947, + "logps/chosen": -122.93224334716797, + "logps/rejected": -194.71368408203125, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.918919324874878, + "rewards/margins": 5.183689117431641, + "rewards/rejected": -7.102608680725098, + "step": 2507 + }, + { + "epoch": 0.39, + "learning_rate": 1.2307865553192593e-05, + "logits/chosen": -2.8389225006103516, + "logits/rejected": -2.7293875217437744, + "logps/chosen": -130.97621154785156, + "logps/rejected": -223.88784790039062, + "loss": 0.9185, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.705472469329834, + "rewards/margins": 2.8779959678649902, + "rewards/rejected": -7.583468437194824, + "step": 2508 + }, + { + "epoch": 0.39, + "learning_rate": 1.2307132112661445e-05, + "logits/chosen": -3.1199755668640137, + "logits/rejected": -1.2144497632980347, + "logps/chosen": -608.6307373046875, + "logps/rejected": -283.80267333984375, + "loss": 2.3348, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.665600776672363, + "rewards/margins": 1.060471773147583, + "rewards/rejected": -6.726072311401367, + "step": 2509 + }, + { + "epoch": 0.39, + "learning_rate": 1.2306398672130297e-05, + "logits/chosen": -3.202634811401367, + "logits/rejected": -2.9439868927001953, + "logps/chosen": -446.77996826171875, + "logps/rejected": -243.99624633789062, + "loss": 6.2379, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.018218040466309, + "rewards/margins": -6.235810279846191, + "rewards/rejected": -1.7824077606201172, + "step": 2510 + }, + { + "epoch": 0.39, + "learning_rate": 1.2305665231599149e-05, + "logits/chosen": -3.1153974533081055, + "logits/rejected": -2.634575128555298, + "logps/chosen": -291.3053894042969, + "logps/rejected": -172.40060424804688, + "loss": 2.4153, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.317730903625488, + "rewards/margins": 1.3916926383972168, + "rewards/rejected": -7.709423065185547, + "step": 2511 + }, + { + "epoch": 0.39, + "learning_rate": 1.2304931791068001e-05, + "logits/chosen": -2.1346492767333984, + "logits/rejected": -2.927898406982422, + "logps/chosen": -225.86834716796875, + "logps/rejected": -407.6182556152344, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2241730690002441, + "rewards/margins": 7.889584541320801, + "rewards/rejected": -9.113757133483887, + "step": 2512 + }, + { + "epoch": 0.39, + "learning_rate": 1.2304198350536853e-05, + "logits/chosen": -2.8583531379699707, + "logits/rejected": -2.5354197025299072, + "logps/chosen": -130.8127899169922, + "logps/rejected": -284.4910888671875, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6507512331008911, + "rewards/margins": 5.642725944519043, + "rewards/rejected": -7.2934770584106445, + "step": 2513 + }, + { + "epoch": 0.39, + "learning_rate": 1.2303464910005705e-05, + "logits/chosen": -1.621288537979126, + "logits/rejected": -3.0898585319519043, + "logps/chosen": -43.52057647705078, + "logps/rejected": -205.25082397460938, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9622435569763184, + "rewards/margins": 5.572423934936523, + "rewards/rejected": -8.53466796875, + "step": 2514 + }, + { + "epoch": 0.39, + "learning_rate": 1.2302731469474557e-05, + "logits/chosen": -2.879251003265381, + "logits/rejected": -2.7409610748291016, + "logps/chosen": -153.24887084960938, + "logps/rejected": -172.168701171875, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0303359031677246, + "rewards/margins": 3.2157387733459473, + "rewards/rejected": -6.246074676513672, + "step": 2515 + }, + { + "epoch": 0.39, + "learning_rate": 1.230199802894341e-05, + "logits/chosen": -1.7006906270980835, + "logits/rejected": -2.672044038772583, + "logps/chosen": -371.9987487792969, + "logps/rejected": -437.82806396484375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7562882900238037, + "rewards/margins": 6.384520053863525, + "rewards/rejected": -8.14080810546875, + "step": 2516 + }, + { + "epoch": 0.39, + "learning_rate": 1.2301264588412262e-05, + "logits/chosen": -2.3824050426483154, + "logits/rejected": -2.971745252609253, + "logps/chosen": -267.53521728515625, + "logps/rejected": -400.2401428222656, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9143197536468506, + "rewards/margins": 6.327861785888672, + "rewards/rejected": -8.242181777954102, + "step": 2517 + }, + { + "epoch": 0.39, + "learning_rate": 1.2300531147881116e-05, + "logits/chosen": -0.954186201095581, + "logits/rejected": -2.725036144256592, + "logps/chosen": -50.86640930175781, + "logps/rejected": -179.0286407470703, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1104846000671387, + "rewards/margins": 4.63363790512085, + "rewards/rejected": -7.744122505187988, + "step": 2518 + }, + { + "epoch": 0.39, + "learning_rate": 1.2299797707349968e-05, + "logits/chosen": -2.9937543869018555, + "logits/rejected": -1.6801936626434326, + "logps/chosen": -252.3710479736328, + "logps/rejected": -147.8945770263672, + "loss": 0.768, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.789860725402832, + "rewards/margins": 0.9991205930709839, + "rewards/rejected": -5.7889814376831055, + "step": 2519 + }, + { + "epoch": 0.39, + "learning_rate": 1.229906426681882e-05, + "logits/chosen": -2.8386895656585693, + "logits/rejected": -3.1209115982055664, + "logps/chosen": -285.42608642578125, + "logps/rejected": -412.8739929199219, + "loss": 5.6596, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.062027931213379, + "rewards/margins": -3.0419182777404785, + "rewards/rejected": -3.0201096534729004, + "step": 2520 + }, + { + "epoch": 0.39, + "learning_rate": 1.2298330826287671e-05, + "logits/chosen": -2.1561124324798584, + "logits/rejected": -3.027172088623047, + "logps/chosen": -390.947998046875, + "logps/rejected": -380.6377868652344, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2801971435546875, + "rewards/margins": 6.606410026550293, + "rewards/rejected": -6.8866071701049805, + "step": 2521 + }, + { + "epoch": 0.39, + "learning_rate": 1.2297597385756523e-05, + "logits/chosen": -1.7333166599273682, + "logits/rejected": -3.0000715255737305, + "logps/chosen": -129.08168029785156, + "logps/rejected": -263.4664306640625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.470059394836426, + "rewards/margins": 3.9973676204681396, + "rewards/rejected": -7.4674272537231445, + "step": 2522 + }, + { + "epoch": 0.39, + "learning_rate": 1.2296863945225375e-05, + "logits/chosen": -2.937227725982666, + "logits/rejected": -2.4795000553131104, + "logps/chosen": -319.70953369140625, + "logps/rejected": -202.0889892578125, + "loss": 2.1089, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.080296516418457, + "rewards/margins": 1.2070794105529785, + "rewards/rejected": -6.2873759269714355, + "step": 2523 + }, + { + "epoch": 0.39, + "learning_rate": 1.2296130504694227e-05, + "logits/chosen": -2.278747320175171, + "logits/rejected": -3.06612491607666, + "logps/chosen": -170.56333923339844, + "logps/rejected": -301.7130432128906, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9462733268737793, + "rewards/margins": 6.330514907836914, + "rewards/rejected": -8.276788711547852, + "step": 2524 + }, + { + "epoch": 0.39, + "learning_rate": 1.2295397064163079e-05, + "logits/chosen": -3.078132152557373, + "logits/rejected": -2.778111696243286, + "logps/chosen": -368.6163330078125, + "logps/rejected": -369.2427978515625, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7245521545410156, + "rewards/margins": 4.2491679191589355, + "rewards/rejected": -5.973720073699951, + "step": 2525 + }, + { + "epoch": 0.39, + "learning_rate": 1.2294663623631932e-05, + "logits/chosen": -2.9186997413635254, + "logits/rejected": -3.243891477584839, + "logps/chosen": -125.35616302490234, + "logps/rejected": -236.4926300048828, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6560800075531006, + "rewards/margins": 7.222999095916748, + "rewards/rejected": -7.879079341888428, + "step": 2526 + }, + { + "epoch": 0.39, + "learning_rate": 1.2293930183100784e-05, + "logits/chosen": -2.96751070022583, + "logits/rejected": -2.3242154121398926, + "logps/chosen": -754.25390625, + "logps/rejected": -533.528076171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4150147438049316, + "rewards/margins": 7.555203437805176, + "rewards/rejected": -9.97021770477295, + "step": 2527 + }, + { + "epoch": 0.39, + "learning_rate": 1.2293196742569636e-05, + "logits/chosen": -2.968500852584839, + "logits/rejected": -2.115784168243408, + "logps/chosen": -210.6703338623047, + "logps/rejected": -103.45124053955078, + "loss": 0.1212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7523512840270996, + "rewards/margins": 2.5584425926208496, + "rewards/rejected": -5.310793876647949, + "step": 2528 + }, + { + "epoch": 0.39, + "learning_rate": 1.2292463302038488e-05, + "logits/chosen": -2.392799139022827, + "logits/rejected": -2.763396739959717, + "logps/chosen": -139.940673828125, + "logps/rejected": -162.79977416992188, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6062965393066406, + "rewards/margins": 5.360275745391846, + "rewards/rejected": -7.966572284698486, + "step": 2529 + }, + { + "epoch": 0.39, + "learning_rate": 1.229172986150734e-05, + "logits/chosen": -2.2996065616607666, + "logits/rejected": -2.5845422744750977, + "logps/chosen": -238.31967163085938, + "logps/rejected": -337.1622314453125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.138512134552002, + "rewards/margins": 5.778674125671387, + "rewards/rejected": -6.917186260223389, + "step": 2530 + }, + { + "epoch": 0.39, + "learning_rate": 1.2290996420976192e-05, + "logits/chosen": -2.5536487102508545, + "logits/rejected": -3.146911382675171, + "logps/chosen": -106.23698425292969, + "logps/rejected": -356.01153564453125, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.101503372192383, + "rewards/margins": 4.238859176635742, + "rewards/rejected": -6.340362548828125, + "step": 2531 + }, + { + "epoch": 0.39, + "learning_rate": 1.2290262980445044e-05, + "logits/chosen": -2.598215103149414, + "logits/rejected": -3.0295982360839844, + "logps/chosen": -63.54545211791992, + "logps/rejected": -148.6624298095703, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.15504789352417, + "rewards/margins": 4.731184005737305, + "rewards/rejected": -5.886231422424316, + "step": 2532 + }, + { + "epoch": 0.39, + "learning_rate": 1.2289529539913896e-05, + "logits/chosen": -1.8714232444763184, + "logits/rejected": -2.945939064025879, + "logps/chosen": -177.86911010742188, + "logps/rejected": -336.4322509765625, + "loss": 2.1514, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.913778781890869, + "rewards/margins": -0.06527376174926758, + "rewards/rejected": -4.848505020141602, + "step": 2533 + }, + { + "epoch": 0.39, + "learning_rate": 1.2288796099382747e-05, + "logits/chosen": -1.62875497341156, + "logits/rejected": -2.997051954269409, + "logps/chosen": -109.44169616699219, + "logps/rejected": -406.18780517578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1030642986297607, + "rewards/margins": 8.09451675415039, + "rewards/rejected": -11.197580337524414, + "step": 2534 + }, + { + "epoch": 0.39, + "learning_rate": 1.2288062658851601e-05, + "logits/chosen": -1.7528033256530762, + "logits/rejected": -2.780531644821167, + "logps/chosen": -54.14544677734375, + "logps/rejected": -236.1905517578125, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2730050086975098, + "rewards/margins": 4.290963172912598, + "rewards/rejected": -6.563968181610107, + "step": 2535 + }, + { + "epoch": 0.39, + "learning_rate": 1.2287329218320453e-05, + "logits/chosen": -2.630317449569702, + "logits/rejected": -3.1073200702667236, + "logps/chosen": -100.34449768066406, + "logps/rejected": -208.52099609375, + "loss": 0.1457, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4890480041503906, + "rewards/margins": 2.3875646591186523, + "rewards/rejected": -5.876612663269043, + "step": 2536 + }, + { + "epoch": 0.39, + "learning_rate": 1.2286595777789305e-05, + "logits/chosen": -2.8492720127105713, + "logits/rejected": -2.330742597579956, + "logps/chosen": -207.66497802734375, + "logps/rejected": -289.93914794921875, + "loss": 0.4139, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1624772548675537, + "rewards/margins": 2.9585061073303223, + "rewards/rejected": -5.120983123779297, + "step": 2537 + }, + { + "epoch": 0.39, + "learning_rate": 1.2285862337258157e-05, + "logits/chosen": -1.8955867290496826, + "logits/rejected": -2.7210164070129395, + "logps/chosen": -198.20187377929688, + "logps/rejected": -412.5298767089844, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.680325508117676, + "rewards/margins": 4.4491167068481445, + "rewards/rejected": -7.12944221496582, + "step": 2538 + }, + { + "epoch": 0.39, + "learning_rate": 1.2285128896727009e-05, + "logits/chosen": -1.6138468980789185, + "logits/rejected": -2.999176025390625, + "logps/chosen": -218.24526977539062, + "logps/rejected": -214.56948852539062, + "loss": 2.7696, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.62902307510376, + "rewards/margins": 1.6740355491638184, + "rewards/rejected": -7.303058624267578, + "step": 2539 + }, + { + "epoch": 0.4, + "learning_rate": 1.228439545619586e-05, + "logits/chosen": -3.0331015586853027, + "logits/rejected": -2.591507911682129, + "logps/chosen": -135.42062377929688, + "logps/rejected": -168.02316284179688, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4671943187713623, + "rewards/margins": 5.353734970092773, + "rewards/rejected": -7.820929527282715, + "step": 2540 + }, + { + "epoch": 0.4, + "learning_rate": 1.2283662015664712e-05, + "logits/chosen": -1.5699578523635864, + "logits/rejected": -2.735177516937256, + "logps/chosen": -160.6529998779297, + "logps/rejected": -349.80670166015625, + "loss": 0.5155, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1300156116485596, + "rewards/margins": 4.4817094802856445, + "rewards/rejected": -6.611724853515625, + "step": 2541 + }, + { + "epoch": 0.4, + "learning_rate": 1.2282928575133564e-05, + "logits/chosen": -1.9508048295974731, + "logits/rejected": -2.928299903869629, + "logps/chosen": -173.3417205810547, + "logps/rejected": -315.59375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0168159008026123, + "rewards/margins": 5.384145736694336, + "rewards/rejected": -7.400961875915527, + "step": 2542 + }, + { + "epoch": 0.4, + "learning_rate": 1.2282195134602416e-05, + "logits/chosen": -3.14503812789917, + "logits/rejected": -2.857912063598633, + "logps/chosen": -265.10888671875, + "logps/rejected": -239.68284606933594, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.026576280593872, + "rewards/margins": 3.8184080123901367, + "rewards/rejected": -5.844984531402588, + "step": 2543 + }, + { + "epoch": 0.4, + "learning_rate": 1.228146169407127e-05, + "logits/chosen": -2.7895357608795166, + "logits/rejected": -3.1630759239196777, + "logps/chosen": -44.346092224121094, + "logps/rejected": -206.29566955566406, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.36629056930542, + "rewards/margins": 6.8392181396484375, + "rewards/rejected": -10.205509185791016, + "step": 2544 + }, + { + "epoch": 0.4, + "learning_rate": 1.2280728253540121e-05, + "logits/chosen": -2.667099952697754, + "logits/rejected": -2.3096981048583984, + "logps/chosen": -163.8880615234375, + "logps/rejected": -142.56002807617188, + "loss": 3.9745, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.874617099761963, + "rewards/margins": -3.9265127182006836, + "rewards/rejected": -2.9481046199798584, + "step": 2545 + }, + { + "epoch": 0.4, + "learning_rate": 1.2279994813008973e-05, + "logits/chosen": -2.9577181339263916, + "logits/rejected": -2.195944309234619, + "logps/chosen": -218.5091094970703, + "logps/rejected": -146.67266845703125, + "loss": 3.7827, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.110811710357666, + "rewards/margins": -0.001230001449584961, + "rewards/rejected": -5.109581470489502, + "step": 2546 + }, + { + "epoch": 0.4, + "learning_rate": 1.2279261372477825e-05, + "logits/chosen": -2.4718363285064697, + "logits/rejected": -3.058037042617798, + "logps/chosen": -206.16880798339844, + "logps/rejected": -340.08135986328125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.30359947681427, + "rewards/margins": 6.152606964111328, + "rewards/rejected": -7.456206321716309, + "step": 2547 + }, + { + "epoch": 0.4, + "learning_rate": 1.2278527931946677e-05, + "logits/chosen": -1.9502004384994507, + "logits/rejected": -3.2511723041534424, + "logps/chosen": -67.09819030761719, + "logps/rejected": -497.2239074707031, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2052135467529297, + "rewards/margins": 6.187358379364014, + "rewards/rejected": -8.392572402954102, + "step": 2548 + }, + { + "epoch": 0.4, + "learning_rate": 1.2277794491415529e-05, + "logits/chosen": -2.308316230773926, + "logits/rejected": -2.9518322944641113, + "logps/chosen": -178.97149658203125, + "logps/rejected": -336.63140869140625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3355129957199097, + "rewards/margins": 7.628937244415283, + "rewards/rejected": -8.96445083618164, + "step": 2549 + }, + { + "epoch": 0.4, + "learning_rate": 1.2277061050884383e-05, + "logits/chosen": -3.0216636657714844, + "logits/rejected": -2.9817206859588623, + "logps/chosen": -195.53639221191406, + "logps/rejected": -332.8892822265625, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9895839691162109, + "rewards/margins": 5.61452579498291, + "rewards/rejected": -6.604109764099121, + "step": 2550 + }, + { + "epoch": 0.4, + "learning_rate": 1.2276327610353234e-05, + "logits/chosen": -1.5047979354858398, + "logits/rejected": -2.6807024478912354, + "logps/chosen": -134.59854125976562, + "logps/rejected": -361.45849609375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8118128776550293, + "rewards/margins": 6.185849189758301, + "rewards/rejected": -8.997662544250488, + "step": 2551 + }, + { + "epoch": 0.4, + "learning_rate": 1.2275594169822086e-05, + "logits/chosen": -2.494749069213867, + "logits/rejected": -2.956822395324707, + "logps/chosen": -253.02626037597656, + "logps/rejected": -370.617919921875, + "loss": 5.5636, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.734987258911133, + "rewards/margins": -2.7232892513275146, + "rewards/rejected": -6.011697769165039, + "step": 2552 + }, + { + "epoch": 0.4, + "learning_rate": 1.227486072929094e-05, + "logits/chosen": -3.0076522827148438, + "logits/rejected": -1.4638956785202026, + "logps/chosen": -574.80029296875, + "logps/rejected": -285.7705078125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.085249423980713, + "rewards/margins": 4.678194999694824, + "rewards/rejected": -5.763444900512695, + "step": 2553 + }, + { + "epoch": 0.4, + "learning_rate": 1.2274127288759792e-05, + "logits/chosen": -3.043275833129883, + "logits/rejected": -3.0084571838378906, + "logps/chosen": -322.89117431640625, + "logps/rejected": -438.93719482421875, + "loss": 2.9403, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.154277324676514, + "rewards/margins": -2.2649614810943604, + "rewards/rejected": -2.8893158435821533, + "step": 2554 + }, + { + "epoch": 0.4, + "learning_rate": 1.2273393848228644e-05, + "logits/chosen": -2.9967596530914307, + "logits/rejected": -3.032395601272583, + "logps/chosen": -124.17818450927734, + "logps/rejected": -224.8262176513672, + "loss": 1.7141, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2963364124298096, + "rewards/margins": 0.3222815990447998, + "rewards/rejected": -3.6186180114746094, + "step": 2555 + }, + { + "epoch": 0.4, + "learning_rate": 1.2272660407697496e-05, + "logits/chosen": -2.5121564865112305, + "logits/rejected": -2.7925288677215576, + "logps/chosen": -141.3387451171875, + "logps/rejected": -285.39776611328125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.373234987258911, + "rewards/margins": 5.061916351318359, + "rewards/rejected": -7.435151100158691, + "step": 2556 + }, + { + "epoch": 0.4, + "learning_rate": 1.2271926967166347e-05, + "logits/chosen": -2.939035415649414, + "logits/rejected": -1.7774227857589722, + "logps/chosen": -469.0342102050781, + "logps/rejected": -1124.34619140625, + "loss": 4.1443, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.514761447906494, + "rewards/margins": -1.1585206985473633, + "rewards/rejected": -4.356240749359131, + "step": 2557 + }, + { + "epoch": 0.4, + "learning_rate": 1.22711935266352e-05, + "logits/chosen": -2.113384962081909, + "logits/rejected": -3.1257688999176025, + "logps/chosen": -94.20506286621094, + "logps/rejected": -417.8529968261719, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3678548336029053, + "rewards/margins": 7.05367374420166, + "rewards/rejected": -9.421528816223145, + "step": 2558 + }, + { + "epoch": 0.4, + "learning_rate": 1.2270460086104051e-05, + "logits/chosen": -1.6435577869415283, + "logits/rejected": -2.6902575492858887, + "logps/chosen": -139.1855010986328, + "logps/rejected": -415.44805908203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1071720123291016, + "rewards/margins": 8.867910385131836, + "rewards/rejected": -11.975082397460938, + "step": 2559 + }, + { + "epoch": 0.4, + "learning_rate": 1.2269726645572903e-05, + "logits/chosen": -2.3716113567352295, + "logits/rejected": -2.8372561931610107, + "logps/chosen": -233.39599609375, + "logps/rejected": -197.2154541015625, + "loss": 2.2377, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.626796007156372, + "rewards/margins": 1.2187771797180176, + "rewards/rejected": -4.845573425292969, + "step": 2560 + }, + { + "epoch": 0.4, + "learning_rate": 1.2268993205041755e-05, + "logits/chosen": -3.040992021560669, + "logits/rejected": -2.8267757892608643, + "logps/chosen": -312.83074951171875, + "logps/rejected": -328.2152404785156, + "loss": 0.1224, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2648096084594727, + "rewards/margins": 2.6885364055633545, + "rewards/rejected": -4.953345775604248, + "step": 2561 + }, + { + "epoch": 0.4, + "learning_rate": 1.2268259764510608e-05, + "logits/chosen": -2.9865872859954834, + "logits/rejected": -2.766411066055298, + "logps/chosen": -173.27444458007812, + "logps/rejected": -215.26751708984375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0510406494140625, + "rewards/margins": 7.0568084716796875, + "rewards/rejected": -7.10784912109375, + "step": 2562 + }, + { + "epoch": 0.4, + "learning_rate": 1.226752632397946e-05, + "logits/chosen": -2.3869547843933105, + "logits/rejected": -3.157114028930664, + "logps/chosen": -166.1096954345703, + "logps/rejected": -436.6978759765625, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156956195831299, + "rewards/margins": 5.939271926879883, + "rewards/rejected": -10.096227645874023, + "step": 2563 + }, + { + "epoch": 0.4, + "learning_rate": 1.2266792883448312e-05, + "logits/chosen": -2.5029165744781494, + "logits/rejected": -3.034310817718506, + "logps/chosen": -64.65157318115234, + "logps/rejected": -203.50437927246094, + "loss": 0.1612, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5545551776885986, + "rewards/margins": 2.34922456741333, + "rewards/rejected": -4.903779983520508, + "step": 2564 + }, + { + "epoch": 0.4, + "learning_rate": 1.2266059442917164e-05, + "logits/chosen": -2.540335178375244, + "logits/rejected": -2.6914899349212646, + "logps/chosen": -126.5580825805664, + "logps/rejected": -391.3632507324219, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0883934497833252, + "rewards/margins": 10.4849853515625, + "rewards/rejected": -11.573378562927246, + "step": 2565 + }, + { + "epoch": 0.4, + "learning_rate": 1.2265326002386016e-05, + "logits/chosen": -2.36004376411438, + "logits/rejected": -3.026017904281616, + "logps/chosen": -181.60498046875, + "logps/rejected": -297.3226318359375, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9366731643676758, + "rewards/margins": 4.354743957519531, + "rewards/rejected": -6.291417121887207, + "step": 2566 + }, + { + "epoch": 0.4, + "learning_rate": 1.2264592561854868e-05, + "logits/chosen": -2.1319315433502197, + "logits/rejected": -3.1017770767211914, + "logps/chosen": -231.15255737304688, + "logps/rejected": -228.11073303222656, + "loss": 0.1235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2636423110961914, + "rewards/margins": 3.521373748779297, + "rewards/rejected": -4.785016059875488, + "step": 2567 + }, + { + "epoch": 0.4, + "learning_rate": 1.226385912132372e-05, + "logits/chosen": -2.24267315864563, + "logits/rejected": -3.1303646564483643, + "logps/chosen": -140.78121948242188, + "logps/rejected": -587.725341796875, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7237484455108643, + "rewards/margins": 9.752082824707031, + "rewards/rejected": -13.475831985473633, + "step": 2568 + }, + { + "epoch": 0.4, + "learning_rate": 1.2263125680792572e-05, + "logits/chosen": -2.15893292427063, + "logits/rejected": -2.8253021240234375, + "logps/chosen": -200.32017517089844, + "logps/rejected": -207.38809204101562, + "loss": 2.0276, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.1315388679504395, + "rewards/margins": 2.7187981605529785, + "rewards/rejected": -7.850337028503418, + "step": 2569 + }, + { + "epoch": 0.4, + "learning_rate": 1.2262392240261424e-05, + "logits/chosen": -2.663141965866089, + "logits/rejected": -3.1127970218658447, + "logps/chosen": -72.5574722290039, + "logps/rejected": -342.4417419433594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9223394393920898, + "rewards/margins": 7.959984302520752, + "rewards/rejected": -9.88232421875, + "step": 2570 + }, + { + "epoch": 0.4, + "learning_rate": 1.2261658799730277e-05, + "logits/chosen": -2.1360745429992676, + "logits/rejected": -2.820166826248169, + "logps/chosen": -225.11724853515625, + "logps/rejected": -251.16183471679688, + "loss": 3.0988, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.307183265686035, + "rewards/margins": -0.13193535804748535, + "rewards/rejected": -4.175248146057129, + "step": 2571 + }, + { + "epoch": 0.4, + "learning_rate": 1.2260925359199129e-05, + "logits/chosen": -2.807093381881714, + "logits/rejected": -3.023322582244873, + "logps/chosen": -215.10958862304688, + "logps/rejected": -205.188720703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023172184824943542, + "rewards/margins": 6.301170825958252, + "rewards/rejected": -6.324343204498291, + "step": 2572 + }, + { + "epoch": 0.4, + "learning_rate": 1.226019191866798e-05, + "logits/chosen": -2.573741912841797, + "logits/rejected": -2.929934024810791, + "logps/chosen": -244.13580322265625, + "logps/rejected": -382.1396484375, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8152930736541748, + "rewards/margins": 5.305373668670654, + "rewards/rejected": -7.12066650390625, + "step": 2573 + }, + { + "epoch": 0.4, + "learning_rate": 1.2259458478136833e-05, + "logits/chosen": -2.985337018966675, + "logits/rejected": -2.9998717308044434, + "logps/chosen": -38.641380310058594, + "logps/rejected": -196.29196166992188, + "loss": 0.1013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6079492568969727, + "rewards/margins": 3.55991792678833, + "rewards/rejected": -5.167867183685303, + "step": 2574 + }, + { + "epoch": 0.4, + "learning_rate": 1.2258725037605685e-05, + "logits/chosen": -3.1206252574920654, + "logits/rejected": -2.285114288330078, + "logps/chosen": -293.9153747558594, + "logps/rejected": -275.580322265625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006817251443862915, + "rewards/margins": 7.649905681610107, + "rewards/rejected": -7.643088340759277, + "step": 2575 + }, + { + "epoch": 0.4, + "learning_rate": 1.2257991597074536e-05, + "logits/chosen": -3.021836280822754, + "logits/rejected": -3.137688159942627, + "logps/chosen": -352.3694152832031, + "logps/rejected": -436.2318420410156, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5245117545127869, + "rewards/margins": 6.8939619064331055, + "rewards/rejected": -7.418473720550537, + "step": 2576 + }, + { + "epoch": 0.4, + "learning_rate": 1.2257258156543388e-05, + "logits/chosen": -1.6602110862731934, + "logits/rejected": -2.803557872772217, + "logps/chosen": -71.64299774169922, + "logps/rejected": -322.9743957519531, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7914466857910156, + "rewards/margins": 5.740602493286133, + "rewards/rejected": -8.532049179077148, + "step": 2577 + }, + { + "epoch": 0.4, + "learning_rate": 1.225652471601224e-05, + "logits/chosen": -2.438397169113159, + "logits/rejected": -2.9884369373321533, + "logps/chosen": -366.4928894042969, + "logps/rejected": -445.41510009765625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.291253685951233, + "rewards/margins": 5.855227470397949, + "rewards/rejected": -7.146481037139893, + "step": 2578 + }, + { + "epoch": 0.4, + "learning_rate": 1.2255791275481092e-05, + "logits/chosen": -1.6014140844345093, + "logits/rejected": -2.8793785572052, + "logps/chosen": -170.00152587890625, + "logps/rejected": -333.19818115234375, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4562020301818848, + "rewards/margins": 4.609287261962891, + "rewards/rejected": -6.065489292144775, + "step": 2579 + }, + { + "epoch": 0.4, + "learning_rate": 1.2255057834949946e-05, + "logits/chosen": -2.1244771480560303, + "logits/rejected": -2.8217613697052, + "logps/chosen": -132.42210388183594, + "logps/rejected": -454.7295227050781, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3530211448669434, + "rewards/margins": 7.4669294357299805, + "rewards/rejected": -9.819951057434082, + "step": 2580 + }, + { + "epoch": 0.4, + "learning_rate": 1.2254324394418798e-05, + "logits/chosen": -2.66349720954895, + "logits/rejected": -3.2261734008789062, + "logps/chosen": -186.08770751953125, + "logps/rejected": -222.05587768554688, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.66300630569458, + "rewards/margins": 5.0653839111328125, + "rewards/rejected": -6.728390693664551, + "step": 2581 + }, + { + "epoch": 0.4, + "learning_rate": 1.225359095388765e-05, + "logits/chosen": -2.9762978553771973, + "logits/rejected": -1.8646258115768433, + "logps/chosen": -189.05630493164062, + "logps/rejected": -177.4217529296875, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8234882354736328, + "rewards/margins": 4.794960975646973, + "rewards/rejected": -5.6184492111206055, + "step": 2582 + }, + { + "epoch": 0.4, + "learning_rate": 1.2252857513356501e-05, + "logits/chosen": -1.277801275253296, + "logits/rejected": -2.830688714981079, + "logps/chosen": -103.39654541015625, + "logps/rejected": -346.0506591796875, + "loss": 1.6711, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.540493011474609, + "rewards/margins": 3.73367977142334, + "rewards/rejected": -8.27417278289795, + "step": 2583 + }, + { + "epoch": 0.4, + "learning_rate": 1.2252124072825355e-05, + "logits/chosen": -2.9075491428375244, + "logits/rejected": -3.174586296081543, + "logps/chosen": -188.3035125732422, + "logps/rejected": -462.89837646484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9348033666610718, + "rewards/margins": 6.2626237869262695, + "rewards/rejected": -7.197427749633789, + "step": 2584 + }, + { + "epoch": 0.4, + "learning_rate": 1.2251390632294207e-05, + "logits/chosen": -2.809264898300171, + "logits/rejected": -2.870448350906372, + "logps/chosen": -242.04013061523438, + "logps/rejected": -205.31680297851562, + "loss": 1.9043, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.39166259765625, + "rewards/margins": 1.3917784690856934, + "rewards/rejected": -3.7834410667419434, + "step": 2585 + }, + { + "epoch": 0.4, + "learning_rate": 1.2250657191763059e-05, + "logits/chosen": -3.1910576820373535, + "logits/rejected": -2.721661329269409, + "logps/chosen": -626.8238525390625, + "logps/rejected": -451.932373046875, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9909698963165283, + "rewards/margins": 5.037988662719727, + "rewards/rejected": -8.028958320617676, + "step": 2586 + }, + { + "epoch": 0.4, + "learning_rate": 1.224992375123191e-05, + "logits/chosen": -2.8946444988250732, + "logits/rejected": -3.301401138305664, + "logps/chosen": -25.28329849243164, + "logps/rejected": -203.1893310546875, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3522040843963623, + "rewards/margins": 5.131565570831299, + "rewards/rejected": -6.48376989364624, + "step": 2587 + }, + { + "epoch": 0.4, + "learning_rate": 1.2249190310700764e-05, + "logits/chosen": -3.0479650497436523, + "logits/rejected": -3.1020379066467285, + "logps/chosen": -205.95880126953125, + "logps/rejected": -259.3638916015625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.834254264831543, + "rewards/margins": 4.41765022277832, + "rewards/rejected": -6.251904487609863, + "step": 2588 + }, + { + "epoch": 0.4, + "learning_rate": 1.2248456870169616e-05, + "logits/chosen": -3.1919145584106445, + "logits/rejected": -3.2831645011901855, + "logps/chosen": -75.04167175292969, + "logps/rejected": -132.14306640625, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.63101863861084, + "rewards/margins": 3.8823494911193848, + "rewards/rejected": -6.513368129730225, + "step": 2589 + }, + { + "epoch": 0.4, + "learning_rate": 1.2247723429638468e-05, + "logits/chosen": -2.369621992111206, + "logits/rejected": -3.098897933959961, + "logps/chosen": -94.58016967773438, + "logps/rejected": -127.51868438720703, + "loss": 1.6173, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.745737075805664, + "rewards/margins": 0.8172521591186523, + "rewards/rejected": -4.562989234924316, + "step": 2590 + }, + { + "epoch": 0.4, + "learning_rate": 1.224698998910732e-05, + "logits/chosen": -2.4759743213653564, + "logits/rejected": -2.947148323059082, + "logps/chosen": -111.7541732788086, + "logps/rejected": -145.55865478515625, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3583946228027344, + "rewards/margins": 5.522769927978516, + "rewards/rejected": -6.88116455078125, + "step": 2591 + }, + { + "epoch": 0.4, + "learning_rate": 1.2246256548576172e-05, + "logits/chosen": -3.089261293411255, + "logits/rejected": -2.832627773284912, + "logps/chosen": -541.6478881835938, + "logps/rejected": -500.43450927734375, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1481049060821533, + "rewards/margins": 3.9354476928710938, + "rewards/rejected": -6.083552837371826, + "step": 2592 + }, + { + "epoch": 0.4, + "learning_rate": 1.2245523108045023e-05, + "logits/chosen": -2.8285770416259766, + "logits/rejected": -2.9202957153320312, + "logps/chosen": -408.8668212890625, + "logps/rejected": -281.20806884765625, + "loss": 1.7093, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3116965293884277, + "rewards/margins": 1.5374979972839355, + "rewards/rejected": -4.849194526672363, + "step": 2593 + }, + { + "epoch": 0.4, + "learning_rate": 1.2244789667513875e-05, + "logits/chosen": -1.7955708503723145, + "logits/rejected": -2.6260244846343994, + "logps/chosen": -119.42916870117188, + "logps/rejected": -476.58636474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0745110511779785, + "rewards/margins": 13.755739212036133, + "rewards/rejected": -14.83025074005127, + "step": 2594 + }, + { + "epoch": 0.4, + "learning_rate": 1.2244056226982727e-05, + "logits/chosen": -3.309518575668335, + "logits/rejected": -2.948291063308716, + "logps/chosen": -314.8976135253906, + "logps/rejected": -213.3080596923828, + "loss": 3.1152, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.861697673797607, + "rewards/margins": 0.3246936798095703, + "rewards/rejected": -5.186391353607178, + "step": 2595 + }, + { + "epoch": 0.4, + "learning_rate": 1.2243322786451579e-05, + "logits/chosen": -2.56290602684021, + "logits/rejected": -2.895176649093628, + "logps/chosen": -90.69731140136719, + "logps/rejected": -304.47198486328125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.496638536453247, + "rewards/margins": 7.3023271560668945, + "rewards/rejected": -9.798965454101562, + "step": 2596 + }, + { + "epoch": 0.4, + "learning_rate": 1.2242589345920433e-05, + "logits/chosen": -2.843966245651245, + "logits/rejected": -2.42633056640625, + "logps/chosen": -371.03741455078125, + "logps/rejected": -387.0435485839844, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4897886514663696, + "rewards/margins": 7.2028398513793945, + "rewards/rejected": -8.692628860473633, + "step": 2597 + }, + { + "epoch": 0.4, + "learning_rate": 1.2241855905389285e-05, + "logits/chosen": -3.1032729148864746, + "logits/rejected": -2.973254919052124, + "logps/chosen": -444.908935546875, + "logps/rejected": -281.6451416015625, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8293884992599487, + "rewards/margins": 4.138443946838379, + "rewards/rejected": -5.967832565307617, + "step": 2598 + }, + { + "epoch": 0.4, + "learning_rate": 1.2241122464858136e-05, + "logits/chosen": -1.8598296642303467, + "logits/rejected": -2.7413992881774902, + "logps/chosen": -107.66500854492188, + "logps/rejected": -426.34332275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2827587127685547, + "rewards/margins": 10.64948558807373, + "rewards/rejected": -12.932245254516602, + "step": 2599 + }, + { + "epoch": 0.4, + "learning_rate": 1.2240389024326988e-05, + "logits/chosen": -2.3868355751037598, + "logits/rejected": -2.8749637603759766, + "logps/chosen": -142.49313354492188, + "logps/rejected": -342.9283447265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9101982116699219, + "rewards/margins": 10.402149200439453, + "rewards/rejected": -12.312347412109375, + "step": 2600 + }, + { + "epoch": 0.4, + "learning_rate": 1.223965558379584e-05, + "logits/chosen": -1.7010283470153809, + "logits/rejected": -3.0589544773101807, + "logps/chosen": -102.03205108642578, + "logps/rejected": -374.91839599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9069433212280273, + "rewards/margins": 9.242074966430664, + "rewards/rejected": -10.149019241333008, + "step": 2601 + }, + { + "epoch": 0.4, + "learning_rate": 1.2238922143264692e-05, + "logits/chosen": -2.8671517372131348, + "logits/rejected": -2.9592695236206055, + "logps/chosen": -194.91571044921875, + "logps/rejected": -398.28314208984375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.569045305252075, + "rewards/margins": 5.685102462768555, + "rewards/rejected": -9.25414752960205, + "step": 2602 + }, + { + "epoch": 0.4, + "learning_rate": 1.2238188702733544e-05, + "logits/chosen": -2.91554856300354, + "logits/rejected": -3.003085136413574, + "logps/chosen": -198.92857360839844, + "logps/rejected": -164.96090698242188, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.816666841506958, + "rewards/margins": 4.449087619781494, + "rewards/rejected": -6.265754699707031, + "step": 2603 + }, + { + "epoch": 0.4, + "learning_rate": 1.2237455262202396e-05, + "logits/chosen": -2.619694709777832, + "logits/rejected": -3.1374623775482178, + "logps/chosen": -184.5458526611328, + "logps/rejected": -143.4619903564453, + "loss": 3.6482, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.3692307472229, + "rewards/margins": -3.518122911453247, + "rewards/rejected": -1.8511078357696533, + "step": 2604 + }, + { + "epoch": 0.41, + "learning_rate": 1.2236721821671248e-05, + "logits/chosen": -2.8293943405151367, + "logits/rejected": -2.8254144191741943, + "logps/chosen": -169.37115478515625, + "logps/rejected": -273.5832214355469, + "loss": 1.3732, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.738070011138916, + "rewards/margins": 2.692797899246216, + "rewards/rejected": -5.430868148803711, + "step": 2605 + }, + { + "epoch": 0.41, + "learning_rate": 1.2235988381140101e-05, + "logits/chosen": -1.5905653238296509, + "logits/rejected": -2.496532440185547, + "logps/chosen": -93.79754638671875, + "logps/rejected": -311.37701416015625, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.677021026611328, + "rewards/margins": 4.615915298461914, + "rewards/rejected": -7.292936325073242, + "step": 2606 + }, + { + "epoch": 0.41, + "learning_rate": 1.2235254940608953e-05, + "logits/chosen": -1.247369647026062, + "logits/rejected": -2.888267755508423, + "logps/chosen": -237.54176330566406, + "logps/rejected": -430.9411926269531, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.887829601764679, + "rewards/margins": 6.689605712890625, + "rewards/rejected": -7.577435493469238, + "step": 2607 + }, + { + "epoch": 0.41, + "learning_rate": 1.2234521500077805e-05, + "logits/chosen": -2.40449857711792, + "logits/rejected": -3.003788948059082, + "logps/chosen": -89.25735473632812, + "logps/rejected": -162.42849731445312, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.39495849609375, + "rewards/margins": 3.2308192253112793, + "rewards/rejected": -7.625777244567871, + "step": 2608 + }, + { + "epoch": 0.41, + "learning_rate": 1.2233788059546657e-05, + "logits/chosen": -2.9085214138031006, + "logits/rejected": -3.132565498352051, + "logps/chosen": -122.74969482421875, + "logps/rejected": -211.36294555664062, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9535812735557556, + "rewards/margins": 3.9265923500061035, + "rewards/rejected": -4.880173683166504, + "step": 2609 + }, + { + "epoch": 0.41, + "learning_rate": 1.2233054619015509e-05, + "logits/chosen": -2.8356940746307373, + "logits/rejected": -2.919438600540161, + "logps/chosen": -165.4930877685547, + "logps/rejected": -160.47030639648438, + "loss": 0.5138, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4257092475891113, + "rewards/margins": 2.1029062271118164, + "rewards/rejected": -5.528615474700928, + "step": 2610 + }, + { + "epoch": 0.41, + "learning_rate": 1.223232117848436e-05, + "logits/chosen": -2.88310170173645, + "logits/rejected": -2.5320992469787598, + "logps/chosen": -586.8896484375, + "logps/rejected": -553.3021240234375, + "loss": 5.4687, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.669768810272217, + "rewards/margins": -3.379925012588501, + "rewards/rejected": -4.289844036102295, + "step": 2611 + }, + { + "epoch": 0.41, + "learning_rate": 1.2231587737953213e-05, + "logits/chosen": -2.562896728515625, + "logits/rejected": -2.738922357559204, + "logps/chosen": -67.86402893066406, + "logps/rejected": -267.0596618652344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5629167556762695, + "rewards/margins": 6.9947099685668945, + "rewards/rejected": -10.557626724243164, + "step": 2612 + }, + { + "epoch": 0.41, + "learning_rate": 1.2230854297422064e-05, + "logits/chosen": -3.0259194374084473, + "logits/rejected": -2.6705524921417236, + "logps/chosen": -457.5964050292969, + "logps/rejected": -593.38330078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.054997444152832, + "rewards/margins": 8.530133247375488, + "rewards/rejected": -11.58513069152832, + "step": 2613 + }, + { + "epoch": 0.41, + "learning_rate": 1.2230120856890916e-05, + "logits/chosen": -2.489849090576172, + "logits/rejected": -2.945376396179199, + "logps/chosen": -261.3477783203125, + "logps/rejected": -300.2749938964844, + "loss": 2.332, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.292289733886719, + "rewards/margins": 0.7242836952209473, + "rewards/rejected": -5.016573429107666, + "step": 2614 + }, + { + "epoch": 0.41, + "learning_rate": 1.222938741635977e-05, + "logits/chosen": -2.6447670459747314, + "logits/rejected": -3.1806647777557373, + "logps/chosen": -174.76983642578125, + "logps/rejected": -368.79949951171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.676642656326294, + "rewards/margins": 7.825600624084473, + "rewards/rejected": -9.502243041992188, + "step": 2615 + }, + { + "epoch": 0.41, + "learning_rate": 1.2228653975828622e-05, + "logits/chosen": -1.7588025331497192, + "logits/rejected": -3.191155433654785, + "logps/chosen": -188.409423828125, + "logps/rejected": -520.5440673828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.301980972290039, + "rewards/margins": 7.821589469909668, + "rewards/rejected": -9.123570442199707, + "step": 2616 + }, + { + "epoch": 0.41, + "learning_rate": 1.2227920535297474e-05, + "logits/chosen": -3.2139039039611816, + "logits/rejected": -3.0467562675476074, + "logps/chosen": -86.73216247558594, + "logps/rejected": -143.29856872558594, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.536940813064575, + "rewards/margins": 6.536865711212158, + "rewards/rejected": -9.073806762695312, + "step": 2617 + }, + { + "epoch": 0.41, + "learning_rate": 1.2227187094766327e-05, + "logits/chosen": -1.140168309211731, + "logits/rejected": -2.9583818912506104, + "logps/chosen": -103.98347473144531, + "logps/rejected": -446.15313720703125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.280390739440918, + "rewards/margins": 7.5252461433410645, + "rewards/rejected": -10.80563735961914, + "step": 2618 + }, + { + "epoch": 0.41, + "learning_rate": 1.2226453654235179e-05, + "logits/chosen": -2.651134490966797, + "logits/rejected": -2.585894823074341, + "logps/chosen": -65.70330047607422, + "logps/rejected": -211.991943359375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2130961418151855, + "rewards/margins": 7.076741695404053, + "rewards/rejected": -10.289837837219238, + "step": 2619 + }, + { + "epoch": 0.41, + "learning_rate": 1.2225720213704031e-05, + "logits/chosen": -2.33739972114563, + "logits/rejected": -2.6668853759765625, + "logps/chosen": -104.66282653808594, + "logps/rejected": -264.3236083984375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7762325406074524, + "rewards/margins": 6.614865303039551, + "rewards/rejected": -7.3910980224609375, + "step": 2620 + }, + { + "epoch": 0.41, + "learning_rate": 1.2224986773172883e-05, + "logits/chosen": -3.2407848834991455, + "logits/rejected": -3.303892135620117, + "logps/chosen": -324.9358825683594, + "logps/rejected": -594.3942260742188, + "loss": 0.1032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.613671898841858, + "rewards/margins": 5.432309150695801, + "rewards/rejected": -7.045980930328369, + "step": 2621 + }, + { + "epoch": 0.41, + "learning_rate": 1.2224253332641735e-05, + "logits/chosen": -3.0703999996185303, + "logits/rejected": -2.272897243499756, + "logps/chosen": -279.6788635253906, + "logps/rejected": -270.83544921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2257388830184937, + "rewards/margins": 8.216753005981445, + "rewards/rejected": -9.442492485046387, + "step": 2622 + }, + { + "epoch": 0.41, + "learning_rate": 1.2223519892110587e-05, + "logits/chosen": -2.023715019226074, + "logits/rejected": -2.72992205619812, + "logps/chosen": -45.78061294555664, + "logps/rejected": -154.16578674316406, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9562485218048096, + "rewards/margins": 5.662773132324219, + "rewards/rejected": -7.619021892547607, + "step": 2623 + }, + { + "epoch": 0.41, + "learning_rate": 1.222278645157944e-05, + "logits/chosen": -2.777878522872925, + "logits/rejected": -2.9720420837402344, + "logps/chosen": -161.83404541015625, + "logps/rejected": -137.25393676757812, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.290074348449707, + "rewards/margins": 6.231870651245117, + "rewards/rejected": -8.521944999694824, + "step": 2624 + }, + { + "epoch": 0.41, + "learning_rate": 1.2222053011048292e-05, + "logits/chosen": -2.3752851486206055, + "logits/rejected": -3.036428451538086, + "logps/chosen": -153.32943725585938, + "logps/rejected": -434.574951171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7956788539886475, + "rewards/margins": 8.23873519897461, + "rewards/rejected": -10.03441333770752, + "step": 2625 + }, + { + "epoch": 0.41, + "learning_rate": 1.2221319570517144e-05, + "logits/chosen": -2.8830742835998535, + "logits/rejected": -2.941835641860962, + "logps/chosen": -334.3187255859375, + "logps/rejected": -482.45037841796875, + "loss": 3.5039, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6062822341918945, + "rewards/margins": 0.20796775817871094, + "rewards/rejected": -5.8142499923706055, + "step": 2626 + }, + { + "epoch": 0.41, + "learning_rate": 1.2220586129985996e-05, + "logits/chosen": -3.1243112087249756, + "logits/rejected": -3.202139377593994, + "logps/chosen": -36.36529541015625, + "logps/rejected": -187.1254119873047, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.69077467918396, + "rewards/margins": 8.277227401733398, + "rewards/rejected": -8.968002319335938, + "step": 2627 + }, + { + "epoch": 0.41, + "learning_rate": 1.2219852689454848e-05, + "logits/chosen": -2.6472177505493164, + "logits/rejected": -3.0962882041931152, + "logps/chosen": -464.5256042480469, + "logps/rejected": -421.5639343261719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9428920745849609, + "rewards/margins": 8.691524505615234, + "rewards/rejected": -9.634416580200195, + "step": 2628 + }, + { + "epoch": 0.41, + "learning_rate": 1.22191192489237e-05, + "logits/chosen": -3.1261701583862305, + "logits/rejected": -2.9057540893554688, + "logps/chosen": -626.0328369140625, + "logps/rejected": -504.3486022949219, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.737722873687744, + "rewards/margins": 5.5145416259765625, + "rewards/rejected": -8.252264022827148, + "step": 2629 + }, + { + "epoch": 0.41, + "learning_rate": 1.2218385808392551e-05, + "logits/chosen": -2.3098058700561523, + "logits/rejected": -2.995701313018799, + "logps/chosen": -35.08896255493164, + "logps/rejected": -320.8209533691406, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.606719970703125, + "rewards/margins": 7.225593566894531, + "rewards/rejected": -8.832313537597656, + "step": 2630 + }, + { + "epoch": 0.41, + "learning_rate": 1.2217652367861403e-05, + "logits/chosen": -2.633408784866333, + "logits/rejected": -3.1507568359375, + "logps/chosen": -82.0495376586914, + "logps/rejected": -177.69467163085938, + "loss": 1.7197, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.570417881011963, + "rewards/margins": 1.2574496269226074, + "rewards/rejected": -5.82786750793457, + "step": 2631 + }, + { + "epoch": 0.41, + "learning_rate": 1.2216918927330255e-05, + "logits/chosen": -1.8918955326080322, + "logits/rejected": -2.8210439682006836, + "logps/chosen": -60.538814544677734, + "logps/rejected": -235.3804931640625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1280735731124878, + "rewards/margins": 5.158083438873291, + "rewards/rejected": -6.28615665435791, + "step": 2632 + }, + { + "epoch": 0.41, + "learning_rate": 1.2216185486799109e-05, + "logits/chosen": -3.2203609943389893, + "logits/rejected": -2.73173451423645, + "logps/chosen": -137.23446655273438, + "logps/rejected": -126.00662231445312, + "loss": 1.0104, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.01864767074585, + "rewards/margins": 2.1059646606445312, + "rewards/rejected": -6.124612331390381, + "step": 2633 + }, + { + "epoch": 0.41, + "learning_rate": 1.221545204626796e-05, + "logits/chosen": -2.3665149211883545, + "logits/rejected": -2.5991592407226562, + "logps/chosen": -216.24179077148438, + "logps/rejected": -454.26483154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.365187168121338, + "rewards/margins": 10.250263214111328, + "rewards/rejected": -11.615449905395508, + "step": 2634 + }, + { + "epoch": 0.41, + "learning_rate": 1.2214718605736813e-05, + "logits/chosen": -3.251823902130127, + "logits/rejected": -3.0435848236083984, + "logps/chosen": -154.71893310546875, + "logps/rejected": -66.63522338867188, + "loss": 2.0632, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.049980640411377, + "rewards/margins": -0.7438791990280151, + "rewards/rejected": -4.3061017990112305, + "step": 2635 + }, + { + "epoch": 0.41, + "learning_rate": 1.2213985165205664e-05, + "logits/chosen": -2.1085190773010254, + "logits/rejected": -3.164311647415161, + "logps/chosen": -142.66375732421875, + "logps/rejected": -250.13768005371094, + "loss": 1.3931, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.951040267944336, + "rewards/margins": 3.0685958862304688, + "rewards/rejected": -7.019636154174805, + "step": 2636 + }, + { + "epoch": 0.41, + "learning_rate": 1.2213251724674516e-05, + "logits/chosen": -2.9281527996063232, + "logits/rejected": -2.882462739944458, + "logps/chosen": -406.1511535644531, + "logps/rejected": -221.1068572998047, + "loss": 2.9085, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.33150053024292, + "rewards/margins": 1.4031651020050049, + "rewards/rejected": -6.734665870666504, + "step": 2637 + }, + { + "epoch": 0.41, + "learning_rate": 1.2212518284143368e-05, + "logits/chosen": -2.7842888832092285, + "logits/rejected": -3.2042315006256104, + "logps/chosen": -46.278709411621094, + "logps/rejected": -358.2813720703125, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3928990364074707, + "rewards/margins": 4.781895637512207, + "rewards/rejected": -8.174795150756836, + "step": 2638 + }, + { + "epoch": 0.41, + "learning_rate": 1.221178484361222e-05, + "logits/chosen": -2.651750087738037, + "logits/rejected": -2.979398727416992, + "logps/chosen": -143.888427734375, + "logps/rejected": -150.64724731445312, + "loss": 1.0871, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.381864547729492, + "rewards/margins": 2.358388662338257, + "rewards/rejected": -5.740253448486328, + "step": 2639 + }, + { + "epoch": 0.41, + "learning_rate": 1.2211051403081072e-05, + "logits/chosen": -2.7318878173828125, + "logits/rejected": -2.8111352920532227, + "logps/chosen": -234.7796630859375, + "logps/rejected": -223.37918090820312, + "loss": 3.5739, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.258986949920654, + "rewards/margins": 0.7388348579406738, + "rewards/rejected": -4.997821807861328, + "step": 2640 + }, + { + "epoch": 0.41, + "learning_rate": 1.2210317962549924e-05, + "logits/chosen": -3.066516876220703, + "logits/rejected": -2.393562078475952, + "logps/chosen": -618.5176391601562, + "logps/rejected": -525.0912475585938, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3769088983535767, + "rewards/margins": 8.897781372070312, + "rewards/rejected": -10.274690628051758, + "step": 2641 + }, + { + "epoch": 0.41, + "learning_rate": 1.2209584522018777e-05, + "logits/chosen": -3.0033280849456787, + "logits/rejected": -2.8741793632507324, + "logps/chosen": -118.72732543945312, + "logps/rejected": -272.94427490234375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9141945838928223, + "rewards/margins": 5.207589626312256, + "rewards/rejected": -7.121784210205078, + "step": 2642 + }, + { + "epoch": 0.41, + "learning_rate": 1.220885108148763e-05, + "logits/chosen": -3.2640914916992188, + "logits/rejected": -2.9850571155548096, + "logps/chosen": -649.5205078125, + "logps/rejected": -462.3055419921875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27489012479782104, + "rewards/margins": 6.798303604125977, + "rewards/rejected": -6.523413181304932, + "step": 2643 + }, + { + "epoch": 0.41, + "learning_rate": 1.2208117640956481e-05, + "logits/chosen": -2.7408246994018555, + "logits/rejected": -2.5571067333221436, + "logps/chosen": -214.74447631835938, + "logps/rejected": -251.53282165527344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5901398062705994, + "rewards/margins": 6.962231636047363, + "rewards/rejected": -7.552371025085449, + "step": 2644 + }, + { + "epoch": 0.41, + "learning_rate": 1.2207384200425333e-05, + "logits/chosen": -2.3611080646514893, + "logits/rejected": -3.098627805709839, + "logps/chosen": -803.8582763671875, + "logps/rejected": -349.53692626953125, + "loss": 0.9474, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6578369140625, + "rewards/margins": 1.3336814641952515, + "rewards/rejected": -3.991518497467041, + "step": 2645 + }, + { + "epoch": 0.41, + "learning_rate": 1.2206650759894185e-05, + "logits/chosen": -3.0041520595550537, + "logits/rejected": -1.754488229751587, + "logps/chosen": -545.439453125, + "logps/rejected": -377.68646240234375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0981521606445312, + "rewards/margins": 7.244397163391113, + "rewards/rejected": -9.342549324035645, + "step": 2646 + }, + { + "epoch": 0.41, + "learning_rate": 1.2205917319363037e-05, + "logits/chosen": -2.945920467376709, + "logits/rejected": -1.3547885417938232, + "logps/chosen": -615.1917724609375, + "logps/rejected": -389.50494384765625, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5655922889709473, + "rewards/margins": 6.674060344696045, + "rewards/rejected": -8.239652633666992, + "step": 2647 + }, + { + "epoch": 0.41, + "learning_rate": 1.2205183878831889e-05, + "logits/chosen": -2.4845354557037354, + "logits/rejected": -3.0817575454711914, + "logps/chosen": -292.0191650390625, + "logps/rejected": -213.9554443359375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.060103178024292, + "rewards/margins": 7.317107200622559, + "rewards/rejected": -8.37721061706543, + "step": 2648 + }, + { + "epoch": 0.41, + "learning_rate": 1.220445043830074e-05, + "logits/chosen": -2.8743107318878174, + "logits/rejected": -2.4965226650238037, + "logps/chosen": -522.2360229492188, + "logps/rejected": -590.6943969726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5081074237823486, + "rewards/margins": 10.735711097717285, + "rewards/rejected": -12.243818283081055, + "step": 2649 + }, + { + "epoch": 0.41, + "learning_rate": 1.2203716997769592e-05, + "logits/chosen": -3.2247681617736816, + "logits/rejected": -3.2367732524871826, + "logps/chosen": -40.771141052246094, + "logps/rejected": -86.40618896484375, + "loss": 0.0829, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7438132762908936, + "rewards/margins": 2.6602392196655273, + "rewards/rejected": -5.404052734375, + "step": 2650 + }, + { + "epoch": 0.41, + "learning_rate": 1.2202983557238446e-05, + "logits/chosen": -2.7038204669952393, + "logits/rejected": -3.0349442958831787, + "logps/chosen": -191.89715576171875, + "logps/rejected": -413.62786865234375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4404146671295166, + "rewards/margins": 5.719290733337402, + "rewards/rejected": -7.15970516204834, + "step": 2651 + }, + { + "epoch": 0.41, + "learning_rate": 1.22022501167073e-05, + "logits/chosen": -3.2356491088867188, + "logits/rejected": -2.7544896602630615, + "logps/chosen": -701.5611572265625, + "logps/rejected": -701.2601318359375, + "loss": 4.6912, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.081085205078125, + "rewards/margins": -2.4119043350219727, + "rewards/rejected": -4.669180393218994, + "step": 2652 + }, + { + "epoch": 0.41, + "learning_rate": 1.2201516676176151e-05, + "logits/chosen": -2.3195786476135254, + "logits/rejected": -3.1005191802978516, + "logps/chosen": -68.44557189941406, + "logps/rejected": -191.18112182617188, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7457854747772217, + "rewards/margins": 3.4691224098205566, + "rewards/rejected": -6.214908123016357, + "step": 2653 + }, + { + "epoch": 0.41, + "learning_rate": 1.2200783235645003e-05, + "logits/chosen": -1.969752550125122, + "logits/rejected": -3.2126574516296387, + "logps/chosen": -246.7445068359375, + "logps/rejected": -495.87109375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6166054010391235, + "rewards/margins": 5.112913131713867, + "rewards/rejected": -6.729517936706543, + "step": 2654 + }, + { + "epoch": 0.41, + "learning_rate": 1.2200049795113855e-05, + "logits/chosen": -2.880169153213501, + "logits/rejected": -3.0718836784362793, + "logps/chosen": -71.33543395996094, + "logps/rejected": -201.19158935546875, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3154640197753906, + "rewards/margins": 5.7717413902282715, + "rewards/rejected": -9.08720588684082, + "step": 2655 + }, + { + "epoch": 0.41, + "learning_rate": 1.2199316354582707e-05, + "logits/chosen": -1.0059517621994019, + "logits/rejected": -2.939159631729126, + "logps/chosen": -64.00752258300781, + "logps/rejected": -274.8812255859375, + "loss": 0.1497, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.746654987335205, + "rewards/margins": 4.179441452026367, + "rewards/rejected": -7.926096439361572, + "step": 2656 + }, + { + "epoch": 0.41, + "learning_rate": 1.2198582914051559e-05, + "logits/chosen": -3.193425178527832, + "logits/rejected": -2.206247568130493, + "logps/chosen": -441.1580810546875, + "logps/rejected": -251.29824829101562, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34072569012641907, + "rewards/margins": 6.030483245849609, + "rewards/rejected": -6.371209144592285, + "step": 2657 + }, + { + "epoch": 0.41, + "learning_rate": 1.219784947352041e-05, + "logits/chosen": -3.1644959449768066, + "logits/rejected": -1.997506022453308, + "logps/chosen": -742.38720703125, + "logps/rejected": -354.1331481933594, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4628723859786987, + "rewards/margins": 8.27710247039795, + "rewards/rejected": -6.814229965209961, + "step": 2658 + }, + { + "epoch": 0.41, + "learning_rate": 1.2197116032989263e-05, + "logits/chosen": -3.147372007369995, + "logits/rejected": -3.1940712928771973, + "logps/chosen": -171.9615020751953, + "logps/rejected": -333.3086853027344, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3401412963867188, + "rewards/margins": 5.580023288726807, + "rewards/rejected": -6.920164585113525, + "step": 2659 + }, + { + "epoch": 0.41, + "learning_rate": 1.2196382592458116e-05, + "logits/chosen": -1.1116114854812622, + "logits/rejected": -3.0604050159454346, + "logps/chosen": -64.11239624023438, + "logps/rejected": -527.8807373046875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.560182809829712, + "rewards/margins": 6.966691017150879, + "rewards/rejected": -8.526874542236328, + "step": 2660 + }, + { + "epoch": 0.41, + "learning_rate": 1.2195649151926968e-05, + "logits/chosen": -2.9256770610809326, + "logits/rejected": -2.9055440425872803, + "logps/chosen": -198.4423065185547, + "logps/rejected": -356.9473876953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9248199462890625, + "rewards/margins": 9.460653305053711, + "rewards/rejected": -12.38547420501709, + "step": 2661 + }, + { + "epoch": 0.41, + "learning_rate": 1.219491571139582e-05, + "logits/chosen": -3.140345573425293, + "logits/rejected": -2.76578688621521, + "logps/chosen": -248.37374877929688, + "logps/rejected": -124.38763427734375, + "loss": 1.9048, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.441237449645996, + "rewards/margins": 1.1068224906921387, + "rewards/rejected": -6.548060417175293, + "step": 2662 + }, + { + "epoch": 0.41, + "learning_rate": 1.2194182270864672e-05, + "logits/chosen": -1.8659132719039917, + "logits/rejected": -3.0514070987701416, + "logps/chosen": -83.5772705078125, + "logps/rejected": -236.30767822265625, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6146130561828613, + "rewards/margins": 4.716919898986816, + "rewards/rejected": -7.331533432006836, + "step": 2663 + }, + { + "epoch": 0.41, + "learning_rate": 1.2193448830333524e-05, + "logits/chosen": -2.8095545768737793, + "logits/rejected": -2.9672842025756836, + "logps/chosen": -62.02309799194336, + "logps/rejected": -180.8624725341797, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2391912937164307, + "rewards/margins": 5.889884948730469, + "rewards/rejected": -7.12907600402832, + "step": 2664 + }, + { + "epoch": 0.41, + "learning_rate": 1.2192715389802376e-05, + "logits/chosen": -3.0701448917388916, + "logits/rejected": -3.235506772994995, + "logps/chosen": -126.77128601074219, + "logps/rejected": -248.2713623046875, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.882625102996826, + "rewards/margins": 4.390918254852295, + "rewards/rejected": -7.273543357849121, + "step": 2665 + }, + { + "epoch": 0.41, + "learning_rate": 1.2191981949271228e-05, + "logits/chosen": -3.2571895122528076, + "logits/rejected": -3.23753023147583, + "logps/chosen": -610.3294677734375, + "logps/rejected": -256.40972900390625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0054435729980469, + "rewards/margins": 5.363712310791016, + "rewards/rejected": -4.358268737792969, + "step": 2666 + }, + { + "epoch": 0.41, + "learning_rate": 1.219124850874008e-05, + "logits/chosen": -3.100705623626709, + "logits/rejected": -2.498884439468384, + "logps/chosen": -654.1888427734375, + "logps/rejected": -459.61590576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9883224964141846, + "rewards/margins": 9.165996551513672, + "rewards/rejected": -10.154319763183594, + "step": 2667 + }, + { + "epoch": 0.41, + "learning_rate": 1.2190515068208931e-05, + "logits/chosen": -2.9391157627105713, + "logits/rejected": -3.1324527263641357, + "logps/chosen": -287.36083984375, + "logps/rejected": -310.6501159667969, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.289864420890808, + "rewards/margins": 4.30488395690918, + "rewards/rejected": -5.594748497009277, + "step": 2668 + }, + { + "epoch": 0.42, + "learning_rate": 1.2189781627677785e-05, + "logits/chosen": -3.199038028717041, + "logits/rejected": -2.167448043823242, + "logps/chosen": -710.07275390625, + "logps/rejected": -277.51904296875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5437896847724915, + "rewards/margins": 6.478947639465332, + "rewards/rejected": -7.022737503051758, + "step": 2669 + }, + { + "epoch": 0.42, + "learning_rate": 1.2189048187146637e-05, + "logits/chosen": -3.0655996799468994, + "logits/rejected": -3.194237232208252, + "logps/chosen": -120.45954895019531, + "logps/rejected": -211.13677978515625, + "loss": 0.1312, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.370691299438477, + "rewards/margins": 2.295389175415039, + "rewards/rejected": -6.666080474853516, + "step": 2670 + }, + { + "epoch": 0.42, + "learning_rate": 1.2188314746615489e-05, + "logits/chosen": -2.000091075897217, + "logits/rejected": -3.192490577697754, + "logps/chosen": -73.44039916992188, + "logps/rejected": -327.10968017578125, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.158925771713257, + "rewards/margins": 1.910642147064209, + "rewards/rejected": -5.069567680358887, + "step": 2671 + }, + { + "epoch": 0.42, + "learning_rate": 1.218758130608434e-05, + "logits/chosen": -3.0328805446624756, + "logits/rejected": -3.1089529991149902, + "logps/chosen": -616.6405639648438, + "logps/rejected": -573.5466918945312, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05848541855812073, + "rewards/margins": 5.814830303192139, + "rewards/rejected": -5.873315811157227, + "step": 2672 + }, + { + "epoch": 0.42, + "learning_rate": 1.2186847865553192e-05, + "logits/chosen": -3.1403133869171143, + "logits/rejected": -2.6105668544769287, + "logps/chosen": -421.64923095703125, + "logps/rejected": -192.04685974121094, + "loss": 1.2557, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.796299695968628, + "rewards/margins": 2.8093979358673096, + "rewards/rejected": -4.6056976318359375, + "step": 2673 + }, + { + "epoch": 0.42, + "learning_rate": 1.2186114425022044e-05, + "logits/chosen": -1.6368972063064575, + "logits/rejected": -3.1633009910583496, + "logps/chosen": -130.23004150390625, + "logps/rejected": -364.50396728515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.885354995727539, + "rewards/margins": 7.557548522949219, + "rewards/rejected": -9.442903518676758, + "step": 2674 + }, + { + "epoch": 0.42, + "learning_rate": 1.2185380984490896e-05, + "logits/chosen": -2.241333484649658, + "logits/rejected": -3.141390800476074, + "logps/chosen": -100.87240600585938, + "logps/rejected": -313.8336486816406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4917309284210205, + "rewards/margins": 8.357704162597656, + "rewards/rejected": -9.849435806274414, + "step": 2675 + }, + { + "epoch": 0.42, + "learning_rate": 1.2184647543959748e-05, + "logits/chosen": -1.4527151584625244, + "logits/rejected": -2.508035182952881, + "logps/chosen": -136.93173217773438, + "logps/rejected": -434.748291015625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3589954376220703, + "rewards/margins": 6.435601711273193, + "rewards/rejected": -8.794597625732422, + "step": 2676 + }, + { + "epoch": 0.42, + "learning_rate": 1.21839141034286e-05, + "logits/chosen": -2.214329957962036, + "logits/rejected": -2.945467948913574, + "logps/chosen": -233.73878479003906, + "logps/rejected": -261.32989501953125, + "loss": 1.0925, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.560186862945557, + "rewards/margins": 0.8284271955490112, + "rewards/rejected": -5.388614177703857, + "step": 2677 + }, + { + "epoch": 0.42, + "learning_rate": 1.2183180662897453e-05, + "logits/chosen": -2.9649174213409424, + "logits/rejected": -3.1766695976257324, + "logps/chosen": -75.0941162109375, + "logps/rejected": -279.3558044433594, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8214738368988037, + "rewards/margins": 5.2446088790893555, + "rewards/rejected": -9.066082954406738, + "step": 2678 + }, + { + "epoch": 0.42, + "learning_rate": 1.2182447222366305e-05, + "logits/chosen": -1.6791822910308838, + "logits/rejected": -2.8217813968658447, + "logps/chosen": -60.234100341796875, + "logps/rejected": -200.3633575439453, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9783355593681335, + "rewards/margins": 4.408123016357422, + "rewards/rejected": -5.386458396911621, + "step": 2679 + }, + { + "epoch": 0.42, + "learning_rate": 1.2181713781835157e-05, + "logits/chosen": -1.978328824043274, + "logits/rejected": -2.7868785858154297, + "logps/chosen": -96.9248275756836, + "logps/rejected": -342.149169921875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.63987398147583, + "rewards/margins": 12.963156700134277, + "rewards/rejected": -15.603031158447266, + "step": 2680 + }, + { + "epoch": 0.42, + "learning_rate": 1.2180980341304009e-05, + "logits/chosen": -3.0926761627197266, + "logits/rejected": -3.1725785732269287, + "logps/chosen": -108.87498474121094, + "logps/rejected": -87.83245849609375, + "loss": 4.3955, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.386569499969482, + "rewards/margins": -1.736617088317871, + "rewards/rejected": -4.649952411651611, + "step": 2681 + }, + { + "epoch": 0.42, + "learning_rate": 1.2180246900772861e-05, + "logits/chosen": -3.0071799755096436, + "logits/rejected": -2.236865282058716, + "logps/chosen": -138.75999450683594, + "logps/rejected": -238.41226196289062, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.313004493713379, + "rewards/margins": 6.327140808105469, + "rewards/rejected": -8.640144348144531, + "step": 2682 + }, + { + "epoch": 0.42, + "learning_rate": 1.2179513460241713e-05, + "logits/chosen": -1.1260137557983398, + "logits/rejected": -3.025132417678833, + "logps/chosen": -67.38798522949219, + "logps/rejected": -408.97613525390625, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7684407234191895, + "rewards/margins": 9.053749084472656, + "rewards/rejected": -10.822189331054688, + "step": 2683 + }, + { + "epoch": 0.42, + "learning_rate": 1.2178780019710565e-05, + "logits/chosen": -2.672800302505493, + "logits/rejected": -3.1750214099884033, + "logps/chosen": -192.27215576171875, + "logps/rejected": -471.00897216796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23800544440746307, + "rewards/margins": 8.178852081298828, + "rewards/rejected": -8.416857719421387, + "step": 2684 + }, + { + "epoch": 0.42, + "learning_rate": 1.2178046579179418e-05, + "logits/chosen": -1.6017796993255615, + "logits/rejected": -3.1476798057556152, + "logps/chosen": -138.56906127929688, + "logps/rejected": -581.4911499023438, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7142016887664795, + "rewards/margins": 5.925359725952148, + "rewards/rejected": -7.639561653137207, + "step": 2685 + }, + { + "epoch": 0.42, + "learning_rate": 1.217731313864827e-05, + "logits/chosen": -3.131920099258423, + "logits/rejected": -1.5337804555892944, + "logps/chosen": -344.531982421875, + "logps/rejected": -206.73696899414062, + "loss": 3.0259, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.205079555511475, + "rewards/margins": -0.6058566570281982, + "rewards/rejected": -3.5992226600646973, + "step": 2686 + }, + { + "epoch": 0.42, + "learning_rate": 1.2176579698117124e-05, + "logits/chosen": -1.1267106533050537, + "logits/rejected": -3.0016002655029297, + "logps/chosen": -86.13601684570312, + "logps/rejected": -470.898193359375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.203857898712158, + "rewards/margins": 7.272538185119629, + "rewards/rejected": -10.476396560668945, + "step": 2687 + }, + { + "epoch": 0.42, + "learning_rate": 1.2175846257585976e-05, + "logits/chosen": -2.076341390609741, + "logits/rejected": -3.2841732501983643, + "logps/chosen": -61.95280838012695, + "logps/rejected": -267.86932373046875, + "loss": 0.1775, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.039508819580078, + "rewards/margins": 1.6831893920898438, + "rewards/rejected": -6.722698211669922, + "step": 2688 + }, + { + "epoch": 0.42, + "learning_rate": 1.2175112817054828e-05, + "logits/chosen": -2.657524347305298, + "logits/rejected": -2.834090232849121, + "logps/chosen": -144.08338928222656, + "logps/rejected": -174.1695556640625, + "loss": 0.6172, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2942311763763428, + "rewards/margins": 3.491636276245117, + "rewards/rejected": -5.785867691040039, + "step": 2689 + }, + { + "epoch": 0.42, + "learning_rate": 1.217437937652368e-05, + "logits/chosen": -2.8264546394348145, + "logits/rejected": -3.338358163833618, + "logps/chosen": -98.89585876464844, + "logps/rejected": -208.64389038085938, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5259861946105957, + "rewards/margins": 6.0363240242004395, + "rewards/rejected": -8.562310218811035, + "step": 2690 + }, + { + "epoch": 0.42, + "learning_rate": 1.2173645935992531e-05, + "logits/chosen": -2.2407784461975098, + "logits/rejected": -3.117234945297241, + "logps/chosen": -287.41156005859375, + "logps/rejected": -428.59503173828125, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.731170654296875, + "rewards/margins": 4.7835540771484375, + "rewards/rejected": -6.5147247314453125, + "step": 2691 + }, + { + "epoch": 0.42, + "learning_rate": 1.2172912495461383e-05, + "logits/chosen": -3.0222041606903076, + "logits/rejected": -3.0491316318511963, + "logps/chosen": -53.918785095214844, + "logps/rejected": -180.0008544921875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3710169792175293, + "rewards/margins": 6.996105194091797, + "rewards/rejected": -8.367121696472168, + "step": 2692 + }, + { + "epoch": 0.42, + "learning_rate": 1.2172179054930235e-05, + "logits/chosen": -3.0980312824249268, + "logits/rejected": -1.6612889766693115, + "logps/chosen": -387.3539733886719, + "logps/rejected": -291.76605224609375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6292431354522705, + "rewards/margins": 6.5369462966918945, + "rewards/rejected": -8.166189193725586, + "step": 2693 + }, + { + "epoch": 0.42, + "learning_rate": 1.2171445614399087e-05, + "logits/chosen": -2.2577261924743652, + "logits/rejected": -2.991584539413452, + "logps/chosen": -232.16574096679688, + "logps/rejected": -231.357421875, + "loss": 3.1074, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.200021982192993, + "rewards/margins": 1.2714152336120605, + "rewards/rejected": -4.471436977386475, + "step": 2694 + }, + { + "epoch": 0.42, + "learning_rate": 1.217071217386794e-05, + "logits/chosen": -2.7623982429504395, + "logits/rejected": -3.2064473628997803, + "logps/chosen": -379.2948913574219, + "logps/rejected": -475.8719177246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0959986448287964, + "rewards/margins": 10.527190208435059, + "rewards/rejected": -11.623188972473145, + "step": 2695 + }, + { + "epoch": 0.42, + "learning_rate": 1.2169978733336792e-05, + "logits/chosen": -2.9456522464752197, + "logits/rejected": -3.1475770473480225, + "logps/chosen": -79.82905578613281, + "logps/rejected": -153.93927001953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2000703811645508, + "rewards/margins": 7.541296482086182, + "rewards/rejected": -8.74136734008789, + "step": 2696 + }, + { + "epoch": 0.42, + "learning_rate": 1.2169245292805644e-05, + "logits/chosen": -2.801560401916504, + "logits/rejected": -3.1903927326202393, + "logps/chosen": -65.73445129394531, + "logps/rejected": -246.08963012695312, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8004066944122314, + "rewards/margins": 5.195041656494141, + "rewards/rejected": -7.995448112487793, + "step": 2697 + }, + { + "epoch": 0.42, + "learning_rate": 1.2168511852274496e-05, + "logits/chosen": -2.9931273460388184, + "logits/rejected": -2.0429446697235107, + "logps/chosen": -256.93243408203125, + "logps/rejected": -213.9117431640625, + "loss": 0.3852, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.309161424636841, + "rewards/margins": 2.2763328552246094, + "rewards/rejected": -5.585494041442871, + "step": 2698 + }, + { + "epoch": 0.42, + "learning_rate": 1.2167778411743348e-05, + "logits/chosen": -2.9897730350494385, + "logits/rejected": -3.0008468627929688, + "logps/chosen": -51.208038330078125, + "logps/rejected": -341.2280578613281, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14936542510986328, + "rewards/margins": 7.510537147521973, + "rewards/rejected": -7.659902572631836, + "step": 2699 + }, + { + "epoch": 0.42, + "learning_rate": 1.21670449712122e-05, + "logits/chosen": -3.05881667137146, + "logits/rejected": -1.0775189399719238, + "logps/chosen": -605.505615234375, + "logps/rejected": -234.80628967285156, + "loss": 2.4446, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.039394855499268, + "rewards/margins": 2.1041011810302734, + "rewards/rejected": -7.143496036529541, + "step": 2700 + }, + { + "epoch": 0.42, + "learning_rate": 1.2166311530681052e-05, + "logits/chosen": -2.948883056640625, + "logits/rejected": -3.13529372215271, + "logps/chosen": -399.60467529296875, + "logps/rejected": -479.56781005859375, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07838213443756104, + "rewards/margins": 5.664216041564941, + "rewards/rejected": -5.742598056793213, + "step": 2701 + }, + { + "epoch": 0.42, + "learning_rate": 1.2165578090149904e-05, + "logits/chosen": -1.5790724754333496, + "logits/rejected": -3.2989375591278076, + "logps/chosen": -160.84645080566406, + "logps/rejected": -666.6746826171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.387141466140747, + "rewards/margins": 9.635881423950195, + "rewards/rejected": -12.023022651672363, + "step": 2702 + }, + { + "epoch": 0.42, + "learning_rate": 1.2164844649618756e-05, + "logits/chosen": -3.0956106185913086, + "logits/rejected": -3.3241872787475586, + "logps/chosen": -16.47823715209961, + "logps/rejected": -188.28378295898438, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24658948183059692, + "rewards/margins": 7.333003044128418, + "rewards/rejected": -7.579592704772949, + "step": 2703 + }, + { + "epoch": 0.42, + "learning_rate": 1.2164111209087609e-05, + "logits/chosen": -3.199152708053589, + "logits/rejected": -2.8935883045196533, + "logps/chosen": -101.56999206542969, + "logps/rejected": -215.49058532714844, + "loss": 3.1007, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.61246395111084, + "rewards/margins": 0.922069787979126, + "rewards/rejected": -4.534533500671387, + "step": 2704 + }, + { + "epoch": 0.42, + "learning_rate": 1.2163377768556461e-05, + "logits/chosen": -3.1419272422790527, + "logits/rejected": -2.3015925884246826, + "logps/chosen": -307.0868225097656, + "logps/rejected": -305.08740234375, + "loss": 4.9292, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.238970756530762, + "rewards/margins": -0.8766098022460938, + "rewards/rejected": -5.362360954284668, + "step": 2705 + }, + { + "epoch": 0.42, + "learning_rate": 1.2162644328025313e-05, + "logits/chosen": -1.6913150548934937, + "logits/rejected": -2.708221673965454, + "logps/chosen": -144.41412353515625, + "logps/rejected": -316.72515869140625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2514266967773438, + "rewards/margins": 7.972589492797852, + "rewards/rejected": -9.224016189575195, + "step": 2706 + }, + { + "epoch": 0.42, + "learning_rate": 1.2161910887494165e-05, + "logits/chosen": -2.333115577697754, + "logits/rejected": -2.850769519805908, + "logps/chosen": -97.52228546142578, + "logps/rejected": -286.6284484863281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7912380695343018, + "rewards/margins": 7.086024284362793, + "rewards/rejected": -9.877262115478516, + "step": 2707 + }, + { + "epoch": 0.42, + "learning_rate": 1.2161177446963017e-05, + "logits/chosen": -3.063744068145752, + "logits/rejected": -2.594566583633423, + "logps/chosen": -302.3575744628906, + "logps/rejected": -318.2942199707031, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8433017730712891, + "rewards/margins": 4.962827682495117, + "rewards/rejected": -5.806129455566406, + "step": 2708 + }, + { + "epoch": 0.42, + "learning_rate": 1.2160444006431868e-05, + "logits/chosen": -2.6116015911102295, + "logits/rejected": -3.0470712184906006, + "logps/chosen": -39.280738830566406, + "logps/rejected": -155.2547149658203, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0586073398590088, + "rewards/margins": 5.047574996948242, + "rewards/rejected": -6.106182098388672, + "step": 2709 + }, + { + "epoch": 0.42, + "learning_rate": 1.215971056590072e-05, + "logits/chosen": -2.9804539680480957, + "logits/rejected": -2.387220621109009, + "logps/chosen": -266.2675476074219, + "logps/rejected": -248.5596923828125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1987295150756836, + "rewards/margins": 5.850866317749023, + "rewards/rejected": -8.049595832824707, + "step": 2710 + }, + { + "epoch": 0.42, + "learning_rate": 1.2158977125369572e-05, + "logits/chosen": -3.0206336975097656, + "logits/rejected": -2.1487221717834473, + "logps/chosen": -311.5080871582031, + "logps/rejected": -299.2852783203125, + "loss": 3.8385, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.676028728485107, + "rewards/margins": 0.5586943626403809, + "rewards/rejected": -5.234723091125488, + "step": 2711 + }, + { + "epoch": 0.42, + "learning_rate": 1.2158243684838424e-05, + "logits/chosen": -2.5074403285980225, + "logits/rejected": -3.1766374111175537, + "logps/chosen": -201.8378143310547, + "logps/rejected": -190.9127197265625, + "loss": 1.0765, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6538138389587402, + "rewards/margins": 2.276425838470459, + "rewards/rejected": -4.930239677429199, + "step": 2712 + }, + { + "epoch": 0.42, + "learning_rate": 1.2157510244307278e-05, + "logits/chosen": -2.738842487335205, + "logits/rejected": -1.7626192569732666, + "logps/chosen": -241.8914794921875, + "logps/rejected": -127.3484878540039, + "loss": 4.0461, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.365137577056885, + "rewards/margins": -0.929173469543457, + "rewards/rejected": -4.435964107513428, + "step": 2713 + }, + { + "epoch": 0.42, + "learning_rate": 1.215677680377613e-05, + "logits/chosen": -2.7729296684265137, + "logits/rejected": -3.0486085414886475, + "logps/chosen": -567.4097900390625, + "logps/rejected": -435.85455322265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0862258672714233, + "rewards/margins": 7.63018798828125, + "rewards/rejected": -8.716413497924805, + "step": 2714 + }, + { + "epoch": 0.42, + "learning_rate": 1.2156043363244981e-05, + "logits/chosen": -2.5037081241607666, + "logits/rejected": -3.211165189743042, + "logps/chosen": -59.689170837402344, + "logps/rejected": -239.5487060546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32915496826171875, + "rewards/margins": 7.747071266174316, + "rewards/rejected": -8.076226234436035, + "step": 2715 + }, + { + "epoch": 0.42, + "learning_rate": 1.2155309922713833e-05, + "logits/chosen": -2.889400005340576, + "logits/rejected": -2.980404853820801, + "logps/chosen": -135.4422149658203, + "logps/rejected": -248.49139404296875, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2902052402496338, + "rewards/margins": 4.549847602844238, + "rewards/rejected": -5.840052604675293, + "step": 2716 + }, + { + "epoch": 0.42, + "learning_rate": 1.2154576482182685e-05, + "logits/chosen": -2.2609362602233887, + "logits/rejected": -2.819610118865967, + "logps/chosen": -124.12826538085938, + "logps/rejected": -98.58224487304688, + "loss": 0.899, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6381683349609375, + "rewards/margins": 1.6382055282592773, + "rewards/rejected": -6.276373863220215, + "step": 2717 + }, + { + "epoch": 0.42, + "learning_rate": 1.2153843041651537e-05, + "logits/chosen": -1.9248278141021729, + "logits/rejected": -2.627359390258789, + "logps/chosen": -189.51629638671875, + "logps/rejected": -327.26800537109375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7278950214385986, + "rewards/margins": 7.1856689453125, + "rewards/rejected": -8.913564682006836, + "step": 2718 + }, + { + "epoch": 0.42, + "learning_rate": 1.215310960112039e-05, + "logits/chosen": -3.008854627609253, + "logits/rejected": -3.1410961151123047, + "logps/chosen": -1007.9183349609375, + "logps/rejected": -415.1257629394531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2550125122070312, + "rewards/margins": 7.522063255310059, + "rewards/rejected": -6.2670512199401855, + "step": 2719 + }, + { + "epoch": 0.42, + "learning_rate": 1.2152376160589243e-05, + "logits/chosen": -2.5806875228881836, + "logits/rejected": -3.1149420738220215, + "logps/chosen": -213.086181640625, + "logps/rejected": -453.27532958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2849823236465454, + "rewards/margins": 9.683088302612305, + "rewards/rejected": -9.968070983886719, + "step": 2720 + }, + { + "epoch": 0.42, + "learning_rate": 1.2151642720058094e-05, + "logits/chosen": -1.9687464237213135, + "logits/rejected": -2.9155378341674805, + "logps/chosen": -426.9156799316406, + "logps/rejected": -511.6409912109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.957867443561554, + "rewards/margins": 8.166119575500488, + "rewards/rejected": -9.123987197875977, + "step": 2721 + }, + { + "epoch": 0.42, + "learning_rate": 1.2150909279526948e-05, + "logits/chosen": -3.003758192062378, + "logits/rejected": -2.7126028537750244, + "logps/chosen": -381.4806213378906, + "logps/rejected": -379.6109619140625, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3125538229942322, + "rewards/margins": 8.13406753540039, + "rewards/rejected": -8.44662094116211, + "step": 2722 + }, + { + "epoch": 0.42, + "learning_rate": 1.21501758389958e-05, + "logits/chosen": -2.087660551071167, + "logits/rejected": -2.575730800628662, + "logps/chosen": -485.0318603515625, + "logps/rejected": -521.9764404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6913284063339233, + "rewards/margins": 11.01295280456543, + "rewards/rejected": -11.7042818069458, + "step": 2723 + }, + { + "epoch": 0.42, + "learning_rate": 1.2149442398464652e-05, + "logits/chosen": -2.98496150970459, + "logits/rejected": -2.6392650604248047, + "logps/chosen": -260.8916015625, + "logps/rejected": -332.9367980957031, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8435535430908203, + "rewards/margins": 4.915453910827637, + "rewards/rejected": -5.759007453918457, + "step": 2724 + }, + { + "epoch": 0.42, + "learning_rate": 1.2148708957933504e-05, + "logits/chosen": -0.8353179097175598, + "logits/rejected": -3.0532987117767334, + "logps/chosen": -124.37673950195312, + "logps/rejected": -605.4019165039062, + "loss": 3.558, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6492958068847656, + "rewards/margins": -1.523216724395752, + "rewards/rejected": -2.1260790824890137, + "step": 2725 + }, + { + "epoch": 0.42, + "learning_rate": 1.2147975517402355e-05, + "logits/chosen": -3.2026329040527344, + "logits/rejected": -3.173999547958374, + "logps/chosen": -184.1263885498047, + "logps/rejected": -212.88690185546875, + "loss": 1.617, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.670992136001587, + "rewards/margins": 1.2718924283981323, + "rewards/rejected": -3.9428844451904297, + "step": 2726 + }, + { + "epoch": 0.42, + "learning_rate": 1.2147242076871207e-05, + "logits/chosen": -2.518394708633423, + "logits/rejected": -3.123934745788574, + "logps/chosen": -215.2421112060547, + "logps/rejected": -359.38299560546875, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0177319049835205, + "rewards/margins": 5.45152473449707, + "rewards/rejected": -6.469256401062012, + "step": 2727 + }, + { + "epoch": 0.42, + "learning_rate": 1.214650863634006e-05, + "logits/chosen": -2.995993137359619, + "logits/rejected": -2.9913105964660645, + "logps/chosen": -318.5115966796875, + "logps/rejected": -187.75967407226562, + "loss": 0.3556, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2901833057403564, + "rewards/margins": 3.657453775405884, + "rewards/rejected": -5.94763708114624, + "step": 2728 + }, + { + "epoch": 0.42, + "learning_rate": 1.2145775195808911e-05, + "logits/chosen": -2.983670473098755, + "logits/rejected": -3.3652377128601074, + "logps/chosen": -78.62934875488281, + "logps/rejected": -184.90174865722656, + "loss": 0.1533, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5080686807632446, + "rewards/margins": 3.4148483276367188, + "rewards/rejected": -4.922916889190674, + "step": 2729 + }, + { + "epoch": 0.42, + "learning_rate": 1.2145041755277763e-05, + "logits/chosen": -3.108691930770874, + "logits/rejected": -2.689695358276367, + "logps/chosen": -409.58551025390625, + "logps/rejected": -464.17144775390625, + "loss": 1.1169, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.1100068092346191, + "rewards/margins": 1.5815553665161133, + "rewards/rejected": -2.6915619373321533, + "step": 2730 + }, + { + "epoch": 0.42, + "learning_rate": 1.2144308314746617e-05, + "logits/chosen": -2.0869076251983643, + "logits/rejected": -3.0449774265289307, + "logps/chosen": -81.94223022460938, + "logps/rejected": -376.4793701171875, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.990537166595459, + "rewards/margins": 10.402311325073242, + "rewards/rejected": -13.39284896850586, + "step": 2731 + }, + { + "epoch": 0.42, + "learning_rate": 1.2143574874215468e-05, + "logits/chosen": -1.925653100013733, + "logits/rejected": -3.0132410526275635, + "logps/chosen": -162.91104125976562, + "logps/rejected": -277.20098876953125, + "loss": 0.0818, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.155302047729492, + "rewards/margins": 4.88848876953125, + "rewards/rejected": -8.043790817260742, + "step": 2732 + }, + { + "epoch": 0.43, + "learning_rate": 1.214284143368432e-05, + "logits/chosen": -1.003286361694336, + "logits/rejected": -2.9419922828674316, + "logps/chosen": -56.77117919921875, + "logps/rejected": -433.054931640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7999176383018494, + "rewards/margins": 10.016092300415039, + "rewards/rejected": -10.816010475158691, + "step": 2733 + }, + { + "epoch": 0.43, + "learning_rate": 1.2142107993153172e-05, + "logits/chosen": -2.585040330886841, + "logits/rejected": -3.1153483390808105, + "logps/chosen": -130.58535766601562, + "logps/rejected": -200.12893676757812, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2841034233570099, + "rewards/margins": 6.059700965881348, + "rewards/rejected": -6.343804359436035, + "step": 2734 + }, + { + "epoch": 0.43, + "learning_rate": 1.2141374552622024e-05, + "logits/chosen": -2.0213940143585205, + "logits/rejected": -2.9312307834625244, + "logps/chosen": -104.9960708618164, + "logps/rejected": -339.64453125, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9879441261291504, + "rewards/margins": 8.024099349975586, + "rewards/rejected": -10.012043952941895, + "step": 2735 + }, + { + "epoch": 0.43, + "learning_rate": 1.2140641112090876e-05, + "logits/chosen": -1.2785052061080933, + "logits/rejected": -3.0535457134246826, + "logps/chosen": -111.5752944946289, + "logps/rejected": -257.83782958984375, + "loss": 2.3659, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.9039149284362793, + "rewards/margins": -2.1078526973724365, + "rewards/rejected": -1.7960621118545532, + "step": 2736 + }, + { + "epoch": 0.43, + "learning_rate": 1.2139907671559728e-05, + "logits/chosen": -3.1604840755462646, + "logits/rejected": -3.2277698516845703, + "logps/chosen": -157.7400360107422, + "logps/rejected": -223.98129272460938, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9479311108589172, + "rewards/margins": 6.003989219665527, + "rewards/rejected": -6.951920032501221, + "step": 2737 + }, + { + "epoch": 0.43, + "learning_rate": 1.213917423102858e-05, + "logits/chosen": -3.2435858249664307, + "logits/rejected": -2.891918420791626, + "logps/chosen": -323.77398681640625, + "logps/rejected": -188.29356384277344, + "loss": 4.6128, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.717543125152588, + "rewards/margins": -4.553993225097656, + "rewards/rejected": -1.1635501384735107, + "step": 2738 + }, + { + "epoch": 0.43, + "learning_rate": 1.2138440790497432e-05, + "logits/chosen": -1.9454858303070068, + "logits/rejected": -3.1570370197296143, + "logps/chosen": -39.27789306640625, + "logps/rejected": -399.2545471191406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8271592855453491, + "rewards/margins": 9.320089340209961, + "rewards/rejected": -11.147249221801758, + "step": 2739 + }, + { + "epoch": 0.43, + "learning_rate": 1.2137707349966285e-05, + "logits/chosen": -2.570453405380249, + "logits/rejected": -3.2009778022766113, + "logps/chosen": -23.18216323852539, + "logps/rejected": -228.24110412597656, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4301972389221191, + "rewards/margins": 6.4329118728637695, + "rewards/rejected": -7.863109111785889, + "step": 2740 + }, + { + "epoch": 0.43, + "learning_rate": 1.2136973909435137e-05, + "logits/chosen": -2.5981979370117188, + "logits/rejected": -2.9747989177703857, + "logps/chosen": -164.2208709716797, + "logps/rejected": -237.94400024414062, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.683673083782196, + "rewards/margins": 4.50141716003418, + "rewards/rejected": -5.1850905418396, + "step": 2741 + }, + { + "epoch": 0.43, + "learning_rate": 1.2136240468903989e-05, + "logits/chosen": -3.0610246658325195, + "logits/rejected": -3.2104251384735107, + "logps/chosen": -57.67413330078125, + "logps/rejected": -233.43414306640625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.191888451576233, + "rewards/margins": 6.048123359680176, + "rewards/rejected": -7.240012168884277, + "step": 2742 + }, + { + "epoch": 0.43, + "learning_rate": 1.213550702837284e-05, + "logits/chosen": -2.1868605613708496, + "logits/rejected": -3.073883533477783, + "logps/chosen": -46.430084228515625, + "logps/rejected": -290.72174072265625, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.455704927444458, + "rewards/margins": 5.713502883911133, + "rewards/rejected": -7.16920804977417, + "step": 2743 + }, + { + "epoch": 0.43, + "learning_rate": 1.2134773587841693e-05, + "logits/chosen": -2.99351167678833, + "logits/rejected": -1.494004726409912, + "logps/chosen": -170.67291259765625, + "logps/rejected": -83.3524169921875, + "loss": 2.5861, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.194576263427734, + "rewards/margins": -2.4780547618865967, + "rewards/rejected": -3.7165215015411377, + "step": 2744 + }, + { + "epoch": 0.43, + "learning_rate": 1.2134040147310545e-05, + "logits/chosen": -2.554948091506958, + "logits/rejected": -3.2085089683532715, + "logps/chosen": -42.96342086791992, + "logps/rejected": -235.4086456298828, + "loss": 0.0495, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.036090612411499, + "rewards/margins": 3.1892313957214355, + "rewards/rejected": -6.2253217697143555, + "step": 2745 + }, + { + "epoch": 0.43, + "learning_rate": 1.2133306706779396e-05, + "logits/chosen": -2.7006475925445557, + "logits/rejected": -3.160780906677246, + "logps/chosen": -65.10162353515625, + "logps/rejected": -270.6204833984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.115207552909851, + "rewards/margins": 7.17598295211792, + "rewards/rejected": -8.291191101074219, + "step": 2746 + }, + { + "epoch": 0.43, + "learning_rate": 1.2132573266248248e-05, + "logits/chosen": -3.138823986053467, + "logits/rejected": -2.6316370964050293, + "logps/chosen": -472.5221252441406, + "logps/rejected": -475.60369873046875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7136054039001465, + "rewards/margins": 5.750998497009277, + "rewards/rejected": -6.464604377746582, + "step": 2747 + }, + { + "epoch": 0.43, + "learning_rate": 1.21318398257171e-05, + "logits/chosen": -2.8612518310546875, + "logits/rejected": -2.9322755336761475, + "logps/chosen": -311.6602783203125, + "logps/rejected": -321.94744873046875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4884636402130127, + "rewards/margins": 6.106411933898926, + "rewards/rejected": -7.594875335693359, + "step": 2748 + }, + { + "epoch": 0.43, + "learning_rate": 1.2131106385185954e-05, + "logits/chosen": -3.0298614501953125, + "logits/rejected": -3.319716691970825, + "logps/chosen": -79.87611389160156, + "logps/rejected": -266.34405517578125, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05180281400680542, + "rewards/margins": 5.567203521728516, + "rewards/rejected": -5.619006156921387, + "step": 2749 + }, + { + "epoch": 0.43, + "learning_rate": 1.2130372944654806e-05, + "logits/chosen": -2.9811177253723145, + "logits/rejected": -3.0548524856567383, + "logps/chosen": -217.03573608398438, + "logps/rejected": -294.2458801269531, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6842453479766846, + "rewards/margins": 6.709149360656738, + "rewards/rejected": -7.393394470214844, + "step": 2750 + }, + { + "epoch": 0.43, + "learning_rate": 1.2129639504123658e-05, + "logits/chosen": -3.1588680744171143, + "logits/rejected": -2.996626615524292, + "logps/chosen": -95.52306365966797, + "logps/rejected": -256.81756591796875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6804356575012207, + "rewards/margins": 8.323345184326172, + "rewards/rejected": -10.003780364990234, + "step": 2751 + }, + { + "epoch": 0.43, + "learning_rate": 1.212890606359251e-05, + "logits/chosen": -2.4404125213623047, + "logits/rejected": -3.2067832946777344, + "logps/chosen": -61.86823272705078, + "logps/rejected": -288.31634521484375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2908951044082642, + "rewards/margins": 6.563198089599609, + "rewards/rejected": -7.854093074798584, + "step": 2752 + }, + { + "epoch": 0.43, + "learning_rate": 1.2128172623061363e-05, + "logits/chosen": -2.8571228981018066, + "logits/rejected": -3.302044630050659, + "logps/chosen": -184.837646484375, + "logps/rejected": -306.7012939453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.578780472278595, + "rewards/margins": 7.9619622230529785, + "rewards/rejected": -8.540742874145508, + "step": 2753 + }, + { + "epoch": 0.43, + "learning_rate": 1.2127439182530215e-05, + "logits/chosen": -3.054243326187134, + "logits/rejected": -3.226600408554077, + "logps/chosen": -30.023420333862305, + "logps/rejected": -203.30880737304688, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08424706012010574, + "rewards/margins": 6.578953742980957, + "rewards/rejected": -6.663200855255127, + "step": 2754 + }, + { + "epoch": 0.43, + "learning_rate": 1.2126705741999067e-05, + "logits/chosen": -2.4438986778259277, + "logits/rejected": -3.1052446365356445, + "logps/chosen": -30.049734115600586, + "logps/rejected": -254.41064453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0982216596603394, + "rewards/margins": 7.034831523895264, + "rewards/rejected": -8.133052825927734, + "step": 2755 + }, + { + "epoch": 0.43, + "learning_rate": 1.2125972301467919e-05, + "logits/chosen": -3.1591408252716064, + "logits/rejected": -2.6301913261413574, + "logps/chosen": -360.29034423828125, + "logps/rejected": -252.0332489013672, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1169929504394531, + "rewards/margins": 5.742931365966797, + "rewards/rejected": -6.85992431640625, + "step": 2756 + }, + { + "epoch": 0.43, + "learning_rate": 1.212523886093677e-05, + "logits/chosen": -2.011972427368164, + "logits/rejected": -3.0029053688049316, + "logps/chosen": -180.9534454345703, + "logps/rejected": -474.8312072753906, + "loss": 3.1114, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.157822608947754, + "rewards/margins": 3.3529789447784424, + "rewards/rejected": -7.510801792144775, + "step": 2757 + }, + { + "epoch": 0.43, + "learning_rate": 1.2124505420405624e-05, + "logits/chosen": -2.8849334716796875, + "logits/rejected": -1.737862229347229, + "logps/chosen": -204.65533447265625, + "logps/rejected": -234.26502990722656, + "loss": 0.7266, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4023499488830566, + "rewards/margins": 5.0654988288879395, + "rewards/rejected": -8.467848777770996, + "step": 2758 + }, + { + "epoch": 0.43, + "learning_rate": 1.2123771979874476e-05, + "logits/chosen": -2.7153756618499756, + "logits/rejected": -3.2253341674804688, + "logps/chosen": -92.40440368652344, + "logps/rejected": -291.4412536621094, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1147290468215942, + "rewards/margins": 7.517370223999023, + "rewards/rejected": -8.632099151611328, + "step": 2759 + }, + { + "epoch": 0.43, + "learning_rate": 1.2123038539343328e-05, + "logits/chosen": -2.7565934658050537, + "logits/rejected": -3.007197856903076, + "logps/chosen": -137.4778289794922, + "logps/rejected": -257.7731018066406, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9338247179985046, + "rewards/margins": 6.5114946365356445, + "rewards/rejected": -7.445319652557373, + "step": 2760 + }, + { + "epoch": 0.43, + "learning_rate": 1.212230509881218e-05, + "logits/chosen": -3.190068006515503, + "logits/rejected": -2.801699161529541, + "logps/chosen": -245.22000122070312, + "logps/rejected": -277.6800537109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7963242530822754, + "rewards/margins": 6.955531120300293, + "rewards/rejected": -10.751855850219727, + "step": 2761 + }, + { + "epoch": 0.43, + "learning_rate": 1.2121571658281032e-05, + "logits/chosen": -3.194627285003662, + "logits/rejected": -3.1132702827453613, + "logps/chosen": -230.41671752929688, + "logps/rejected": -214.47537231445312, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43128281831741333, + "rewards/margins": 5.549218654632568, + "rewards/rejected": -5.117935657501221, + "step": 2762 + }, + { + "epoch": 0.43, + "learning_rate": 1.2120838217749883e-05, + "logits/chosen": -1.5849502086639404, + "logits/rejected": -3.061655282974243, + "logps/chosen": -74.59872436523438, + "logps/rejected": -257.08837890625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1191215515136719, + "rewards/margins": 6.811590194702148, + "rewards/rejected": -7.93071174621582, + "step": 2763 + }, + { + "epoch": 0.43, + "learning_rate": 1.2120104777218735e-05, + "logits/chosen": -3.067401885986328, + "logits/rejected": -3.150430917739868, + "logps/chosen": -206.15982055664062, + "logps/rejected": -98.45362091064453, + "loss": 3.6536, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.654238224029541, + "rewards/margins": -0.9378030300140381, + "rewards/rejected": -3.716435194015503, + "step": 2764 + }, + { + "epoch": 0.43, + "learning_rate": 1.2119371336687587e-05, + "logits/chosen": -3.2322700023651123, + "logits/rejected": -2.298801898956299, + "logps/chosen": -457.92559814453125, + "logps/rejected": -357.67913818359375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0530118942260742, + "rewards/margins": 6.197257995605469, + "rewards/rejected": -7.250269889831543, + "step": 2765 + }, + { + "epoch": 0.43, + "learning_rate": 1.2118637896156439e-05, + "logits/chosen": -1.848567008972168, + "logits/rejected": -2.732621431350708, + "logps/chosen": -282.9053649902344, + "logps/rejected": -571.7246704101562, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5346130132675171, + "rewards/margins": 7.545900344848633, + "rewards/rejected": -8.080513000488281, + "step": 2766 + }, + { + "epoch": 0.43, + "learning_rate": 1.2117904455625293e-05, + "logits/chosen": -2.4350805282592773, + "logits/rejected": -3.073983907699585, + "logps/chosen": -519.2294921875, + "logps/rejected": -634.6175537109375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.139642357826233, + "rewards/margins": 7.378018379211426, + "rewards/rejected": -8.517661094665527, + "step": 2767 + }, + { + "epoch": 0.43, + "learning_rate": 1.2117171015094145e-05, + "logits/chosen": -2.004545211791992, + "logits/rejected": -2.87677001953125, + "logps/chosen": -234.3399658203125, + "logps/rejected": -390.85009765625, + "loss": 4.3909, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.881746768951416, + "rewards/margins": 2.576735019683838, + "rewards/rejected": -7.458481788635254, + "step": 2768 + }, + { + "epoch": 0.43, + "learning_rate": 1.2116437574562996e-05, + "logits/chosen": -2.8642654418945312, + "logits/rejected": -3.1247804164886475, + "logps/chosen": -264.9767761230469, + "logps/rejected": -238.30319213867188, + "loss": 2.9514, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.33107852935791, + "rewards/margins": 0.0700981616973877, + "rewards/rejected": -4.401176452636719, + "step": 2769 + }, + { + "epoch": 0.43, + "learning_rate": 1.2115704134031848e-05, + "logits/chosen": -2.19830322265625, + "logits/rejected": -3.0956475734710693, + "logps/chosen": -304.0971374511719, + "logps/rejected": -366.04498291015625, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5519989728927612, + "rewards/margins": 6.773163795471191, + "rewards/rejected": -8.325162887573242, + "step": 2770 + }, + { + "epoch": 0.43, + "learning_rate": 1.21149706935007e-05, + "logits/chosen": -2.699122190475464, + "logits/rejected": -2.556102991104126, + "logps/chosen": -482.17333984375, + "logps/rejected": -424.5329284667969, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.015527367591858, + "rewards/margins": 5.798211574554443, + "rewards/rejected": -6.813738822937012, + "step": 2771 + }, + { + "epoch": 0.43, + "learning_rate": 1.2114237252969552e-05, + "logits/chosen": -2.9303596019744873, + "logits/rejected": -3.0993430614471436, + "logps/chosen": -89.23941040039062, + "logps/rejected": -155.07144165039062, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0336267948150635, + "rewards/margins": 4.123164176940918, + "rewards/rejected": -6.156790733337402, + "step": 2772 + }, + { + "epoch": 0.43, + "learning_rate": 1.2113503812438404e-05, + "logits/chosen": -0.8558287620544434, + "logits/rejected": -2.9283714294433594, + "logps/chosen": -43.12151336669922, + "logps/rejected": -477.92425537109375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6374964118003845, + "rewards/margins": 7.900463581085205, + "rewards/rejected": -8.537960052490234, + "step": 2773 + }, + { + "epoch": 0.43, + "learning_rate": 1.2112770371907256e-05, + "logits/chosen": -2.7536449432373047, + "logits/rejected": -3.235353708267212, + "logps/chosen": -419.71282958984375, + "logps/rejected": -433.8873291015625, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7788883447647095, + "rewards/margins": 4.3580732345581055, + "rewards/rejected": -5.136961460113525, + "step": 2774 + }, + { + "epoch": 0.43, + "learning_rate": 1.211203693137611e-05, + "logits/chosen": -2.134929895401001, + "logits/rejected": -2.720778226852417, + "logps/chosen": -188.26736450195312, + "logps/rejected": -276.1216125488281, + "loss": 2.908, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.310408115386963, + "rewards/margins": 1.240626573562622, + "rewards/rejected": -5.551034450531006, + "step": 2775 + }, + { + "epoch": 0.43, + "learning_rate": 1.2111303490844961e-05, + "logits/chosen": -2.4594645500183105, + "logits/rejected": -2.993104934692383, + "logps/chosen": -392.7308349609375, + "logps/rejected": -407.37506103515625, + "loss": 4.0149, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.194188117980957, + "rewards/margins": -2.452969551086426, + "rewards/rejected": -3.7412185668945312, + "step": 2776 + }, + { + "epoch": 0.43, + "learning_rate": 1.2110570050313813e-05, + "logits/chosen": -1.1884437799453735, + "logits/rejected": -2.4782514572143555, + "logps/chosen": -184.17420959472656, + "logps/rejected": -461.3683776855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.436209112405777, + "rewards/margins": 9.164833068847656, + "rewards/rejected": -8.72862434387207, + "step": 2777 + }, + { + "epoch": 0.43, + "learning_rate": 1.2109836609782665e-05, + "logits/chosen": -3.141347646713257, + "logits/rejected": -2.9050259590148926, + "logps/chosen": -485.0594482421875, + "logps/rejected": -543.6893310546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5359203815460205, + "rewards/margins": 8.709395408630371, + "rewards/rejected": -8.17347526550293, + "step": 2778 + }, + { + "epoch": 0.43, + "learning_rate": 1.2109103169251517e-05, + "logits/chosen": -2.977152109146118, + "logits/rejected": -3.01778507232666, + "logps/chosen": -71.425048828125, + "logps/rejected": -153.421142578125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13970565795898438, + "rewards/margins": 6.501925945281982, + "rewards/rejected": -6.641631603240967, + "step": 2779 + }, + { + "epoch": 0.43, + "learning_rate": 1.2108369728720369e-05, + "logits/chosen": -2.3755571842193604, + "logits/rejected": -3.188664436340332, + "logps/chosen": -174.83322143554688, + "logps/rejected": -303.68572998046875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9217790961265564, + "rewards/margins": 6.272785186767578, + "rewards/rejected": -7.194564342498779, + "step": 2780 + }, + { + "epoch": 0.43, + "learning_rate": 1.210763628818922e-05, + "logits/chosen": -2.3531553745269775, + "logits/rejected": -2.625791549682617, + "logps/chosen": -89.72343444824219, + "logps/rejected": -242.5611572265625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9795930981636047, + "rewards/margins": 6.640908718109131, + "rewards/rejected": -7.620501518249512, + "step": 2781 + }, + { + "epoch": 0.43, + "learning_rate": 1.2106902847658073e-05, + "logits/chosen": -3.2602665424346924, + "logits/rejected": -2.830813407897949, + "logps/chosen": -567.6001586914062, + "logps/rejected": -378.32763671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5723267793655396, + "rewards/margins": 8.916718482971191, + "rewards/rejected": -7.344391822814941, + "step": 2782 + }, + { + "epoch": 0.43, + "learning_rate": 1.2106169407126924e-05, + "logits/chosen": -2.2200562953948975, + "logits/rejected": -2.845223903656006, + "logps/chosen": -97.8523941040039, + "logps/rejected": -319.13519287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5012516975402832, + "rewards/margins": 8.254352569580078, + "rewards/rejected": -9.75560474395752, + "step": 2783 + }, + { + "epoch": 0.43, + "learning_rate": 1.2105435966595778e-05, + "logits/chosen": -3.0361671447753906, + "logits/rejected": -3.122771739959717, + "logps/chosen": -226.83460998535156, + "logps/rejected": -257.7161865234375, + "loss": 2.8748, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2840423583984375, + "rewards/margins": -1.0084030628204346, + "rewards/rejected": -3.275639533996582, + "step": 2784 + }, + { + "epoch": 0.43, + "learning_rate": 1.210470252606463e-05, + "logits/chosen": -2.168536901473999, + "logits/rejected": -2.925008773803711, + "logps/chosen": -49.07562255859375, + "logps/rejected": -154.57289123535156, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7093071937561035, + "rewards/margins": 1.252219319343567, + "rewards/rejected": -3.96152663230896, + "step": 2785 + }, + { + "epoch": 0.43, + "learning_rate": 1.2103969085533482e-05, + "logits/chosen": -3.091458559036255, + "logits/rejected": -3.0956804752349854, + "logps/chosen": -238.3556671142578, + "logps/rejected": -86.80827331542969, + "loss": 4.1676, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.286047458648682, + "rewards/margins": -2.904837131500244, + "rewards/rejected": -1.3812099695205688, + "step": 2786 + }, + { + "epoch": 0.43, + "learning_rate": 1.2103235645002335e-05, + "logits/chosen": -1.3203725814819336, + "logits/rejected": -2.920048475265503, + "logps/chosen": -151.8048095703125, + "logps/rejected": -330.7129821777344, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9710696935653687, + "rewards/margins": 7.328815460205078, + "rewards/rejected": -8.299885749816895, + "step": 2787 + }, + { + "epoch": 0.43, + "learning_rate": 1.2102502204471187e-05, + "logits/chosen": -2.9712910652160645, + "logits/rejected": -2.9073991775512695, + "logps/chosen": -346.55694580078125, + "logps/rejected": -430.6204833984375, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6080421209335327, + "rewards/margins": 4.475655555725098, + "rewards/rejected": -5.08369779586792, + "step": 2788 + }, + { + "epoch": 0.43, + "learning_rate": 1.2101768763940039e-05, + "logits/chosen": -3.1564695835113525, + "logits/rejected": -3.173929214477539, + "logps/chosen": -152.88858032226562, + "logps/rejected": -323.2915954589844, + "loss": 1.0131, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7765873670578003, + "rewards/margins": 3.269937515258789, + "rewards/rejected": -4.046525001525879, + "step": 2789 + }, + { + "epoch": 0.43, + "learning_rate": 1.2101035323408891e-05, + "logits/chosen": -2.3303303718566895, + "logits/rejected": -2.988900661468506, + "logps/chosen": -198.88919067382812, + "logps/rejected": -207.64259338378906, + "loss": 3.2514, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5361740589141846, + "rewards/margins": 0.8733749389648438, + "rewards/rejected": -4.409549236297607, + "step": 2790 + }, + { + "epoch": 0.43, + "learning_rate": 1.2100301882877743e-05, + "logits/chosen": -2.519883632659912, + "logits/rejected": -3.2469685077667236, + "logps/chosen": -233.21942138671875, + "logps/rejected": -594.33056640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9698562622070312, + "rewards/margins": 8.126660346984863, + "rewards/rejected": -9.096516609191895, + "step": 2791 + }, + { + "epoch": 0.43, + "learning_rate": 1.2099568442346595e-05, + "logits/chosen": -3.258037567138672, + "logits/rejected": -3.11633563041687, + "logps/chosen": -151.84042358398438, + "logps/rejected": -151.3438720703125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26839834451675415, + "rewards/margins": 5.618669033050537, + "rewards/rejected": -5.8870673179626465, + "step": 2792 + }, + { + "epoch": 0.43, + "learning_rate": 1.2098835001815448e-05, + "logits/chosen": -2.394059181213379, + "logits/rejected": -3.113233804702759, + "logps/chosen": -342.4797668457031, + "logps/rejected": -357.88116455078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48395615816116333, + "rewards/margins": 8.138657569885254, + "rewards/rejected": -8.622613906860352, + "step": 2793 + }, + { + "epoch": 0.43, + "learning_rate": 1.20981015612843e-05, + "logits/chosen": -2.9450223445892334, + "logits/rejected": -2.947589635848999, + "logps/chosen": -117.35513305664062, + "logps/rejected": -236.17095947265625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4362536668777466, + "rewards/margins": 6.17140007019043, + "rewards/rejected": -7.607653617858887, + "step": 2794 + }, + { + "epoch": 0.43, + "learning_rate": 1.2097368120753152e-05, + "logits/chosen": -2.6641786098480225, + "logits/rejected": -3.1823906898498535, + "logps/chosen": -266.8495178222656, + "logps/rejected": -425.996826171875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3932976722717285, + "rewards/margins": 6.664871692657471, + "rewards/rejected": -8.0581693649292, + "step": 2795 + }, + { + "epoch": 0.43, + "learning_rate": 1.2096634680222004e-05, + "logits/chosen": -2.7623746395111084, + "logits/rejected": -3.2758471965789795, + "logps/chosen": -132.31558227539062, + "logps/rejected": -274.0592956542969, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0903478860855103, + "rewards/margins": 5.321435928344727, + "rewards/rejected": -6.411783695220947, + "step": 2796 + }, + { + "epoch": 0.43, + "learning_rate": 1.2095901239690856e-05, + "logits/chosen": -3.146649122238159, + "logits/rejected": -2.469733238220215, + "logps/chosen": -632.8612060546875, + "logps/rejected": -460.6456604003906, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8192015886306763, + "rewards/margins": 4.2466888427734375, + "rewards/rejected": -6.065890312194824, + "step": 2797 + }, + { + "epoch": 0.44, + "learning_rate": 1.2095167799159708e-05, + "logits/chosen": -3.18725848197937, + "logits/rejected": -2.903111219406128, + "logps/chosen": -99.48470306396484, + "logps/rejected": -191.30795288085938, + "loss": 4.608, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.994223594665527, + "rewards/margins": -2.0648908615112305, + "rewards/rejected": -2.929332971572876, + "step": 2798 + }, + { + "epoch": 0.44, + "learning_rate": 1.209443435862856e-05, + "logits/chosen": -2.785247802734375, + "logits/rejected": -2.8571786880493164, + "logps/chosen": -123.95228576660156, + "logps/rejected": -148.7242889404297, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8702129125595093, + "rewards/margins": 4.316267967224121, + "rewards/rejected": -5.186480522155762, + "step": 2799 + }, + { + "epoch": 0.44, + "learning_rate": 1.2093700918097411e-05, + "logits/chosen": -2.8716471195220947, + "logits/rejected": -3.1449594497680664, + "logps/chosen": -606.938720703125, + "logps/rejected": -496.1337890625, + "loss": 0.2387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5812627077102661, + "rewards/margins": 2.620753288269043, + "rewards/rejected": -3.2020158767700195, + "step": 2800 + }, + { + "epoch": 0.44, + "learning_rate": 1.2092967477566263e-05, + "logits/chosen": -3.266617774963379, + "logits/rejected": -2.6456427574157715, + "logps/chosen": -643.15234375, + "logps/rejected": -385.3457946777344, + "loss": 0.1062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06108555197715759, + "rewards/margins": 4.772517204284668, + "rewards/rejected": -4.711431980133057, + "step": 2801 + }, + { + "epoch": 0.44, + "learning_rate": 1.2092234037035117e-05, + "logits/chosen": -2.6978743076324463, + "logits/rejected": -3.1836791038513184, + "logps/chosen": -428.191650390625, + "logps/rejected": -644.5714721679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05838240683078766, + "rewards/margins": 11.402315139770508, + "rewards/rejected": -11.34393310546875, + "step": 2802 + }, + { + "epoch": 0.44, + "learning_rate": 1.2091500596503969e-05, + "logits/chosen": -2.510336399078369, + "logits/rejected": -3.038357734680176, + "logps/chosen": -361.7240295410156, + "logps/rejected": -689.1054077148438, + "loss": 4.7592, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.179104328155518, + "rewards/margins": -0.25121259689331055, + "rewards/rejected": -3.927891731262207, + "step": 2803 + }, + { + "epoch": 0.44, + "learning_rate": 1.209076715597282e-05, + "logits/chosen": -2.690478563308716, + "logits/rejected": -2.9513063430786133, + "logps/chosen": -179.92245483398438, + "logps/rejected": -367.1768798828125, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9459487795829773, + "rewards/margins": 4.311119079589844, + "rewards/rejected": -5.257067680358887, + "step": 2804 + }, + { + "epoch": 0.44, + "learning_rate": 1.2090033715441673e-05, + "logits/chosen": -2.5853891372680664, + "logits/rejected": -3.0562522411346436, + "logps/chosen": -112.4500732421875, + "logps/rejected": -302.91204833984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06571522355079651, + "rewards/margins": 8.29630184173584, + "rewards/rejected": -8.362016677856445, + "step": 2805 + }, + { + "epoch": 0.44, + "learning_rate": 1.2089300274910524e-05, + "logits/chosen": -1.5289661884307861, + "logits/rejected": -3.086946487426758, + "logps/chosen": -92.6607666015625, + "logps/rejected": -352.78741455078125, + "loss": 0.1791, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7280887365341187, + "rewards/margins": 4.902321815490723, + "rewards/rejected": -6.630410194396973, + "step": 2806 + }, + { + "epoch": 0.44, + "learning_rate": 1.2088566834379376e-05, + "logits/chosen": -3.0486557483673096, + "logits/rejected": -3.1402649879455566, + "logps/chosen": -114.81352996826172, + "logps/rejected": -204.94874572753906, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4111591577529907, + "rewards/margins": 5.832420349121094, + "rewards/rejected": -6.243578910827637, + "step": 2807 + }, + { + "epoch": 0.44, + "learning_rate": 1.2087833393848228e-05, + "logits/chosen": -3.038444995880127, + "logits/rejected": -2.0544278621673584, + "logps/chosen": -105.41901397705078, + "logps/rejected": -131.04232788085938, + "loss": 2.3001, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9741523265838623, + "rewards/margins": 0.15351080894470215, + "rewards/rejected": -3.1276631355285645, + "step": 2808 + }, + { + "epoch": 0.44, + "learning_rate": 1.208709995331708e-05, + "logits/chosen": -2.416088104248047, + "logits/rejected": -3.0740718841552734, + "logps/chosen": -90.67173767089844, + "logps/rejected": -229.85562133789062, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4394054412841797, + "rewards/margins": 5.04620361328125, + "rewards/rejected": -5.48560905456543, + "step": 2809 + }, + { + "epoch": 0.44, + "learning_rate": 1.2086366512785932e-05, + "logits/chosen": -2.29608154296875, + "logits/rejected": -2.941847085952759, + "logps/chosen": -180.31832885742188, + "logps/rejected": -281.93896484375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1677074432373047, + "rewards/margins": 5.798079490661621, + "rewards/rejected": -6.965786933898926, + "step": 2810 + }, + { + "epoch": 0.44, + "learning_rate": 1.2085633072254785e-05, + "logits/chosen": -3.144711494445801, + "logits/rejected": -3.091057300567627, + "logps/chosen": -88.59788513183594, + "logps/rejected": -247.2751922607422, + "loss": 1.1126, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.531278133392334, + "rewards/margins": 4.637157440185547, + "rewards/rejected": -7.168435573577881, + "step": 2811 + }, + { + "epoch": 0.44, + "learning_rate": 1.2084899631723637e-05, + "logits/chosen": -3.094255208969116, + "logits/rejected": -2.5723869800567627, + "logps/chosen": -463.0919189453125, + "logps/rejected": -363.7106018066406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019319921731948853, + "rewards/margins": 7.970017433166504, + "rewards/rejected": -7.989336967468262, + "step": 2812 + }, + { + "epoch": 0.44, + "learning_rate": 1.208416619119249e-05, + "logits/chosen": -2.8812060356140137, + "logits/rejected": -1.661818504333496, + "logps/chosen": -310.783203125, + "logps/rejected": -223.2542724609375, + "loss": 2.8608, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0505728721618652, + "rewards/margins": 0.16994261741638184, + "rewards/rejected": -3.220515489578247, + "step": 2813 + }, + { + "epoch": 0.44, + "learning_rate": 1.2083432750661341e-05, + "logits/chosen": -3.2281270027160645, + "logits/rejected": -3.165850877761841, + "logps/chosen": -376.875732421875, + "logps/rejected": -255.12474060058594, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.51604425907135, + "rewards/margins": 4.660317420959473, + "rewards/rejected": -6.176362037658691, + "step": 2814 + }, + { + "epoch": 0.44, + "learning_rate": 1.2082699310130193e-05, + "logits/chosen": -2.8297030925750732, + "logits/rejected": -3.092839002609253, + "logps/chosen": -151.9839630126953, + "logps/rejected": -315.68402099609375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48962992429733276, + "rewards/margins": 6.015451431274414, + "rewards/rejected": -6.5050811767578125, + "step": 2815 + }, + { + "epoch": 0.44, + "learning_rate": 1.2081965869599045e-05, + "logits/chosen": -3.138253688812256, + "logits/rejected": -3.1322691440582275, + "logps/chosen": -68.72490692138672, + "logps/rejected": -241.49932861328125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4013484716415405, + "rewards/margins": 7.241701602935791, + "rewards/rejected": -8.643050193786621, + "step": 2816 + }, + { + "epoch": 0.44, + "learning_rate": 1.2081232429067897e-05, + "logits/chosen": -2.908721685409546, + "logits/rejected": -2.9506094455718994, + "logps/chosen": -330.24420166015625, + "logps/rejected": -197.9663543701172, + "loss": 2.7318, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.047486782073975, + "rewards/margins": 0.49080538749694824, + "rewards/rejected": -4.538292407989502, + "step": 2817 + }, + { + "epoch": 0.44, + "learning_rate": 1.2080498988536749e-05, + "logits/chosen": -1.9852378368377686, + "logits/rejected": -2.7392942905426025, + "logps/chosen": -162.06512451171875, + "logps/rejected": -262.670166015625, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1710962057113647, + "rewards/margins": 5.378902435302734, + "rewards/rejected": -6.5499982833862305, + "step": 2818 + }, + { + "epoch": 0.44, + "learning_rate": 1.2079765548005602e-05, + "logits/chosen": -2.7311012744903564, + "logits/rejected": -3.1254818439483643, + "logps/chosen": -92.48912048339844, + "logps/rejected": -273.05609130859375, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016314774751663208, + "rewards/margins": 6.39872932434082, + "rewards/rejected": -6.3824143409729, + "step": 2819 + }, + { + "epoch": 0.44, + "learning_rate": 1.2079032107474454e-05, + "logits/chosen": -3.292855978012085, + "logits/rejected": -3.0729401111602783, + "logps/chosen": -149.31077575683594, + "logps/rejected": -149.23606872558594, + "loss": 2.1245, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2006373405456543, + "rewards/margins": 1.750044345855713, + "rewards/rejected": -4.950681209564209, + "step": 2820 + }, + { + "epoch": 0.44, + "learning_rate": 1.2078298666943308e-05, + "logits/chosen": -3.0875461101531982, + "logits/rejected": -3.2185118198394775, + "logps/chosen": -36.00177001953125, + "logps/rejected": -145.4031982421875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5565740466117859, + "rewards/margins": 6.057000637054443, + "rewards/rejected": -6.613574981689453, + "step": 2821 + }, + { + "epoch": 0.44, + "learning_rate": 1.207756522641216e-05, + "logits/chosen": -2.8246490955352783, + "logits/rejected": -3.2448368072509766, + "logps/chosen": -383.7017822265625, + "logps/rejected": -393.587890625, + "loss": 0.423, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5565614700317383, + "rewards/margins": 3.5224311351776123, + "rewards/rejected": -6.0789923667907715, + "step": 2822 + }, + { + "epoch": 0.44, + "learning_rate": 1.2076831785881011e-05, + "logits/chosen": -2.1359851360321045, + "logits/rejected": -3.0473737716674805, + "logps/chosen": -191.301025390625, + "logps/rejected": -324.6551513671875, + "loss": 0.0275, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0505378246307373, + "rewards/margins": 3.6523070335388184, + "rewards/rejected": -5.702845096588135, + "step": 2823 + }, + { + "epoch": 0.44, + "learning_rate": 1.2076098345349863e-05, + "logits/chosen": -3.0436742305755615, + "logits/rejected": -2.5113203525543213, + "logps/chosen": -143.610595703125, + "logps/rejected": -131.50299072265625, + "loss": 0.6186, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.002911329269409, + "rewards/margins": 2.0256757736206055, + "rewards/rejected": -4.028587341308594, + "step": 2824 + }, + { + "epoch": 0.44, + "learning_rate": 1.2075364904818715e-05, + "logits/chosen": -1.9058952331542969, + "logits/rejected": -3.1173362731933594, + "logps/chosen": -60.71428680419922, + "logps/rejected": -417.119140625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0032756328582764, + "rewards/margins": 5.865509033203125, + "rewards/rejected": -6.868784427642822, + "step": 2825 + }, + { + "epoch": 0.44, + "learning_rate": 1.2074631464287567e-05, + "logits/chosen": -2.750279426574707, + "logits/rejected": -3.0928955078125, + "logps/chosen": -167.8546905517578, + "logps/rejected": -481.94140625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4415965974330902, + "rewards/margins": 7.004002571105957, + "rewards/rejected": -7.445599555969238, + "step": 2826 + }, + { + "epoch": 0.44, + "learning_rate": 1.2073898023756419e-05, + "logits/chosen": -3.0293257236480713, + "logits/rejected": -2.003784656524658, + "logps/chosen": -214.8343505859375, + "logps/rejected": -279.98663330078125, + "loss": 3.1293, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.611988067626953, + "rewards/margins": 0.7924251556396484, + "rewards/rejected": -5.404413223266602, + "step": 2827 + }, + { + "epoch": 0.44, + "learning_rate": 1.207316458322527e-05, + "logits/chosen": -3.0768306255340576, + "logits/rejected": -2.5077261924743652, + "logps/chosen": -172.7678985595703, + "logps/rejected": -147.42822265625, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8312816619873047, + "rewards/margins": 4.500640869140625, + "rewards/rejected": -6.33192253112793, + "step": 2828 + }, + { + "epoch": 0.44, + "learning_rate": 1.2072431142694124e-05, + "logits/chosen": -1.9846004247665405, + "logits/rejected": -3.1900484561920166, + "logps/chosen": -122.90862274169922, + "logps/rejected": -390.55499267578125, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5234642028808594, + "rewards/margins": 4.660236358642578, + "rewards/rejected": -5.1837005615234375, + "step": 2829 + }, + { + "epoch": 0.44, + "learning_rate": 1.2071697702162976e-05, + "logits/chosen": -2.974186897277832, + "logits/rejected": -3.116851329803467, + "logps/chosen": -341.54925537109375, + "logps/rejected": -338.5841369628906, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5704803466796875, + "rewards/margins": 7.564643859863281, + "rewards/rejected": -8.135124206542969, + "step": 2830 + }, + { + "epoch": 0.44, + "learning_rate": 1.2070964261631828e-05, + "logits/chosen": -1.3157354593276978, + "logits/rejected": -2.9625515937805176, + "logps/chosen": -50.064353942871094, + "logps/rejected": -208.71478271484375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1221551895141602, + "rewards/margins": 4.955677032470703, + "rewards/rejected": -6.077832221984863, + "step": 2831 + }, + { + "epoch": 0.44, + "learning_rate": 1.207023082110068e-05, + "logits/chosen": -2.296250343322754, + "logits/rejected": -3.2402288913726807, + "logps/chosen": -234.59178161621094, + "logps/rejected": -363.9158630371094, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19057312607765198, + "rewards/margins": 4.2347564697265625, + "rewards/rejected": -4.044183254241943, + "step": 2832 + }, + { + "epoch": 0.44, + "learning_rate": 1.2069497380569532e-05, + "logits/chosen": -2.7735984325408936, + "logits/rejected": -3.1312665939331055, + "logps/chosen": -167.068115234375, + "logps/rejected": -165.24502563476562, + "loss": 0.4619, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.768531084060669, + "rewards/margins": 2.1419613361358643, + "rewards/rejected": -3.910492420196533, + "step": 2833 + }, + { + "epoch": 0.44, + "learning_rate": 1.2068763940038384e-05, + "logits/chosen": -2.496610641479492, + "logits/rejected": -3.0558409690856934, + "logps/chosen": -103.54025268554688, + "logps/rejected": -338.28167724609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.246240258216858, + "rewards/margins": 6.76617956161499, + "rewards/rejected": -8.012419700622559, + "step": 2834 + }, + { + "epoch": 0.44, + "learning_rate": 1.2068030499507236e-05, + "logits/chosen": -3.169689178466797, + "logits/rejected": -3.1669328212738037, + "logps/chosen": -30.98870849609375, + "logps/rejected": -101.92195129394531, + "loss": 0.1215, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1491365432739258, + "rewards/margins": 3.0994410514831543, + "rewards/rejected": -4.24857759475708, + "step": 2835 + }, + { + "epoch": 0.44, + "learning_rate": 1.2067297058976088e-05, + "logits/chosen": -1.6438751220703125, + "logits/rejected": -3.0224759578704834, + "logps/chosen": -81.61947631835938, + "logps/rejected": -288.02294921875, + "loss": 0.3565, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0082192420959473, + "rewards/margins": 4.966835021972656, + "rewards/rejected": -7.975053787231445, + "step": 2836 + }, + { + "epoch": 0.44, + "learning_rate": 1.206656361844494e-05, + "logits/chosen": -3.230367422103882, + "logits/rejected": -3.0992021560668945, + "logps/chosen": -60.67327880859375, + "logps/rejected": -268.5758056640625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5410928726196289, + "rewards/margins": 7.568502426147461, + "rewards/rejected": -8.10959529876709, + "step": 2837 + }, + { + "epoch": 0.44, + "learning_rate": 1.2065830177913793e-05, + "logits/chosen": -3.0782196521759033, + "logits/rejected": -2.9675025939941406, + "logps/chosen": -111.38491821289062, + "logps/rejected": -183.00473022460938, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5457923412322998, + "rewards/margins": 6.018812656402588, + "rewards/rejected": -7.564604759216309, + "step": 2838 + }, + { + "epoch": 0.44, + "learning_rate": 1.2065096737382645e-05, + "logits/chosen": -2.9718387126922607, + "logits/rejected": -2.3922836780548096, + "logps/chosen": -236.7523956298828, + "logps/rejected": -206.20330810546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31240519881248474, + "rewards/margins": 8.370006561279297, + "rewards/rejected": -8.057601928710938, + "step": 2839 + }, + { + "epoch": 0.44, + "learning_rate": 1.2064363296851497e-05, + "logits/chosen": -3.3096256256103516, + "logits/rejected": -2.0196139812469482, + "logps/chosen": -649.6375732421875, + "logps/rejected": -354.7189636230469, + "loss": 2.4503, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2710418701171875, + "rewards/margins": 1.3672194480895996, + "rewards/rejected": -4.638261318206787, + "step": 2840 + }, + { + "epoch": 0.44, + "learning_rate": 1.2063629856320349e-05, + "logits/chosen": -2.9730396270751953, + "logits/rejected": -3.190824270248413, + "logps/chosen": -132.70172119140625, + "logps/rejected": -217.5010986328125, + "loss": 0.1423, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.440378189086914, + "rewards/margins": 3.719373941421509, + "rewards/rejected": -5.159751892089844, + "step": 2841 + }, + { + "epoch": 0.44, + "learning_rate": 1.20628964157892e-05, + "logits/chosen": -3.2673144340515137, + "logits/rejected": -3.3575174808502197, + "logps/chosen": -163.2227325439453, + "logps/rejected": -155.45626831054688, + "loss": 3.3137, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.918445348739624, + "rewards/margins": -1.1165707111358643, + "rewards/rejected": -2.8018746376037598, + "step": 2842 + }, + { + "epoch": 0.44, + "learning_rate": 1.2062162975258052e-05, + "logits/chosen": -2.532799005508423, + "logits/rejected": -2.6648099422454834, + "logps/chosen": -124.77664184570312, + "logps/rejected": -271.82183837890625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.545513391494751, + "rewards/margins": 5.646724700927734, + "rewards/rejected": -7.192237854003906, + "step": 2843 + }, + { + "epoch": 0.44, + "learning_rate": 1.2061429534726904e-05, + "logits/chosen": -3.03947377204895, + "logits/rejected": -2.404402017593384, + "logps/chosen": -167.4124298095703, + "logps/rejected": -51.273502349853516, + "loss": 5.2684, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.098459243774414, + "rewards/margins": -5.236147880554199, + "rewards/rejected": -1.8623112440109253, + "step": 2844 + }, + { + "epoch": 0.44, + "learning_rate": 1.2060696094195756e-05, + "logits/chosen": -2.5588953495025635, + "logits/rejected": -3.158153533935547, + "logps/chosen": -51.66376876831055, + "logps/rejected": -186.13336181640625, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3583755493164062, + "rewards/margins": 1.8718008995056152, + "rewards/rejected": -4.2301764488220215, + "step": 2845 + }, + { + "epoch": 0.44, + "learning_rate": 1.2059962653664608e-05, + "logits/chosen": -2.5142745971679688, + "logits/rejected": -3.2615981101989746, + "logps/chosen": -86.9871597290039, + "logps/rejected": -276.39923095703125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9557445049285889, + "rewards/margins": 5.804255485534668, + "rewards/rejected": -6.760000228881836, + "step": 2846 + }, + { + "epoch": 0.44, + "learning_rate": 1.2059229213133462e-05, + "logits/chosen": -2.9300389289855957, + "logits/rejected": -3.2331907749176025, + "logps/chosen": -227.8140869140625, + "logps/rejected": -392.0146484375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5137313604354858, + "rewards/margins": 5.803840637207031, + "rewards/rejected": -7.317572116851807, + "step": 2847 + }, + { + "epoch": 0.44, + "learning_rate": 1.2058495772602313e-05, + "logits/chosen": -2.0867021083831787, + "logits/rejected": -2.9228098392486572, + "logps/chosen": -50.43887710571289, + "logps/rejected": -182.4130096435547, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.079765796661377, + "rewards/margins": 4.385984420776367, + "rewards/rejected": -5.465750217437744, + "step": 2848 + }, + { + "epoch": 0.44, + "learning_rate": 1.2057762332071165e-05, + "logits/chosen": -3.0156850814819336, + "logits/rejected": -2.7765371799468994, + "logps/chosen": -200.9566650390625, + "logps/rejected": -376.3780517578125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8723349571228027, + "rewards/margins": 5.330549716949463, + "rewards/rejected": -8.202884674072266, + "step": 2849 + }, + { + "epoch": 0.44, + "learning_rate": 1.2057028891540017e-05, + "logits/chosen": -0.8418710827827454, + "logits/rejected": -2.760801076889038, + "logps/chosen": -94.16529083251953, + "logps/rejected": -380.91851806640625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5258850455284119, + "rewards/margins": 5.631343841552734, + "rewards/rejected": -6.157228946685791, + "step": 2850 + }, + { + "epoch": 0.44, + "learning_rate": 1.2056295451008869e-05, + "logits/chosen": -2.8150129318237305, + "logits/rejected": -3.1732964515686035, + "logps/chosen": -230.97918701171875, + "logps/rejected": -353.6410217285156, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8062324523925781, + "rewards/margins": 6.843418121337891, + "rewards/rejected": -8.649650573730469, + "step": 2851 + }, + { + "epoch": 0.44, + "learning_rate": 1.2055562010477721e-05, + "logits/chosen": -3.096127510070801, + "logits/rejected": -2.1267824172973633, + "logps/chosen": -419.526611328125, + "logps/rejected": -313.78814697265625, + "loss": 2.4832, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.183300733566284, + "rewards/margins": 1.220360517501831, + "rewards/rejected": -4.403661251068115, + "step": 2852 + }, + { + "epoch": 0.44, + "learning_rate": 1.2054828569946575e-05, + "logits/chosen": -3.0153958797454834, + "logits/rejected": -2.650797128677368, + "logps/chosen": -77.9188461303711, + "logps/rejected": -170.5290985107422, + "loss": 0.1418, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7665235996246338, + "rewards/margins": 2.673311710357666, + "rewards/rejected": -4.439835548400879, + "step": 2853 + }, + { + "epoch": 0.44, + "learning_rate": 1.2054095129415426e-05, + "logits/chosen": -3.264854907989502, + "logits/rejected": -2.5584194660186768, + "logps/chosen": -128.5493621826172, + "logps/rejected": -86.0053482055664, + "loss": 1.5205, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.802267074584961, + "rewards/margins": 1.522019863128662, + "rewards/rejected": -4.324286937713623, + "step": 2854 + }, + { + "epoch": 0.44, + "learning_rate": 1.2053361688884278e-05, + "logits/chosen": -2.916245937347412, + "logits/rejected": -3.338372230529785, + "logps/chosen": -112.87632751464844, + "logps/rejected": -161.99383544921875, + "loss": 0.899, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1840908527374268, + "rewards/margins": 2.3636882305145264, + "rewards/rejected": -4.547779083251953, + "step": 2855 + }, + { + "epoch": 0.44, + "learning_rate": 1.2052628248353132e-05, + "logits/chosen": -2.4332292079925537, + "logits/rejected": -3.243807315826416, + "logps/chosen": -204.2498016357422, + "logps/rejected": -383.8991394042969, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9151058197021484, + "rewards/margins": 5.8209733963012695, + "rewards/rejected": -6.736079216003418, + "step": 2856 + }, + { + "epoch": 0.44, + "learning_rate": 1.2051894807821984e-05, + "logits/chosen": -2.903360366821289, + "logits/rejected": -3.2879929542541504, + "logps/chosen": -146.44891357421875, + "logps/rejected": -338.0903625488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.554931640625, + "rewards/margins": 9.085464477539062, + "rewards/rejected": -10.640396118164062, + "step": 2857 + }, + { + "epoch": 0.44, + "learning_rate": 1.2051161367290836e-05, + "logits/chosen": -3.153566837310791, + "logits/rejected": -3.066770076751709, + "logps/chosen": -155.40255737304688, + "logps/rejected": -57.79499053955078, + "loss": 3.417, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.928664684295654, + "rewards/margins": -2.0078930854797363, + "rewards/rejected": -2.920771837234497, + "step": 2858 + }, + { + "epoch": 0.44, + "learning_rate": 1.2050427926759687e-05, + "logits/chosen": -3.288637638092041, + "logits/rejected": -3.1026134490966797, + "logps/chosen": -196.81350708007812, + "logps/rejected": -46.2891960144043, + "loss": 6.0994, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.7661213874816895, + "rewards/margins": -6.097048759460449, + "rewards/rejected": -0.6690723896026611, + "step": 2859 + }, + { + "epoch": 0.44, + "learning_rate": 1.204969448622854e-05, + "logits/chosen": -2.6687543392181396, + "logits/rejected": -3.022979259490967, + "logps/chosen": -110.13473510742188, + "logps/rejected": -297.9602966308594, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1928459405899048, + "rewards/margins": 5.826934337615967, + "rewards/rejected": -7.019780158996582, + "step": 2860 + }, + { + "epoch": 0.44, + "learning_rate": 1.2048961045697391e-05, + "logits/chosen": -2.8422000408172607, + "logits/rejected": -3.2168707847595215, + "logps/chosen": -637.3004150390625, + "logps/rejected": -620.7781982421875, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1694306135177612, + "rewards/margins": 5.354513168334961, + "rewards/rejected": -6.523943901062012, + "step": 2861 + }, + { + "epoch": 0.45, + "learning_rate": 1.2048227605166243e-05, + "logits/chosen": -1.9082170724868774, + "logits/rejected": -3.1809895038604736, + "logps/chosen": -330.1628112792969, + "logps/rejected": -626.791748046875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3127068281173706, + "rewards/margins": 6.868078231811523, + "rewards/rejected": -8.180785179138184, + "step": 2862 + }, + { + "epoch": 0.45, + "learning_rate": 1.2047494164635095e-05, + "logits/chosen": -2.6211225986480713, + "logits/rejected": -3.089840888977051, + "logps/chosen": -354.0186462402344, + "logps/rejected": -215.43450927734375, + "loss": 4.9832, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.309136390686035, + "rewards/margins": -1.878485918045044, + "rewards/rejected": -4.430650234222412, + "step": 2863 + }, + { + "epoch": 0.45, + "learning_rate": 1.2046760724103947e-05, + "logits/chosen": -2.6947181224823, + "logits/rejected": -3.198984384536743, + "logps/chosen": -161.93484497070312, + "logps/rejected": -271.49676513671875, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.280921220779419, + "rewards/margins": 5.178738594055176, + "rewards/rejected": -6.459659576416016, + "step": 2864 + }, + { + "epoch": 0.45, + "learning_rate": 1.20460272835728e-05, + "logits/chosen": -3.231226682662964, + "logits/rejected": -2.500807523727417, + "logps/chosen": -218.62420654296875, + "logps/rejected": -116.37771606445312, + "loss": 3.9105, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.913269996643066, + "rewards/margins": -1.9022419452667236, + "rewards/rejected": -3.0110278129577637, + "step": 2865 + }, + { + "epoch": 0.45, + "learning_rate": 1.2045293843041652e-05, + "logits/chosen": -1.886537790298462, + "logits/rejected": -2.998490571975708, + "logps/chosen": -304.3255920410156, + "logps/rejected": -503.5570983886719, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2261215448379517, + "rewards/margins": 6.150547981262207, + "rewards/rejected": -7.376669406890869, + "step": 2866 + }, + { + "epoch": 0.45, + "learning_rate": 1.2044560402510504e-05, + "logits/chosen": -1.8717682361602783, + "logits/rejected": -3.1964571475982666, + "logps/chosen": -212.32144165039062, + "logps/rejected": -339.49334716796875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0692695677280426, + "rewards/margins": 6.085636138916016, + "rewards/rejected": -6.154905796051025, + "step": 2867 + }, + { + "epoch": 0.45, + "learning_rate": 1.2043826961979356e-05, + "logits/chosen": -2.7437524795532227, + "logits/rejected": -3.0908799171447754, + "logps/chosen": -285.2881164550781, + "logps/rejected": -469.93603515625, + "loss": 4.1394, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.796060085296631, + "rewards/margins": -2.272831439971924, + "rewards/rejected": -2.523228645324707, + "step": 2868 + }, + { + "epoch": 0.45, + "learning_rate": 1.2043093521448208e-05, + "logits/chosen": -1.4095083475112915, + "logits/rejected": -3.049241065979004, + "logps/chosen": -39.51438903808594, + "logps/rejected": -280.9021911621094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3241076469421387, + "rewards/margins": 6.980367660522461, + "rewards/rejected": -9.304475784301758, + "step": 2869 + }, + { + "epoch": 0.45, + "learning_rate": 1.204236008091706e-05, + "logits/chosen": -3.20574688911438, + "logits/rejected": -3.1867129802703857, + "logps/chosen": -167.0451202392578, + "logps/rejected": -269.46661376953125, + "loss": 0.1816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9171078205108643, + "rewards/margins": 5.319677352905273, + "rewards/rejected": -7.236784934997559, + "step": 2870 + }, + { + "epoch": 0.45, + "learning_rate": 1.2041626640385912e-05, + "logits/chosen": -2.2627108097076416, + "logits/rejected": -3.15643572807312, + "logps/chosen": -290.8416748046875, + "logps/rejected": -437.68408203125, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.419054627418518, + "rewards/margins": 5.147355079650879, + "rewards/rejected": -6.566409111022949, + "step": 2871 + }, + { + "epoch": 0.45, + "learning_rate": 1.2040893199854764e-05, + "logits/chosen": -3.1491949558258057, + "logits/rejected": -2.9483158588409424, + "logps/chosen": -488.7398681640625, + "logps/rejected": -470.55224609375, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.010765790939331, + "rewards/margins": 5.008388519287109, + "rewards/rejected": -6.0191545486450195, + "step": 2872 + }, + { + "epoch": 0.45, + "learning_rate": 1.2040159759323617e-05, + "logits/chosen": -3.114978075027466, + "logits/rejected": -3.231346845626831, + "logps/chosen": -155.27630615234375, + "logps/rejected": -108.96702575683594, + "loss": 2.3694, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7125802040100098, + "rewards/margins": -0.6803175210952759, + "rewards/rejected": -3.0322628021240234, + "step": 2873 + }, + { + "epoch": 0.45, + "learning_rate": 1.2039426318792469e-05, + "logits/chosen": -3.146667718887329, + "logits/rejected": -2.9121012687683105, + "logps/chosen": -181.58282470703125, + "logps/rejected": -131.38174438476562, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.333589553833008, + "rewards/margins": 3.2809839248657227, + "rewards/rejected": -5.6145734786987305, + "step": 2874 + }, + { + "epoch": 0.45, + "learning_rate": 1.2038692878261321e-05, + "logits/chosen": -2.6767477989196777, + "logits/rejected": -3.2498722076416016, + "logps/chosen": -116.37631225585938, + "logps/rejected": -150.75192260742188, + "loss": 2.5941, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.336155891418457, + "rewards/margins": 1.0538969039916992, + "rewards/rejected": -4.390052795410156, + "step": 2875 + }, + { + "epoch": 0.45, + "learning_rate": 1.2037959437730173e-05, + "logits/chosen": -1.4588431119918823, + "logits/rejected": -2.7354753017425537, + "logps/chosen": -332.2587890625, + "logps/rejected": -455.567138671875, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.88787841796875, + "rewards/margins": 4.810128688812256, + "rewards/rejected": -6.698007583618164, + "step": 2876 + }, + { + "epoch": 0.45, + "learning_rate": 1.2037225997199025e-05, + "logits/chosen": -3.212284803390503, + "logits/rejected": -3.005551338195801, + "logps/chosen": -589.2401733398438, + "logps/rejected": -562.85107421875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9377068281173706, + "rewards/margins": 5.108880043029785, + "rewards/rejected": -7.046586990356445, + "step": 2877 + }, + { + "epoch": 0.45, + "learning_rate": 1.2036492556667877e-05, + "logits/chosen": -2.267969846725464, + "logits/rejected": -3.2539243698120117, + "logps/chosen": -368.2520751953125, + "logps/rejected": -444.67364501953125, + "loss": 2.3048, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.297407627105713, + "rewards/margins": 0.9555025100708008, + "rewards/rejected": -4.252910137176514, + "step": 2878 + }, + { + "epoch": 0.45, + "learning_rate": 1.2035759116136728e-05, + "logits/chosen": -2.694767713546753, + "logits/rejected": -3.1707231998443604, + "logps/chosen": -93.73863220214844, + "logps/rejected": -267.6180419921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8450143933296204, + "rewards/margins": 6.981709957122803, + "rewards/rejected": -7.826724052429199, + "step": 2879 + }, + { + "epoch": 0.45, + "learning_rate": 1.203502567560558e-05, + "logits/chosen": -2.035658121109009, + "logits/rejected": -3.080549716949463, + "logps/chosen": -35.46420669555664, + "logps/rejected": -343.0513916015625, + "loss": 0.1058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2645858526229858, + "rewards/margins": 5.739883899688721, + "rewards/rejected": -7.004469871520996, + "step": 2880 + }, + { + "epoch": 0.45, + "learning_rate": 1.2034292235074432e-05, + "logits/chosen": -2.138134002685547, + "logits/rejected": -3.016939640045166, + "logps/chosen": -27.560535430908203, + "logps/rejected": -112.06428527832031, + "loss": 0.1146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3976025581359863, + "rewards/margins": 2.472470760345459, + "rewards/rejected": -3.8700733184814453, + "step": 2881 + }, + { + "epoch": 0.45, + "learning_rate": 1.2033558794543286e-05, + "logits/chosen": -3.095607280731201, + "logits/rejected": -2.5250473022460938, + "logps/chosen": -238.90740966796875, + "logps/rejected": -222.02215576171875, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7426398992538452, + "rewards/margins": 4.474379539489746, + "rewards/rejected": -5.217019081115723, + "step": 2882 + }, + { + "epoch": 0.45, + "learning_rate": 1.2032825354012138e-05, + "logits/chosen": -3.247687816619873, + "logits/rejected": -3.3102149963378906, + "logps/chosen": -241.07000732421875, + "logps/rejected": -233.58230590820312, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035495758056640625, + "rewards/margins": 3.50418758392334, + "rewards/rejected": -3.507737159729004, + "step": 2883 + }, + { + "epoch": 0.45, + "learning_rate": 1.203209191348099e-05, + "logits/chosen": -3.097433090209961, + "logits/rejected": -2.078528642654419, + "logps/chosen": -345.2200927734375, + "logps/rejected": -282.46380615234375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.236019104719162, + "rewards/margins": 6.605986595153809, + "rewards/rejected": -6.369967460632324, + "step": 2884 + }, + { + "epoch": 0.45, + "learning_rate": 1.2031358472949841e-05, + "logits/chosen": -2.7419323921203613, + "logits/rejected": -3.2578628063201904, + "logps/chosen": -181.8997802734375, + "logps/rejected": -203.6614990234375, + "loss": 0.1561, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.210198163986206, + "rewards/margins": 3.503089666366577, + "rewards/rejected": -5.713287830352783, + "step": 2885 + }, + { + "epoch": 0.45, + "learning_rate": 1.2030625032418693e-05, + "logits/chosen": -1.120333194732666, + "logits/rejected": -3.0367472171783447, + "logps/chosen": -87.50752258300781, + "logps/rejected": -341.97210693359375, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7707857489585876, + "rewards/margins": 4.79015588760376, + "rewards/rejected": -5.560941696166992, + "step": 2886 + }, + { + "epoch": 0.45, + "learning_rate": 1.2029891591887547e-05, + "logits/chosen": -2.9523444175720215, + "logits/rejected": -2.0666022300720215, + "logps/chosen": -285.6256408691406, + "logps/rejected": -213.4877471923828, + "loss": 1.8192, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4534432888031006, + "rewards/margins": 1.5880645513534546, + "rewards/rejected": -5.041507720947266, + "step": 2887 + }, + { + "epoch": 0.45, + "learning_rate": 1.2029158151356399e-05, + "logits/chosen": -2.9062607288360596, + "logits/rejected": -3.254857301712036, + "logps/chosen": -131.48243713378906, + "logps/rejected": -212.77699279785156, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1700843870639801, + "rewards/margins": 4.06611442565918, + "rewards/rejected": -4.236198425292969, + "step": 2888 + }, + { + "epoch": 0.45, + "learning_rate": 1.202842471082525e-05, + "logits/chosen": -2.435321569442749, + "logits/rejected": -2.767495632171631, + "logps/chosen": -587.0612182617188, + "logps/rejected": -853.40234375, + "loss": 3.4843, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8544249534606934, + "rewards/margins": 0.0989229679107666, + "rewards/rejected": -3.95334792137146, + "step": 2889 + }, + { + "epoch": 0.45, + "learning_rate": 1.2027691270294102e-05, + "logits/chosen": -3.120666027069092, + "logits/rejected": -3.109605312347412, + "logps/chosen": -198.80010986328125, + "logps/rejected": -151.35235595703125, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9527595639228821, + "rewards/margins": 3.9688303470611572, + "rewards/rejected": -4.9215898513793945, + "step": 2890 + }, + { + "epoch": 0.45, + "learning_rate": 1.2026957829762956e-05, + "logits/chosen": -3.1482975482940674, + "logits/rejected": -3.170544385910034, + "logps/chosen": -405.579345703125, + "logps/rejected": -466.8786926269531, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4787094295024872, + "rewards/margins": 6.030993461608887, + "rewards/rejected": -6.509703159332275, + "step": 2891 + }, + { + "epoch": 0.45, + "learning_rate": 1.2026224389231808e-05, + "logits/chosen": -1.155231237411499, + "logits/rejected": -2.9622905254364014, + "logps/chosen": -129.6085205078125, + "logps/rejected": -464.4765930175781, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6962432861328125, + "rewards/margins": 6.681465148925781, + "rewards/rejected": -8.377708435058594, + "step": 2892 + }, + { + "epoch": 0.45, + "learning_rate": 1.202549094870066e-05, + "logits/chosen": -2.5358433723449707, + "logits/rejected": -3.006958484649658, + "logps/chosen": -263.9505615234375, + "logps/rejected": -194.553466796875, + "loss": 1.5235, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0393266677856445, + "rewards/margins": 0.960576057434082, + "rewards/rejected": -3.9999027252197266, + "step": 2893 + }, + { + "epoch": 0.45, + "learning_rate": 1.2024757508169512e-05, + "logits/chosen": -2.986741065979004, + "logits/rejected": -3.088351249694824, + "logps/chosen": -39.32037353515625, + "logps/rejected": -206.81539916992188, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.368660569190979, + "rewards/margins": 4.479276657104492, + "rewards/rejected": -5.847937107086182, + "step": 2894 + }, + { + "epoch": 0.45, + "learning_rate": 1.2024024067638364e-05, + "logits/chosen": -1.5997731685638428, + "logits/rejected": -3.1662726402282715, + "logps/chosen": -77.58641052246094, + "logps/rejected": -337.3470458984375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2315248250961304, + "rewards/margins": 4.717120170593262, + "rewards/rejected": -5.948645114898682, + "step": 2895 + }, + { + "epoch": 0.45, + "learning_rate": 1.2023290627107215e-05, + "logits/chosen": -2.5103330612182617, + "logits/rejected": -3.0797250270843506, + "logps/chosen": -99.20405578613281, + "logps/rejected": -196.54922485351562, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6358028650283813, + "rewards/margins": 4.566608905792236, + "rewards/rejected": -5.202411651611328, + "step": 2896 + }, + { + "epoch": 0.45, + "learning_rate": 1.2022557186576067e-05, + "logits/chosen": -3.208505153656006, + "logits/rejected": -3.2821426391601562, + "logps/chosen": -106.97976684570312, + "logps/rejected": -241.88294982910156, + "loss": 0.1166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46749573945999146, + "rewards/margins": 2.5775535106658936, + "rewards/rejected": -3.0450491905212402, + "step": 2897 + }, + { + "epoch": 0.45, + "learning_rate": 1.202182374604492e-05, + "logits/chosen": -1.1421332359313965, + "logits/rejected": -2.5718696117401123, + "logps/chosen": -129.51315307617188, + "logps/rejected": -521.0172729492188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7120614051818848, + "rewards/margins": 7.010776519775391, + "rewards/rejected": -8.722837448120117, + "step": 2898 + }, + { + "epoch": 0.45, + "learning_rate": 1.2021090305513771e-05, + "logits/chosen": -2.96219801902771, + "logits/rejected": -2.9887168407440186, + "logps/chosen": -225.19488525390625, + "logps/rejected": -201.9580078125, + "loss": 0.6687, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.671535611152649, + "rewards/margins": 1.6963300704956055, + "rewards/rejected": -3.367865562438965, + "step": 2899 + }, + { + "epoch": 0.45, + "learning_rate": 1.2020356864982625e-05, + "logits/chosen": -2.8744211196899414, + "logits/rejected": -3.1053264141082764, + "logps/chosen": -780.0855712890625, + "logps/rejected": -629.330322265625, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006095878779888153, + "rewards/margins": 5.245312690734863, + "rewards/rejected": -5.2392168045043945, + "step": 2900 + }, + { + "epoch": 0.45, + "learning_rate": 1.2019623424451477e-05, + "logits/chosen": -2.62508487701416, + "logits/rejected": -3.2391538619995117, + "logps/chosen": -38.49335861206055, + "logps/rejected": -278.18804931640625, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06489495933055878, + "rewards/margins": 5.707943916320801, + "rewards/rejected": -5.6430487632751465, + "step": 2901 + }, + { + "epoch": 0.45, + "learning_rate": 1.2018889983920328e-05, + "logits/chosen": -3.0757179260253906, + "logits/rejected": -2.4155986309051514, + "logps/chosen": -381.93634033203125, + "logps/rejected": -305.2843322753906, + "loss": 3.6796, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.972891330718994, + "rewards/margins": -1.5356671810150146, + "rewards/rejected": -3.4372239112854004, + "step": 2902 + }, + { + "epoch": 0.45, + "learning_rate": 1.201815654338918e-05, + "logits/chosen": -2.469294786453247, + "logits/rejected": -3.1457035541534424, + "logps/chosen": -299.6103210449219, + "logps/rejected": -384.9524230957031, + "loss": 0.0519, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.606439232826233, + "rewards/margins": 4.379034042358398, + "rewards/rejected": -5.9854736328125, + "step": 2903 + }, + { + "epoch": 0.45, + "learning_rate": 1.2017423102858032e-05, + "logits/chosen": -1.910130500793457, + "logits/rejected": -2.796679973602295, + "logps/chosen": -263.27294921875, + "logps/rejected": -570.9723510742188, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1554539203643799, + "rewards/margins": 7.515860557556152, + "rewards/rejected": -8.671314239501953, + "step": 2904 + }, + { + "epoch": 0.45, + "learning_rate": 1.2016689662326884e-05, + "logits/chosen": -2.2767021656036377, + "logits/rejected": -3.1766867637634277, + "logps/chosen": -35.859169006347656, + "logps/rejected": -146.31533813476562, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.085500478744507, + "rewards/margins": 1.8520655632019043, + "rewards/rejected": -3.9375662803649902, + "step": 2905 + }, + { + "epoch": 0.45, + "learning_rate": 1.2015956221795736e-05, + "logits/chosen": -1.6858611106872559, + "logits/rejected": -3.0402166843414307, + "logps/chosen": -226.85000610351562, + "logps/rejected": -572.6676025390625, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0556038618087769, + "rewards/margins": 4.879480838775635, + "rewards/rejected": -5.935084819793701, + "step": 2906 + }, + { + "epoch": 0.45, + "learning_rate": 1.2015222781264588e-05, + "logits/chosen": -2.3003814220428467, + "logits/rejected": -3.097966194152832, + "logps/chosen": -133.2295379638672, + "logps/rejected": -311.4749450683594, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6494923830032349, + "rewards/margins": 4.8859052658081055, + "rewards/rejected": -6.535397529602051, + "step": 2907 + }, + { + "epoch": 0.45, + "learning_rate": 1.201448934073344e-05, + "logits/chosen": -3.0638201236724854, + "logits/rejected": -2.884296417236328, + "logps/chosen": -86.55947875976562, + "logps/rejected": -181.44357299804688, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3200111389160156, + "rewards/margins": 5.735996723175049, + "rewards/rejected": -6.0560078620910645, + "step": 2908 + }, + { + "epoch": 0.45, + "learning_rate": 1.2013755900202293e-05, + "logits/chosen": -2.937974691390991, + "logits/rejected": -2.1602578163146973, + "logps/chosen": -139.30018615722656, + "logps/rejected": -120.27071380615234, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6575440168380737, + "rewards/margins": 1.6853618621826172, + "rewards/rejected": -3.3429059982299805, + "step": 2909 + }, + { + "epoch": 0.45, + "learning_rate": 1.2013022459671145e-05, + "logits/chosen": -2.7669711112976074, + "logits/rejected": -3.1422359943389893, + "logps/chosen": -548.5087890625, + "logps/rejected": -819.899169921875, + "loss": 3.4286, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9673125743865967, + "rewards/margins": -1.2262039184570312, + "rewards/rejected": -1.7411086559295654, + "step": 2910 + }, + { + "epoch": 0.45, + "learning_rate": 1.2012289019139997e-05, + "logits/chosen": -3.023279905319214, + "logits/rejected": -2.9552416801452637, + "logps/chosen": -203.37228393554688, + "logps/rejected": -346.0970458984375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3598159551620483, + "rewards/margins": 6.610145568847656, + "rewards/rejected": -7.969961166381836, + "step": 2911 + }, + { + "epoch": 0.45, + "learning_rate": 1.2011555578608849e-05, + "logits/chosen": -2.9914069175720215, + "logits/rejected": -2.8682780265808105, + "logps/chosen": -278.90228271484375, + "logps/rejected": -355.5595703125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.656201183795929, + "rewards/margins": 5.661131858825684, + "rewards/rejected": -5.00493049621582, + "step": 2912 + }, + { + "epoch": 0.45, + "learning_rate": 1.20108221380777e-05, + "logits/chosen": -1.7869269847869873, + "logits/rejected": -2.9639766216278076, + "logps/chosen": -178.01858520507812, + "logps/rejected": -532.86669921875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9218990802764893, + "rewards/margins": 6.434819221496582, + "rewards/rejected": -7.356718063354492, + "step": 2913 + }, + { + "epoch": 0.45, + "learning_rate": 1.2010088697546553e-05, + "logits/chosen": -0.9072009921073914, + "logits/rejected": -2.928145408630371, + "logps/chosen": -50.39292907714844, + "logps/rejected": -345.29132080078125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3786824941635132, + "rewards/margins": 5.394941329956055, + "rewards/rejected": -6.773623943328857, + "step": 2914 + }, + { + "epoch": 0.45, + "learning_rate": 1.2009355257015405e-05, + "logits/chosen": -1.5298219919204712, + "logits/rejected": -3.191479444503784, + "logps/chosen": -93.28526306152344, + "logps/rejected": -391.0518493652344, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0053828954696655, + "rewards/margins": 6.568068504333496, + "rewards/rejected": -7.573451042175293, + "step": 2915 + }, + { + "epoch": 0.45, + "learning_rate": 1.2008621816484256e-05, + "logits/chosen": -2.8864378929138184, + "logits/rejected": -3.17972993850708, + "logps/chosen": -88.0078125, + "logps/rejected": -248.73898315429688, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5802481174468994, + "rewards/margins": 4.733782768249512, + "rewards/rejected": -6.31403112411499, + "step": 2916 + }, + { + "epoch": 0.45, + "learning_rate": 1.2007888375953108e-05, + "logits/chosen": -2.3771660327911377, + "logits/rejected": -3.0995423793792725, + "logps/chosen": -137.1275177001953, + "logps/rejected": -211.11358642578125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6968597769737244, + "rewards/margins": 5.811739921569824, + "rewards/rejected": -6.508599281311035, + "step": 2917 + }, + { + "epoch": 0.45, + "learning_rate": 1.2007154935421962e-05, + "logits/chosen": -3.102400541305542, + "logits/rejected": -2.403853178024292, + "logps/chosen": -326.84326171875, + "logps/rejected": -241.66256713867188, + "loss": 3.0439, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4483437538146973, + "rewards/margins": 1.5445218086242676, + "rewards/rejected": -4.992865562438965, + "step": 2918 + }, + { + "epoch": 0.45, + "learning_rate": 1.2006421494890814e-05, + "logits/chosen": -2.8058691024780273, + "logits/rejected": -3.205217123031616, + "logps/chosen": -212.64695739746094, + "logps/rejected": -441.6280517578125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.218708872795105, + "rewards/margins": 5.873577117919922, + "rewards/rejected": -7.092286109924316, + "step": 2919 + }, + { + "epoch": 0.45, + "learning_rate": 1.2005688054359666e-05, + "logits/chosen": -3.2278614044189453, + "logits/rejected": -2.881678581237793, + "logps/chosen": -472.0848388671875, + "logps/rejected": -262.58203125, + "loss": 2.8117, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8262816667556763, + "rewards/margins": 1.1934783458709717, + "rewards/rejected": -3.0197598934173584, + "step": 2920 + }, + { + "epoch": 0.45, + "learning_rate": 1.200495461382852e-05, + "logits/chosen": -3.1807844638824463, + "logits/rejected": -3.0720784664154053, + "logps/chosen": -347.6524963378906, + "logps/rejected": -166.10800170898438, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46847230195999146, + "rewards/margins": 6.019392013549805, + "rewards/rejected": -5.550919532775879, + "step": 2921 + }, + { + "epoch": 0.45, + "learning_rate": 1.2004221173297371e-05, + "logits/chosen": -2.7943379878997803, + "logits/rejected": -2.830389976501465, + "logps/chosen": -496.4597473144531, + "logps/rejected": -351.75970458984375, + "loss": 3.9489, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.9904069900512695, + "rewards/margins": 1.4286036491394043, + "rewards/rejected": -6.419010639190674, + "step": 2922 + }, + { + "epoch": 0.45, + "learning_rate": 1.2003487732766223e-05, + "logits/chosen": -3.0855681896209717, + "logits/rejected": -1.984720230102539, + "logps/chosen": -120.76171112060547, + "logps/rejected": -104.64337158203125, + "loss": 0.103, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0243641138076782, + "rewards/margins": 3.2903859615325928, + "rewards/rejected": -4.3147501945495605, + "step": 2923 + }, + { + "epoch": 0.45, + "learning_rate": 1.2002754292235075e-05, + "logits/chosen": -2.9030888080596924, + "logits/rejected": -3.2675516605377197, + "logps/chosen": -48.17025375366211, + "logps/rejected": -239.3319091796875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9779741764068604, + "rewards/margins": 5.817390441894531, + "rewards/rejected": -6.7953643798828125, + "step": 2924 + }, + { + "epoch": 0.45, + "learning_rate": 1.2002020851703927e-05, + "logits/chosen": -2.798856258392334, + "logits/rejected": -2.1932692527770996, + "logps/chosen": -892.7802124023438, + "logps/rejected": -787.5235595703125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1717529445886612, + "rewards/margins": 6.4034576416015625, + "rewards/rejected": -6.2317047119140625, + "step": 2925 + }, + { + "epoch": 0.46, + "learning_rate": 1.2001287411172779e-05, + "logits/chosen": -2.256157636642456, + "logits/rejected": -3.155792474746704, + "logps/chosen": -18.208293914794922, + "logps/rejected": -227.68759155273438, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31778478622436523, + "rewards/margins": 6.05757999420166, + "rewards/rejected": -6.375365257263184, + "step": 2926 + }, + { + "epoch": 0.46, + "learning_rate": 1.2000553970641632e-05, + "logits/chosen": -2.9165396690368652, + "logits/rejected": -3.2362313270568848, + "logps/chosen": -206.67970275878906, + "logps/rejected": -251.2484130859375, + "loss": 2.0407, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.411426544189453, + "rewards/margins": 1.6464333534240723, + "rewards/rejected": -4.057859897613525, + "step": 2927 + }, + { + "epoch": 0.46, + "learning_rate": 1.1999820530110484e-05, + "logits/chosen": -2.499300241470337, + "logits/rejected": -3.251203775405884, + "logps/chosen": -361.08660888671875, + "logps/rejected": -412.64453125, + "loss": 3.4883, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.827288866043091, + "rewards/margins": -0.20934510231018066, + "rewards/rejected": -3.61794376373291, + "step": 2928 + }, + { + "epoch": 0.46, + "learning_rate": 1.1999087089579336e-05, + "logits/chosen": -2.7797234058380127, + "logits/rejected": -3.2172508239746094, + "logps/chosen": -373.643310546875, + "logps/rejected": -315.3438720703125, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5134044885635376, + "rewards/margins": 4.477813243865967, + "rewards/rejected": -5.991217613220215, + "step": 2929 + }, + { + "epoch": 0.46, + "learning_rate": 1.1998353649048188e-05, + "logits/chosen": -2.998358964920044, + "logits/rejected": -2.997727394104004, + "logps/chosen": -374.99932861328125, + "logps/rejected": -517.473388671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.776236355304718, + "rewards/margins": 7.485713958740234, + "rewards/rejected": -8.261950492858887, + "step": 2930 + }, + { + "epoch": 0.46, + "learning_rate": 1.199762020851704e-05, + "logits/chosen": -2.62473201751709, + "logits/rejected": -0.9952349662780762, + "logps/chosen": -330.5744323730469, + "logps/rejected": -144.91110229492188, + "loss": 4.8239, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.927610397338867, + "rewards/margins": -1.4263837337493896, + "rewards/rejected": -4.501226425170898, + "step": 2931 + }, + { + "epoch": 0.46, + "learning_rate": 1.1996886767985892e-05, + "logits/chosen": -2.76076078414917, + "logits/rejected": -3.3802666664123535, + "logps/chosen": -910.9778442382812, + "logps/rejected": -782.1732177734375, + "loss": 0.3692, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.6601547002792358, + "rewards/margins": 3.434195041656494, + "rewards/rejected": -4.0943498611450195, + "step": 2932 + }, + { + "epoch": 0.46, + "learning_rate": 1.1996153327454743e-05, + "logits/chosen": -2.9261059761047363, + "logits/rejected": -3.2670061588287354, + "logps/chosen": -35.95567321777344, + "logps/rejected": -179.17575073242188, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34077367186546326, + "rewards/margins": 4.367165565490723, + "rewards/rejected": -4.707939147949219, + "step": 2933 + }, + { + "epoch": 0.46, + "learning_rate": 1.1995419886923595e-05, + "logits/chosen": -2.4761641025543213, + "logits/rejected": -3.1220054626464844, + "logps/chosen": -72.24280548095703, + "logps/rejected": -260.98114013671875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7570667266845703, + "rewards/margins": 6.312479019165039, + "rewards/rejected": -7.069545745849609, + "step": 2934 + }, + { + "epoch": 0.46, + "learning_rate": 1.1994686446392447e-05, + "logits/chosen": -2.057856321334839, + "logits/rejected": -3.1590399742126465, + "logps/chosen": -273.2435607910156, + "logps/rejected": -427.97735595703125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49211347103118896, + "rewards/margins": 5.778105735778809, + "rewards/rejected": -6.270219802856445, + "step": 2935 + }, + { + "epoch": 0.46, + "learning_rate": 1.19939530058613e-05, + "logits/chosen": -2.9437694549560547, + "logits/rejected": -2.760786294937134, + "logps/chosen": -119.02877044677734, + "logps/rejected": -204.091552734375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.729968547821045, + "rewards/margins": 5.98933744430542, + "rewards/rejected": -7.719305992126465, + "step": 2936 + }, + { + "epoch": 0.46, + "learning_rate": 1.1993219565330153e-05, + "logits/chosen": -2.719818115234375, + "logits/rejected": -3.0734472274780273, + "logps/chosen": -72.48385620117188, + "logps/rejected": -242.9265594482422, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.308532238006592, + "rewards/margins": 4.4282708168029785, + "rewards/rejected": -6.73680305480957, + "step": 2937 + }, + { + "epoch": 0.46, + "learning_rate": 1.1992486124799005e-05, + "logits/chosen": -3.136791229248047, + "logits/rejected": -3.205970287322998, + "logps/chosen": -171.81204223632812, + "logps/rejected": -61.72642135620117, + "loss": 1.3222, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4705801010131836, + "rewards/margins": 1.3112046718597412, + "rewards/rejected": -3.781784772872925, + "step": 2938 + }, + { + "epoch": 0.46, + "learning_rate": 1.1991752684267856e-05, + "logits/chosen": -2.475809097290039, + "logits/rejected": -3.247466564178467, + "logps/chosen": -361.57952880859375, + "logps/rejected": -677.9163818359375, + "loss": 3.3716, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.349810838699341, + "rewards/margins": 0.11003565788269043, + "rewards/rejected": -3.4598464965820312, + "step": 2939 + }, + { + "epoch": 0.46, + "learning_rate": 1.1991019243736708e-05, + "logits/chosen": -2.5171828269958496, + "logits/rejected": -3.260467529296875, + "logps/chosen": -17.0140323638916, + "logps/rejected": -179.038818359375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4265232980251312, + "rewards/margins": 4.713671684265137, + "rewards/rejected": -5.140195369720459, + "step": 2940 + }, + { + "epoch": 0.46, + "learning_rate": 1.199028580320556e-05, + "logits/chosen": -3.304814338684082, + "logits/rejected": -3.1929361820220947, + "logps/chosen": -293.46337890625, + "logps/rejected": -106.357177734375, + "loss": 2.0236, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0841705799102783, + "rewards/margins": -0.3446784019470215, + "rewards/rejected": -1.7394921779632568, + "step": 2941 + }, + { + "epoch": 0.46, + "learning_rate": 1.1989552362674412e-05, + "logits/chosen": -2.50695538520813, + "logits/rejected": -2.9095633029937744, + "logps/chosen": -88.81874084472656, + "logps/rejected": -344.6291809082031, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7490934133529663, + "rewards/margins": 5.147639751434326, + "rewards/rejected": -6.896733283996582, + "step": 2942 + }, + { + "epoch": 0.46, + "learning_rate": 1.1988818922143264e-05, + "logits/chosen": -2.998525381088257, + "logits/rejected": -3.2801673412323, + "logps/chosen": -68.35755920410156, + "logps/rejected": -176.66928100585938, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4431724548339844, + "rewards/margins": 4.125502586364746, + "rewards/rejected": -5.5686750411987305, + "step": 2943 + }, + { + "epoch": 0.46, + "learning_rate": 1.1988085481612116e-05, + "logits/chosen": -2.7390096187591553, + "logits/rejected": -3.188121795654297, + "logps/chosen": -175.3732452392578, + "logps/rejected": -258.83740234375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6782509088516235, + "rewards/margins": 4.27101993560791, + "rewards/rejected": -5.949270725250244, + "step": 2944 + }, + { + "epoch": 0.46, + "learning_rate": 1.198735204108097e-05, + "logits/chosen": -3.2735257148742676, + "logits/rejected": -3.168891429901123, + "logps/chosen": -156.8602752685547, + "logps/rejected": -129.98846435546875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5532894134521484, + "rewards/margins": 5.415278911590576, + "rewards/rejected": -5.968568325042725, + "step": 2945 + }, + { + "epoch": 0.46, + "learning_rate": 1.1986618600549821e-05, + "logits/chosen": -2.572901964187622, + "logits/rejected": -3.2277798652648926, + "logps/chosen": -55.82499313354492, + "logps/rejected": -281.98583984375, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9753149151802063, + "rewards/margins": 4.560338973999023, + "rewards/rejected": -5.535654067993164, + "step": 2946 + }, + { + "epoch": 0.46, + "learning_rate": 1.1985885160018673e-05, + "logits/chosen": -2.3490707874298096, + "logits/rejected": -3.0251963138580322, + "logps/chosen": -93.11090087890625, + "logps/rejected": -240.79945373535156, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7776718139648438, + "rewards/margins": 4.394606113433838, + "rewards/rejected": -5.172277927398682, + "step": 2947 + }, + { + "epoch": 0.46, + "learning_rate": 1.1985151719487525e-05, + "logits/chosen": -2.7820167541503906, + "logits/rejected": -3.1999852657318115, + "logps/chosen": -143.60751342773438, + "logps/rejected": -308.27874755859375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13382263481616974, + "rewards/margins": 5.742704391479492, + "rewards/rejected": -5.876527309417725, + "step": 2948 + }, + { + "epoch": 0.46, + "learning_rate": 1.1984418278956377e-05, + "logits/chosen": -3.009951591491699, + "logits/rejected": -3.2152795791625977, + "logps/chosen": -185.97604370117188, + "logps/rejected": -380.11322021484375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0325570106506348, + "rewards/margins": 5.639433860778809, + "rewards/rejected": -6.671991348266602, + "step": 2949 + }, + { + "epoch": 0.46, + "learning_rate": 1.1983684838425229e-05, + "logits/chosen": -2.8658392429351807, + "logits/rejected": -3.1947648525238037, + "logps/chosen": -188.61842346191406, + "logps/rejected": -393.4583740234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4457812309265137, + "rewards/margins": 9.697593688964844, + "rewards/rejected": -11.143375396728516, + "step": 2950 + }, + { + "epoch": 0.46, + "learning_rate": 1.198295139789408e-05, + "logits/chosen": -1.9924789667129517, + "logits/rejected": -2.8036482334136963, + "logps/chosen": -33.474063873291016, + "logps/rejected": -199.95046997070312, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7069216966629028, + "rewards/margins": 5.128360748291016, + "rewards/rejected": -5.835282325744629, + "step": 2951 + }, + { + "epoch": 0.46, + "learning_rate": 1.1982217957362932e-05, + "logits/chosen": -2.3248980045318604, + "logits/rejected": -3.199878692626953, + "logps/chosen": -269.8457946777344, + "logps/rejected": -532.6332397460938, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6282578110694885, + "rewards/margins": 7.90814208984375, + "rewards/rejected": -8.536399841308594, + "step": 2952 + }, + { + "epoch": 0.46, + "learning_rate": 1.1981484516831786e-05, + "logits/chosen": -2.2963945865631104, + "logits/rejected": -3.0865907669067383, + "logps/chosen": -140.72193908691406, + "logps/rejected": -263.3972473144531, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28690797090530396, + "rewards/margins": 5.288628578186035, + "rewards/rejected": -5.575536727905273, + "step": 2953 + }, + { + "epoch": 0.46, + "learning_rate": 1.1980751076300638e-05, + "logits/chosen": -1.657397985458374, + "logits/rejected": -2.8280344009399414, + "logps/chosen": -62.569313049316406, + "logps/rejected": -282.590087890625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4592171907424927, + "rewards/margins": 4.7979655265808105, + "rewards/rejected": -6.257183074951172, + "step": 2954 + }, + { + "epoch": 0.46, + "learning_rate": 1.1980017635769492e-05, + "logits/chosen": -2.721015691757202, + "logits/rejected": -3.3516383171081543, + "logps/chosen": -64.13434600830078, + "logps/rejected": -235.70626831054688, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8834744691848755, + "rewards/margins": 4.693421363830566, + "rewards/rejected": -5.576895713806152, + "step": 2955 + }, + { + "epoch": 0.46, + "learning_rate": 1.1979284195238343e-05, + "logits/chosen": -3.2651755809783936, + "logits/rejected": -2.8940343856811523, + "logps/chosen": -369.58135986328125, + "logps/rejected": -266.75848388671875, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.166899561882019, + "rewards/margins": 4.031548023223877, + "rewards/rejected": -5.198447227478027, + "step": 2956 + }, + { + "epoch": 0.46, + "learning_rate": 1.1978550754707195e-05, + "logits/chosen": -3.0570125579833984, + "logits/rejected": -2.6506285667419434, + "logps/chosen": -486.6745910644531, + "logps/rejected": -408.8119812011719, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8090152740478516, + "rewards/margins": 6.334393501281738, + "rewards/rejected": -7.14340877532959, + "step": 2957 + }, + { + "epoch": 0.46, + "learning_rate": 1.1977817314176047e-05, + "logits/chosen": -0.9462196230888367, + "logits/rejected": -1.9144537448883057, + "logps/chosen": -311.9364013671875, + "logps/rejected": -462.079345703125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9190307855606079, + "rewards/margins": 6.179293632507324, + "rewards/rejected": -7.098324775695801, + "step": 2958 + }, + { + "epoch": 0.46, + "learning_rate": 1.1977083873644899e-05, + "logits/chosen": -2.9472885131835938, + "logits/rejected": -3.0026848316192627, + "logps/chosen": -105.06818389892578, + "logps/rejected": -193.09889221191406, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.568745732307434, + "rewards/margins": 4.474649906158447, + "rewards/rejected": -6.043395519256592, + "step": 2959 + }, + { + "epoch": 0.46, + "learning_rate": 1.1976350433113751e-05, + "logits/chosen": -3.2319586277008057, + "logits/rejected": -3.2352213859558105, + "logps/chosen": -133.21987915039062, + "logps/rejected": -151.85592651367188, + "loss": 2.0969, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5496604442596436, + "rewards/margins": -0.2362898588180542, + "rewards/rejected": -2.3133704662323, + "step": 2960 + }, + { + "epoch": 0.46, + "learning_rate": 1.1975616992582603e-05, + "logits/chosen": -1.578417181968689, + "logits/rejected": -3.086569309234619, + "logps/chosen": -77.75881958007812, + "logps/rejected": -518.6725463867188, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4203424453735352, + "rewards/margins": 6.603790760040283, + "rewards/rejected": -8.024133682250977, + "step": 2961 + }, + { + "epoch": 0.46, + "learning_rate": 1.1974883552051456e-05, + "logits/chosen": -2.980012893676758, + "logits/rejected": -3.1971518993377686, + "logps/chosen": -402.9237365722656, + "logps/rejected": -562.9320068359375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31009751558303833, + "rewards/margins": 6.985865116119385, + "rewards/rejected": -6.67576789855957, + "step": 2962 + }, + { + "epoch": 0.46, + "learning_rate": 1.1974150111520308e-05, + "logits/chosen": -2.7488276958465576, + "logits/rejected": -3.1258974075317383, + "logps/chosen": -30.67477798461914, + "logps/rejected": -234.44586181640625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0996638536453247, + "rewards/margins": 5.645928382873535, + "rewards/rejected": -6.74559211730957, + "step": 2963 + }, + { + "epoch": 0.46, + "learning_rate": 1.197341667098916e-05, + "logits/chosen": -3.1536412239074707, + "logits/rejected": -3.182623863220215, + "logps/chosen": -486.2618103027344, + "logps/rejected": -582.5355834960938, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7182579040527344, + "rewards/margins": 4.411220550537109, + "rewards/rejected": -5.129478454589844, + "step": 2964 + }, + { + "epoch": 0.46, + "learning_rate": 1.1972683230458012e-05, + "logits/chosen": -3.028942346572876, + "logits/rejected": -3.2582197189331055, + "logps/chosen": -84.40070343017578, + "logps/rejected": -208.35818481445312, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8757317066192627, + "rewards/margins": 4.64157772064209, + "rewards/rejected": -5.517309665679932, + "step": 2965 + }, + { + "epoch": 0.46, + "learning_rate": 1.1971949789926864e-05, + "logits/chosen": -3.141061544418335, + "logits/rejected": -2.887956380844116, + "logps/chosen": -847.5712890625, + "logps/rejected": -642.3272705078125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6172845363616943, + "rewards/margins": 6.088754653930664, + "rewards/rejected": -7.7060394287109375, + "step": 2966 + }, + { + "epoch": 0.46, + "learning_rate": 1.1971216349395716e-05, + "logits/chosen": -3.100801944732666, + "logits/rejected": -2.628903388977051, + "logps/chosen": -222.69253540039062, + "logps/rejected": -307.94354248046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30333212018013, + "rewards/margins": 6.3619537353515625, + "rewards/rejected": -6.665285587310791, + "step": 2967 + }, + { + "epoch": 0.46, + "learning_rate": 1.1970482908864568e-05, + "logits/chosen": -2.718871831893921, + "logits/rejected": -2.5639126300811768, + "logps/chosen": -234.23507690429688, + "logps/rejected": -265.349609375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2322998046875, + "rewards/margins": 4.837848663330078, + "rewards/rejected": -5.070148468017578, + "step": 2968 + }, + { + "epoch": 0.46, + "learning_rate": 1.196974946833342e-05, + "logits/chosen": -2.817974328994751, + "logits/rejected": -3.203191041946411, + "logps/chosen": -145.2903289794922, + "logps/rejected": -162.0113525390625, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6937274932861328, + "rewards/margins": 4.664222717285156, + "rewards/rejected": -5.357950210571289, + "step": 2969 + }, + { + "epoch": 0.46, + "learning_rate": 1.1969016027802271e-05, + "logits/chosen": -2.2210235595703125, + "logits/rejected": -2.7511773109436035, + "logps/chosen": -152.6993408203125, + "logps/rejected": -232.86000061035156, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4541633725166321, + "rewards/margins": 7.004396915435791, + "rewards/rejected": -7.458559989929199, + "step": 2970 + }, + { + "epoch": 0.46, + "learning_rate": 1.1968282587271125e-05, + "logits/chosen": -3.2132749557495117, + "logits/rejected": -3.204954147338867, + "logps/chosen": -173.2787628173828, + "logps/rejected": -104.9783706665039, + "loss": 0.4841, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.764609456062317, + "rewards/margins": 1.9725265502929688, + "rewards/rejected": -3.737135887145996, + "step": 2971 + }, + { + "epoch": 0.46, + "learning_rate": 1.1967549146739977e-05, + "logits/chosen": -2.5486886501312256, + "logits/rejected": -3.2094273567199707, + "logps/chosen": -130.05686950683594, + "logps/rejected": -361.0283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6131279468536377, + "rewards/margins": 9.82420539855957, + "rewards/rejected": -9.211077690124512, + "step": 2972 + }, + { + "epoch": 0.46, + "learning_rate": 1.1966815706208829e-05, + "logits/chosen": -3.246051549911499, + "logits/rejected": -3.0490195751190186, + "logps/chosen": -475.2077331542969, + "logps/rejected": -595.0032958984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15214157104492188, + "rewards/margins": 10.666372299194336, + "rewards/rejected": -10.818513870239258, + "step": 2973 + }, + { + "epoch": 0.46, + "learning_rate": 1.196608226567768e-05, + "logits/chosen": -2.500464916229248, + "logits/rejected": -2.5083889961242676, + "logps/chosen": -548.5037841796875, + "logps/rejected": -399.206787109375, + "loss": 1.0966, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8313651084899902, + "rewards/margins": 1.085524559020996, + "rewards/rejected": -4.916889667510986, + "step": 2974 + }, + { + "epoch": 0.46, + "learning_rate": 1.1965348825146532e-05, + "logits/chosen": -3.077343225479126, + "logits/rejected": -2.495543956756592, + "logps/chosen": -505.93304443359375, + "logps/rejected": -514.0032958984375, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1774682998657227, + "rewards/margins": 5.915499687194824, + "rewards/rejected": -7.092967987060547, + "step": 2975 + }, + { + "epoch": 0.46, + "learning_rate": 1.1964615384615384e-05, + "logits/chosen": -2.507741689682007, + "logits/rejected": -3.1164586544036865, + "logps/chosen": -217.79342651367188, + "logps/rejected": -212.7587127685547, + "loss": 2.7721, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5942134857177734, + "rewards/margins": 1.3419201374053955, + "rewards/rejected": -3.936133861541748, + "step": 2976 + }, + { + "epoch": 0.46, + "learning_rate": 1.1963881944084236e-05, + "logits/chosen": -2.987508535385132, + "logits/rejected": -1.6572749614715576, + "logps/chosen": -318.83563232421875, + "logps/rejected": -172.38613891601562, + "loss": 3.2653, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9568567276000977, + "rewards/margins": -1.0229339599609375, + "rewards/rejected": -2.93392276763916, + "step": 2977 + }, + { + "epoch": 0.46, + "learning_rate": 1.1963148503553088e-05, + "logits/chosen": -2.1152195930480957, + "logits/rejected": -2.65384840965271, + "logps/chosen": -180.836181640625, + "logps/rejected": -452.97039794921875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09478645771741867, + "rewards/margins": 7.133042335510254, + "rewards/rejected": -7.2278289794921875, + "step": 2978 + }, + { + "epoch": 0.46, + "learning_rate": 1.196241506302194e-05, + "logits/chosen": -2.876929521560669, + "logits/rejected": -2.780673027038574, + "logps/chosen": -121.2513198852539, + "logps/rejected": -113.78926086425781, + "loss": 2.8456, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.587536334991455, + "rewards/margins": 0.025797367095947266, + "rewards/rejected": -3.6133337020874023, + "step": 2979 + }, + { + "epoch": 0.46, + "learning_rate": 1.1961681622490794e-05, + "logits/chosen": -2.003657817840576, + "logits/rejected": -2.917189598083496, + "logps/chosen": -142.1981201171875, + "logps/rejected": -337.0990295410156, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4173717498779297, + "rewards/margins": 7.423864364624023, + "rewards/rejected": -8.841236114501953, + "step": 2980 + }, + { + "epoch": 0.46, + "learning_rate": 1.1960948181959645e-05, + "logits/chosen": -3.122220993041992, + "logits/rejected": -2.586496591567993, + "logps/chosen": -984.6478881835938, + "logps/rejected": -877.0401611328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49977418780326843, + "rewards/margins": 6.949383735656738, + "rewards/rejected": -7.44915771484375, + "step": 2981 + }, + { + "epoch": 0.46, + "learning_rate": 1.1960214741428497e-05, + "logits/chosen": -2.613884210586548, + "logits/rejected": -3.1585636138916016, + "logps/chosen": -39.741146087646484, + "logps/rejected": -353.8938293457031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6746827363967896, + "rewards/margins": 8.267525672912598, + "rewards/rejected": -8.942208290100098, + "step": 2982 + }, + { + "epoch": 0.46, + "learning_rate": 1.195948130089735e-05, + "logits/chosen": -3.28200626373291, + "logits/rejected": -2.9526925086975098, + "logps/chosen": -220.4728546142578, + "logps/rejected": -453.1268005371094, + "loss": 3.0408, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8858866691589355, + "rewards/margins": 3.3466360569000244, + "rewards/rejected": -8.232522964477539, + "step": 2983 + }, + { + "epoch": 0.46, + "learning_rate": 1.1958747860366201e-05, + "logits/chosen": -3.0072922706604004, + "logits/rejected": -2.5377137660980225, + "logps/chosen": -250.98194885253906, + "logps/rejected": -282.8661193847656, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8426158428192139, + "rewards/margins": 6.577444553375244, + "rewards/rejected": -7.420060157775879, + "step": 2984 + }, + { + "epoch": 0.46, + "learning_rate": 1.1958014419835053e-05, + "logits/chosen": -2.378798723220825, + "logits/rejected": -3.1630241870880127, + "logps/chosen": -81.83871459960938, + "logps/rejected": -314.4709167480469, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4316072463989258, + "rewards/margins": 6.370368003845215, + "rewards/rejected": -7.801975250244141, + "step": 2985 + }, + { + "epoch": 0.46, + "learning_rate": 1.1957280979303905e-05, + "logits/chosen": -3.301535129547119, + "logits/rejected": -3.0388832092285156, + "logps/chosen": -107.966064453125, + "logps/rejected": -48.951866149902344, + "loss": 3.176, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9694786071777344, + "rewards/margins": -1.6725143194198608, + "rewards/rejected": -2.296964168548584, + "step": 2986 + }, + { + "epoch": 0.46, + "learning_rate": 1.1956547538772758e-05, + "logits/chosen": -2.689650774002075, + "logits/rejected": -3.210475444793701, + "logps/chosen": -212.00137329101562, + "logps/rejected": -230.78460693359375, + "loss": 3.8989, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.287428379058838, + "rewards/margins": -0.4530320167541504, + "rewards/rejected": -3.8343963623046875, + "step": 2987 + }, + { + "epoch": 0.46, + "learning_rate": 1.195581409824161e-05, + "logits/chosen": -3.116337299346924, + "logits/rejected": -2.8497602939605713, + "logps/chosen": -508.6854248046875, + "logps/rejected": -548.5604248046875, + "loss": 2.4877, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7653794288635254, + "rewards/margins": 1.75337815284729, + "rewards/rejected": -5.5187578201293945, + "step": 2988 + }, + { + "epoch": 0.46, + "learning_rate": 1.1955080657710464e-05, + "logits/chosen": -3.029174327850342, + "logits/rejected": -2.9982175827026367, + "logps/chosen": -198.27293395996094, + "logps/rejected": -263.56658935546875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8970661163330078, + "rewards/margins": 6.31046199798584, + "rewards/rejected": -7.207528591156006, + "step": 2989 + }, + { + "epoch": 0.47, + "learning_rate": 1.1954347217179316e-05, + "logits/chosen": -3.2056031227111816, + "logits/rejected": -2.9157233238220215, + "logps/chosen": -134.32955932617188, + "logps/rejected": -151.21542358398438, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4651931822299957, + "rewards/margins": 7.399996280670166, + "rewards/rejected": -7.865189552307129, + "step": 2990 + }, + { + "epoch": 0.47, + "learning_rate": 1.1953613776648168e-05, + "logits/chosen": -3.4457201957702637, + "logits/rejected": -3.2950010299682617, + "logps/chosen": -152.95362854003906, + "logps/rejected": -51.49268341064453, + "loss": 3.7004, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.754638195037842, + "rewards/margins": -3.675265073776245, + "rewards/rejected": -1.0793731212615967, + "step": 2991 + }, + { + "epoch": 0.47, + "learning_rate": 1.195288033611702e-05, + "logits/chosen": -2.198957920074463, + "logits/rejected": -3.006039619445801, + "logps/chosen": -195.69882202148438, + "logps/rejected": -174.04940795898438, + "loss": 1.0101, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4547133445739746, + "rewards/margins": 1.4449516534805298, + "rewards/rejected": -3.899664878845215, + "step": 2992 + }, + { + "epoch": 0.47, + "learning_rate": 1.1952146895585871e-05, + "logits/chosen": -3.2387895584106445, + "logits/rejected": -2.790166139602661, + "logps/chosen": -266.3401794433594, + "logps/rejected": -99.29401397705078, + "loss": 3.3626, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.036336421966553, + "rewards/margins": -3.2568907737731934, + "rewards/rejected": -0.7794456481933594, + "step": 2993 + }, + { + "epoch": 0.47, + "learning_rate": 1.1951413455054723e-05, + "logits/chosen": -3.0555551052093506, + "logits/rejected": -2.881923198699951, + "logps/chosen": -122.72918701171875, + "logps/rejected": -301.6358337402344, + "loss": 3.2169, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.146990776062012, + "rewards/margins": 1.5341579914093018, + "rewards/rejected": -6.681148529052734, + "step": 2994 + }, + { + "epoch": 0.47, + "learning_rate": 1.1950680014523575e-05, + "logits/chosen": -2.1681106090545654, + "logits/rejected": -3.3160266876220703, + "logps/chosen": -297.2969055175781, + "logps/rejected": -605.5829467773438, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1295688152313232, + "rewards/margins": 6.904808044433594, + "rewards/rejected": -8.034377098083496, + "step": 2995 + }, + { + "epoch": 0.47, + "learning_rate": 1.1949946573992427e-05, + "logits/chosen": -3.02375864982605, + "logits/rejected": -2.742868423461914, + "logps/chosen": -261.72235107421875, + "logps/rejected": -211.1558837890625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25337809324264526, + "rewards/margins": 7.529971122741699, + "rewards/rejected": -7.27659273147583, + "step": 2996 + }, + { + "epoch": 0.47, + "learning_rate": 1.1949213133461279e-05, + "logits/chosen": -2.6415023803710938, + "logits/rejected": -3.007521629333496, + "logps/chosen": -286.7287292480469, + "logps/rejected": -334.1522216796875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.102887749671936, + "rewards/margins": 6.650211334228516, + "rewards/rejected": -7.75309944152832, + "step": 2997 + }, + { + "epoch": 0.47, + "learning_rate": 1.1948479692930132e-05, + "logits/chosen": -2.6147661209106445, + "logits/rejected": -3.220032215118408, + "logps/chosen": -242.670166015625, + "logps/rejected": -314.5481872558594, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9576133489608765, + "rewards/margins": 5.990603446960449, + "rewards/rejected": -6.948216915130615, + "step": 2998 + }, + { + "epoch": 0.47, + "learning_rate": 1.1947746252398984e-05, + "logits/chosen": -3.0468547344207764, + "logits/rejected": -3.1235764026641846, + "logps/chosen": -58.359676361083984, + "logps/rejected": -198.48837280273438, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1296948194503784, + "rewards/margins": 5.979384899139404, + "rewards/rejected": -7.109079360961914, + "step": 2999 + }, + { + "epoch": 0.47, + "learning_rate": 1.1947012811867836e-05, + "logits/chosen": -1.7753729820251465, + "logits/rejected": -3.1748907566070557, + "logps/chosen": -178.05166625976562, + "logps/rejected": -160.43138122558594, + "loss": 3.5295, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.985499382019043, + "rewards/margins": -1.5992448329925537, + "rewards/rejected": -3.3862547874450684, + "step": 3000 + }, + { + "epoch": 0.47, + "learning_rate": 1.1946279371336688e-05, + "logits/chosen": -1.2495043277740479, + "logits/rejected": -3.1848113536834717, + "logps/chosen": -124.68350219726562, + "logps/rejected": -401.1352844238281, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3003814220428467, + "rewards/margins": 5.548677444458008, + "rewards/rejected": -6.849059104919434, + "step": 3001 + }, + { + "epoch": 0.47, + "learning_rate": 1.194554593080554e-05, + "logits/chosen": -2.64691162109375, + "logits/rejected": -3.0441441535949707, + "logps/chosen": -62.12017822265625, + "logps/rejected": -265.2796936035156, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25128117203712463, + "rewards/margins": 6.238162040710449, + "rewards/rejected": -6.489443302154541, + "step": 3002 + }, + { + "epoch": 0.47, + "learning_rate": 1.1944812490274392e-05, + "logits/chosen": -3.279332399368286, + "logits/rejected": -2.5312302112579346, + "logps/chosen": -569.6543579101562, + "logps/rejected": -191.10243225097656, + "loss": 1.9057, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2056777477264404, + "rewards/margins": 1.1293222904205322, + "rewards/rejected": -3.3350002765655518, + "step": 3003 + }, + { + "epoch": 0.47, + "learning_rate": 1.1944079049743244e-05, + "logits/chosen": -2.5867342948913574, + "logits/rejected": -3.0340137481689453, + "logps/chosen": -119.19336700439453, + "logps/rejected": -285.619384765625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8988100290298462, + "rewards/margins": 4.72503662109375, + "rewards/rejected": -5.623846530914307, + "step": 3004 + }, + { + "epoch": 0.47, + "learning_rate": 1.1943345609212096e-05, + "logits/chosen": -3.0269572734832764, + "logits/rejected": -3.183255434036255, + "logps/chosen": -346.86004638671875, + "logps/rejected": -523.9002685546875, + "loss": 4.5555, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.510411262512207, + "rewards/margins": -1.0283873081207275, + "rewards/rejected": -3.4820237159729004, + "step": 3005 + }, + { + "epoch": 0.47, + "learning_rate": 1.1942612168680947e-05, + "logits/chosen": -3.2415108680725098, + "logits/rejected": -3.1706764698028564, + "logps/chosen": -142.28012084960938, + "logps/rejected": -62.04772186279297, + "loss": 1.7783, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5544180870056152, + "rewards/margins": -0.22357845306396484, + "rewards/rejected": -3.3308396339416504, + "step": 3006 + }, + { + "epoch": 0.47, + "learning_rate": 1.1941878728149801e-05, + "logits/chosen": -2.7215209007263184, + "logits/rejected": -3.1897640228271484, + "logps/chosen": -30.165424346923828, + "logps/rejected": -145.68722534179688, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.035282850265503, + "rewards/margins": 3.2031607627868652, + "rewards/rejected": -4.238443374633789, + "step": 3007 + }, + { + "epoch": 0.47, + "learning_rate": 1.1941145287618653e-05, + "logits/chosen": -2.1364543437957764, + "logits/rejected": -2.880746364593506, + "logps/chosen": -282.99005126953125, + "logps/rejected": -349.7110900878906, + "loss": 2.3724, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6081862449645996, + "rewards/margins": 0.9961464405059814, + "rewards/rejected": -3.604332685470581, + "step": 3008 + }, + { + "epoch": 0.47, + "learning_rate": 1.1940411847087505e-05, + "logits/chosen": -3.058410167694092, + "logits/rejected": -3.071063995361328, + "logps/chosen": -89.53998565673828, + "logps/rejected": -166.74411010742188, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2504909038543701, + "rewards/margins": 3.657531261444092, + "rewards/rejected": -4.908021926879883, + "step": 3009 + }, + { + "epoch": 0.47, + "learning_rate": 1.1939678406556357e-05, + "logits/chosen": -2.4199085235595703, + "logits/rejected": -2.818065643310547, + "logps/chosen": -192.93490600585938, + "logps/rejected": -290.8934326171875, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3894115686416626, + "rewards/margins": 4.730277061462402, + "rewards/rejected": -5.119688987731934, + "step": 3010 + }, + { + "epoch": 0.47, + "learning_rate": 1.1938944966025209e-05, + "logits/chosen": -2.8341259956359863, + "logits/rejected": -2.2648561000823975, + "logps/chosen": -56.34755325317383, + "logps/rejected": -133.4885711669922, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9924310445785522, + "rewards/margins": 4.613323211669922, + "rewards/rejected": -5.6057538986206055, + "step": 3011 + }, + { + "epoch": 0.47, + "learning_rate": 1.193821152549406e-05, + "logits/chosen": -0.851585865020752, + "logits/rejected": -2.900589942932129, + "logps/chosen": -93.25328826904297, + "logps/rejected": -393.6878356933594, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9652187824249268, + "rewards/margins": 5.7039947509765625, + "rewards/rejected": -6.66921329498291, + "step": 3012 + }, + { + "epoch": 0.47, + "learning_rate": 1.1937478084962912e-05, + "logits/chosen": -3.053199291229248, + "logits/rejected": -3.219151020050049, + "logps/chosen": -437.66119384765625, + "logps/rejected": -585.2244873046875, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6217209100723267, + "rewards/margins": 5.162411689758301, + "rewards/rejected": -5.784132480621338, + "step": 3013 + }, + { + "epoch": 0.47, + "learning_rate": 1.1936744644431764e-05, + "logits/chosen": -2.268624782562256, + "logits/rejected": -3.14477276802063, + "logps/chosen": -53.270381927490234, + "logps/rejected": -247.14715576171875, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0595980882644653, + "rewards/margins": 2.9828662872314453, + "rewards/rejected": -4.042464256286621, + "step": 3014 + }, + { + "epoch": 0.47, + "learning_rate": 1.1936011203900616e-05, + "logits/chosen": -3.095168352127075, + "logits/rejected": -3.3301899433135986, + "logps/chosen": -392.0462341308594, + "logps/rejected": -547.0166015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8165562152862549, + "rewards/margins": 8.131021499633789, + "rewards/rejected": -8.947577476501465, + "step": 3015 + }, + { + "epoch": 0.47, + "learning_rate": 1.193527776336947e-05, + "logits/chosen": -2.884101629257202, + "logits/rejected": -3.0809032917022705, + "logps/chosen": -501.9078063964844, + "logps/rejected": -547.758544921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10955429077148438, + "rewards/margins": 7.257758140563965, + "rewards/rejected": -7.367312431335449, + "step": 3016 + }, + { + "epoch": 0.47, + "learning_rate": 1.1934544322838322e-05, + "logits/chosen": -1.192795753479004, + "logits/rejected": -3.2855095863342285, + "logps/chosen": -22.290119171142578, + "logps/rejected": -509.5254211425781, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2663582563400269, + "rewards/margins": 4.03022575378418, + "rewards/rejected": -5.296584129333496, + "step": 3017 + }, + { + "epoch": 0.47, + "learning_rate": 1.1933810882307173e-05, + "logits/chosen": -2.433657646179199, + "logits/rejected": -3.1680095195770264, + "logps/chosen": -75.46149444580078, + "logps/rejected": -232.74755859375, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.514772653579712, + "rewards/margins": 4.313229560852051, + "rewards/rejected": -5.828001976013184, + "step": 3018 + }, + { + "epoch": 0.47, + "learning_rate": 1.1933077441776025e-05, + "logits/chosen": -3.063382387161255, + "logits/rejected": -2.356017827987671, + "logps/chosen": -131.80056762695312, + "logps/rejected": -158.55941772460938, + "loss": 2.3896, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.013651371002197, + "rewards/margins": -1.2764726877212524, + "rewards/rejected": -2.7371785640716553, + "step": 3019 + }, + { + "epoch": 0.47, + "learning_rate": 1.1932344001244877e-05, + "logits/chosen": -3.1700260639190674, + "logits/rejected": -1.8854646682739258, + "logps/chosen": -234.19590759277344, + "logps/rejected": -212.26596069335938, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23701781034469604, + "rewards/margins": 6.253043174743652, + "rewards/rejected": -6.490061283111572, + "step": 3020 + }, + { + "epoch": 0.47, + "learning_rate": 1.193161056071373e-05, + "logits/chosen": -3.2026560306549072, + "logits/rejected": -1.49997878074646, + "logps/chosen": -465.87677001953125, + "logps/rejected": -177.899169921875, + "loss": 3.8089, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4838409423828125, + "rewards/margins": -0.41692042350769043, + "rewards/rejected": -3.066920518875122, + "step": 3021 + }, + { + "epoch": 0.47, + "learning_rate": 1.1930877120182583e-05, + "logits/chosen": -3.221538543701172, + "logits/rejected": -2.8748860359191895, + "logps/chosen": -355.6750183105469, + "logps/rejected": -245.42652893066406, + "loss": 1.776, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6086130142211914, + "rewards/margins": 0.06954836845397949, + "rewards/rejected": -3.678161382675171, + "step": 3022 + }, + { + "epoch": 0.47, + "learning_rate": 1.1930143679651434e-05, + "logits/chosen": -3.269073486328125, + "logits/rejected": -3.0586090087890625, + "logps/chosen": -470.7181091308594, + "logps/rejected": -455.3668212890625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.104156494140625, + "rewards/margins": 6.204969882965088, + "rewards/rejected": -6.309126377105713, + "step": 3023 + }, + { + "epoch": 0.47, + "learning_rate": 1.1929410239120286e-05, + "logits/chosen": -2.80329966545105, + "logits/rejected": -3.1009747982025146, + "logps/chosen": -47.59825897216797, + "logps/rejected": -132.24639892578125, + "loss": 0.1199, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3920403718948364, + "rewards/margins": 2.6751136779785156, + "rewards/rejected": -4.0671539306640625, + "step": 3024 + }, + { + "epoch": 0.47, + "learning_rate": 1.192867679858914e-05, + "logits/chosen": -1.8664203882217407, + "logits/rejected": -2.8802237510681152, + "logps/chosen": -79.33674621582031, + "logps/rejected": -400.82501220703125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8185509443283081, + "rewards/margins": 5.617471694946289, + "rewards/rejected": -6.436022758483887, + "step": 3025 + }, + { + "epoch": 0.47, + "learning_rate": 1.1927943358057992e-05, + "logits/chosen": -3.038335084915161, + "logits/rejected": -3.291924238204956, + "logps/chosen": -752.161376953125, + "logps/rejected": -535.2696533203125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7576080560684204, + "rewards/margins": 5.755345344543457, + "rewards/rejected": -4.997737407684326, + "step": 3026 + }, + { + "epoch": 0.47, + "learning_rate": 1.1927209917526844e-05, + "logits/chosen": -2.933955430984497, + "logits/rejected": -3.100769281387329, + "logps/chosen": -157.55355834960938, + "logps/rejected": -293.80462646484375, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.499778151512146, + "rewards/margins": 4.3962860107421875, + "rewards/rejected": -5.896064281463623, + "step": 3027 + }, + { + "epoch": 0.47, + "learning_rate": 1.1926476476995696e-05, + "logits/chosen": -3.1997151374816895, + "logits/rejected": -3.111989736557007, + "logps/chosen": -644.3798828125, + "logps/rejected": -416.8740234375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7796043753623962, + "rewards/margins": 6.020080089569092, + "rewards/rejected": -5.240475654602051, + "step": 3028 + }, + { + "epoch": 0.47, + "learning_rate": 1.1925743036464547e-05, + "logits/chosen": -1.8133939504623413, + "logits/rejected": -3.0042762756347656, + "logps/chosen": -128.27760314941406, + "logps/rejected": -303.4360046386719, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1602373123168945, + "rewards/margins": 7.455498695373535, + "rewards/rejected": -9.61573600769043, + "step": 3029 + }, + { + "epoch": 0.47, + "learning_rate": 1.19250095959334e-05, + "logits/chosen": -2.2516586780548096, + "logits/rejected": -2.8283543586730957, + "logps/chosen": -428.8141784667969, + "logps/rejected": -506.2184143066406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4398418664932251, + "rewards/margins": 10.277349472045898, + "rewards/rejected": -9.837507247924805, + "step": 3030 + }, + { + "epoch": 0.47, + "learning_rate": 1.1924276155402251e-05, + "logits/chosen": -2.8174498081207275, + "logits/rejected": -3.100985050201416, + "logps/chosen": -611.2383422851562, + "logps/rejected": -799.17822265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7046710848808289, + "rewards/margins": 7.840865135192871, + "rewards/rejected": -7.136194229125977, + "step": 3031 + }, + { + "epoch": 0.47, + "learning_rate": 1.1923542714871103e-05, + "logits/chosen": -1.3830009698867798, + "logits/rejected": -3.0030617713928223, + "logps/chosen": -57.90877151489258, + "logps/rejected": -247.19224548339844, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2237164974212646, + "rewards/margins": 3.758962631225586, + "rewards/rejected": -5.98267936706543, + "step": 3032 + }, + { + "epoch": 0.47, + "learning_rate": 1.1922809274339955e-05, + "logits/chosen": -3.141666889190674, + "logits/rejected": -3.1263396739959717, + "logps/chosen": -261.0196838378906, + "logps/rejected": -223.14828491210938, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3356008231639862, + "rewards/margins": 5.829360008239746, + "rewards/rejected": -6.164960861206055, + "step": 3033 + }, + { + "epoch": 0.47, + "learning_rate": 1.1922075833808809e-05, + "logits/chosen": -3.1868209838867188, + "logits/rejected": -1.7722194194793701, + "logps/chosen": -278.94146728515625, + "logps/rejected": -231.88671875, + "loss": 2.9191, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.710397481918335, + "rewards/margins": -1.2920341491699219, + "rewards/rejected": -2.418363094329834, + "step": 3034 + }, + { + "epoch": 0.47, + "learning_rate": 1.192134239327766e-05, + "logits/chosen": -3.340247392654419, + "logits/rejected": -1.5427504777908325, + "logps/chosen": -264.31146240234375, + "logps/rejected": -144.53616333007812, + "loss": 3.5309, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.568615674972534, + "rewards/margins": 0.6679613590240479, + "rewards/rejected": -4.236577033996582, + "step": 3035 + }, + { + "epoch": 0.47, + "learning_rate": 1.1920608952746512e-05, + "logits/chosen": -2.270831346511841, + "logits/rejected": -2.9919841289520264, + "logps/chosen": -62.311126708984375, + "logps/rejected": -225.752197265625, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2238893508911133, + "rewards/margins": 3.203742742538452, + "rewards/rejected": -4.4276323318481445, + "step": 3036 + }, + { + "epoch": 0.47, + "learning_rate": 1.1919875512215364e-05, + "logits/chosen": -2.1565873622894287, + "logits/rejected": -3.187145709991455, + "logps/chosen": -257.3539733886719, + "logps/rejected": -440.38385009765625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15947455167770386, + "rewards/margins": 5.991034507751465, + "rewards/rejected": -5.831560134887695, + "step": 3037 + }, + { + "epoch": 0.47, + "learning_rate": 1.1919142071684216e-05, + "logits/chosen": -3.289686679840088, + "logits/rejected": -3.1244821548461914, + "logps/chosen": -218.7908172607422, + "logps/rejected": -292.2608337402344, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6218792200088501, + "rewards/margins": 7.005280494689941, + "rewards/rejected": -7.627159118652344, + "step": 3038 + }, + { + "epoch": 0.47, + "learning_rate": 1.1918408631153068e-05, + "logits/chosen": -2.8890838623046875, + "logits/rejected": -2.5980961322784424, + "logps/chosen": -128.71205139160156, + "logps/rejected": -237.46878051757812, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5772415399551392, + "rewards/margins": 6.530911445617676, + "rewards/rejected": -7.108152389526367, + "step": 3039 + }, + { + "epoch": 0.47, + "learning_rate": 1.191767519062192e-05, + "logits/chosen": -3.0610311031341553, + "logits/rejected": -2.8420724868774414, + "logps/chosen": -318.14715576171875, + "logps/rejected": -383.0285949707031, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10328064113855362, + "rewards/margins": 4.958987236022949, + "rewards/rejected": -5.062268257141113, + "step": 3040 + }, + { + "epoch": 0.47, + "learning_rate": 1.1916941750090772e-05, + "logits/chosen": -2.3020496368408203, + "logits/rejected": -3.1750426292419434, + "logps/chosen": -278.4447326660156, + "logps/rejected": -437.9943542480469, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4666283130645752, + "rewards/margins": 8.67465591430664, + "rewards/rejected": -10.141283988952637, + "step": 3041 + }, + { + "epoch": 0.47, + "learning_rate": 1.1916208309559624e-05, + "logits/chosen": -1.4031774997711182, + "logits/rejected": -3.051865816116333, + "logps/chosen": -128.8908233642578, + "logps/rejected": -444.5284423828125, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7946376800537109, + "rewards/margins": 6.148334503173828, + "rewards/rejected": -6.942972183227539, + "step": 3042 + }, + { + "epoch": 0.47, + "learning_rate": 1.1915474869028477e-05, + "logits/chosen": -3.1866984367370605, + "logits/rejected": -2.067934989929199, + "logps/chosen": -183.73220825195312, + "logps/rejected": -145.33633422851562, + "loss": 2.7768, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.489626884460449, + "rewards/margins": -0.5616695880889893, + "rewards/rejected": -3.92795729637146, + "step": 3043 + }, + { + "epoch": 0.47, + "learning_rate": 1.1914741428497329e-05, + "logits/chosen": -3.2181899547576904, + "logits/rejected": -3.0138988494873047, + "logps/chosen": -294.4072265625, + "logps/rejected": -343.7076416015625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0743026733398438, + "rewards/margins": 5.09549617767334, + "rewards/rejected": -6.169798851013184, + "step": 3044 + }, + { + "epoch": 0.47, + "learning_rate": 1.1914007987966181e-05, + "logits/chosen": -2.602273941040039, + "logits/rejected": -3.152106523513794, + "logps/chosen": -302.70367431640625, + "logps/rejected": -592.4759521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5285346508026123, + "rewards/margins": 11.203344345092773, + "rewards/rejected": -10.674809455871582, + "step": 3045 + }, + { + "epoch": 0.47, + "learning_rate": 1.1913274547435033e-05, + "logits/chosen": -3.1189091205596924, + "logits/rejected": -3.073430061340332, + "logps/chosen": -87.2138671875, + "logps/rejected": -141.89544677734375, + "loss": 0.1031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6342732906341553, + "rewards/margins": 4.254967212677002, + "rewards/rejected": -4.889240741729736, + "step": 3046 + }, + { + "epoch": 0.47, + "learning_rate": 1.1912541106903885e-05, + "logits/chosen": -3.0206141471862793, + "logits/rejected": -3.101494312286377, + "logps/chosen": -364.72137451171875, + "logps/rejected": -375.48626708984375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.77605801820755, + "rewards/margins": 6.242574691772461, + "rewards/rejected": -7.018632888793945, + "step": 3047 + }, + { + "epoch": 0.47, + "learning_rate": 1.1911807666372737e-05, + "logits/chosen": -2.585261344909668, + "logits/rejected": -2.86727237701416, + "logps/chosen": -351.1221923828125, + "logps/rejected": -528.7352294921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2746805250644684, + "rewards/margins": 7.044342041015625, + "rewards/rejected": -7.3190226554870605, + "step": 3048 + }, + { + "epoch": 0.47, + "learning_rate": 1.1911074225841588e-05, + "logits/chosen": -3.0130767822265625, + "logits/rejected": -2.0947611331939697, + "logps/chosen": -151.59571838378906, + "logps/rejected": -178.73080444335938, + "loss": 1.7174, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8413679599761963, + "rewards/margins": 1.6232273578643799, + "rewards/rejected": -3.464595317840576, + "step": 3049 + }, + { + "epoch": 0.47, + "learning_rate": 1.191034078531044e-05, + "logits/chosen": -2.19035005569458, + "logits/rejected": -2.92451548576355, + "logps/chosen": -262.34503173828125, + "logps/rejected": -361.00543212890625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8423862457275391, + "rewards/margins": 5.745738983154297, + "rewards/rejected": -6.588125705718994, + "step": 3050 + }, + { + "epoch": 0.47, + "learning_rate": 1.1909607344779292e-05, + "logits/chosen": -2.821687698364258, + "logits/rejected": -3.3924856185913086, + "logps/chosen": -42.13317108154297, + "logps/rejected": -218.06515502929688, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7675731778144836, + "rewards/margins": 7.278573989868164, + "rewards/rejected": -8.046147346496582, + "step": 3051 + }, + { + "epoch": 0.47, + "learning_rate": 1.1908873904248146e-05, + "logits/chosen": -2.4271974563598633, + "logits/rejected": -3.1423752307891846, + "logps/chosen": -180.39767456054688, + "logps/rejected": -313.97125244140625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18195682764053345, + "rewards/margins": 5.272920608520508, + "rewards/rejected": -5.454877853393555, + "step": 3052 + }, + { + "epoch": 0.47, + "learning_rate": 1.1908140463716998e-05, + "logits/chosen": -2.0885090827941895, + "logits/rejected": -2.8895909786224365, + "logps/chosen": -208.5915069580078, + "logps/rejected": -335.1789245605469, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2781455516815186, + "rewards/margins": 5.2370781898498535, + "rewards/rejected": -6.515223503112793, + "step": 3053 + }, + { + "epoch": 0.47, + "learning_rate": 1.190740702318585e-05, + "logits/chosen": -2.960444450378418, + "logits/rejected": -2.45611834526062, + "logps/chosen": -262.4115295410156, + "logps/rejected": -349.18109130859375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2039260864257812, + "rewards/margins": 7.078174114227295, + "rewards/rejected": -8.282099723815918, + "step": 3054 + }, + { + "epoch": 0.48, + "learning_rate": 1.1906673582654701e-05, + "logits/chosen": -2.6331839561462402, + "logits/rejected": -3.088594675064087, + "logps/chosen": -755.4324340820312, + "logps/rejected": -678.8034057617188, + "loss": 3.0693, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.259112596511841, + "rewards/margins": 0.20297551155090332, + "rewards/rejected": -3.462088108062744, + "step": 3055 + }, + { + "epoch": 0.48, + "learning_rate": 1.1905940142123555e-05, + "logits/chosen": -3.2501988410949707, + "logits/rejected": -2.0333213806152344, + "logps/chosen": -435.052978515625, + "logps/rejected": -257.0207214355469, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45858001708984375, + "rewards/margins": 4.9859490394592285, + "rewards/rejected": -5.4445295333862305, + "step": 3056 + }, + { + "epoch": 0.48, + "learning_rate": 1.1905206701592407e-05, + "logits/chosen": -2.9608418941497803, + "logits/rejected": -2.6951537132263184, + "logps/chosen": -182.16006469726562, + "logps/rejected": -288.33538818359375, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.946387529373169, + "rewards/margins": 4.156415939331055, + "rewards/rejected": -5.1028032302856445, + "step": 3057 + }, + { + "epoch": 0.48, + "learning_rate": 1.1904473261061259e-05, + "logits/chosen": -2.9049558639526367, + "logits/rejected": -3.076228380203247, + "logps/chosen": -403.6636962890625, + "logps/rejected": -371.3209228515625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12291793525218964, + "rewards/margins": 6.09636116027832, + "rewards/rejected": -5.973443508148193, + "step": 3058 + }, + { + "epoch": 0.48, + "learning_rate": 1.190373982053011e-05, + "logits/chosen": -0.9061658978462219, + "logits/rejected": -3.0576891899108887, + "logps/chosen": -29.609420776367188, + "logps/rejected": -275.5611572265625, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6035864949226379, + "rewards/margins": 3.93445086479187, + "rewards/rejected": -4.538037300109863, + "step": 3059 + }, + { + "epoch": 0.48, + "learning_rate": 1.1903006379998964e-05, + "logits/chosen": -2.814051389694214, + "logits/rejected": -2.7770447731018066, + "logps/chosen": -356.77044677734375, + "logps/rejected": -298.6422119140625, + "loss": 2.2505, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.482787132263184, + "rewards/margins": 0.7596569061279297, + "rewards/rejected": -5.242444038391113, + "step": 3060 + }, + { + "epoch": 0.48, + "learning_rate": 1.1902272939467816e-05, + "logits/chosen": -3.1578903198242188, + "logits/rejected": -3.0210108757019043, + "logps/chosen": -567.60498046875, + "logps/rejected": -596.4361572265625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34591367840766907, + "rewards/margins": 7.0330657958984375, + "rewards/rejected": -7.378979682922363, + "step": 3061 + }, + { + "epoch": 0.48, + "learning_rate": 1.1901539498936668e-05, + "logits/chosen": -2.3419392108917236, + "logits/rejected": -2.9271886348724365, + "logps/chosen": -87.21441650390625, + "logps/rejected": -151.9044952392578, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.592087984085083, + "rewards/margins": 4.797280788421631, + "rewards/rejected": -6.389368534088135, + "step": 3062 + }, + { + "epoch": 0.48, + "learning_rate": 1.190080605840552e-05, + "logits/chosen": -2.2421581745147705, + "logits/rejected": -3.1583645343780518, + "logps/chosen": -368.790771484375, + "logps/rejected": -715.5429077148438, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2577316462993622, + "rewards/margins": 8.822996139526367, + "rewards/rejected": -9.080728530883789, + "step": 3063 + }, + { + "epoch": 0.48, + "learning_rate": 1.1900072617874372e-05, + "logits/chosen": -2.0118296146392822, + "logits/rejected": -2.9958667755126953, + "logps/chosen": -58.016845703125, + "logps/rejected": -278.26177978515625, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8586444854736328, + "rewards/margins": 6.343447685241699, + "rewards/rejected": -7.20209264755249, + "step": 3064 + }, + { + "epoch": 0.48, + "learning_rate": 1.1899339177343224e-05, + "logits/chosen": -2.541421890258789, + "logits/rejected": -2.9451212882995605, + "logps/chosen": -156.91217041015625, + "logps/rejected": -499.6720275878906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1517333984375, + "rewards/margins": 8.522574424743652, + "rewards/rejected": -9.674307823181152, + "step": 3065 + }, + { + "epoch": 0.48, + "learning_rate": 1.1898605736812075e-05, + "logits/chosen": -2.6459481716156006, + "logits/rejected": -3.2080624103546143, + "logps/chosen": -150.38607788085938, + "logps/rejected": -305.3099060058594, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2431468963623047, + "rewards/margins": 4.583591938018799, + "rewards/rejected": -5.8267388343811035, + "step": 3066 + }, + { + "epoch": 0.48, + "learning_rate": 1.1897872296280927e-05, + "logits/chosen": -2.8187344074249268, + "logits/rejected": -3.088268280029297, + "logps/chosen": -272.5084228515625, + "logps/rejected": -442.6520080566406, + "loss": 0.0479, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.315403699874878, + "rewards/margins": 4.516722202301025, + "rewards/rejected": -5.832125663757324, + "step": 3067 + }, + { + "epoch": 0.48, + "learning_rate": 1.189713885574978e-05, + "logits/chosen": -3.072596549987793, + "logits/rejected": -2.647005796432495, + "logps/chosen": -158.25741577148438, + "logps/rejected": -304.4206237792969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2065437287092209, + "rewards/margins": 10.931635856628418, + "rewards/rejected": -11.138179779052734, + "step": 3068 + }, + { + "epoch": 0.48, + "learning_rate": 1.1896405415218633e-05, + "logits/chosen": -2.980041265487671, + "logits/rejected": -2.0742812156677246, + "logps/chosen": -208.83778381347656, + "logps/rejected": -101.99729919433594, + "loss": 0.3996, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6672649383544922, + "rewards/margins": 2.7875137329101562, + "rewards/rejected": -4.454778671264648, + "step": 3069 + }, + { + "epoch": 0.48, + "learning_rate": 1.1895671974687485e-05, + "logits/chosen": -3.2341816425323486, + "logits/rejected": -3.167790412902832, + "logps/chosen": -117.02635192871094, + "logps/rejected": -145.03237915039062, + "loss": 1.4388, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3423805236816406, + "rewards/margins": 1.984618902206421, + "rewards/rejected": -4.326999187469482, + "step": 3070 + }, + { + "epoch": 0.48, + "learning_rate": 1.1894938534156337e-05, + "logits/chosen": -2.860583543777466, + "logits/rejected": -3.1104536056518555, + "logps/chosen": -28.477184295654297, + "logps/rejected": -185.61978149414062, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5483617782592773, + "rewards/margins": 4.503425121307373, + "rewards/rejected": -6.05178689956665, + "step": 3071 + }, + { + "epoch": 0.48, + "learning_rate": 1.1894205093625188e-05, + "logits/chosen": -3.188117027282715, + "logits/rejected": -0.8497756123542786, + "logps/chosen": -911.9053344726562, + "logps/rejected": -253.9562530517578, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4878453016281128, + "rewards/margins": 4.554246425628662, + "rewards/rejected": -6.0420918464660645, + "step": 3072 + }, + { + "epoch": 0.48, + "learning_rate": 1.189347165309404e-05, + "logits/chosen": -2.1495776176452637, + "logits/rejected": -3.120388984680176, + "logps/chosen": -72.13253784179688, + "logps/rejected": -215.1956787109375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0769307613372803, + "rewards/margins": 5.279841423034668, + "rewards/rejected": -7.356771945953369, + "step": 3073 + }, + { + "epoch": 0.48, + "learning_rate": 1.1892738212562892e-05, + "logits/chosen": -3.386636972427368, + "logits/rejected": -3.4082601070404053, + "logps/chosen": -105.68053436279297, + "logps/rejected": -76.42041015625, + "loss": 1.7091, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3982958793640137, + "rewards/margins": 0.8160978555679321, + "rewards/rejected": -4.214393615722656, + "step": 3074 + }, + { + "epoch": 0.48, + "learning_rate": 1.1892004772031744e-05, + "logits/chosen": -1.9341604709625244, + "logits/rejected": -3.0590460300445557, + "logps/chosen": -171.20623779296875, + "logps/rejected": -252.81857299804688, + "loss": 0.2116, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8669991493225098, + "rewards/margins": 3.550109386444092, + "rewards/rejected": -5.417108535766602, + "step": 3075 + }, + { + "epoch": 0.48, + "learning_rate": 1.1891271331500596e-05, + "logits/chosen": -1.9404950141906738, + "logits/rejected": -3.1098413467407227, + "logps/chosen": -233.95889282226562, + "logps/rejected": -320.8313903808594, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4160300493240356, + "rewards/margins": 4.612617492675781, + "rewards/rejected": -6.0286478996276855, + "step": 3076 + }, + { + "epoch": 0.48, + "learning_rate": 1.1890537890969448e-05, + "logits/chosen": -3.212740898132324, + "logits/rejected": -1.9138834476470947, + "logps/chosen": -230.16140747070312, + "logps/rejected": -59.46312713623047, + "loss": 0.5747, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.508876323699951, + "rewards/margins": 0.513707160949707, + "rewards/rejected": -3.022583484649658, + "step": 3077 + }, + { + "epoch": 0.48, + "learning_rate": 1.1889804450438301e-05, + "logits/chosen": -1.659706950187683, + "logits/rejected": -3.1955366134643555, + "logps/chosen": -62.02519989013672, + "logps/rejected": -278.6595153808594, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1205556392669678, + "rewards/margins": 3.6208622455596924, + "rewards/rejected": -4.74141788482666, + "step": 3078 + }, + { + "epoch": 0.48, + "learning_rate": 1.1889071009907153e-05, + "logits/chosen": -2.8871631622314453, + "logits/rejected": -3.0209598541259766, + "logps/chosen": -592.3723754882812, + "logps/rejected": -561.591552734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41158488392829895, + "rewards/margins": 8.60464096069336, + "rewards/rejected": -8.193056106567383, + "step": 3079 + }, + { + "epoch": 0.48, + "learning_rate": 1.1888337569376005e-05, + "logits/chosen": -3.1172244548797607, + "logits/rejected": -2.7389955520629883, + "logps/chosen": -137.69723510742188, + "logps/rejected": -170.36187744140625, + "loss": 2.1619, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8718698024749756, + "rewards/margins": 1.8562841415405273, + "rewards/rejected": -5.728154182434082, + "step": 3080 + }, + { + "epoch": 0.48, + "learning_rate": 1.1887604128844857e-05, + "logits/chosen": -2.2059452533721924, + "logits/rejected": -3.1132349967956543, + "logps/chosen": -132.27452087402344, + "logps/rejected": -247.02894592285156, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7307769656181335, + "rewards/margins": 6.074156761169434, + "rewards/rejected": -6.804934024810791, + "step": 3081 + }, + { + "epoch": 0.48, + "learning_rate": 1.1886870688313709e-05, + "logits/chosen": -3.2188689708709717, + "logits/rejected": -2.810577630996704, + "logps/chosen": -268.95660400390625, + "logps/rejected": -227.4174346923828, + "loss": 2.4711, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.426260232925415, + "rewards/margins": 0.6446361541748047, + "rewards/rejected": -4.070896148681641, + "step": 3082 + }, + { + "epoch": 0.48, + "learning_rate": 1.188613724778256e-05, + "logits/chosen": -2.644468307495117, + "logits/rejected": -3.0715208053588867, + "logps/chosen": -67.17780303955078, + "logps/rejected": -139.80938720703125, + "loss": 0.0408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.911948025226593, + "rewards/margins": 3.80318546295166, + "rewards/rejected": -4.7151336669921875, + "step": 3083 + }, + { + "epoch": 0.48, + "learning_rate": 1.1885403807251413e-05, + "logits/chosen": -2.655118942260742, + "logits/rejected": -3.1352133750915527, + "logps/chosen": -105.8067626953125, + "logps/rejected": -195.98471069335938, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7074666023254395, + "rewards/margins": 5.247269153594971, + "rewards/rejected": -5.95473575592041, + "step": 3084 + }, + { + "epoch": 0.48, + "learning_rate": 1.1884670366720264e-05, + "logits/chosen": -3.157435655593872, + "logits/rejected": -2.913888692855835, + "logps/chosen": -180.9607696533203, + "logps/rejected": -231.0634307861328, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2005093097686768, + "rewards/margins": 5.600461006164551, + "rewards/rejected": -6.800970554351807, + "step": 3085 + }, + { + "epoch": 0.48, + "learning_rate": 1.1883936926189116e-05, + "logits/chosen": -3.149583578109741, + "logits/rejected": -2.6682748794555664, + "logps/chosen": -207.19366455078125, + "logps/rejected": -115.65406799316406, + "loss": 1.641, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2447781562805176, + "rewards/margins": 0.62308669090271, + "rewards/rejected": -2.8678646087646484, + "step": 3086 + }, + { + "epoch": 0.48, + "learning_rate": 1.188320348565797e-05, + "logits/chosen": -3.2630443572998047, + "logits/rejected": -2.9226346015930176, + "logps/chosen": -387.3836975097656, + "logps/rejected": -399.377197265625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5130729675292969, + "rewards/margins": 4.879860877990723, + "rewards/rejected": -4.366787910461426, + "step": 3087 + }, + { + "epoch": 0.48, + "learning_rate": 1.1882470045126822e-05, + "logits/chosen": -2.441509962081909, + "logits/rejected": -3.2295186519622803, + "logps/chosen": -279.0596923828125, + "logps/rejected": -470.5262451171875, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9734363555908203, + "rewards/margins": 5.6685590744018555, + "rewards/rejected": -6.641995429992676, + "step": 3088 + }, + { + "epoch": 0.48, + "learning_rate": 1.1881736604595674e-05, + "logits/chosen": -2.9736225605010986, + "logits/rejected": -3.2909326553344727, + "logps/chosen": -243.53262329101562, + "logps/rejected": -254.18727111816406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.112396240234375, + "rewards/margins": 7.198426723480225, + "rewards/rejected": -8.310823440551758, + "step": 3089 + }, + { + "epoch": 0.48, + "learning_rate": 1.1881003164064527e-05, + "logits/chosen": -1.7852782011032104, + "logits/rejected": -2.8182528018951416, + "logps/chosen": -68.04684448242188, + "logps/rejected": -343.68426513671875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2757482528686523, + "rewards/margins": 6.14139461517334, + "rewards/rejected": -7.417142868041992, + "step": 3090 + }, + { + "epoch": 0.48, + "learning_rate": 1.1880269723533379e-05, + "logits/chosen": -2.8938937187194824, + "logits/rejected": -3.205693244934082, + "logps/chosen": -233.96914672851562, + "logps/rejected": -470.11029052734375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7336681485176086, + "rewards/margins": 7.76325798034668, + "rewards/rejected": -8.496926307678223, + "step": 3091 + }, + { + "epoch": 0.48, + "learning_rate": 1.1879536283002231e-05, + "logits/chosen": -3.097853422164917, + "logits/rejected": -2.7044687271118164, + "logps/chosen": -243.6559600830078, + "logps/rejected": -236.83477783203125, + "loss": 0.1096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2274338006973267, + "rewards/margins": 3.5949547290802, + "rewards/rejected": -4.822388648986816, + "step": 3092 + }, + { + "epoch": 0.48, + "learning_rate": 1.1878802842471083e-05, + "logits/chosen": -2.9958717823028564, + "logits/rejected": -3.0308516025543213, + "logps/chosen": -740.910400390625, + "logps/rejected": -917.96044921875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.243183970451355, + "rewards/margins": 11.258477210998535, + "rewards/rejected": -12.50166130065918, + "step": 3093 + }, + { + "epoch": 0.48, + "learning_rate": 1.1878069401939935e-05, + "logits/chosen": -2.522637128829956, + "logits/rejected": -2.972205877304077, + "logps/chosen": -255.14047241210938, + "logps/rejected": -527.0443725585938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4237060546875, + "rewards/margins": 9.61846923828125, + "rewards/rejected": -11.04217529296875, + "step": 3094 + }, + { + "epoch": 0.48, + "learning_rate": 1.1877335961408787e-05, + "logits/chosen": -3.038555383682251, + "logits/rejected": -3.1492486000061035, + "logps/chosen": -132.4004669189453, + "logps/rejected": -225.5318145751953, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6458759307861328, + "rewards/margins": 7.283393383026123, + "rewards/rejected": -7.929269790649414, + "step": 3095 + }, + { + "epoch": 0.48, + "learning_rate": 1.187660252087764e-05, + "logits/chosen": -2.5571885108947754, + "logits/rejected": -2.94978928565979, + "logps/chosen": -125.44174194335938, + "logps/rejected": -345.35858154296875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8479890823364258, + "rewards/margins": 9.208829879760742, + "rewards/rejected": -11.056818962097168, + "step": 3096 + }, + { + "epoch": 0.48, + "learning_rate": 1.1875869080346492e-05, + "logits/chosen": -3.1274032592773438, + "logits/rejected": -3.3262245655059814, + "logps/chosen": -36.31743621826172, + "logps/rejected": -115.75009155273438, + "loss": 0.2553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2718143463134766, + "rewards/margins": 1.3997843265533447, + "rewards/rejected": -3.6715986728668213, + "step": 3097 + }, + { + "epoch": 0.48, + "learning_rate": 1.1875135639815344e-05, + "logits/chosen": -3.1732499599456787, + "logits/rejected": -3.242076873779297, + "logps/chosen": -43.564964294433594, + "logps/rejected": -164.91392517089844, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8200077414512634, + "rewards/margins": 6.367135047912598, + "rewards/rejected": -7.187142848968506, + "step": 3098 + }, + { + "epoch": 0.48, + "learning_rate": 1.1874402199284196e-05, + "logits/chosen": -2.9984400272369385, + "logits/rejected": -1.8954097032546997, + "logps/chosen": -151.8575439453125, + "logps/rejected": -119.33799743652344, + "loss": 3.1161, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.620495796203613, + "rewards/margins": -0.055998802185058594, + "rewards/rejected": -4.564496994018555, + "step": 3099 + }, + { + "epoch": 0.48, + "learning_rate": 1.1873668758753048e-05, + "logits/chosen": -1.905470609664917, + "logits/rejected": -3.24102783203125, + "logps/chosen": -295.1917724609375, + "logps/rejected": -650.9468383789062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9335038661956787, + "rewards/margins": 8.004958152770996, + "rewards/rejected": -9.938461303710938, + "step": 3100 + }, + { + "epoch": 0.48, + "learning_rate": 1.18729353182219e-05, + "logits/chosen": -2.1679558753967285, + "logits/rejected": -3.016005754470825, + "logps/chosen": -181.05987548828125, + "logps/rejected": -191.6165008544922, + "loss": 1.5842, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.265100479125977, + "rewards/margins": 2.2680506706237793, + "rewards/rejected": -6.533151149749756, + "step": 3101 + }, + { + "epoch": 0.48, + "learning_rate": 1.1872201877690752e-05, + "logits/chosen": -2.253441095352173, + "logits/rejected": -3.2501516342163086, + "logps/chosen": -35.2061653137207, + "logps/rejected": -238.25567626953125, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7538784742355347, + "rewards/margins": 6.064664363861084, + "rewards/rejected": -6.81854248046875, + "step": 3102 + }, + { + "epoch": 0.48, + "learning_rate": 1.1871468437159603e-05, + "logits/chosen": -2.6379899978637695, + "logits/rejected": -3.0285089015960693, + "logps/chosen": -260.3597106933594, + "logps/rejected": -487.9117736816406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2829220294952393, + "rewards/margins": 7.852237701416016, + "rewards/rejected": -9.135160446166992, + "step": 3103 + }, + { + "epoch": 0.48, + "learning_rate": 1.1870734996628455e-05, + "logits/chosen": -2.6227731704711914, + "logits/rejected": -3.1942379474639893, + "logps/chosen": -152.42913818359375, + "logps/rejected": -471.7076721191406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.011536121368408, + "rewards/margins": 8.048650741577148, + "rewards/rejected": -11.060186386108398, + "step": 3104 + }, + { + "epoch": 0.48, + "learning_rate": 1.1870001556097309e-05, + "logits/chosen": -3.1670522689819336, + "logits/rejected": -2.833475351333618, + "logps/chosen": -150.4248046875, + "logps/rejected": -57.71855545043945, + "loss": 3.647, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.78255558013916, + "rewards/margins": -3.552515983581543, + "rewards/rejected": -2.230039358139038, + "step": 3105 + }, + { + "epoch": 0.48, + "learning_rate": 1.186926811556616e-05, + "logits/chosen": -3.1750965118408203, + "logits/rejected": -3.1405301094055176, + "logps/chosen": -953.9259033203125, + "logps/rejected": -623.6865234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6433746218681335, + "rewards/margins": 6.457178115844727, + "rewards/rejected": -5.813803195953369, + "step": 3106 + }, + { + "epoch": 0.48, + "learning_rate": 1.1868534675035013e-05, + "logits/chosen": -3.038010597229004, + "logits/rejected": -2.0964949131011963, + "logps/chosen": -411.5484313964844, + "logps/rejected": -237.44308471679688, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6733688712120056, + "rewards/margins": 5.179261207580566, + "rewards/rejected": -5.852629661560059, + "step": 3107 + }, + { + "epoch": 0.48, + "learning_rate": 1.1867801234503864e-05, + "logits/chosen": -2.5625789165496826, + "logits/rejected": -3.078174114227295, + "logps/chosen": -939.905029296875, + "logps/rejected": -744.5343627929688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0995941162109375, + "rewards/margins": 7.726955413818359, + "rewards/rejected": -8.826549530029297, + "step": 3108 + }, + { + "epoch": 0.48, + "learning_rate": 1.1867067793972716e-05, + "logits/chosen": -0.46400439739227295, + "logits/rejected": -2.10412335395813, + "logps/chosen": -181.48635864257812, + "logps/rejected": -334.1612548828125, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.760203242301941, + "rewards/margins": 9.40924072265625, + "rewards/rejected": -11.16944408416748, + "step": 3109 + }, + { + "epoch": 0.48, + "learning_rate": 1.1866334353441568e-05, + "logits/chosen": -1.7665663957595825, + "logits/rejected": -2.9536116123199463, + "logps/chosen": -152.56216430664062, + "logps/rejected": -232.09039306640625, + "loss": 2.3326, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.827113389968872, + "rewards/margins": 0.06363129615783691, + "rewards/rejected": -2.890744686126709, + "step": 3110 + }, + { + "epoch": 0.48, + "learning_rate": 1.186560091291042e-05, + "logits/chosen": -2.183476209640503, + "logits/rejected": -3.123405694961548, + "logps/chosen": -353.1868896484375, + "logps/rejected": -484.14483642578125, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22989502549171448, + "rewards/margins": 4.42083740234375, + "rewards/rejected": -4.650732517242432, + "step": 3111 + }, + { + "epoch": 0.48, + "learning_rate": 1.1864867472379272e-05, + "logits/chosen": -3.0566632747650146, + "logits/rejected": -2.0177011489868164, + "logps/chosen": -371.5867919921875, + "logps/rejected": -217.46189880371094, + "loss": 6.5386, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.824659824371338, + "rewards/margins": -6.537158012390137, + "rewards/rejected": -0.2875015139579773, + "step": 3112 + }, + { + "epoch": 0.48, + "learning_rate": 1.1864134031848124e-05, + "logits/chosen": -2.8151743412017822, + "logits/rejected": -2.9935202598571777, + "logps/chosen": -275.8655700683594, + "logps/rejected": -179.79776000976562, + "loss": 2.2068, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3567826747894287, + "rewards/margins": 1.4570982456207275, + "rewards/rejected": -4.813880920410156, + "step": 3113 + }, + { + "epoch": 0.48, + "learning_rate": 1.1863400591316977e-05, + "logits/chosen": -2.1863462924957275, + "logits/rejected": -2.8184611797332764, + "logps/chosen": -156.9717559814453, + "logps/rejected": -181.15948486328125, + "loss": 1.4109, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2048003673553467, + "rewards/margins": 1.8531098365783691, + "rewards/rejected": -4.057910442352295, + "step": 3114 + }, + { + "epoch": 0.48, + "learning_rate": 1.186266715078583e-05, + "logits/chosen": -2.547419309616089, + "logits/rejected": -2.616271734237671, + "logps/chosen": -115.19715881347656, + "logps/rejected": -317.72613525390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.098960518836975, + "rewards/margins": 7.983246803283691, + "rewards/rejected": -9.082207679748535, + "step": 3115 + }, + { + "epoch": 0.48, + "learning_rate": 1.1861933710254681e-05, + "logits/chosen": -1.3264950513839722, + "logits/rejected": -3.0528297424316406, + "logps/chosen": -47.586002349853516, + "logps/rejected": -288.56640625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4448602795600891, + "rewards/margins": 9.062636375427246, + "rewards/rejected": -9.50749683380127, + "step": 3116 + }, + { + "epoch": 0.48, + "learning_rate": 1.1861200269723533e-05, + "logits/chosen": -3.1014857292175293, + "logits/rejected": -1.8236066102981567, + "logps/chosen": -535.3395385742188, + "logps/rejected": -344.720703125, + "loss": 1.823, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6691436767578125, + "rewards/margins": 0.9191772937774658, + "rewards/rejected": -3.5883209705352783, + "step": 3117 + }, + { + "epoch": 0.48, + "learning_rate": 1.1860466829192385e-05, + "logits/chosen": -3.0514705181121826, + "logits/rejected": -3.275474786758423, + "logps/chosen": -322.0389709472656, + "logps/rejected": -582.1718139648438, + "loss": 3.7034, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.593866348266602, + "rewards/margins": -3.0865283012390137, + "rewards/rejected": -1.507338047027588, + "step": 3118 + }, + { + "epoch": 0.49, + "learning_rate": 1.1859733388661237e-05, + "logits/chosen": -3.0870845317840576, + "logits/rejected": -2.365569591522217, + "logps/chosen": -415.1701965332031, + "logps/rejected": -472.082763671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1282896101474762, + "rewards/margins": 7.169766426086426, + "rewards/rejected": -7.041476249694824, + "step": 3119 + }, + { + "epoch": 0.49, + "learning_rate": 1.1858999948130089e-05, + "logits/chosen": -3.1589574813842773, + "logits/rejected": -2.7208383083343506, + "logps/chosen": -534.814208984375, + "logps/rejected": -453.04150390625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6966774463653564, + "rewards/margins": 5.853791236877441, + "rewards/rejected": -6.550468444824219, + "step": 3120 + }, + { + "epoch": 0.49, + "learning_rate": 1.185826650759894e-05, + "logits/chosen": -3.230201005935669, + "logits/rejected": -1.9931315183639526, + "logps/chosen": -183.56776428222656, + "logps/rejected": -99.17042541503906, + "loss": 2.7376, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.247730255126953, + "rewards/margins": -1.0554028749465942, + "rewards/rejected": -3.1923272609710693, + "step": 3121 + }, + { + "epoch": 0.49, + "learning_rate": 1.1857533067067794e-05, + "logits/chosen": -2.7252197265625, + "logits/rejected": -2.6899776458740234, + "logps/chosen": -539.9932250976562, + "logps/rejected": -392.8408508300781, + "loss": 0.1445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7235214114189148, + "rewards/margins": 4.3631591796875, + "rewards/rejected": -5.086680889129639, + "step": 3122 + }, + { + "epoch": 0.49, + "learning_rate": 1.1856799626536646e-05, + "logits/chosen": -2.4609570503234863, + "logits/rejected": -3.1183524131774902, + "logps/chosen": -100.77629089355469, + "logps/rejected": -395.1419677734375, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.391187310218811, + "rewards/margins": 5.322144508361816, + "rewards/rejected": -6.713331699371338, + "step": 3123 + }, + { + "epoch": 0.49, + "learning_rate": 1.18560661860055e-05, + "logits/chosen": -1.9885138273239136, + "logits/rejected": -2.9889376163482666, + "logps/chosen": -187.3934326171875, + "logps/rejected": -353.402587890625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8293517827987671, + "rewards/margins": 5.892549514770508, + "rewards/rejected": -6.721901893615723, + "step": 3124 + }, + { + "epoch": 0.49, + "learning_rate": 1.1855332745474351e-05, + "logits/chosen": -3.125276803970337, + "logits/rejected": -3.3502135276794434, + "logps/chosen": -144.9717254638672, + "logps/rejected": -287.06158447265625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6118744611740112, + "rewards/margins": 4.836922645568848, + "rewards/rejected": -6.448797225952148, + "step": 3125 + }, + { + "epoch": 0.49, + "learning_rate": 1.1854599304943203e-05, + "logits/chosen": -1.9783172607421875, + "logits/rejected": -3.1579301357269287, + "logps/chosen": -205.5470428466797, + "logps/rejected": -364.80517578125, + "loss": 1.9565, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7618439197540283, + "rewards/margins": 0.27281641960144043, + "rewards/rejected": -3.0346603393554688, + "step": 3126 + }, + { + "epoch": 0.49, + "learning_rate": 1.1853865864412055e-05, + "logits/chosen": -2.8940744400024414, + "logits/rejected": -2.589837074279785, + "logps/chosen": -183.0096435546875, + "logps/rejected": -248.85025024414062, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8156934976577759, + "rewards/margins": 6.230097770690918, + "rewards/rejected": -7.0457916259765625, + "step": 3127 + }, + { + "epoch": 0.49, + "learning_rate": 1.1853132423880907e-05, + "logits/chosen": -3.073976993560791, + "logits/rejected": -0.9638481140136719, + "logps/chosen": -293.95208740234375, + "logps/rejected": -119.23002624511719, + "loss": 1.5328, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.433232069015503, + "rewards/margins": 0.21511006355285645, + "rewards/rejected": -3.6483421325683594, + "step": 3128 + }, + { + "epoch": 0.49, + "learning_rate": 1.1852398983349759e-05, + "logits/chosen": -1.9058232307434082, + "logits/rejected": -3.081233501434326, + "logps/chosen": -64.95941162109375, + "logps/rejected": -209.25070190429688, + "loss": 0.0726, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0728684663772583, + "rewards/margins": 2.920973777770996, + "rewards/rejected": -3.993842124938965, + "step": 3129 + }, + { + "epoch": 0.49, + "learning_rate": 1.1851665542818611e-05, + "logits/chosen": -2.809912919998169, + "logits/rejected": -2.910036563873291, + "logps/chosen": -84.79925537109375, + "logps/rejected": -265.2462158203125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9828178286552429, + "rewards/margins": 7.684223175048828, + "rewards/rejected": -8.667040824890137, + "step": 3130 + }, + { + "epoch": 0.49, + "learning_rate": 1.1850932102287463e-05, + "logits/chosen": -2.48427414894104, + "logits/rejected": -3.2778587341308594, + "logps/chosen": -281.9840087890625, + "logps/rejected": -333.87420654296875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2539291381835938, + "rewards/margins": 5.8633551597595215, + "rewards/rejected": -7.117284774780273, + "step": 3131 + }, + { + "epoch": 0.49, + "learning_rate": 1.1850198661756316e-05, + "logits/chosen": -2.455752372741699, + "logits/rejected": -2.9506237506866455, + "logps/chosen": -53.48126220703125, + "logps/rejected": -232.0330047607422, + "loss": 0.2093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8070682287216187, + "rewards/margins": 4.383558750152588, + "rewards/rejected": -6.190627098083496, + "step": 3132 + }, + { + "epoch": 0.49, + "learning_rate": 1.1849465221225168e-05, + "logits/chosen": -1.9669318199157715, + "logits/rejected": -3.297973871231079, + "logps/chosen": -52.135047912597656, + "logps/rejected": -457.679443359375, + "loss": 0.9416, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.537208080291748, + "rewards/margins": 2.0950489044189453, + "rewards/rejected": -4.632256984710693, + "step": 3133 + }, + { + "epoch": 0.49, + "learning_rate": 1.184873178069402e-05, + "logits/chosen": -1.598518967628479, + "logits/rejected": -2.8708078861236572, + "logps/chosen": -94.38386535644531, + "logps/rejected": -309.77471923828125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2736797332763672, + "rewards/margins": 6.953375816345215, + "rewards/rejected": -8.227055549621582, + "step": 3134 + }, + { + "epoch": 0.49, + "learning_rate": 1.1847998340162872e-05, + "logits/chosen": -1.9876484870910645, + "logits/rejected": -2.9356679916381836, + "logps/chosen": -160.3587646484375, + "logps/rejected": -343.29656982421875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9395702481269836, + "rewards/margins": 6.556389808654785, + "rewards/rejected": -7.495959758758545, + "step": 3135 + }, + { + "epoch": 0.49, + "learning_rate": 1.1847264899631724e-05, + "logits/chosen": -2.446199893951416, + "logits/rejected": -3.2052454948425293, + "logps/chosen": -133.06002807617188, + "logps/rejected": -293.2364807128906, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7106239199638367, + "rewards/margins": 7.384693145751953, + "rewards/rejected": -8.095316886901855, + "step": 3136 + }, + { + "epoch": 0.49, + "learning_rate": 1.1846531459100576e-05, + "logits/chosen": -3.4247639179229736, + "logits/rejected": -3.3416755199432373, + "logps/chosen": -85.88321685791016, + "logps/rejected": -91.60340881347656, + "loss": 2.7634, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.92659068107605, + "rewards/margins": -0.2022719383239746, + "rewards/rejected": -3.724318742752075, + "step": 3137 + }, + { + "epoch": 0.49, + "learning_rate": 1.1845798018569428e-05, + "logits/chosen": -3.2274138927459717, + "logits/rejected": -3.1168954372406006, + "logps/chosen": -383.1755676269531, + "logps/rejected": -210.32078552246094, + "loss": 3.6268, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.826676845550537, + "rewards/margins": -0.6224594116210938, + "rewards/rejected": -4.204217910766602, + "step": 3138 + }, + { + "epoch": 0.49, + "learning_rate": 1.184506457803828e-05, + "logits/chosen": -3.060548782348633, + "logits/rejected": -3.3468735218048096, + "logps/chosen": -43.68889617919922, + "logps/rejected": -158.45346069335938, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2649028301239014, + "rewards/margins": 3.535184860229492, + "rewards/rejected": -5.8000874519348145, + "step": 3139 + }, + { + "epoch": 0.49, + "learning_rate": 1.1844331137507131e-05, + "logits/chosen": -2.917642116546631, + "logits/rejected": -2.7653582096099854, + "logps/chosen": -178.79576110839844, + "logps/rejected": -288.3893737792969, + "loss": 3.349, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.177000045776367, + "rewards/margins": -0.8169221878051758, + "rewards/rejected": -4.36007833480835, + "step": 3140 + }, + { + "epoch": 0.49, + "learning_rate": 1.1843597696975985e-05, + "logits/chosen": -2.828195810317993, + "logits/rejected": -3.026642322540283, + "logps/chosen": -110.336181640625, + "logps/rejected": -235.58270263671875, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0133132934570312, + "rewards/margins": 5.773115634918213, + "rewards/rejected": -6.786428451538086, + "step": 3141 + }, + { + "epoch": 0.49, + "learning_rate": 1.1842864256444837e-05, + "logits/chosen": -3.215543746948242, + "logits/rejected": -3.4524855613708496, + "logps/chosen": -32.037200927734375, + "logps/rejected": -138.82888793945312, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7881593704223633, + "rewards/margins": 5.94371223449707, + "rewards/rejected": -7.731871604919434, + "step": 3142 + }, + { + "epoch": 0.49, + "learning_rate": 1.1842130815913689e-05, + "logits/chosen": -3.314685344696045, + "logits/rejected": -3.1454970836639404, + "logps/chosen": -93.09529113769531, + "logps/rejected": -190.1016387939453, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2813961505889893, + "rewards/margins": 4.945054054260254, + "rewards/rejected": -6.226449966430664, + "step": 3143 + }, + { + "epoch": 0.49, + "learning_rate": 1.184139737538254e-05, + "logits/chosen": -3.131817102432251, + "logits/rejected": -3.240748643875122, + "logps/chosen": -194.49569702148438, + "logps/rejected": -310.19854736328125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.128583550453186, + "rewards/margins": 6.964943885803223, + "rewards/rejected": -8.093527793884277, + "step": 3144 + }, + { + "epoch": 0.49, + "learning_rate": 1.1840663934851392e-05, + "logits/chosen": -2.3934850692749023, + "logits/rejected": -3.147879123687744, + "logps/chosen": -240.04917907714844, + "logps/rejected": -399.42645263671875, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1792305707931519, + "rewards/margins": 4.206740379333496, + "rewards/rejected": -5.3859710693359375, + "step": 3145 + }, + { + "epoch": 0.49, + "learning_rate": 1.1839930494320244e-05, + "logits/chosen": -3.2051045894622803, + "logits/rejected": -2.269629716873169, + "logps/chosen": -266.2602233886719, + "logps/rejected": -159.17320251464844, + "loss": 1.681, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0928635597229004, + "rewards/margins": 1.2849116325378418, + "rewards/rejected": -4.377775192260742, + "step": 3146 + }, + { + "epoch": 0.49, + "learning_rate": 1.1839197053789096e-05, + "logits/chosen": -3.1323370933532715, + "logits/rejected": -3.2247719764709473, + "logps/chosen": -158.2863006591797, + "logps/rejected": -217.15582275390625, + "loss": 0.0982, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7902778387069702, + "rewards/margins": 3.113356590270996, + "rewards/rejected": -3.9036343097686768, + "step": 3147 + }, + { + "epoch": 0.49, + "learning_rate": 1.1838463613257948e-05, + "logits/chosen": -2.283466339111328, + "logits/rejected": -2.9395711421966553, + "logps/chosen": -100.05157470703125, + "logps/rejected": -227.93804931640625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0592880249023438, + "rewards/margins": 5.597021579742432, + "rewards/rejected": -7.656309604644775, + "step": 3148 + }, + { + "epoch": 0.49, + "learning_rate": 1.1837730172726802e-05, + "logits/chosen": -2.997492551803589, + "logits/rejected": -3.2880780696868896, + "logps/chosen": -166.9712371826172, + "logps/rejected": -179.6602325439453, + "loss": 0.0381, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2360241413116455, + "rewards/margins": 3.2975637912750244, + "rewards/rejected": -4.53358793258667, + "step": 3149 + }, + { + "epoch": 0.49, + "learning_rate": 1.1836996732195654e-05, + "logits/chosen": -1.6140106916427612, + "logits/rejected": -3.065349817276001, + "logps/chosen": -54.740196228027344, + "logps/rejected": -215.79983520507812, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8654074668884277, + "rewards/margins": 4.255166053771973, + "rewards/rejected": -7.120573043823242, + "step": 3150 + }, + { + "epoch": 0.49, + "learning_rate": 1.1836263291664505e-05, + "logits/chosen": -2.7426609992980957, + "logits/rejected": -3.0998589992523193, + "logps/chosen": -590.6975708007812, + "logps/rejected": -771.5601196289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.353468656539917, + "rewards/margins": 12.002138137817383, + "rewards/rejected": -13.355606079101562, + "step": 3151 + }, + { + "epoch": 0.49, + "learning_rate": 1.1835529851133357e-05, + "logits/chosen": -2.601078510284424, + "logits/rejected": -1.8808163404464722, + "logps/chosen": -369.584716796875, + "logps/rejected": -320.107421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1956843137741089, + "rewards/margins": 7.591923713684082, + "rewards/rejected": -8.78760814666748, + "step": 3152 + }, + { + "epoch": 0.49, + "learning_rate": 1.183479641060221e-05, + "logits/chosen": -2.3887791633605957, + "logits/rejected": -3.0608744621276855, + "logps/chosen": -114.78770446777344, + "logps/rejected": -252.8031005859375, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5086981058120728, + "rewards/margins": 6.120020866394043, + "rewards/rejected": -7.628718852996826, + "step": 3153 + }, + { + "epoch": 0.49, + "learning_rate": 1.1834062970071061e-05, + "logits/chosen": -1.8381741046905518, + "logits/rejected": -3.1900546550750732, + "logps/chosen": -307.1485595703125, + "logps/rejected": -453.54461669921875, + "loss": 2.6519, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.977189064025879, + "rewards/margins": 0.6909785270690918, + "rewards/rejected": -5.6681671142578125, + "step": 3154 + }, + { + "epoch": 0.49, + "learning_rate": 1.1833329529539913e-05, + "logits/chosen": -3.2685163021087646, + "logits/rejected": -3.0812244415283203, + "logps/chosen": -141.17079162597656, + "logps/rejected": -177.69651794433594, + "loss": 3.5133, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8571557998657227, + "rewards/margins": 0.2344825267791748, + "rewards/rejected": -4.091638565063477, + "step": 3155 + }, + { + "epoch": 0.49, + "learning_rate": 1.1832596089008766e-05, + "logits/chosen": -0.954713761806488, + "logits/rejected": -3.115185022354126, + "logps/chosen": -104.33683776855469, + "logps/rejected": -497.503662109375, + "loss": 0.0452, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0944832563400269, + "rewards/margins": 3.6322946548461914, + "rewards/rejected": -4.726778030395508, + "step": 3156 + }, + { + "epoch": 0.49, + "learning_rate": 1.1831862648477618e-05, + "logits/chosen": -2.2922537326812744, + "logits/rejected": -2.948715925216675, + "logps/chosen": -137.14181518554688, + "logps/rejected": -223.74620056152344, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09925460815429688, + "rewards/margins": 5.782556533813477, + "rewards/rejected": -5.881811141967773, + "step": 3157 + }, + { + "epoch": 0.49, + "learning_rate": 1.1831129207946472e-05, + "logits/chosen": -2.9120564460754395, + "logits/rejected": -3.2034647464752197, + "logps/chosen": -102.27072143554688, + "logps/rejected": -209.8302001953125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.060931921005249, + "rewards/margins": 4.660863876342773, + "rewards/rejected": -5.721795558929443, + "step": 3158 + }, + { + "epoch": 0.49, + "learning_rate": 1.1830395767415324e-05, + "logits/chosen": -2.9714479446411133, + "logits/rejected": -3.368731737136841, + "logps/chosen": -271.784423828125, + "logps/rejected": -367.08251953125, + "loss": 0.1205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.703061819076538, + "rewards/margins": 3.001984119415283, + "rewards/rejected": -4.705045700073242, + "step": 3159 + }, + { + "epoch": 0.49, + "learning_rate": 1.1829662326884176e-05, + "logits/chosen": -3.188951253890991, + "logits/rejected": -3.1103968620300293, + "logps/chosen": -178.989501953125, + "logps/rejected": -212.67739868164062, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5725793242454529, + "rewards/margins": 4.114471912384033, + "rewards/rejected": -4.687051296234131, + "step": 3160 + }, + { + "epoch": 0.49, + "learning_rate": 1.1828928886353028e-05, + "logits/chosen": -2.855494976043701, + "logits/rejected": -2.8131275177001953, + "logps/chosen": -48.01154327392578, + "logps/rejected": -167.5256805419922, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5206422805786133, + "rewards/margins": 5.522970199584961, + "rewards/rejected": -7.043612480163574, + "step": 3161 + }, + { + "epoch": 0.49, + "learning_rate": 1.182819544582188e-05, + "logits/chosen": -1.1849629878997803, + "logits/rejected": -3.059269428253174, + "logps/chosen": -138.75340270996094, + "logps/rejected": -487.8401794433594, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0636085271835327, + "rewards/margins": 6.01577091217041, + "rewards/rejected": -7.079379081726074, + "step": 3162 + }, + { + "epoch": 0.49, + "learning_rate": 1.1827462005290731e-05, + "logits/chosen": -2.9510796070098877, + "logits/rejected": -3.3761284351348877, + "logps/chosen": -205.50442504882812, + "logps/rejected": -353.3275451660156, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9448747634887695, + "rewards/margins": 5.182779312133789, + "rewards/rejected": -7.127654075622559, + "step": 3163 + }, + { + "epoch": 0.49, + "learning_rate": 1.1826728564759583e-05, + "logits/chosen": -3.1952927112579346, + "logits/rejected": -3.2531228065490723, + "logps/chosen": -63.075626373291016, + "logps/rejected": -153.2237548828125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.452180027961731, + "rewards/margins": 5.711200714111328, + "rewards/rejected": -7.1633806228637695, + "step": 3164 + }, + { + "epoch": 0.49, + "learning_rate": 1.1825995124228435e-05, + "logits/chosen": -2.666598081588745, + "logits/rejected": -3.1828505992889404, + "logps/chosen": -167.77682495117188, + "logps/rejected": -304.8891296386719, + "loss": 0.9533, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3056694269180298, + "rewards/margins": 3.787479877471924, + "rewards/rejected": -5.093149185180664, + "step": 3165 + }, + { + "epoch": 0.49, + "learning_rate": 1.1825261683697287e-05, + "logits/chosen": -2.402489185333252, + "logits/rejected": -2.9793190956115723, + "logps/chosen": -43.54505157470703, + "logps/rejected": -177.95822143554688, + "loss": 0.0989, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3894622325897217, + "rewards/margins": 4.6428093910217285, + "rewards/rejected": -6.032271385192871, + "step": 3166 + }, + { + "epoch": 0.49, + "learning_rate": 1.182452824316614e-05, + "logits/chosen": -3.0986835956573486, + "logits/rejected": -2.9909493923187256, + "logps/chosen": -178.3541259765625, + "logps/rejected": -402.78619384765625, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.009744644165039, + "rewards/margins": 6.37990665435791, + "rewards/rejected": -7.389651775360107, + "step": 3167 + }, + { + "epoch": 0.49, + "learning_rate": 1.1823794802634992e-05, + "logits/chosen": -3.2475130558013916, + "logits/rejected": -1.6787859201431274, + "logps/chosen": -468.9987487792969, + "logps/rejected": -185.16595458984375, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7827529907226562, + "rewards/margins": 4.656335830688477, + "rewards/rejected": -5.439088821411133, + "step": 3168 + }, + { + "epoch": 0.49, + "learning_rate": 1.1823061362103844e-05, + "logits/chosen": -3.0558159351348877, + "logits/rejected": -2.9639580249786377, + "logps/chosen": -186.32937622070312, + "logps/rejected": -290.8762512207031, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5711205005645752, + "rewards/margins": 4.854841232299805, + "rewards/rejected": -6.425961494445801, + "step": 3169 + }, + { + "epoch": 0.49, + "learning_rate": 1.1822327921572696e-05, + "logits/chosen": -3.246840000152588, + "logits/rejected": -1.6269495487213135, + "logps/chosen": -326.98760986328125, + "logps/rejected": -120.619384765625, + "loss": 2.8203, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.173074722290039, + "rewards/margins": -2.7323474884033203, + "rewards/rejected": -1.44072687625885, + "step": 3170 + }, + { + "epoch": 0.49, + "learning_rate": 1.1821594481041548e-05, + "logits/chosen": -2.5248541831970215, + "logits/rejected": -2.701557159423828, + "logps/chosen": -89.95808410644531, + "logps/rejected": -238.2133026123047, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4441295862197876, + "rewards/margins": 4.6059064865112305, + "rewards/rejected": -6.0500359535217285, + "step": 3171 + }, + { + "epoch": 0.49, + "learning_rate": 1.18208610405104e-05, + "logits/chosen": -2.91325306892395, + "logits/rejected": -3.3467366695404053, + "logps/chosen": -150.12063598632812, + "logps/rejected": -343.48089599609375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8426659107208252, + "rewards/margins": 5.976107597351074, + "rewards/rejected": -7.81877326965332, + "step": 3172 + }, + { + "epoch": 0.49, + "learning_rate": 1.1820127599979252e-05, + "logits/chosen": -2.7006895542144775, + "logits/rejected": -3.1480183601379395, + "logps/chosen": -199.5836181640625, + "logps/rejected": -312.0146179199219, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.14825701713562, + "rewards/margins": 7.005867958068848, + "rewards/rejected": -9.154125213623047, + "step": 3173 + }, + { + "epoch": 0.49, + "learning_rate": 1.1819394159448104e-05, + "logits/chosen": -2.841844320297241, + "logits/rejected": -2.753624439239502, + "logps/chosen": -280.36370849609375, + "logps/rejected": -260.76544189453125, + "loss": 3.717, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.946483612060547, + "rewards/margins": 0.23979592323303223, + "rewards/rejected": -6.186279773712158, + "step": 3174 + }, + { + "epoch": 0.49, + "learning_rate": 1.1818660718916956e-05, + "logits/chosen": -3.1208717823028564, + "logits/rejected": -2.298999071121216, + "logps/chosen": -454.5679016113281, + "logps/rejected": -300.4671630859375, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.459832191467285, + "rewards/margins": 5.1963653564453125, + "rewards/rejected": -8.656196594238281, + "step": 3175 + }, + { + "epoch": 0.49, + "learning_rate": 1.1817927278385809e-05, + "logits/chosen": -3.032694101333618, + "logits/rejected": -2.6556589603424072, + "logps/chosen": -305.0270080566406, + "logps/rejected": -373.3389587402344, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3873077630996704, + "rewards/margins": 6.77949857711792, + "rewards/rejected": -8.1668062210083, + "step": 3176 + }, + { + "epoch": 0.49, + "learning_rate": 1.1817193837854661e-05, + "logits/chosen": -1.621990442276001, + "logits/rejected": -3.1416263580322266, + "logps/chosen": -100.62055206298828, + "logps/rejected": -301.2724609375, + "loss": 4.0718, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.791645050048828, + "rewards/margins": -1.4970431327819824, + "rewards/rejected": -4.2946014404296875, + "step": 3177 + }, + { + "epoch": 0.49, + "learning_rate": 1.1816460397323513e-05, + "logits/chosen": -2.6891162395477295, + "logits/rejected": -2.945702075958252, + "logps/chosen": -82.8955078125, + "logps/rejected": -302.75927734375, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3843666315078735, + "rewards/margins": 5.186489105224609, + "rewards/rejected": -6.570856094360352, + "step": 3178 + }, + { + "epoch": 0.49, + "learning_rate": 1.1815726956792365e-05, + "logits/chosen": -2.4169349670410156, + "logits/rejected": -3.0374679565429688, + "logps/chosen": -174.88336181640625, + "logps/rejected": -208.91006469726562, + "loss": 2.9806, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.721235275268555, + "rewards/margins": 0.43127989768981934, + "rewards/rejected": -5.152514934539795, + "step": 3179 + }, + { + "epoch": 0.49, + "learning_rate": 1.1814993516261217e-05, + "logits/chosen": -3.0340688228607178, + "logits/rejected": -3.2069685459136963, + "logps/chosen": -36.65681457519531, + "logps/rejected": -141.30653381347656, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.420577049255371, + "rewards/margins": 4.659022331237793, + "rewards/rejected": -7.079599380493164, + "step": 3180 + }, + { + "epoch": 0.49, + "learning_rate": 1.1814260075730069e-05, + "logits/chosen": -3.152836799621582, + "logits/rejected": -2.9120161533355713, + "logps/chosen": -119.41307067871094, + "logps/rejected": -56.47089385986328, + "loss": 3.1118, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8804755210876465, + "rewards/margins": -1.9969842433929443, + "rewards/rejected": -2.883491277694702, + "step": 3181 + }, + { + "epoch": 0.49, + "learning_rate": 1.181352663519892e-05, + "logits/chosen": -3.2349252700805664, + "logits/rejected": -3.2054097652435303, + "logps/chosen": -127.28056335449219, + "logps/rejected": -136.29217529296875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6022554636001587, + "rewards/margins": 5.65443229675293, + "rewards/rejected": -6.256687164306641, + "step": 3182 + }, + { + "epoch": 0.5, + "learning_rate": 1.1812793194667772e-05, + "logits/chosen": -1.9017568826675415, + "logits/rejected": -3.215689182281494, + "logps/chosen": -328.71136474609375, + "logps/rejected": -726.2171630859375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11504361033439636, + "rewards/margins": 7.605543613433838, + "rewards/rejected": -7.490499973297119, + "step": 3183 + }, + { + "epoch": 0.5, + "learning_rate": 1.1812059754136624e-05, + "logits/chosen": -2.780562400817871, + "logits/rejected": -2.4479193687438965, + "logps/chosen": -806.75830078125, + "logps/rejected": -378.0303955078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2761779725551605, + "rewards/margins": 6.135749816894531, + "rewards/rejected": -5.859571933746338, + "step": 3184 + }, + { + "epoch": 0.5, + "learning_rate": 1.1811326313605478e-05, + "logits/chosen": -2.960444211959839, + "logits/rejected": -3.139772415161133, + "logps/chosen": -113.05667114257812, + "logps/rejected": -195.07504272460938, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.545907974243164, + "rewards/margins": 4.735686302185059, + "rewards/rejected": -6.281594276428223, + "step": 3185 + }, + { + "epoch": 0.5, + "learning_rate": 1.181059287307433e-05, + "logits/chosen": -2.852179765701294, + "logits/rejected": -3.0579473972320557, + "logps/chosen": -41.650146484375, + "logps/rejected": -177.16636657714844, + "loss": 0.119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0829572677612305, + "rewards/margins": 2.4898500442504883, + "rewards/rejected": -4.572807312011719, + "step": 3186 + }, + { + "epoch": 0.5, + "learning_rate": 1.1809859432543181e-05, + "logits/chosen": -2.6087918281555176, + "logits/rejected": -2.915109634399414, + "logps/chosen": -253.75350952148438, + "logps/rejected": -283.55474853515625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.067407488822937, + "rewards/margins": 5.643723011016846, + "rewards/rejected": -6.711130619049072, + "step": 3187 + }, + { + "epoch": 0.5, + "learning_rate": 1.1809125992012033e-05, + "logits/chosen": -2.350904941558838, + "logits/rejected": -3.12483286857605, + "logps/chosen": -42.44670104980469, + "logps/rejected": -114.25785064697266, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7492289543151855, + "rewards/margins": 2.703993797302246, + "rewards/rejected": -4.453222751617432, + "step": 3188 + }, + { + "epoch": 0.5, + "learning_rate": 1.1808392551480885e-05, + "logits/chosen": -2.7609941959381104, + "logits/rejected": -3.175778865814209, + "logps/chosen": -773.1296997070312, + "logps/rejected": -688.450439453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.442770391702652, + "rewards/margins": 7.2517170906066895, + "rewards/rejected": -7.694487571716309, + "step": 3189 + }, + { + "epoch": 0.5, + "learning_rate": 1.1807659110949739e-05, + "logits/chosen": -3.201108455657959, + "logits/rejected": -1.6994348764419556, + "logps/chosen": -681.0361328125, + "logps/rejected": -373.2886962890625, + "loss": 2.3116, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3782103061676025, + "rewards/margins": 0.9324390888214111, + "rewards/rejected": -3.3106493949890137, + "step": 3190 + }, + { + "epoch": 0.5, + "learning_rate": 1.180692567041859e-05, + "logits/chosen": -3.208439826965332, + "logits/rejected": -3.221374750137329, + "logps/chosen": -200.10447692871094, + "logps/rejected": -277.24432373046875, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2568492889404297, + "rewards/margins": 4.363788604736328, + "rewards/rejected": -6.620637893676758, + "step": 3191 + }, + { + "epoch": 0.5, + "learning_rate": 1.1806192229887443e-05, + "logits/chosen": -2.244481086730957, + "logits/rejected": -2.760420799255371, + "logps/chosen": -346.5880432128906, + "logps/rejected": -338.2479248046875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0619277954101562, + "rewards/margins": 6.27083158493042, + "rewards/rejected": -8.332759857177734, + "step": 3192 + }, + { + "epoch": 0.5, + "learning_rate": 1.1805458789356294e-05, + "logits/chosen": -2.152132034301758, + "logits/rejected": -3.0387959480285645, + "logps/chosen": -105.89259338378906, + "logps/rejected": -381.65362548828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7050186395645142, + "rewards/margins": 7.439764499664307, + "rewards/rejected": -8.144783020019531, + "step": 3193 + }, + { + "epoch": 0.5, + "learning_rate": 1.1804725348825148e-05, + "logits/chosen": -2.958862543106079, + "logits/rejected": -2.613162040710449, + "logps/chosen": -206.8964080810547, + "logps/rejected": -322.07720947265625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1174542903900146, + "rewards/margins": 6.667008399963379, + "rewards/rejected": -8.784462928771973, + "step": 3194 + }, + { + "epoch": 0.5, + "learning_rate": 1.1803991908294e-05, + "logits/chosen": -1.863353967666626, + "logits/rejected": -3.1305384635925293, + "logps/chosen": -117.66311645507812, + "logps/rejected": -348.09942626953125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8527915477752686, + "rewards/margins": 6.111759185791016, + "rewards/rejected": -7.964550971984863, + "step": 3195 + }, + { + "epoch": 0.5, + "learning_rate": 1.1803258467762852e-05, + "logits/chosen": -3.099170207977295, + "logits/rejected": -2.816223621368408, + "logps/chosen": -258.5003662109375, + "logps/rejected": -385.161376953125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8499191999435425, + "rewards/margins": 6.639532566070557, + "rewards/rejected": -8.48945140838623, + "step": 3196 + }, + { + "epoch": 0.5, + "learning_rate": 1.1802525027231704e-05, + "logits/chosen": -2.787140130996704, + "logits/rejected": -2.9324116706848145, + "logps/chosen": -175.27963256835938, + "logps/rejected": -335.51275634765625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.798766016960144, + "rewards/margins": 6.315962791442871, + "rewards/rejected": -8.114728927612305, + "step": 3197 + }, + { + "epoch": 0.5, + "learning_rate": 1.1801791586700556e-05, + "logits/chosen": -2.1299734115600586, + "logits/rejected": -3.060879945755005, + "logps/chosen": -151.74151611328125, + "logps/rejected": -453.614990234375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3756439685821533, + "rewards/margins": 7.9974212646484375, + "rewards/rejected": -9.373065948486328, + "step": 3198 + }, + { + "epoch": 0.5, + "learning_rate": 1.1801058146169407e-05, + "logits/chosen": -3.246527671813965, + "logits/rejected": -3.3773014545440674, + "logps/chosen": -66.82931518554688, + "logps/rejected": -169.93875122070312, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5637331008911133, + "rewards/margins": 4.284516334533691, + "rewards/rejected": -5.848249435424805, + "step": 3199 + }, + { + "epoch": 0.5, + "learning_rate": 1.180032470563826e-05, + "logits/chosen": -2.9907238483428955, + "logits/rejected": -2.1921374797821045, + "logps/chosen": -312.1131591796875, + "logps/rejected": -262.2295837402344, + "loss": 0.239, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0199310779571533, + "rewards/margins": 2.919877052307129, + "rewards/rejected": -4.939807891845703, + "step": 3200 + }, + { + "epoch": 0.5, + "learning_rate": 1.1799591265107111e-05, + "logits/chosen": -2.6686737537384033, + "logits/rejected": -2.957484483718872, + "logps/chosen": -627.932861328125, + "logps/rejected": -440.4591979980469, + "loss": 2.4898, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3577773571014404, + "rewards/margins": 0.9567503929138184, + "rewards/rejected": -4.314527988433838, + "step": 3201 + }, + { + "epoch": 0.5, + "learning_rate": 1.1798857824575963e-05, + "logits/chosen": -2.9024507999420166, + "logits/rejected": -2.3081812858581543, + "logps/chosen": -113.48269653320312, + "logps/rejected": -174.89605712890625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.030224084854126, + "rewards/margins": 5.158702373504639, + "rewards/rejected": -7.1889262199401855, + "step": 3202 + }, + { + "epoch": 0.5, + "learning_rate": 1.1798124384044817e-05, + "logits/chosen": -1.2424455881118774, + "logits/rejected": -2.7416727542877197, + "logps/chosen": -239.456298828125, + "logps/rejected": -414.54473876953125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0314109325408936, + "rewards/margins": 6.6109795570373535, + "rewards/rejected": -7.642390251159668, + "step": 3203 + }, + { + "epoch": 0.5, + "learning_rate": 1.1797390943513668e-05, + "logits/chosen": -3.0072710514068604, + "logits/rejected": -3.2080774307250977, + "logps/chosen": -364.8808288574219, + "logps/rejected": -282.74822998046875, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2762088775634766, + "rewards/margins": 3.7075371742248535, + "rewards/rejected": -4.983746528625488, + "step": 3204 + }, + { + "epoch": 0.5, + "learning_rate": 1.179665750298252e-05, + "logits/chosen": -3.0229549407958984, + "logits/rejected": -2.1183857917785645, + "logps/chosen": -305.7286376953125, + "logps/rejected": -240.52146911621094, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9954994320869446, + "rewards/margins": 5.962140083312988, + "rewards/rejected": -6.957639694213867, + "step": 3205 + }, + { + "epoch": 0.5, + "learning_rate": 1.1795924062451372e-05, + "logits/chosen": -2.3264102935791016, + "logits/rejected": -3.180265188217163, + "logps/chosen": -60.499935150146484, + "logps/rejected": -407.67828369140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8392287492752075, + "rewards/margins": 10.877235412597656, + "rewards/rejected": -11.71646499633789, + "step": 3206 + }, + { + "epoch": 0.5, + "learning_rate": 1.1795190621920224e-05, + "logits/chosen": -3.296900510787964, + "logits/rejected": -3.008835792541504, + "logps/chosen": -134.44845581054688, + "logps/rejected": -128.04461669921875, + "loss": 1.2498, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.848681688308716, + "rewards/margins": 1.1773414611816406, + "rewards/rejected": -4.026022911071777, + "step": 3207 + }, + { + "epoch": 0.5, + "learning_rate": 1.1794457181389076e-05, + "logits/chosen": -1.9060128927230835, + "logits/rejected": -3.3517234325408936, + "logps/chosen": -66.67990112304688, + "logps/rejected": -233.70823669433594, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3366214036941528, + "rewards/margins": 5.280041694641113, + "rewards/rejected": -6.616663455963135, + "step": 3208 + }, + { + "epoch": 0.5, + "learning_rate": 1.1793723740857928e-05, + "logits/chosen": -2.5005764961242676, + "logits/rejected": -2.8990490436553955, + "logps/chosen": -182.83206176757812, + "logps/rejected": -181.06234741210938, + "loss": 3.6412, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.899357795715332, + "rewards/margins": -0.3547515869140625, + "rewards/rejected": -4.544605731964111, + "step": 3209 + }, + { + "epoch": 0.5, + "learning_rate": 1.179299030032678e-05, + "logits/chosen": -1.934966802597046, + "logits/rejected": -3.0561299324035645, + "logps/chosen": -69.91496276855469, + "logps/rejected": -445.36328125, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4587541818618774, + "rewards/margins": 6.476072311401367, + "rewards/rejected": -7.934826850891113, + "step": 3210 + }, + { + "epoch": 0.5, + "learning_rate": 1.1792256859795632e-05, + "logits/chosen": -3.2510976791381836, + "logits/rejected": -2.7229418754577637, + "logps/chosen": -329.9690246582031, + "logps/rejected": -299.6304931640625, + "loss": 0.0523, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5994865894317627, + "rewards/margins": 3.315809965133667, + "rewards/rejected": -4.91529655456543, + "step": 3211 + }, + { + "epoch": 0.5, + "learning_rate": 1.1791523419264485e-05, + "logits/chosen": -2.877910852432251, + "logits/rejected": -3.219238519668579, + "logps/chosen": -346.9312744140625, + "logps/rejected": -333.48016357421875, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7873579263687134, + "rewards/margins": 5.020471572875977, + "rewards/rejected": -5.8078293800354, + "step": 3212 + }, + { + "epoch": 0.5, + "learning_rate": 1.1790789978733337e-05, + "logits/chosen": -3.1886630058288574, + "logits/rejected": -2.4886958599090576, + "logps/chosen": -545.7255249023438, + "logps/rejected": -302.55633544921875, + "loss": 10.0316, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.59532356262207, + "rewards/margins": -10.031574249267578, + "rewards/rejected": -0.5637496709823608, + "step": 3213 + }, + { + "epoch": 0.5, + "learning_rate": 1.1790056538202189e-05, + "logits/chosen": -3.0127673149108887, + "logits/rejected": -2.536478281021118, + "logps/chosen": -128.11509704589844, + "logps/rejected": -404.73687744140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.361669898033142, + "rewards/margins": 11.04094123840332, + "rewards/rejected": -12.402610778808594, + "step": 3214 + }, + { + "epoch": 0.5, + "learning_rate": 1.1789323097671041e-05, + "logits/chosen": -3.145240545272827, + "logits/rejected": -3.0749051570892334, + "logps/chosen": -399.8978271484375, + "logps/rejected": -368.9870910644531, + "loss": 0.4061, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.989772915840149, + "rewards/margins": 3.5120763778686523, + "rewards/rejected": -5.501849174499512, + "step": 3215 + }, + { + "epoch": 0.5, + "learning_rate": 1.1788589657139893e-05, + "logits/chosen": -1.6120705604553223, + "logits/rejected": -2.8793728351593018, + "logps/chosen": -238.91864013671875, + "logps/rejected": -415.9508056640625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6132164001464844, + "rewards/margins": 5.800797462463379, + "rewards/rejected": -7.414013862609863, + "step": 3216 + }, + { + "epoch": 0.5, + "learning_rate": 1.1787856216608745e-05, + "logits/chosen": -2.145732879638672, + "logits/rejected": -2.777994155883789, + "logps/chosen": -132.8789825439453, + "logps/rejected": -299.2121276855469, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7593331336975098, + "rewards/margins": 6.232272148132324, + "rewards/rejected": -7.991605281829834, + "step": 3217 + }, + { + "epoch": 0.5, + "learning_rate": 1.1787122776077596e-05, + "logits/chosen": -2.1674792766571045, + "logits/rejected": -3.2042813301086426, + "logps/chosen": -380.9759216308594, + "logps/rejected": -547.826416015625, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7268218994140625, + "rewards/margins": 5.0529890060424805, + "rewards/rejected": -5.779811382293701, + "step": 3218 + }, + { + "epoch": 0.5, + "learning_rate": 1.1786389335546448e-05, + "logits/chosen": -2.817680597305298, + "logits/rejected": -2.893763542175293, + "logps/chosen": -204.5133056640625, + "logps/rejected": -294.2357177734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9252331256866455, + "rewards/margins": 8.298515319824219, + "rewards/rejected": -10.223748207092285, + "step": 3219 + }, + { + "epoch": 0.5, + "learning_rate": 1.17856558950153e-05, + "logits/chosen": -2.3934221267700195, + "logits/rejected": -2.918020725250244, + "logps/chosen": -137.72933959960938, + "logps/rejected": -288.76806640625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.448099970817566, + "rewards/margins": 5.136038780212402, + "rewards/rejected": -6.584138870239258, + "step": 3220 + }, + { + "epoch": 0.5, + "learning_rate": 1.1784922454484154e-05, + "logits/chosen": -2.2943637371063232, + "logits/rejected": -3.1777701377868652, + "logps/chosen": -30.841691970825195, + "logps/rejected": -239.35755920410156, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8117419481277466, + "rewards/margins": 3.278179168701172, + "rewards/rejected": -5.089920997619629, + "step": 3221 + }, + { + "epoch": 0.5, + "learning_rate": 1.1784189013953006e-05, + "logits/chosen": -2.9199068546295166, + "logits/rejected": -2.021902561187744, + "logps/chosen": -324.0100402832031, + "logps/rejected": -114.52172088623047, + "loss": 3.1289, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.462206840515137, + "rewards/margins": -0.15508651733398438, + "rewards/rejected": -5.307120323181152, + "step": 3222 + }, + { + "epoch": 0.5, + "learning_rate": 1.1783455573421858e-05, + "logits/chosen": -3.196530818939209, + "logits/rejected": -3.4964091777801514, + "logps/chosen": -34.98423767089844, + "logps/rejected": -166.13787841796875, + "loss": 0.0933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8958945274353027, + "rewards/margins": 2.7174272537231445, + "rewards/rejected": -4.613321781158447, + "step": 3223 + }, + { + "epoch": 0.5, + "learning_rate": 1.1782722132890711e-05, + "logits/chosen": -1.5822314023971558, + "logits/rejected": -2.910123825073242, + "logps/chosen": -82.10607147216797, + "logps/rejected": -457.373046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0557260513305664, + "rewards/margins": 10.489496231079102, + "rewards/rejected": -12.545223236083984, + "step": 3224 + }, + { + "epoch": 0.5, + "learning_rate": 1.1781988692359563e-05, + "logits/chosen": -3.0366413593292236, + "logits/rejected": -2.4734134674072266, + "logps/chosen": -126.30830383300781, + "logps/rejected": -356.77069091796875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9229823350906372, + "rewards/margins": 6.248337268829346, + "rewards/rejected": -8.171319961547852, + "step": 3225 + }, + { + "epoch": 0.5, + "learning_rate": 1.1781255251828415e-05, + "logits/chosen": -2.8400354385375977, + "logits/rejected": -2.9270622730255127, + "logps/chosen": -234.07879638671875, + "logps/rejected": -360.09014892578125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7046806812286377, + "rewards/margins": 6.628352642059326, + "rewards/rejected": -9.333033561706543, + "step": 3226 + }, + { + "epoch": 0.5, + "learning_rate": 1.1780521811297267e-05, + "logits/chosen": -2.5123066902160645, + "logits/rejected": -3.0320029258728027, + "logps/chosen": -501.1829833984375, + "logps/rejected": -564.150634765625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24013370275497437, + "rewards/margins": 7.442303657531738, + "rewards/rejected": -7.202169895172119, + "step": 3227 + }, + { + "epoch": 0.5, + "learning_rate": 1.1779788370766119e-05, + "logits/chosen": -2.57808780670166, + "logits/rejected": -3.050691604614258, + "logps/chosen": -22.429168701171875, + "logps/rejected": -141.34506225585938, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.079455018043518, + "rewards/margins": 5.216676712036133, + "rewards/rejected": -6.2961320877075195, + "step": 3228 + }, + { + "epoch": 0.5, + "learning_rate": 1.177905493023497e-05, + "logits/chosen": -2.3230085372924805, + "logits/rejected": -3.1041624546051025, + "logps/chosen": -473.29730224609375, + "logps/rejected": -366.1968994140625, + "loss": 5.0603, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.130258560180664, + "rewards/margins": -2.398289680480957, + "rewards/rejected": -4.731968879699707, + "step": 3229 + }, + { + "epoch": 0.5, + "learning_rate": 1.1778321489703824e-05, + "logits/chosen": -2.7175843715667725, + "logits/rejected": -3.2517220973968506, + "logps/chosen": -147.50001525878906, + "logps/rejected": -291.84735107421875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8363097310066223, + "rewards/margins": 5.132254600524902, + "rewards/rejected": -5.968564033508301, + "step": 3230 + }, + { + "epoch": 0.5, + "learning_rate": 1.1777588049172676e-05, + "logits/chosen": -1.9845681190490723, + "logits/rejected": -2.76947021484375, + "logps/chosen": -103.34025573730469, + "logps/rejected": -217.95428466796875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9494662284851074, + "rewards/margins": 5.805866241455078, + "rewards/rejected": -7.755332946777344, + "step": 3231 + }, + { + "epoch": 0.5, + "learning_rate": 1.1776854608641528e-05, + "logits/chosen": -3.0521018505096436, + "logits/rejected": -1.8904311656951904, + "logps/chosen": -398.60833740234375, + "logps/rejected": -533.2927856445312, + "loss": 4.7136, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.989826679229736, + "rewards/margins": 0.8028140068054199, + "rewards/rejected": -6.792640686035156, + "step": 3232 + }, + { + "epoch": 0.5, + "learning_rate": 1.177612116811038e-05, + "logits/chosen": -3.0937418937683105, + "logits/rejected": -1.9857378005981445, + "logps/chosen": -380.5978088378906, + "logps/rejected": -301.804443359375, + "loss": 4.2641, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.128517150878906, + "rewards/margins": -1.7218265533447266, + "rewards/rejected": -3.406690835952759, + "step": 3233 + }, + { + "epoch": 0.5, + "learning_rate": 1.1775387727579232e-05, + "logits/chosen": -2.3386390209198, + "logits/rejected": -3.042781352996826, + "logps/chosen": -109.4202880859375, + "logps/rejected": -445.6130065917969, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.595719337463379, + "rewards/margins": 7.127333641052246, + "rewards/rejected": -10.723052978515625, + "step": 3234 + }, + { + "epoch": 0.5, + "learning_rate": 1.1774654287048084e-05, + "logits/chosen": -3.2244038581848145, + "logits/rejected": -2.7262632846832275, + "logps/chosen": -250.90023803710938, + "logps/rejected": -192.48789978027344, + "loss": 1.632, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.794928789138794, + "rewards/margins": 1.8115956783294678, + "rewards/rejected": -4.606524467468262, + "step": 3235 + }, + { + "epoch": 0.5, + "learning_rate": 1.1773920846516935e-05, + "logits/chosen": -2.7609493732452393, + "logits/rejected": -3.1440699100494385, + "logps/chosen": -54.44126892089844, + "logps/rejected": -146.4291534423828, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2675533294677734, + "rewards/margins": 4.016219139099121, + "rewards/rejected": -6.2837724685668945, + "step": 3236 + }, + { + "epoch": 0.5, + "learning_rate": 1.1773187405985787e-05, + "logits/chosen": -3.2249488830566406, + "logits/rejected": -1.9578832387924194, + "logps/chosen": -692.24609375, + "logps/rejected": -515.627197265625, + "loss": 2.5658, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.484637498855591, + "rewards/margins": 1.6308989524841309, + "rewards/rejected": -4.115536689758301, + "step": 3237 + }, + { + "epoch": 0.5, + "learning_rate": 1.1772453965454639e-05, + "logits/chosen": -3.1341514587402344, + "logits/rejected": -3.138749122619629, + "logps/chosen": -99.98554992675781, + "logps/rejected": -222.06863403320312, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38759344816207886, + "rewards/margins": 4.987092971801758, + "rewards/rejected": -5.374686241149902, + "step": 3238 + }, + { + "epoch": 0.5, + "learning_rate": 1.1771720524923493e-05, + "logits/chosen": -3.0526771545410156, + "logits/rejected": -3.1187050342559814, + "logps/chosen": -100.0693588256836, + "logps/rejected": -172.5938262939453, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6948784589767456, + "rewards/margins": 5.837964057922363, + "rewards/rejected": -6.532842636108398, + "step": 3239 + }, + { + "epoch": 0.5, + "learning_rate": 1.1770987084392345e-05, + "logits/chosen": -2.9457297325134277, + "logits/rejected": -2.9518699645996094, + "logps/chosen": -111.444091796875, + "logps/rejected": -418.30718994140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.056734085083008, + "rewards/margins": 7.3591461181640625, + "rewards/rejected": -9.41588020324707, + "step": 3240 + }, + { + "epoch": 0.5, + "learning_rate": 1.1770253643861196e-05, + "logits/chosen": -3.139890193939209, + "logits/rejected": -3.008408308029175, + "logps/chosen": -807.826416015625, + "logps/rejected": -314.75274658203125, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.35162353515625, + "rewards/margins": 4.260128021240234, + "rewards/rejected": -6.611751556396484, + "step": 3241 + }, + { + "epoch": 0.5, + "learning_rate": 1.1769520203330048e-05, + "logits/chosen": -2.8574347496032715, + "logits/rejected": -3.251490831375122, + "logps/chosen": -303.1086730957031, + "logps/rejected": -472.6453857421875, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6788352727890015, + "rewards/margins": 6.969756126403809, + "rewards/rejected": -8.648591995239258, + "step": 3242 + }, + { + "epoch": 0.5, + "learning_rate": 1.17687867627989e-05, + "logits/chosen": -2.9155311584472656, + "logits/rejected": -2.811046838760376, + "logps/chosen": -248.51113891601562, + "logps/rejected": -567.4598999023438, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3842308521270752, + "rewards/margins": 8.58842945098877, + "rewards/rejected": -9.972660064697266, + "step": 3243 + }, + { + "epoch": 0.5, + "learning_rate": 1.1768053322267752e-05, + "logits/chosen": -1.3472380638122559, + "logits/rejected": -2.9886314868927, + "logps/chosen": -134.4991455078125, + "logps/rejected": -406.3888854980469, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9082748293876648, + "rewards/margins": 7.098048210144043, + "rewards/rejected": -8.006322860717773, + "step": 3244 + }, + { + "epoch": 0.5, + "learning_rate": 1.1767319881736604e-05, + "logits/chosen": -2.7351839542388916, + "logits/rejected": -3.0434069633483887, + "logps/chosen": -99.71310424804688, + "logps/rejected": -230.4027557373047, + "loss": 0.0845, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.83506441116333, + "rewards/margins": 2.7166638374328613, + "rewards/rejected": -4.551728248596191, + "step": 3245 + }, + { + "epoch": 0.5, + "learning_rate": 1.1766586441205456e-05, + "logits/chosen": -3.2027666568756104, + "logits/rejected": -2.9319264888763428, + "logps/chosen": -435.332763671875, + "logps/rejected": -493.33111572265625, + "loss": 2.3199, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5499680042266846, + "rewards/margins": 1.9257912635803223, + "rewards/rejected": -5.475759506225586, + "step": 3246 + }, + { + "epoch": 0.5, + "learning_rate": 1.176585300067431e-05, + "logits/chosen": -3.1326866149902344, + "logits/rejected": -3.2181456089019775, + "logps/chosen": -372.886962890625, + "logps/rejected": -417.7008361816406, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8852508068084717, + "rewards/margins": 5.782196044921875, + "rewards/rejected": -7.667447090148926, + "step": 3247 + }, + { + "epoch": 0.51, + "learning_rate": 1.1765119560143161e-05, + "logits/chosen": -1.5691086053848267, + "logits/rejected": -2.8768017292022705, + "logps/chosen": -72.62907409667969, + "logps/rejected": -173.04315185546875, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3166816234588623, + "rewards/margins": 4.478819847106934, + "rewards/rejected": -5.795501232147217, + "step": 3248 + }, + { + "epoch": 0.51, + "learning_rate": 1.1764386119612013e-05, + "logits/chosen": -1.8710641860961914, + "logits/rejected": -2.886854410171509, + "logps/chosen": -107.86579132080078, + "logps/rejected": -302.9901428222656, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4241100549697876, + "rewards/margins": 6.909850120544434, + "rewards/rejected": -8.33396053314209, + "step": 3249 + }, + { + "epoch": 0.51, + "learning_rate": 1.1763652679080865e-05, + "logits/chosen": -3.0874173641204834, + "logits/rejected": -2.5863797664642334, + "logps/chosen": -306.80682373046875, + "logps/rejected": -194.83740234375, + "loss": 3.2962, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.313358783721924, + "rewards/margins": -0.5342047214508057, + "rewards/rejected": -3.7791543006896973, + "step": 3250 + }, + { + "epoch": 0.51, + "learning_rate": 1.1762919238549717e-05, + "logits/chosen": -2.8143393993377686, + "logits/rejected": -3.3877763748168945, + "logps/chosen": -19.942073822021484, + "logps/rejected": -197.2999267578125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9913794994354248, + "rewards/margins": 5.496969223022461, + "rewards/rejected": -6.488348484039307, + "step": 3251 + }, + { + "epoch": 0.51, + "learning_rate": 1.1762185798018569e-05, + "logits/chosen": -3.2027227878570557, + "logits/rejected": -2.2253220081329346, + "logps/chosen": -664.987060546875, + "logps/rejected": -264.6591796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2501968145370483, + "rewards/margins": 5.869638919830322, + "rewards/rejected": -4.619441986083984, + "step": 3252 + }, + { + "epoch": 0.51, + "learning_rate": 1.176145235748742e-05, + "logits/chosen": -2.000979423522949, + "logits/rejected": -2.8774282932281494, + "logps/chosen": -145.30435180664062, + "logps/rejected": -356.95220947265625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4105935096740723, + "rewards/margins": 7.133134841918945, + "rewards/rejected": -8.54372787475586, + "step": 3253 + }, + { + "epoch": 0.51, + "learning_rate": 1.1760718916956273e-05, + "logits/chosen": -3.063209056854248, + "logits/rejected": -3.0398035049438477, + "logps/chosen": -450.8301086425781, + "logps/rejected": -883.806396484375, + "loss": 3.6957, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1938252449035645, + "rewards/margins": 2.6793622970581055, + "rewards/rejected": -6.87318754196167, + "step": 3254 + }, + { + "epoch": 0.51, + "learning_rate": 1.1759985476425124e-05, + "logits/chosen": -2.823730707168579, + "logits/rejected": -3.1464834213256836, + "logps/chosen": -39.92041778564453, + "logps/rejected": -173.43740844726562, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8967490196228027, + "rewards/margins": 4.24893045425415, + "rewards/rejected": -6.145679473876953, + "step": 3255 + }, + { + "epoch": 0.51, + "learning_rate": 1.1759252035893978e-05, + "logits/chosen": -2.8014352321624756, + "logits/rejected": -3.367406129837036, + "logps/chosen": -619.7991943359375, + "logps/rejected": -555.9259033203125, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8302467465400696, + "rewards/margins": 4.058322906494141, + "rewards/rejected": -4.8885698318481445, + "step": 3256 + }, + { + "epoch": 0.51, + "learning_rate": 1.175851859536283e-05, + "logits/chosen": -1.6391048431396484, + "logits/rejected": -3.075427532196045, + "logps/chosen": -109.60785675048828, + "logps/rejected": -390.25830078125, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5758404731750488, + "rewards/margins": 4.458404541015625, + "rewards/rejected": -6.034245491027832, + "step": 3257 + }, + { + "epoch": 0.51, + "learning_rate": 1.1757785154831683e-05, + "logits/chosen": -2.7878775596618652, + "logits/rejected": -3.2148776054382324, + "logps/chosen": -55.210166931152344, + "logps/rejected": -213.33944702148438, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1663891077041626, + "rewards/margins": 4.740221977233887, + "rewards/rejected": -5.90661096572876, + "step": 3258 + }, + { + "epoch": 0.51, + "learning_rate": 1.1757051714300535e-05, + "logits/chosen": -1.9712331295013428, + "logits/rejected": -3.126633644104004, + "logps/chosen": -164.09991455078125, + "logps/rejected": -394.76519775390625, + "loss": 1.9702, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9163880348205566, + "rewards/margins": 1.479383945465088, + "rewards/rejected": -5.3957719802856445, + "step": 3259 + }, + { + "epoch": 0.51, + "learning_rate": 1.1756318273769387e-05, + "logits/chosen": -1.5522849559783936, + "logits/rejected": -2.5029280185699463, + "logps/chosen": -166.73056030273438, + "logps/rejected": -328.4947509765625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.577752709388733, + "rewards/margins": 5.816100120544434, + "rewards/rejected": -7.393853187561035, + "step": 3260 + }, + { + "epoch": 0.51, + "learning_rate": 1.1755584833238239e-05, + "logits/chosen": -2.568307399749756, + "logits/rejected": -3.012247085571289, + "logps/chosen": -387.76776123046875, + "logps/rejected": -494.0656433105469, + "loss": 3.2674, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1162643432617188, + "rewards/margins": 0.4661376476287842, + "rewards/rejected": -3.582401990890503, + "step": 3261 + }, + { + "epoch": 0.51, + "learning_rate": 1.1754851392707091e-05, + "logits/chosen": -1.3781193494796753, + "logits/rejected": -2.9922292232513428, + "logps/chosen": -138.0478515625, + "logps/rejected": -354.0232238769531, + "loss": 1.9446, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3693087100982666, + "rewards/margins": 1.6022450923919678, + "rewards/rejected": -3.9715538024902344, + "step": 3262 + }, + { + "epoch": 0.51, + "learning_rate": 1.1754117952175943e-05, + "logits/chosen": -3.0948500633239746, + "logits/rejected": -2.7350080013275146, + "logps/chosen": -415.39715576171875, + "logps/rejected": -437.83837890625, + "loss": 3.3875, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.993826389312744, + "rewards/margins": 0.0921018123626709, + "rewards/rejected": -5.085928440093994, + "step": 3263 + }, + { + "epoch": 0.51, + "learning_rate": 1.1753384511644795e-05, + "logits/chosen": -1.857118844985962, + "logits/rejected": -2.4153831005096436, + "logps/chosen": -263.18695068359375, + "logps/rejected": -359.0633544921875, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7394378185272217, + "rewards/margins": 4.463608741760254, + "rewards/rejected": -6.203046798706055, + "step": 3264 + }, + { + "epoch": 0.51, + "learning_rate": 1.1752651071113648e-05, + "logits/chosen": -2.1771395206451416, + "logits/rejected": -3.003826856613159, + "logps/chosen": -133.04025268554688, + "logps/rejected": -334.746826171875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7331283688545227, + "rewards/margins": 7.194502353668213, + "rewards/rejected": -7.927630424499512, + "step": 3265 + }, + { + "epoch": 0.51, + "learning_rate": 1.17519176305825e-05, + "logits/chosen": -2.9869937896728516, + "logits/rejected": -3.1249420642852783, + "logps/chosen": -239.75653076171875, + "logps/rejected": -174.3898468017578, + "loss": 1.7885, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5233399868011475, + "rewards/margins": 2.766753911972046, + "rewards/rejected": -5.290093898773193, + "step": 3266 + }, + { + "epoch": 0.51, + "learning_rate": 1.1751184190051352e-05, + "logits/chosen": -2.8291571140289307, + "logits/rejected": -2.895033597946167, + "logps/chosen": -141.80726623535156, + "logps/rejected": -230.49057006835938, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9838311672210693, + "rewards/margins": 5.781924724578857, + "rewards/rejected": -7.765755653381348, + "step": 3267 + }, + { + "epoch": 0.51, + "learning_rate": 1.1750450749520204e-05, + "logits/chosen": -2.8979389667510986, + "logits/rejected": -3.26103138923645, + "logps/chosen": -116.22286987304688, + "logps/rejected": -256.4088134765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42162859439849854, + "rewards/margins": 7.135288238525391, + "rewards/rejected": -7.556917190551758, + "step": 3268 + }, + { + "epoch": 0.51, + "learning_rate": 1.1749717308989056e-05, + "logits/chosen": -2.6803810596466064, + "logits/rejected": -3.1390950679779053, + "logps/chosen": -162.01962280273438, + "logps/rejected": -417.847412109375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5836505889892578, + "rewards/margins": 5.041216850280762, + "rewards/rejected": -5.6248674392700195, + "step": 3269 + }, + { + "epoch": 0.51, + "learning_rate": 1.1748983868457908e-05, + "logits/chosen": -2.6121394634246826, + "logits/rejected": -3.088512659072876, + "logps/chosen": -227.69789123535156, + "logps/rejected": -253.97296142578125, + "loss": 2.9779, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9577231407165527, + "rewards/margins": -0.02602982521057129, + "rewards/rejected": -3.9316935539245605, + "step": 3270 + }, + { + "epoch": 0.51, + "learning_rate": 1.174825042792676e-05, + "logits/chosen": -2.8271048069000244, + "logits/rejected": -2.122929811477661, + "logps/chosen": -211.47463989257812, + "logps/rejected": -256.0229797363281, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2908852100372314, + "rewards/margins": 3.360276222229004, + "rewards/rejected": -5.651161193847656, + "step": 3271 + }, + { + "epoch": 0.51, + "learning_rate": 1.1747516987395611e-05, + "logits/chosen": -3.125775098800659, + "logits/rejected": -2.2557575702667236, + "logps/chosen": -595.1942749023438, + "logps/rejected": -332.77911376953125, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7528408169746399, + "rewards/margins": 4.231060028076172, + "rewards/rejected": -4.983901023864746, + "step": 3272 + }, + { + "epoch": 0.51, + "learning_rate": 1.1746783546864463e-05, + "logits/chosen": -3.1140313148498535, + "logits/rejected": -1.753838300704956, + "logps/chosen": -198.5791778564453, + "logps/rejected": -64.48258972167969, + "loss": 3.2572, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.075926303863525, + "rewards/margins": -2.513570785522461, + "rewards/rejected": -3.5623555183410645, + "step": 3273 + }, + { + "epoch": 0.51, + "learning_rate": 1.1746050106333317e-05, + "logits/chosen": -2.0237209796905518, + "logits/rejected": -3.2007906436920166, + "logps/chosen": -390.3267517089844, + "logps/rejected": -532.1266479492188, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35925528407096863, + "rewards/margins": 8.232091903686523, + "rewards/rejected": -8.591346740722656, + "step": 3274 + }, + { + "epoch": 0.51, + "learning_rate": 1.1745316665802169e-05, + "logits/chosen": -2.9872076511383057, + "logits/rejected": -3.188101053237915, + "logps/chosen": -134.91574096679688, + "logps/rejected": -111.38356018066406, + "loss": 1.6565, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.320016860961914, + "rewards/margins": -0.2855573892593384, + "rewards/rejected": -3.0344595909118652, + "step": 3275 + }, + { + "epoch": 0.51, + "learning_rate": 1.174458322527102e-05, + "logits/chosen": -1.9661203622817993, + "logits/rejected": -2.75842547416687, + "logps/chosen": -207.31564331054688, + "logps/rejected": -267.391845703125, + "loss": 2.0814, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.253937005996704, + "rewards/margins": 1.6284475326538086, + "rewards/rejected": -4.882384300231934, + "step": 3276 + }, + { + "epoch": 0.51, + "learning_rate": 1.1743849784739873e-05, + "logits/chosen": -2.3208749294281006, + "logits/rejected": -3.2098405361175537, + "logps/chosen": -221.6566162109375, + "logps/rejected": -383.4518737792969, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2835189998149872, + "rewards/margins": 7.026395797729492, + "rewards/rejected": -7.309915065765381, + "step": 3277 + }, + { + "epoch": 0.51, + "learning_rate": 1.1743116344208724e-05, + "logits/chosen": -2.608035087585449, + "logits/rejected": -3.0657129287719727, + "logps/chosen": -235.5634765625, + "logps/rejected": -340.29388427734375, + "loss": 0.9775, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3896918296813965, + "rewards/margins": 1.7367743253707886, + "rewards/rejected": -5.126466274261475, + "step": 3278 + }, + { + "epoch": 0.51, + "learning_rate": 1.1742382903677576e-05, + "logits/chosen": -2.4120349884033203, + "logits/rejected": -3.0924923419952393, + "logps/chosen": -109.51393127441406, + "logps/rejected": -471.55120849609375, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7635411024093628, + "rewards/margins": 6.39730978012085, + "rewards/rejected": -8.160850524902344, + "step": 3279 + }, + { + "epoch": 0.51, + "learning_rate": 1.1741649463146428e-05, + "logits/chosen": -1.2100061178207397, + "logits/rejected": -3.040581703186035, + "logps/chosen": -97.46209716796875, + "logps/rejected": -161.25685119628906, + "loss": 2.5764, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4858927726745605, + "rewards/margins": 0.48770785331726074, + "rewards/rejected": -3.9736006259918213, + "step": 3280 + }, + { + "epoch": 0.51, + "learning_rate": 1.174091602261528e-05, + "logits/chosen": -2.992713689804077, + "logits/rejected": -3.220607042312622, + "logps/chosen": -228.29527282714844, + "logps/rejected": -322.3899230957031, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7144836187362671, + "rewards/margins": 6.20330810546875, + "rewards/rejected": -6.917791366577148, + "step": 3281 + }, + { + "epoch": 0.51, + "learning_rate": 1.1740182582084132e-05, + "logits/chosen": -3.064631462097168, + "logits/rejected": -2.2668628692626953, + "logps/chosen": -230.3335418701172, + "logps/rejected": -133.22756958007812, + "loss": 2.4133, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.097076416015625, + "rewards/margins": -1.6331602334976196, + "rewards/rejected": -3.463916301727295, + "step": 3282 + }, + { + "epoch": 0.51, + "learning_rate": 1.1739449141552986e-05, + "logits/chosen": -2.777627944946289, + "logits/rejected": -2.947762966156006, + "logps/chosen": -346.2036437988281, + "logps/rejected": -490.6923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26351624727249146, + "rewards/margins": 11.268651008605957, + "rewards/rejected": -11.005134582519531, + "step": 3283 + }, + { + "epoch": 0.51, + "learning_rate": 1.1738715701021837e-05, + "logits/chosen": -2.924370765686035, + "logits/rejected": -1.3596605062484741, + "logps/chosen": -225.12811279296875, + "logps/rejected": -111.19584655761719, + "loss": 1.9046, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2291712760925293, + "rewards/margins": 0.5795344114303589, + "rewards/rejected": -2.8087055683135986, + "step": 3284 + }, + { + "epoch": 0.51, + "learning_rate": 1.173798226049069e-05, + "logits/chosen": -2.99635910987854, + "logits/rejected": -2.7236552238464355, + "logps/chosen": -139.09442138671875, + "logps/rejected": -235.170166015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2939338684082031, + "rewards/margins": 6.966837406158447, + "rewards/rejected": -6.672903537750244, + "step": 3285 + }, + { + "epoch": 0.51, + "learning_rate": 1.1737248819959541e-05, + "logits/chosen": -3.342427968978882, + "logits/rejected": -2.9054465293884277, + "logps/chosen": -619.461669921875, + "logps/rejected": -496.60247802734375, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.138146996498108, + "rewards/margins": 4.120479583740234, + "rewards/rejected": -5.258626937866211, + "step": 3286 + }, + { + "epoch": 0.51, + "learning_rate": 1.1736515379428393e-05, + "logits/chosen": -1.916121006011963, + "logits/rejected": -2.8798062801361084, + "logps/chosen": -95.74808502197266, + "logps/rejected": -194.9265594482422, + "loss": 0.1013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8533764481544495, + "rewards/margins": 3.2811241149902344, + "rewards/rejected": -4.134500503540039, + "step": 3287 + }, + { + "epoch": 0.51, + "learning_rate": 1.1735781938897245e-05, + "logits/chosen": -1.8367729187011719, + "logits/rejected": -3.20644474029541, + "logps/chosen": -314.6573486328125, + "logps/rejected": -332.0126037597656, + "loss": 0.058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.476121425628662, + "rewards/margins": 3.8333895206451416, + "rewards/rejected": -5.309511184692383, + "step": 3288 + }, + { + "epoch": 0.51, + "learning_rate": 1.1735048498366097e-05, + "logits/chosen": -3.1697845458984375, + "logits/rejected": -3.125309467315674, + "logps/chosen": -289.25433349609375, + "logps/rejected": -360.537353515625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.907536506652832, + "rewards/margins": 8.425174713134766, + "rewards/rejected": -11.332712173461914, + "step": 3289 + }, + { + "epoch": 0.51, + "learning_rate": 1.173431505783495e-05, + "logits/chosen": -3.1553211212158203, + "logits/rejected": -2.4126992225646973, + "logps/chosen": -161.99354553222656, + "logps/rejected": -298.94757080078125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9805183410644531, + "rewards/margins": 6.4341139793396, + "rewards/rejected": -8.414632797241211, + "step": 3290 + }, + { + "epoch": 0.51, + "learning_rate": 1.1733581617303802e-05, + "logits/chosen": -3.218818426132202, + "logits/rejected": -2.6678426265716553, + "logps/chosen": -160.56446838378906, + "logps/rejected": -151.75946044921875, + "loss": 1.8849, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.997499942779541, + "rewards/margins": 0.8247935771942139, + "rewards/rejected": -3.822293519973755, + "step": 3291 + }, + { + "epoch": 0.51, + "learning_rate": 1.1732848176772656e-05, + "logits/chosen": -3.2143280506134033, + "logits/rejected": -2.5400843620300293, + "logps/chosen": -424.1830749511719, + "logps/rejected": -362.6766357421875, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3564229011535645, + "rewards/margins": 4.328756332397461, + "rewards/rejected": -5.685179233551025, + "step": 3292 + }, + { + "epoch": 0.51, + "learning_rate": 1.1732114736241508e-05, + "logits/chosen": -1.7887026071548462, + "logits/rejected": -3.140150785446167, + "logps/chosen": -196.83712768554688, + "logps/rejected": -462.11688232421875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4179985523223877, + "rewards/margins": 9.002174377441406, + "rewards/rejected": -10.420172691345215, + "step": 3293 + }, + { + "epoch": 0.51, + "learning_rate": 1.173138129571036e-05, + "logits/chosen": -1.8094794750213623, + "logits/rejected": -2.709669828414917, + "logps/chosen": -87.19125366210938, + "logps/rejected": -349.5818176269531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12105885148048401, + "rewards/margins": 8.437050819396973, + "rewards/rejected": -8.315991401672363, + "step": 3294 + }, + { + "epoch": 0.51, + "learning_rate": 1.1730647855179211e-05, + "logits/chosen": -2.0668318271636963, + "logits/rejected": -2.914315700531006, + "logps/chosen": -197.47512817382812, + "logps/rejected": -504.94427490234375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1811527013778687, + "rewards/margins": 7.722650527954102, + "rewards/rejected": -8.903802871704102, + "step": 3295 + }, + { + "epoch": 0.51, + "learning_rate": 1.1729914414648063e-05, + "logits/chosen": -2.509612560272217, + "logits/rejected": -3.0644259452819824, + "logps/chosen": -157.7045440673828, + "logps/rejected": -302.30718994140625, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2366927862167358, + "rewards/margins": 5.173958778381348, + "rewards/rejected": -6.410651206970215, + "step": 3296 + }, + { + "epoch": 0.51, + "learning_rate": 1.1729180974116915e-05, + "logits/chosen": -2.737203359603882, + "logits/rejected": -3.2335727214813232, + "logps/chosen": -276.04296875, + "logps/rejected": -256.2931823730469, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.661444902420044, + "rewards/margins": 3.6404404640197754, + "rewards/rejected": -5.301885604858398, + "step": 3297 + }, + { + "epoch": 0.51, + "learning_rate": 1.1728447533585767e-05, + "logits/chosen": -2.3247275352478027, + "logits/rejected": -3.0403194427490234, + "logps/chosen": -263.3444519042969, + "logps/rejected": -279.0802917480469, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013902872800827026, + "rewards/margins": 7.319823265075684, + "rewards/rejected": -7.333725929260254, + "step": 3298 + }, + { + "epoch": 0.51, + "learning_rate": 1.1727714093054619e-05, + "logits/chosen": -2.620185613632202, + "logits/rejected": -3.2324161529541016, + "logps/chosen": -424.9678955078125, + "logps/rejected": -594.8571166992188, + "loss": 0.0525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3742738962173462, + "rewards/margins": 4.158447265625, + "rewards/rejected": -4.532721042633057, + "step": 3299 + }, + { + "epoch": 0.51, + "learning_rate": 1.172698065252347e-05, + "logits/chosen": -2.9403786659240723, + "logits/rejected": -2.637751817703247, + "logps/chosen": -522.650634765625, + "logps/rejected": -428.2334289550781, + "loss": 0.8404, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.71640944480896, + "rewards/margins": 2.1772243976593018, + "rewards/rejected": -3.8936338424682617, + "step": 3300 + }, + { + "epoch": 0.51, + "learning_rate": 1.1726247211992324e-05, + "logits/chosen": -3.085705518722534, + "logits/rejected": -3.1027612686157227, + "logps/chosen": -623.525634765625, + "logps/rejected": -335.37933349609375, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0043235719203948975, + "rewards/margins": 6.454257488250732, + "rewards/rejected": -6.45858097076416, + "step": 3301 + }, + { + "epoch": 0.51, + "learning_rate": 1.1725513771461176e-05, + "logits/chosen": -1.7388001680374146, + "logits/rejected": -2.8604981899261475, + "logps/chosen": -195.73162841796875, + "logps/rejected": -518.8260498046875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1233925819396973, + "rewards/margins": 6.094717979431152, + "rewards/rejected": -7.21811056137085, + "step": 3302 + }, + { + "epoch": 0.51, + "learning_rate": 1.1724780330930028e-05, + "logits/chosen": -2.1774322986602783, + "logits/rejected": -2.4384942054748535, + "logps/chosen": -214.34652709960938, + "logps/rejected": -276.752685546875, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6957541704177856, + "rewards/margins": 3.977088212966919, + "rewards/rejected": -5.672842502593994, + "step": 3303 + }, + { + "epoch": 0.51, + "learning_rate": 1.172404689039888e-05, + "logits/chosen": -2.403169870376587, + "logits/rejected": -2.7455849647521973, + "logps/chosen": -294.74981689453125, + "logps/rejected": -321.544677734375, + "loss": 1.4942, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.915707588195801, + "rewards/margins": 0.5827528238296509, + "rewards/rejected": -3.498460292816162, + "step": 3304 + }, + { + "epoch": 0.51, + "learning_rate": 1.1723313449867732e-05, + "logits/chosen": -1.5893558263778687, + "logits/rejected": -2.852262020111084, + "logps/chosen": -59.912349700927734, + "logps/rejected": -245.99234008789062, + "loss": 0.0652, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.502455234527588, + "rewards/margins": 5.246592044830322, + "rewards/rejected": -7.74904727935791, + "step": 3305 + }, + { + "epoch": 0.51, + "learning_rate": 1.1722580009336584e-05, + "logits/chosen": -2.957197666168213, + "logits/rejected": -1.3803093433380127, + "logps/chosen": -224.66796875, + "logps/rejected": -136.33934020996094, + "loss": 1.9433, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.520303964614868, + "rewards/margins": 1.0655261278152466, + "rewards/rejected": -3.5858302116394043, + "step": 3306 + }, + { + "epoch": 0.51, + "learning_rate": 1.1721846568805436e-05, + "logits/chosen": -2.1101014614105225, + "logits/rejected": -2.1206886768341064, + "logps/chosen": -320.4679870605469, + "logps/rejected": -355.09295654296875, + "loss": 0.1308, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9663262367248535, + "rewards/margins": 4.634171962738037, + "rewards/rejected": -6.600498199462891, + "step": 3307 + }, + { + "epoch": 0.51, + "learning_rate": 1.1721113128274288e-05, + "logits/chosen": -2.5900285243988037, + "logits/rejected": -2.916365146636963, + "logps/chosen": -210.49884033203125, + "logps/rejected": -257.5, + "loss": 2.0295, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.015910625457764, + "rewards/margins": 0.5264666080474854, + "rewards/rejected": -5.54237699508667, + "step": 3308 + }, + { + "epoch": 0.51, + "learning_rate": 1.172037968774314e-05, + "logits/chosen": -2.9072413444519043, + "logits/rejected": -3.3048830032348633, + "logps/chosen": -226.16358947753906, + "logps/rejected": -355.4703369140625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5069870948791504, + "rewards/margins": 5.915284156799316, + "rewards/rejected": -7.422270774841309, + "step": 3309 + }, + { + "epoch": 0.51, + "learning_rate": 1.1719646247211993e-05, + "logits/chosen": -2.622638463973999, + "logits/rejected": -3.171980857849121, + "logps/chosen": -87.11780548095703, + "logps/rejected": -248.6748504638672, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4668362140655518, + "rewards/margins": 5.928180694580078, + "rewards/rejected": -7.395017623901367, + "step": 3310 + }, + { + "epoch": 0.51, + "learning_rate": 1.1718912806680845e-05, + "logits/chosen": -1.978071928024292, + "logits/rejected": -3.1258161067962646, + "logps/chosen": -131.02305603027344, + "logps/rejected": -232.61004638671875, + "loss": 2.4878, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.678875923156738, + "rewards/margins": -0.2852048873901367, + "rewards/rejected": -4.393670558929443, + "step": 3311 + }, + { + "epoch": 0.52, + "learning_rate": 1.1718179366149697e-05, + "logits/chosen": -3.1941440105438232, + "logits/rejected": -2.360041379928589, + "logps/chosen": -266.519287109375, + "logps/rejected": -206.33834838867188, + "loss": 2.4444, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9576683044433594, + "rewards/margins": -0.2104170322418213, + "rewards/rejected": -2.747251272201538, + "step": 3312 + }, + { + "epoch": 0.52, + "learning_rate": 1.1717445925618549e-05, + "logits/chosen": -0.6678124666213989, + "logits/rejected": -3.1271636486053467, + "logps/chosen": -25.46538543701172, + "logps/rejected": -314.6068115234375, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.350501537322998, + "rewards/margins": 4.2614898681640625, + "rewards/rejected": -5.611990928649902, + "step": 3313 + }, + { + "epoch": 0.52, + "learning_rate": 1.17167124850874e-05, + "logits/chosen": -0.8169947862625122, + "logits/rejected": -3.064147710800171, + "logps/chosen": -38.50567626953125, + "logps/rejected": -501.72998046875, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6919196844100952, + "rewards/margins": 5.431672096252441, + "rewards/rejected": -7.123591899871826, + "step": 3314 + }, + { + "epoch": 0.52, + "learning_rate": 1.1715979044556252e-05, + "logits/chosen": -2.440441370010376, + "logits/rejected": -2.950765609741211, + "logps/chosen": -65.76277923583984, + "logps/rejected": -160.8194122314453, + "loss": 0.139, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9011818170547485, + "rewards/margins": 3.004556894302368, + "rewards/rejected": -4.905738830566406, + "step": 3315 + }, + { + "epoch": 0.52, + "learning_rate": 1.1715245604025104e-05, + "logits/chosen": -2.180284261703491, + "logits/rejected": -3.2459850311279297, + "logps/chosen": -140.4210968017578, + "logps/rejected": -193.62237548828125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006976962089538574, + "rewards/margins": 5.68138313293457, + "rewards/rejected": -5.688360214233398, + "step": 3316 + }, + { + "epoch": 0.52, + "learning_rate": 1.1714512163493956e-05, + "logits/chosen": -2.5883071422576904, + "logits/rejected": -1.2209888696670532, + "logps/chosen": -368.4410095214844, + "logps/rejected": -176.0491943359375, + "loss": 1.2346, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4658470153808594, + "rewards/margins": 1.0820984840393066, + "rewards/rejected": -3.547945499420166, + "step": 3317 + }, + { + "epoch": 0.52, + "learning_rate": 1.1713778722962808e-05, + "logits/chosen": -3.1968066692352295, + "logits/rejected": -2.76151704788208, + "logps/chosen": -198.316162109375, + "logps/rejected": -305.5942077636719, + "loss": 2.119, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.873412847518921, + "rewards/margins": 1.648296594619751, + "rewards/rejected": -4.521709442138672, + "step": 3318 + }, + { + "epoch": 0.52, + "learning_rate": 1.1713045282431662e-05, + "logits/chosen": -1.9235223531723022, + "logits/rejected": -2.9382195472717285, + "logps/chosen": -56.15690994262695, + "logps/rejected": -195.1717987060547, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0142393112182617, + "rewards/margins": 5.356822967529297, + "rewards/rejected": -6.371062755584717, + "step": 3319 + }, + { + "epoch": 0.52, + "learning_rate": 1.1712311841900513e-05, + "logits/chosen": -3.2617359161376953, + "logits/rejected": -2.342017889022827, + "logps/chosen": -355.0511474609375, + "logps/rejected": -139.57180786132812, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8990119695663452, + "rewards/margins": 6.0284833908081055, + "rewards/rejected": -6.927495002746582, + "step": 3320 + }, + { + "epoch": 0.52, + "learning_rate": 1.1711578401369365e-05, + "logits/chosen": -2.8339309692382812, + "logits/rejected": -3.2476398944854736, + "logps/chosen": -417.65533447265625, + "logps/rejected": -431.4482116699219, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4600105285644531, + "rewards/margins": 7.027738094329834, + "rewards/rejected": -8.487749099731445, + "step": 3321 + }, + { + "epoch": 0.52, + "learning_rate": 1.1710844960838217e-05, + "logits/chosen": -0.6416100859642029, + "logits/rejected": -2.862337827682495, + "logps/chosen": -86.13664245605469, + "logps/rejected": -898.308349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3002071678638458, + "rewards/margins": 9.931119918823242, + "rewards/rejected": -10.231327056884766, + "step": 3322 + }, + { + "epoch": 0.52, + "learning_rate": 1.1710111520307069e-05, + "logits/chosen": -1.0594007968902588, + "logits/rejected": -2.880911350250244, + "logps/chosen": -32.08274841308594, + "logps/rejected": -416.1431884765625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1242475509643555, + "rewards/margins": 7.833647727966309, + "rewards/rejected": -9.957895278930664, + "step": 3323 + }, + { + "epoch": 0.52, + "learning_rate": 1.1709378079775923e-05, + "logits/chosen": -3.0432698726654053, + "logits/rejected": -3.073051929473877, + "logps/chosen": -444.9773254394531, + "logps/rejected": -292.6256408691406, + "loss": 1.955, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3586671352386475, + "rewards/margins": -0.18339455127716064, + "rewards/rejected": -2.1752724647521973, + "step": 3324 + }, + { + "epoch": 0.52, + "learning_rate": 1.1708644639244775e-05, + "logits/chosen": -3.104891061782837, + "logits/rejected": -2.05135178565979, + "logps/chosen": -198.54571533203125, + "logps/rejected": -110.99458312988281, + "loss": 3.6056, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.888730049133301, + "rewards/margins": -0.9330682754516602, + "rewards/rejected": -3.9556620121002197, + "step": 3325 + }, + { + "epoch": 0.52, + "learning_rate": 1.1707911198713626e-05, + "logits/chosen": -1.5334633588790894, + "logits/rejected": -3.1371467113494873, + "logps/chosen": -122.78950500488281, + "logps/rejected": -425.9067077636719, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.442268967628479, + "rewards/margins": 5.496946334838867, + "rewards/rejected": -6.939215183258057, + "step": 3326 + }, + { + "epoch": 0.52, + "learning_rate": 1.1707177758182478e-05, + "logits/chosen": -3.083571195602417, + "logits/rejected": -3.1506848335266113, + "logps/chosen": -117.53644561767578, + "logps/rejected": -173.3656768798828, + "loss": 0.1496, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4494081735610962, + "rewards/margins": 3.6874876022338867, + "rewards/rejected": -5.136895656585693, + "step": 3327 + }, + { + "epoch": 0.52, + "learning_rate": 1.1706444317651332e-05, + "logits/chosen": -2.8701727390289307, + "logits/rejected": -3.157296657562256, + "logps/chosen": -37.210609436035156, + "logps/rejected": -246.48309326171875, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0439316034317017, + "rewards/margins": 3.418614149093628, + "rewards/rejected": -4.462545871734619, + "step": 3328 + }, + { + "epoch": 0.52, + "learning_rate": 1.1705710877120184e-05, + "logits/chosen": -3.1033899784088135, + "logits/rejected": -2.5032918453216553, + "logps/chosen": -114.17930603027344, + "logps/rejected": -95.192138671875, + "loss": 1.9647, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.380747079849243, + "rewards/margins": -0.4579876661300659, + "rewards/rejected": -2.9227592945098877, + "step": 3329 + }, + { + "epoch": 0.52, + "learning_rate": 1.1704977436589036e-05, + "logits/chosen": -2.8981025218963623, + "logits/rejected": -1.3479981422424316, + "logps/chosen": -469.94940185546875, + "logps/rejected": -177.16427612304688, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5364593267440796, + "rewards/margins": 6.000524044036865, + "rewards/rejected": -6.536983489990234, + "step": 3330 + }, + { + "epoch": 0.52, + "learning_rate": 1.1704243996057888e-05, + "logits/chosen": -3.292919397354126, + "logits/rejected": -3.2100508213043213, + "logps/chosen": -164.7476043701172, + "logps/rejected": -151.40618896484375, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2345986366271973, + "rewards/margins": 3.0769600868225098, + "rewards/rejected": -4.311558723449707, + "step": 3331 + }, + { + "epoch": 0.52, + "learning_rate": 1.170351055552674e-05, + "logits/chosen": -2.1882917881011963, + "logits/rejected": -3.0335612297058105, + "logps/chosen": -126.09307861328125, + "logps/rejected": -270.0701904296875, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9200901985168457, + "rewards/margins": 3.916078567504883, + "rewards/rejected": -5.8361687660217285, + "step": 3332 + }, + { + "epoch": 0.52, + "learning_rate": 1.1702777114995591e-05, + "logits/chosen": -0.8670426607131958, + "logits/rejected": -3.097519874572754, + "logps/chosen": -176.9942626953125, + "logps/rejected": -492.64483642578125, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4690773487091064, + "rewards/margins": 7.066658973693848, + "rewards/rejected": -8.535736083984375, + "step": 3333 + }, + { + "epoch": 0.52, + "learning_rate": 1.1702043674464443e-05, + "logits/chosen": -2.9520864486694336, + "logits/rejected": -2.1699345111846924, + "logps/chosen": -358.7192687988281, + "logps/rejected": -306.87109375, + "loss": 2.7309, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.783940553665161, + "rewards/margins": 0.5490658283233643, + "rewards/rejected": -4.333006381988525, + "step": 3334 + }, + { + "epoch": 0.52, + "learning_rate": 1.1701310233933295e-05, + "logits/chosen": -3.194814682006836, + "logits/rejected": -2.2998900413513184, + "logps/chosen": -498.49493408203125, + "logps/rejected": -411.95166015625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7846221923828125, + "rewards/margins": 5.331010818481445, + "rewards/rejected": -9.115633010864258, + "step": 3335 + }, + { + "epoch": 0.52, + "learning_rate": 1.1700576793402149e-05, + "logits/chosen": -2.6078274250030518, + "logits/rejected": -3.1891303062438965, + "logps/chosen": -347.14202880859375, + "logps/rejected": -283.825927734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03302231431007385, + "rewards/margins": 6.953102111816406, + "rewards/rejected": -6.9861249923706055, + "step": 3336 + }, + { + "epoch": 0.52, + "learning_rate": 1.1699843352871e-05, + "logits/chosen": -3.1299471855163574, + "logits/rejected": -3.1115219593048096, + "logps/chosen": -590.5902709960938, + "logps/rejected": -562.0057373046875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.349128007888794, + "rewards/margins": 6.467524528503418, + "rewards/rejected": -5.118395805358887, + "step": 3337 + }, + { + "epoch": 0.52, + "learning_rate": 1.1699109912339852e-05, + "logits/chosen": -2.9966793060302734, + "logits/rejected": -2.923239231109619, + "logps/chosen": -197.87921142578125, + "logps/rejected": -124.69376373291016, + "loss": 3.1501, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9793801307678223, + "rewards/margins": -1.3226633071899414, + "rewards/rejected": -2.656716823577881, + "step": 3338 + }, + { + "epoch": 0.52, + "learning_rate": 1.1698376471808704e-05, + "logits/chosen": -2.788799524307251, + "logits/rejected": -2.895155668258667, + "logps/chosen": -124.19596099853516, + "logps/rejected": -257.56134033203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7863577008247375, + "rewards/margins": 8.396448135375977, + "rewards/rejected": -9.182806015014648, + "step": 3339 + }, + { + "epoch": 0.52, + "learning_rate": 1.1697643031277556e-05, + "logits/chosen": -3.1633548736572266, + "logits/rejected": -2.3367483615875244, + "logps/chosen": -600.779052734375, + "logps/rejected": -422.51470947265625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17857059836387634, + "rewards/margins": 6.762537002563477, + "rewards/rejected": -6.941107273101807, + "step": 3340 + }, + { + "epoch": 0.52, + "learning_rate": 1.1696909590746408e-05, + "logits/chosen": -2.7147929668426514, + "logits/rejected": -3.1274595260620117, + "logps/chosen": -273.17022705078125, + "logps/rejected": -373.09130859375, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6632499694824219, + "rewards/margins": 3.7263827323913574, + "rewards/rejected": -4.389632701873779, + "step": 3341 + }, + { + "epoch": 0.52, + "learning_rate": 1.169617615021526e-05, + "logits/chosen": -3.0728468894958496, + "logits/rejected": -3.183870315551758, + "logps/chosen": -465.5716857910156, + "logps/rejected": -391.8028564453125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014001465402543545, + "rewards/margins": 6.956748962402344, + "rewards/rejected": -6.942747592926025, + "step": 3342 + }, + { + "epoch": 0.52, + "learning_rate": 1.1695442709684112e-05, + "logits/chosen": -2.7491185665130615, + "logits/rejected": -2.9655404090881348, + "logps/chosen": -234.48077392578125, + "logps/rejected": -202.87457275390625, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9543426036834717, + "rewards/margins": 3.655433177947998, + "rewards/rejected": -5.609776020050049, + "step": 3343 + }, + { + "epoch": 0.52, + "learning_rate": 1.1694709269152964e-05, + "logits/chosen": -2.905625104904175, + "logits/rejected": -2.962707996368408, + "logps/chosen": -184.28997802734375, + "logps/rejected": -272.6534729003906, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4539348781108856, + "rewards/margins": 6.365880012512207, + "rewards/rejected": -6.819815158843994, + "step": 3344 + }, + { + "epoch": 0.52, + "learning_rate": 1.1693975828621817e-05, + "logits/chosen": -1.749950885772705, + "logits/rejected": -3.042340040206909, + "logps/chosen": -123.9564208984375, + "logps/rejected": -463.24298095703125, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3606042861938477, + "rewards/margins": 8.128626823425293, + "rewards/rejected": -9.48923110961914, + "step": 3345 + }, + { + "epoch": 0.52, + "learning_rate": 1.1693242388090669e-05, + "logits/chosen": -2.9050188064575195, + "logits/rejected": -2.7043566703796387, + "logps/chosen": -119.34974670410156, + "logps/rejected": -293.1888427734375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.937753438949585, + "rewards/margins": 5.30977725982666, + "rewards/rejected": -7.247530460357666, + "step": 3346 + }, + { + "epoch": 0.52, + "learning_rate": 1.1692508947559521e-05, + "logits/chosen": -3.144317626953125, + "logits/rejected": -2.5593512058258057, + "logps/chosen": -716.0484619140625, + "logps/rejected": -423.75506591796875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8307037949562073, + "rewards/margins": 6.783499240875244, + "rewards/rejected": -7.614202976226807, + "step": 3347 + }, + { + "epoch": 0.52, + "learning_rate": 1.1691775507028373e-05, + "logits/chosen": -3.178614616394043, + "logits/rejected": -3.053654432296753, + "logps/chosen": -621.32421875, + "logps/rejected": -950.71826171875, + "loss": 2.9228, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.454981327056885, + "rewards/margins": 0.4009404182434082, + "rewards/rejected": -4.855921745300293, + "step": 3348 + }, + { + "epoch": 0.52, + "learning_rate": 1.1691042066497225e-05, + "logits/chosen": -3.1521642208099365, + "logits/rejected": -2.2222700119018555, + "logps/chosen": -208.5398712158203, + "logps/rejected": -307.2031555175781, + "loss": 2.7609, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.887009620666504, + "rewards/margins": 3.038285732269287, + "rewards/rejected": -8.925294876098633, + "step": 3349 + }, + { + "epoch": 0.52, + "learning_rate": 1.1690308625966077e-05, + "logits/chosen": -2.3175675868988037, + "logits/rejected": -3.008291721343994, + "logps/chosen": -117.91312408447266, + "logps/rejected": -179.14138793945312, + "loss": 2.5218, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.050743579864502, + "rewards/margins": 1.6312150955200195, + "rewards/rejected": -4.681958198547363, + "step": 3350 + }, + { + "epoch": 0.52, + "learning_rate": 1.1689575185434928e-05, + "logits/chosen": -3.0338869094848633, + "logits/rejected": -1.4751272201538086, + "logps/chosen": -989.2171020507812, + "logps/rejected": -400.4268798828125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6395082473754883, + "rewards/margins": 5.795389175415039, + "rewards/rejected": -8.434897422790527, + "step": 3351 + }, + { + "epoch": 0.52, + "learning_rate": 1.168884174490378e-05, + "logits/chosen": -2.048804998397827, + "logits/rejected": -3.035691022872925, + "logps/chosen": -194.42489624023438, + "logps/rejected": -343.791015625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5343174934387207, + "rewards/margins": 5.831027984619141, + "rewards/rejected": -7.3653459548950195, + "step": 3352 + }, + { + "epoch": 0.52, + "learning_rate": 1.1688108304372632e-05, + "logits/chosen": -3.1921029090881348, + "logits/rejected": -2.9793102741241455, + "logps/chosen": -120.85218048095703, + "logps/rejected": -138.698974609375, + "loss": 0.7042, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.411818504333496, + "rewards/margins": 1.578371286392212, + "rewards/rejected": -3.990189790725708, + "step": 3353 + }, + { + "epoch": 0.52, + "learning_rate": 1.1687374863841486e-05, + "logits/chosen": -2.5841310024261475, + "logits/rejected": -3.024541139602661, + "logps/chosen": -415.3200988769531, + "logps/rejected": -444.5955505371094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1527358889579773, + "rewards/margins": 7.570013046264648, + "rewards/rejected": -7.417276859283447, + "step": 3354 + }, + { + "epoch": 0.52, + "learning_rate": 1.1686641423310338e-05, + "logits/chosen": -2.6993236541748047, + "logits/rejected": -3.262171506881714, + "logps/chosen": -376.87445068359375, + "logps/rejected": -415.1788330078125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6991363763809204, + "rewards/margins": 6.1506500244140625, + "rewards/rejected": -6.849786758422852, + "step": 3355 + }, + { + "epoch": 0.52, + "learning_rate": 1.168590798277919e-05, + "logits/chosen": -3.076648235321045, + "logits/rejected": -3.047508716583252, + "logps/chosen": -599.53759765625, + "logps/rejected": -504.40167236328125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26576995849609375, + "rewards/margins": 6.789309501647949, + "rewards/rejected": -6.523539066314697, + "step": 3356 + }, + { + "epoch": 0.52, + "learning_rate": 1.1685174542248041e-05, + "logits/chosen": -1.6710585355758667, + "logits/rejected": -3.2269504070281982, + "logps/chosen": -163.38833618164062, + "logps/rejected": -399.3829345703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9865796566009521, + "rewards/margins": 7.17982816696167, + "rewards/rejected": -8.166407585144043, + "step": 3357 + }, + { + "epoch": 0.52, + "learning_rate": 1.1684441101716895e-05, + "logits/chosen": -3.0884060859680176, + "logits/rejected": -2.8477742671966553, + "logps/chosen": -158.1797637939453, + "logps/rejected": -261.79718017578125, + "loss": 2.0626, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.166140556335449, + "rewards/margins": 2.0288515090942383, + "rewards/rejected": -6.1949920654296875, + "step": 3358 + }, + { + "epoch": 0.52, + "learning_rate": 1.1683707661185747e-05, + "logits/chosen": -2.6985998153686523, + "logits/rejected": -3.081860065460205, + "logps/chosen": -252.92340087890625, + "logps/rejected": -292.7103271484375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2617652416229248, + "rewards/margins": 5.935413360595703, + "rewards/rejected": -7.197178840637207, + "step": 3359 + }, + { + "epoch": 0.52, + "learning_rate": 1.1682974220654599e-05, + "logits/chosen": -2.961806297302246, + "logits/rejected": -1.9912344217300415, + "logps/chosen": -394.67120361328125, + "logps/rejected": -265.1943359375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.347894310951233, + "rewards/margins": 5.138039588928223, + "rewards/rejected": -6.485933780670166, + "step": 3360 + }, + { + "epoch": 0.52, + "learning_rate": 1.168224078012345e-05, + "logits/chosen": -3.0339202880859375, + "logits/rejected": -1.1845601797103882, + "logps/chosen": -491.1498718261719, + "logps/rejected": -84.61415100097656, + "loss": 3.6786, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.664684295654297, + "rewards/margins": -3.5701189041137695, + "rewards/rejected": -2.0945653915405273, + "step": 3361 + }, + { + "epoch": 0.52, + "learning_rate": 1.1681507339592303e-05, + "logits/chosen": -3.2360174655914307, + "logits/rejected": -3.1672146320343018, + "logps/chosen": -134.00759887695312, + "logps/rejected": -116.22599792480469, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8794082999229431, + "rewards/margins": 3.308257818222046, + "rewards/rejected": -4.187665939331055, + "step": 3362 + }, + { + "epoch": 0.52, + "learning_rate": 1.1680773899061156e-05, + "logits/chosen": -3.0433359146118164, + "logits/rejected": -2.7752888202667236, + "logps/chosen": -494.3706970214844, + "logps/rejected": -375.93115234375, + "loss": 1.7626, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.095730781555176, + "rewards/margins": 1.636143445968628, + "rewards/rejected": -5.731874465942383, + "step": 3363 + }, + { + "epoch": 0.52, + "learning_rate": 1.1680040458530008e-05, + "logits/chosen": -3.20668888092041, + "logits/rejected": -2.9340596199035645, + "logps/chosen": -114.24998474121094, + "logps/rejected": -251.9494171142578, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8327910900115967, + "rewards/margins": 5.243209362030029, + "rewards/rejected": -7.076000213623047, + "step": 3364 + }, + { + "epoch": 0.52, + "learning_rate": 1.167930701799886e-05, + "logits/chosen": -2.422234058380127, + "logits/rejected": -3.0720677375793457, + "logps/chosen": -99.12117767333984, + "logps/rejected": -394.3105163574219, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.500883936882019, + "rewards/margins": 8.274330139160156, + "rewards/rejected": -9.775214195251465, + "step": 3365 + }, + { + "epoch": 0.52, + "learning_rate": 1.1678573577467712e-05, + "logits/chosen": -3.0337371826171875, + "logits/rejected": -3.2480039596557617, + "logps/chosen": -74.45581817626953, + "logps/rejected": -153.7396240234375, + "loss": 2.4021, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.755594253540039, + "rewards/margins": 0.7215414047241211, + "rewards/rejected": -4.47713565826416, + "step": 3366 + }, + { + "epoch": 0.52, + "learning_rate": 1.1677840136936564e-05, + "logits/chosen": -3.140885591506958, + "logits/rejected": -3.024568557739258, + "logps/chosen": -166.7987823486328, + "logps/rejected": -152.3489532470703, + "loss": 2.1626, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.757774591445923, + "rewards/margins": -0.5415627956390381, + "rewards/rejected": -3.2162117958068848, + "step": 3367 + }, + { + "epoch": 0.52, + "learning_rate": 1.1677106696405416e-05, + "logits/chosen": -2.485001564025879, + "logits/rejected": -3.0557987689971924, + "logps/chosen": -441.204833984375, + "logps/rejected": -442.58319091796875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0215851068496704, + "rewards/margins": 6.76938533782959, + "rewards/rejected": -7.790970802307129, + "step": 3368 + }, + { + "epoch": 0.52, + "learning_rate": 1.1676373255874267e-05, + "logits/chosen": -2.7455782890319824, + "logits/rejected": -3.135481119155884, + "logps/chosen": -280.75506591796875, + "logps/rejected": -242.62318420410156, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8115371465682983, + "rewards/margins": 3.759129524230957, + "rewards/rejected": -5.570666313171387, + "step": 3369 + }, + { + "epoch": 0.52, + "learning_rate": 1.167563981534312e-05, + "logits/chosen": -1.364254117012024, + "logits/rejected": -1.8157302141189575, + "logps/chosen": -156.83860778808594, + "logps/rejected": -265.74822998046875, + "loss": 0.0413, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19620245695114136, + "rewards/margins": 5.698525428771973, + "rewards/rejected": -5.894728183746338, + "step": 3370 + }, + { + "epoch": 0.52, + "learning_rate": 1.1674906374811971e-05, + "logits/chosen": -1.4327900409698486, + "logits/rejected": -2.9850220680236816, + "logps/chosen": -113.2826156616211, + "logps/rejected": -400.9118347167969, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6215893030166626, + "rewards/margins": 4.981657028198242, + "rewards/rejected": -6.603246688842773, + "step": 3371 + }, + { + "epoch": 0.52, + "learning_rate": 1.1674172934280825e-05, + "logits/chosen": -2.883862257003784, + "logits/rejected": -3.107987642288208, + "logps/chosen": -66.74251556396484, + "logps/rejected": -191.05752563476562, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.518048882484436, + "rewards/margins": 4.655250072479248, + "rewards/rejected": -5.1732988357543945, + "step": 3372 + }, + { + "epoch": 0.52, + "learning_rate": 1.1673439493749677e-05, + "logits/chosen": -2.6262331008911133, + "logits/rejected": -2.806999444961548, + "logps/chosen": -290.13018798828125, + "logps/rejected": -424.0047607421875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9628891944885254, + "rewards/margins": 6.8469133377075195, + "rewards/rejected": -8.809802055358887, + "step": 3373 + }, + { + "epoch": 0.52, + "learning_rate": 1.1672706053218528e-05, + "logits/chosen": -2.447476625442505, + "logits/rejected": -2.993985414505005, + "logps/chosen": -162.68746948242188, + "logps/rejected": -251.65249633789062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31050339341163635, + "rewards/margins": 6.9254560470581055, + "rewards/rejected": -7.235959529876709, + "step": 3374 + }, + { + "epoch": 0.52, + "learning_rate": 1.167197261268738e-05, + "logits/chosen": -3.0218303203582764, + "logits/rejected": -3.082731246948242, + "logps/chosen": -38.060611724853516, + "logps/rejected": -168.54664611816406, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2357897758483887, + "rewards/margins": 4.515960216522217, + "rewards/rejected": -5.7517499923706055, + "step": 3375 + }, + { + "epoch": 0.53, + "learning_rate": 1.1671239172156232e-05, + "logits/chosen": -3.0733563899993896, + "logits/rejected": -2.827880859375, + "logps/chosen": -140.84397888183594, + "logps/rejected": -221.56500244140625, + "loss": 0.1192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7254081964492798, + "rewards/margins": 4.174063682556152, + "rewards/rejected": -5.899471759796143, + "step": 3376 + }, + { + "epoch": 0.53, + "learning_rate": 1.1670505731625084e-05, + "logits/chosen": -2.97536039352417, + "logits/rejected": -3.206057548522949, + "logps/chosen": -76.31596374511719, + "logps/rejected": -149.11492919921875, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5970222353935242, + "rewards/margins": 4.147228240966797, + "rewards/rejected": -4.744250297546387, + "step": 3377 + }, + { + "epoch": 0.53, + "learning_rate": 1.1669772291093936e-05, + "logits/chosen": -2.055530548095703, + "logits/rejected": -2.820235252380371, + "logps/chosen": -124.7823486328125, + "logps/rejected": -312.5428466796875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8455268740653992, + "rewards/margins": 4.462268829345703, + "rewards/rejected": -5.307795524597168, + "step": 3378 + }, + { + "epoch": 0.53, + "learning_rate": 1.1669038850562788e-05, + "logits/chosen": -2.044623851776123, + "logits/rejected": -2.9710748195648193, + "logps/chosen": -51.9677734375, + "logps/rejected": -226.9158172607422, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9676234722137451, + "rewards/margins": 4.281454086303711, + "rewards/rejected": -5.249077796936035, + "step": 3379 + }, + { + "epoch": 0.53, + "learning_rate": 1.166830541003164e-05, + "logits/chosen": -2.4527587890625, + "logits/rejected": -3.0758509635925293, + "logps/chosen": -169.0628662109375, + "logps/rejected": -215.6682586669922, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9377254843711853, + "rewards/margins": 4.421226501464844, + "rewards/rejected": -5.358951568603516, + "step": 3380 + }, + { + "epoch": 0.53, + "learning_rate": 1.1667571969500493e-05, + "logits/chosen": -1.826625943183899, + "logits/rejected": -3.0674588680267334, + "logps/chosen": -69.49465942382812, + "logps/rejected": -146.001953125, + "loss": 1.5856, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.446382999420166, + "rewards/margins": 0.6241761445999146, + "rewards/rejected": -3.070559024810791, + "step": 3381 + }, + { + "epoch": 0.53, + "learning_rate": 1.1666838528969345e-05, + "logits/chosen": -3.089144706726074, + "logits/rejected": -2.778697967529297, + "logps/chosen": -376.04681396484375, + "logps/rejected": -356.0187072753906, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4288597106933594, + "rewards/margins": 6.694787979125977, + "rewards/rejected": -6.265927791595459, + "step": 3382 + }, + { + "epoch": 0.53, + "learning_rate": 1.1666105088438197e-05, + "logits/chosen": -3.0729410648345947, + "logits/rejected": -1.3317583799362183, + "logps/chosen": -246.6719207763672, + "logps/rejected": -174.25054931640625, + "loss": 1.3463, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8374123573303223, + "rewards/margins": 1.3295087814331055, + "rewards/rejected": -4.166921138763428, + "step": 3383 + }, + { + "epoch": 0.53, + "learning_rate": 1.1665371647907049e-05, + "logits/chosen": -3.20719575881958, + "logits/rejected": -2.7164695262908936, + "logps/chosen": -1165.6763916015625, + "logps/rejected": -839.4601440429688, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.841644287109375, + "rewards/margins": 7.401059150695801, + "rewards/rejected": -9.242703437805176, + "step": 3384 + }, + { + "epoch": 0.53, + "learning_rate": 1.16646382073759e-05, + "logits/chosen": -2.041842222213745, + "logits/rejected": -3.191579818725586, + "logps/chosen": -53.714012145996094, + "logps/rejected": -390.59442138671875, + "loss": 0.0547, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7112460136413574, + "rewards/margins": 3.1109461784362793, + "rewards/rejected": -4.822192192077637, + "step": 3385 + }, + { + "epoch": 0.53, + "learning_rate": 1.1663904766844753e-05, + "logits/chosen": -2.833883285522461, + "logits/rejected": -3.048409938812256, + "logps/chosen": -628.8621215820312, + "logps/rejected": -536.7937622070312, + "loss": 0.0692, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2311432361602783, + "rewards/margins": 4.622071743011475, + "rewards/rejected": -5.853215217590332, + "step": 3386 + }, + { + "epoch": 0.53, + "learning_rate": 1.1663171326313605e-05, + "logits/chosen": -2.1099772453308105, + "logits/rejected": -2.655791759490967, + "logps/chosen": -129.0614013671875, + "logps/rejected": -280.04974365234375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.321847915649414, + "rewards/margins": 7.089920520782471, + "rewards/rejected": -8.411767959594727, + "step": 3387 + }, + { + "epoch": 0.53, + "learning_rate": 1.1662437885782456e-05, + "logits/chosen": -3.0699079036712646, + "logits/rejected": -3.1605517864227295, + "logps/chosen": -254.7811279296875, + "logps/rejected": -298.71722412109375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43799781799316406, + "rewards/margins": 6.379519939422607, + "rewards/rejected": -6.8175177574157715, + "step": 3388 + }, + { + "epoch": 0.53, + "learning_rate": 1.1661704445251308e-05, + "logits/chosen": -2.7734270095825195, + "logits/rejected": -2.621424913406372, + "logps/chosen": -159.5262451171875, + "logps/rejected": -106.4539794921875, + "loss": 5.3326, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.7387847900390625, + "rewards/margins": -3.238231897354126, + "rewards/rejected": -3.5005528926849365, + "step": 3389 + }, + { + "epoch": 0.53, + "learning_rate": 1.1660971004720162e-05, + "logits/chosen": -2.492518424987793, + "logits/rejected": -3.134215831756592, + "logps/chosen": -165.750244140625, + "logps/rejected": -333.89599609375, + "loss": 1.1971, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.061948776245117, + "rewards/margins": 3.5146713256835938, + "rewards/rejected": -6.576620101928711, + "step": 3390 + }, + { + "epoch": 0.53, + "learning_rate": 1.1660237564189014e-05, + "logits/chosen": -2.6176741123199463, + "logits/rejected": -1.860994577407837, + "logps/chosen": -1881.4619140625, + "logps/rejected": -362.0745544433594, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5227630138397217, + "rewards/margins": 4.107402801513672, + "rewards/rejected": -6.630166053771973, + "step": 3391 + }, + { + "epoch": 0.53, + "learning_rate": 1.1659504123657867e-05, + "logits/chosen": -2.626077175140381, + "logits/rejected": -3.3531687259674072, + "logps/chosen": -40.559776306152344, + "logps/rejected": -263.30279541015625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2120187282562256, + "rewards/margins": 5.836694240570068, + "rewards/rejected": -7.048712730407715, + "step": 3392 + }, + { + "epoch": 0.53, + "learning_rate": 1.165877068312672e-05, + "logits/chosen": -1.8861669301986694, + "logits/rejected": -3.0019798278808594, + "logps/chosen": -167.04037475585938, + "logps/rejected": -357.7786865234375, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5165191888809204, + "rewards/margins": 5.56060791015625, + "rewards/rejected": -6.077127456665039, + "step": 3393 + }, + { + "epoch": 0.53, + "learning_rate": 1.1658037242595571e-05, + "logits/chosen": -1.693642497062683, + "logits/rejected": -3.0921504497528076, + "logps/chosen": -172.04908752441406, + "logps/rejected": -430.38299560546875, + "loss": 2.8403, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.471673011779785, + "rewards/margins": -1.3397445678710938, + "rewards/rejected": -2.1319284439086914, + "step": 3394 + }, + { + "epoch": 0.53, + "learning_rate": 1.1657303802064423e-05, + "logits/chosen": -2.9932007789611816, + "logits/rejected": -2.7757720947265625, + "logps/chosen": -119.5931167602539, + "logps/rejected": -191.61387634277344, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.359767198562622, + "rewards/margins": 7.435001373291016, + "rewards/rejected": -9.794768333435059, + "step": 3395 + }, + { + "epoch": 0.53, + "learning_rate": 1.1656570361533275e-05, + "logits/chosen": -2.679009437561035, + "logits/rejected": -2.6451144218444824, + "logps/chosen": -441.52435302734375, + "logps/rejected": -467.26239013671875, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1177947521209717, + "rewards/margins": 3.2046310901641846, + "rewards/rejected": -4.322425842285156, + "step": 3396 + }, + { + "epoch": 0.53, + "learning_rate": 1.1655836921002127e-05, + "logits/chosen": -3.054569721221924, + "logits/rejected": -3.431889772415161, + "logps/chosen": -66.96080017089844, + "logps/rejected": -226.11941528320312, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5155384540557861, + "rewards/margins": 5.37644100189209, + "rewards/rejected": -5.891979217529297, + "step": 3397 + }, + { + "epoch": 0.53, + "learning_rate": 1.1655103480470979e-05, + "logits/chosen": -2.144512891769409, + "logits/rejected": -3.208378314971924, + "logps/chosen": -113.91417694091797, + "logps/rejected": -454.65936279296875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.63060462474823, + "rewards/margins": 7.966949462890625, + "rewards/rejected": -8.597554206848145, + "step": 3398 + }, + { + "epoch": 0.53, + "learning_rate": 1.1654370039939832e-05, + "logits/chosen": -1.9048575162887573, + "logits/rejected": -1.8332281112670898, + "logps/chosen": -311.83990478515625, + "logps/rejected": -390.9356384277344, + "loss": 2.2591, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.611150741577148, + "rewards/margins": 1.615389347076416, + "rewards/rejected": -6.226539611816406, + "step": 3399 + }, + { + "epoch": 0.53, + "learning_rate": 1.1653636599408684e-05, + "logits/chosen": -3.0521836280822754, + "logits/rejected": -3.1714675426483154, + "logps/chosen": -421.9328308105469, + "logps/rejected": -458.5731201171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4250274896621704, + "rewards/margins": 8.610589981079102, + "rewards/rejected": -8.185563087463379, + "step": 3400 + }, + { + "epoch": 0.53, + "learning_rate": 1.1652903158877536e-05, + "logits/chosen": -2.7445123195648193, + "logits/rejected": -2.7453737258911133, + "logps/chosen": -204.30870056152344, + "logps/rejected": -231.784423828125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0810508728027344, + "rewards/margins": 5.74441385269165, + "rewards/rejected": -6.825464725494385, + "step": 3401 + }, + { + "epoch": 0.53, + "learning_rate": 1.1652169718346388e-05, + "logits/chosen": -2.242713212966919, + "logits/rejected": -3.115852117538452, + "logps/chosen": -167.16476440429688, + "logps/rejected": -365.8619689941406, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2396499514579773, + "rewards/margins": 6.276412010192871, + "rewards/rejected": -6.516061782836914, + "step": 3402 + }, + { + "epoch": 0.53, + "learning_rate": 1.165143627781524e-05, + "logits/chosen": -2.35227108001709, + "logits/rejected": -3.06219744682312, + "logps/chosen": -735.4976806640625, + "logps/rejected": -506.1493225097656, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8625213503837585, + "rewards/margins": 5.9014177322387695, + "rewards/rejected": -6.763938903808594, + "step": 3403 + }, + { + "epoch": 0.53, + "learning_rate": 1.1650702837284092e-05, + "logits/chosen": -2.4052138328552246, + "logits/rejected": -3.0132412910461426, + "logps/chosen": -61.05466842651367, + "logps/rejected": -234.61636352539062, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5190422534942627, + "rewards/margins": 3.6126272678375244, + "rewards/rejected": -7.131669521331787, + "step": 3404 + }, + { + "epoch": 0.53, + "learning_rate": 1.1649969396752943e-05, + "logits/chosen": -2.4519591331481934, + "logits/rejected": -2.963953971862793, + "logps/chosen": -205.44915771484375, + "logps/rejected": -116.3633804321289, + "loss": 3.6627, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.651965141296387, + "rewards/margins": -1.8863298892974854, + "rewards/rejected": -3.7656350135803223, + "step": 3405 + }, + { + "epoch": 0.53, + "learning_rate": 1.1649235956221795e-05, + "logits/chosen": -1.2374471426010132, + "logits/rejected": -2.6277029514312744, + "logps/chosen": -169.75535583496094, + "logps/rejected": -332.953125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8551244735717773, + "rewards/margins": 4.685435771942139, + "rewards/rejected": -7.540560245513916, + "step": 3406 + }, + { + "epoch": 0.53, + "learning_rate": 1.1648502515690647e-05, + "logits/chosen": -2.256039619445801, + "logits/rejected": -2.9104995727539062, + "logps/chosen": -122.56521606445312, + "logps/rejected": -296.2627258300781, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.104466199874878, + "rewards/margins": 4.891468048095703, + "rewards/rejected": -6.995934009552002, + "step": 3407 + }, + { + "epoch": 0.53, + "learning_rate": 1.16477690751595e-05, + "logits/chosen": -2.5226998329162598, + "logits/rejected": -3.0954270362854004, + "logps/chosen": -152.70806884765625, + "logps/rejected": -404.33111572265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42124825716018677, + "rewards/margins": 8.242405891418457, + "rewards/rejected": -8.663654327392578, + "step": 3408 + }, + { + "epoch": 0.53, + "learning_rate": 1.1647035634628353e-05, + "logits/chosen": -3.234034776687622, + "logits/rejected": -2.4922657012939453, + "logps/chosen": -477.6170349121094, + "logps/rejected": -423.52801513671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3450813293457031, + "rewards/margins": 7.352591514587402, + "rewards/rejected": -7.6976728439331055, + "step": 3409 + }, + { + "epoch": 0.53, + "learning_rate": 1.1646302194097205e-05, + "logits/chosen": -1.5695123672485352, + "logits/rejected": -3.064664363861084, + "logps/chosen": -56.743778228759766, + "logps/rejected": -265.3808898925781, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2041096687316895, + "rewards/margins": 5.227754592895508, + "rewards/rejected": -7.431864261627197, + "step": 3410 + }, + { + "epoch": 0.53, + "learning_rate": 1.1645568753566056e-05, + "logits/chosen": -2.153646230697632, + "logits/rejected": -3.134887456893921, + "logps/chosen": -104.26815795898438, + "logps/rejected": -200.81085205078125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0705946683883667, + "rewards/margins": 5.540187358856201, + "rewards/rejected": -6.610781669616699, + "step": 3411 + }, + { + "epoch": 0.53, + "learning_rate": 1.1644835313034908e-05, + "logits/chosen": -2.764042854309082, + "logits/rejected": -2.902674913406372, + "logps/chosen": -267.94873046875, + "logps/rejected": -224.83929443359375, + "loss": 1.9627, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.814530849456787, + "rewards/margins": 0.26238250732421875, + "rewards/rejected": -4.076913833618164, + "step": 3412 + }, + { + "epoch": 0.53, + "learning_rate": 1.164410187250376e-05, + "logits/chosen": -2.427637815475464, + "logits/rejected": -3.211090326309204, + "logps/chosen": -31.91754150390625, + "logps/rejected": -241.6633758544922, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8538944125175476, + "rewards/margins": 7.47509241104126, + "rewards/rejected": -8.328987121582031, + "step": 3413 + }, + { + "epoch": 0.53, + "learning_rate": 1.1643368431972612e-05, + "logits/chosen": -3.196225643157959, + "logits/rejected": -3.0391643047332764, + "logps/chosen": -133.61209106445312, + "logps/rejected": -154.5181121826172, + "loss": 2.6758, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.110410213470459, + "rewards/margins": -1.0308233499526978, + "rewards/rejected": -3.079586982727051, + "step": 3414 + }, + { + "epoch": 0.53, + "learning_rate": 1.1642634991441464e-05, + "logits/chosen": -2.6418111324310303, + "logits/rejected": -3.191939115524292, + "logps/chosen": -109.33219909667969, + "logps/rejected": -512.401611328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9339932203292847, + "rewards/margins": 8.161269187927246, + "rewards/rejected": -9.09526252746582, + "step": 3415 + }, + { + "epoch": 0.53, + "learning_rate": 1.1641901550910316e-05, + "logits/chosen": -2.695411205291748, + "logits/rejected": -2.9995408058166504, + "logps/chosen": -151.9547119140625, + "logps/rejected": -210.81332397460938, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14560888707637787, + "rewards/margins": 7.015934944152832, + "rewards/rejected": -7.161543846130371, + "step": 3416 + }, + { + "epoch": 0.53, + "learning_rate": 1.164116811037917e-05, + "logits/chosen": -3.2010464668273926, + "logits/rejected": -3.0545036792755127, + "logps/chosen": -463.322998046875, + "logps/rejected": -455.077392578125, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5268058776855469, + "rewards/margins": 5.853359222412109, + "rewards/rejected": -7.380165100097656, + "step": 3417 + }, + { + "epoch": 0.53, + "learning_rate": 1.1640434669848021e-05, + "logits/chosen": -2.1365716457366943, + "logits/rejected": -3.1698222160339355, + "logps/chosen": -50.50838088989258, + "logps/rejected": -263.70281982421875, + "loss": 0.6894, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.345015048980713, + "rewards/margins": 0.9551377296447754, + "rewards/rejected": -3.3001527786254883, + "step": 3418 + }, + { + "epoch": 0.53, + "learning_rate": 1.1639701229316873e-05, + "logits/chosen": -2.017627000808716, + "logits/rejected": -2.949887275695801, + "logps/chosen": -144.2680206298828, + "logps/rejected": -318.0380554199219, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41246509552001953, + "rewards/margins": 7.751822471618652, + "rewards/rejected": -8.164287567138672, + "step": 3419 + }, + { + "epoch": 0.53, + "learning_rate": 1.1638967788785725e-05, + "logits/chosen": -3.051708698272705, + "logits/rejected": -2.6297800540924072, + "logps/chosen": -262.7641906738281, + "logps/rejected": -291.06500244140625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9024734497070312, + "rewards/margins": 4.2218523025512695, + "rewards/rejected": -5.124325752258301, + "step": 3420 + }, + { + "epoch": 0.53, + "learning_rate": 1.1638234348254577e-05, + "logits/chosen": -2.8447580337524414, + "logits/rejected": -3.2521097660064697, + "logps/chosen": -261.5599670410156, + "logps/rejected": -485.8760070800781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08612976968288422, + "rewards/margins": 7.455072402954102, + "rewards/rejected": -7.541202068328857, + "step": 3421 + }, + { + "epoch": 0.53, + "learning_rate": 1.1637500907723429e-05, + "logits/chosen": -2.6806106567382812, + "logits/rejected": -3.0661838054656982, + "logps/chosen": -60.926822662353516, + "logps/rejected": -328.84149169921875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1370586156845093, + "rewards/margins": 6.963596343994141, + "rewards/rejected": -8.100654602050781, + "step": 3422 + }, + { + "epoch": 0.53, + "learning_rate": 1.163676746719228e-05, + "logits/chosen": -3.052414894104004, + "logits/rejected": -2.4000954627990723, + "logps/chosen": -505.3635559082031, + "logps/rejected": -452.77154541015625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044361889362335205, + "rewards/margins": 5.896859645843506, + "rewards/rejected": -5.941221714019775, + "step": 3423 + }, + { + "epoch": 0.53, + "learning_rate": 1.1636034026661133e-05, + "logits/chosen": -3.0057902336120605, + "logits/rejected": -2.411738157272339, + "logps/chosen": -612.1006469726562, + "logps/rejected": -387.83349609375, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1156554222106934, + "rewards/margins": 3.899026393890381, + "rewards/rejected": -5.014681816101074, + "step": 3424 + }, + { + "epoch": 0.53, + "learning_rate": 1.1635300586129986e-05, + "logits/chosen": -2.3087637424468994, + "logits/rejected": -3.1266627311706543, + "logps/chosen": -224.7101593017578, + "logps/rejected": -334.8081970214844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.907630205154419, + "rewards/margins": 6.729608535766602, + "rewards/rejected": -7.637238502502441, + "step": 3425 + }, + { + "epoch": 0.53, + "learning_rate": 1.1634567145598838e-05, + "logits/chosen": -2.2093679904937744, + "logits/rejected": -2.8647923469543457, + "logps/chosen": -50.68184280395508, + "logps/rejected": -226.8555145263672, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6176274418830872, + "rewards/margins": 6.13055419921875, + "rewards/rejected": -6.748181343078613, + "step": 3426 + }, + { + "epoch": 0.53, + "learning_rate": 1.1633833705067692e-05, + "logits/chosen": -2.9686033725738525, + "logits/rejected": -2.6942296028137207, + "logps/chosen": -342.7198181152344, + "logps/rejected": -269.47857666015625, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6812076568603516, + "rewards/margins": 5.334884166717529, + "rewards/rejected": -7.016091823577881, + "step": 3427 + }, + { + "epoch": 0.53, + "learning_rate": 1.1633100264536543e-05, + "logits/chosen": -2.8709940910339355, + "logits/rejected": -3.2294304370880127, + "logps/chosen": -1092.1263427734375, + "logps/rejected": -789.2349243164062, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27442625164985657, + "rewards/margins": 7.6949462890625, + "rewards/rejected": -7.969372749328613, + "step": 3428 + }, + { + "epoch": 0.53, + "learning_rate": 1.1632366824005395e-05, + "logits/chosen": -3.101240396499634, + "logits/rejected": -3.1296932697296143, + "logps/chosen": -50.048851013183594, + "logps/rejected": -137.72647094726562, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5667266845703125, + "rewards/margins": 4.525463581085205, + "rewards/rejected": -5.092190265655518, + "step": 3429 + }, + { + "epoch": 0.53, + "learning_rate": 1.1631633383474247e-05, + "logits/chosen": -0.7554040551185608, + "logits/rejected": -2.747152805328369, + "logps/chosen": -101.9488525390625, + "logps/rejected": -368.3831481933594, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3724968433380127, + "rewards/margins": 5.715061187744141, + "rewards/rejected": -8.087557792663574, + "step": 3430 + }, + { + "epoch": 0.53, + "learning_rate": 1.1630899942943099e-05, + "logits/chosen": -2.5785322189331055, + "logits/rejected": -3.1627233028411865, + "logps/chosen": -275.73028564453125, + "logps/rejected": -394.1482238769531, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8869481086730957, + "rewards/margins": 4.026732444763184, + "rewards/rejected": -5.913680076599121, + "step": 3431 + }, + { + "epoch": 0.53, + "learning_rate": 1.1630166502411951e-05, + "logits/chosen": -3.1546943187713623, + "logits/rejected": -2.27740740776062, + "logps/chosen": -111.72130584716797, + "logps/rejected": -34.121891021728516, + "loss": 1.1333, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.9734432697296143, + "rewards/margins": -0.685238778591156, + "rewards/rejected": -2.2882044315338135, + "step": 3432 + }, + { + "epoch": 0.53, + "learning_rate": 1.1629433061880803e-05, + "logits/chosen": -2.609156847000122, + "logits/rejected": -3.2797629833221436, + "logps/chosen": -80.74925994873047, + "logps/rejected": -430.89910888671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43530482053756714, + "rewards/margins": 9.54195785522461, + "rewards/rejected": -9.977262496948242, + "step": 3433 + }, + { + "epoch": 0.53, + "learning_rate": 1.1628699621349656e-05, + "logits/chosen": -2.9190821647644043, + "logits/rejected": -2.4291954040527344, + "logps/chosen": -107.01261901855469, + "logps/rejected": -314.1393737792969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7032708525657654, + "rewards/margins": 7.607430458068848, + "rewards/rejected": -8.310701370239258, + "step": 3434 + }, + { + "epoch": 0.53, + "learning_rate": 1.1627966180818508e-05, + "logits/chosen": -2.9730076789855957, + "logits/rejected": -2.1725754737854004, + "logps/chosen": -450.35443115234375, + "logps/rejected": -440.905029296875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.861770749092102, + "rewards/margins": 8.177162170410156, + "rewards/rejected": -9.038932800292969, + "step": 3435 + }, + { + "epoch": 0.53, + "learning_rate": 1.162723274028736e-05, + "logits/chosen": -2.3994808197021484, + "logits/rejected": -2.770141124725342, + "logps/chosen": -210.58737182617188, + "logps/rejected": -226.69204711914062, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7671467065811157, + "rewards/margins": 6.917471408843994, + "rewards/rejected": -7.68461799621582, + "step": 3436 + }, + { + "epoch": 0.53, + "learning_rate": 1.1626499299756212e-05, + "logits/chosen": -2.9624061584472656, + "logits/rejected": -3.217082977294922, + "logps/chosen": -98.92483520507812, + "logps/rejected": -308.9662170410156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2496238946914673, + "rewards/margins": 8.199172019958496, + "rewards/rejected": -9.448795318603516, + "step": 3437 + }, + { + "epoch": 0.53, + "learning_rate": 1.1625765859225064e-05, + "logits/chosen": -2.603635787963867, + "logits/rejected": -3.104107141494751, + "logps/chosen": -149.03257751464844, + "logps/rejected": -378.2819519042969, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7969444394111633, + "rewards/margins": 6.012758255004883, + "rewards/rejected": -6.809702396392822, + "step": 3438 + }, + { + "epoch": 0.53, + "learning_rate": 1.1625032418693916e-05, + "logits/chosen": -2.260430097579956, + "logits/rejected": -3.0315136909484863, + "logps/chosen": -543.1287841796875, + "logps/rejected": -516.7957763671875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3944412171840668, + "rewards/margins": 6.8431901931762695, + "rewards/rejected": -7.237631320953369, + "step": 3439 + }, + { + "epoch": 0.53, + "learning_rate": 1.1624298978162768e-05, + "logits/chosen": -2.997097969055176, + "logits/rejected": -3.1955039501190186, + "logps/chosen": -134.37998962402344, + "logps/rejected": -236.42559814453125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3849170207977295, + "rewards/margins": 6.771488189697266, + "rewards/rejected": -8.156404495239258, + "step": 3440 + }, + { + "epoch": 0.54, + "learning_rate": 1.162356553763162e-05, + "logits/chosen": -3.2829582691192627, + "logits/rejected": -2.735903263092041, + "logps/chosen": -542.2207641601562, + "logps/rejected": -338.8111572265625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5704481601715088, + "rewards/margins": 6.339840888977051, + "rewards/rejected": -6.910289287567139, + "step": 3441 + }, + { + "epoch": 0.54, + "learning_rate": 1.1622832097100471e-05, + "logits/chosen": -3.1112775802612305, + "logits/rejected": -3.005729913711548, + "logps/chosen": -170.63197326660156, + "logps/rejected": -261.38623046875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39403724670410156, + "rewards/margins": 7.69563627243042, + "rewards/rejected": -8.08967399597168, + "step": 3442 + }, + { + "epoch": 0.54, + "learning_rate": 1.1622098656569325e-05, + "logits/chosen": -3.1126046180725098, + "logits/rejected": -2.3960154056549072, + "logps/chosen": -266.4393615722656, + "logps/rejected": -224.68887329101562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2075468301773071, + "rewards/margins": 8.203865051269531, + "rewards/rejected": -9.41141128540039, + "step": 3443 + }, + { + "epoch": 0.54, + "learning_rate": 1.1621365216038177e-05, + "logits/chosen": -1.7343573570251465, + "logits/rejected": -2.89760684967041, + "logps/chosen": -45.213768005371094, + "logps/rejected": -216.57452392578125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9656239151954651, + "rewards/margins": 5.915496349334717, + "rewards/rejected": -6.881120204925537, + "step": 3444 + }, + { + "epoch": 0.54, + "learning_rate": 1.1620631775507029e-05, + "logits/chosen": -2.6670446395874023, + "logits/rejected": -3.072225332260132, + "logps/chosen": -130.025146484375, + "logps/rejected": -167.2705535888672, + "loss": 1.0417, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8570553064346313, + "rewards/margins": 1.7788982391357422, + "rewards/rejected": -3.635953426361084, + "step": 3445 + }, + { + "epoch": 0.54, + "learning_rate": 1.161989833497588e-05, + "logits/chosen": -1.7878139019012451, + "logits/rejected": -3.0812859535217285, + "logps/chosen": -36.999229431152344, + "logps/rejected": -250.27120971679688, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5802135467529297, + "rewards/margins": 7.040250778198242, + "rewards/rejected": -7.620463848114014, + "step": 3446 + }, + { + "epoch": 0.54, + "learning_rate": 1.1619164894444733e-05, + "logits/chosen": -2.4296159744262695, + "logits/rejected": -2.954902410507202, + "logps/chosen": -159.1925048828125, + "logps/rejected": -292.70379638671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4644668698310852, + "rewards/margins": 8.729288101196289, + "rewards/rejected": -9.193754196166992, + "step": 3447 + }, + { + "epoch": 0.54, + "learning_rate": 1.1618431453913584e-05, + "logits/chosen": -2.193624258041382, + "logits/rejected": -2.849789619445801, + "logps/chosen": -207.5390625, + "logps/rejected": -208.20089721679688, + "loss": 1.9781, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7088332176208496, + "rewards/margins": -0.12596464157104492, + "rewards/rejected": -2.5828685760498047, + "step": 3448 + }, + { + "epoch": 0.54, + "learning_rate": 1.1617698013382436e-05, + "logits/chosen": -3.1271026134490967, + "logits/rejected": -2.7708497047424316, + "logps/chosen": -194.57025146484375, + "logps/rejected": -59.50981521606445, + "loss": 2.6784, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.887094020843506, + "rewards/margins": -2.5876824855804443, + "rewards/rejected": -1.299411654472351, + "step": 3449 + }, + { + "epoch": 0.54, + "learning_rate": 1.1616964572851288e-05, + "logits/chosen": -1.7541264295578003, + "logits/rejected": -2.777329444885254, + "logps/chosen": -223.05126953125, + "logps/rejected": -407.466064453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1384334564208984, + "rewards/margins": 8.089346885681152, + "rewards/rejected": -9.22778034210205, + "step": 3450 + }, + { + "epoch": 0.54, + "learning_rate": 1.161623113232014e-05, + "logits/chosen": -2.9874932765960693, + "logits/rejected": -2.9189724922180176, + "logps/chosen": -363.65509033203125, + "logps/rejected": -440.8035888671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.683641791343689, + "rewards/margins": 8.274092674255371, + "rewards/rejected": -8.957735061645508, + "step": 3451 + }, + { + "epoch": 0.54, + "learning_rate": 1.1615497691788994e-05, + "logits/chosen": -1.6250884532928467, + "logits/rejected": -2.9113478660583496, + "logps/chosen": -141.87503051757812, + "logps/rejected": -398.1039123535156, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.446587324142456, + "rewards/margins": 7.804920196533203, + "rewards/rejected": -9.251507759094238, + "step": 3452 + }, + { + "epoch": 0.54, + "learning_rate": 1.1614764251257845e-05, + "logits/chosen": -2.989677906036377, + "logits/rejected": -1.8466733694076538, + "logps/chosen": -717.7590942382812, + "logps/rejected": -449.169677734375, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9087675213813782, + "rewards/margins": 5.011414051055908, + "rewards/rejected": -5.9201812744140625, + "step": 3453 + }, + { + "epoch": 0.54, + "learning_rate": 1.1614030810726697e-05, + "logits/chosen": -2.702258586883545, + "logits/rejected": -3.1371424198150635, + "logps/chosen": -112.43279266357422, + "logps/rejected": -187.54055786132812, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9587721824645996, + "rewards/margins": 4.406756401062012, + "rewards/rejected": -6.365528583526611, + "step": 3454 + }, + { + "epoch": 0.54, + "learning_rate": 1.161329737019555e-05, + "logits/chosen": -3.142720937728882, + "logits/rejected": -2.0344338417053223, + "logps/chosen": -133.0487518310547, + "logps/rejected": -229.8490753173828, + "loss": 0.1561, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.133458137512207, + "rewards/margins": 5.772967338562012, + "rewards/rejected": -8.906425476074219, + "step": 3455 + }, + { + "epoch": 0.54, + "learning_rate": 1.1612563929664401e-05, + "logits/chosen": -2.8783435821533203, + "logits/rejected": -3.064018964767456, + "logps/chosen": -340.9245300292969, + "logps/rejected": -337.5218505859375, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.87798011302948, + "rewards/margins": 4.825849533081055, + "rewards/rejected": -6.703829765319824, + "step": 3456 + }, + { + "epoch": 0.54, + "learning_rate": 1.1611830489133253e-05, + "logits/chosen": -2.887139081954956, + "logits/rejected": -2.685189723968506, + "logps/chosen": -108.97113800048828, + "logps/rejected": -166.57327270507812, + "loss": 0.9163, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5110673904418945, + "rewards/margins": 2.7820682525634766, + "rewards/rejected": -5.293135643005371, + "step": 3457 + }, + { + "epoch": 0.54, + "learning_rate": 1.1611097048602105e-05, + "logits/chosen": -2.373894453048706, + "logits/rejected": -3.1565942764282227, + "logps/chosen": -96.8697509765625, + "logps/rejected": -367.6053466796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7104034423828125, + "rewards/margins": 6.616171836853027, + "rewards/rejected": -9.326574325561523, + "step": 3458 + }, + { + "epoch": 0.54, + "learning_rate": 1.1610363608070958e-05, + "logits/chosen": -1.1954400539398193, + "logits/rejected": -2.920016050338745, + "logps/chosen": -48.245601654052734, + "logps/rejected": -266.87677001953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1701931953430176, + "rewards/margins": 7.732008457183838, + "rewards/rejected": -8.902201652526855, + "step": 3459 + }, + { + "epoch": 0.54, + "learning_rate": 1.160963016753981e-05, + "logits/chosen": -1.6155149936676025, + "logits/rejected": -2.9296374320983887, + "logps/chosen": -60.00376510620117, + "logps/rejected": -351.8804931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1234471797943115, + "rewards/margins": 9.678117752075195, + "rewards/rejected": -10.801565170288086, + "step": 3460 + }, + { + "epoch": 0.54, + "learning_rate": 1.1608896727008664e-05, + "logits/chosen": -2.8862197399139404, + "logits/rejected": -2.975182056427002, + "logps/chosen": -168.50424194335938, + "logps/rejected": -320.3420104980469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.988472819328308, + "rewards/margins": 7.679495811462402, + "rewards/rejected": -9.66796875, + "step": 3461 + }, + { + "epoch": 0.54, + "learning_rate": 1.1608163286477516e-05, + "logits/chosen": -2.853513717651367, + "logits/rejected": -2.0813229084014893, + "logps/chosen": -97.1800537109375, + "logps/rejected": -142.89553833007812, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.493532657623291, + "rewards/margins": 3.612278938293457, + "rewards/rejected": -6.105811595916748, + "step": 3462 + }, + { + "epoch": 0.54, + "learning_rate": 1.1607429845946368e-05, + "logits/chosen": -1.1765280961990356, + "logits/rejected": -2.842294216156006, + "logps/chosen": -88.59461212158203, + "logps/rejected": -381.7670593261719, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.402559518814087, + "rewards/margins": 6.03759241104126, + "rewards/rejected": -7.440152168273926, + "step": 3463 + }, + { + "epoch": 0.54, + "learning_rate": 1.160669640541522e-05, + "logits/chosen": -2.881047248840332, + "logits/rejected": -3.2307076454162598, + "logps/chosen": -101.72997283935547, + "logps/rejected": -301.3656005859375, + "loss": 2.6866, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2427825927734375, + "rewards/margins": 0.46221399307250977, + "rewards/rejected": -4.704996585845947, + "step": 3464 + }, + { + "epoch": 0.54, + "learning_rate": 1.1605962964884071e-05, + "logits/chosen": -2.2148778438568115, + "logits/rejected": -3.0875210762023926, + "logps/chosen": -136.80401611328125, + "logps/rejected": -675.4296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9911754727363586, + "rewards/margins": 11.697667121887207, + "rewards/rejected": -12.6888427734375, + "step": 3465 + }, + { + "epoch": 0.54, + "learning_rate": 1.1605229524352923e-05, + "logits/chosen": -2.173668384552002, + "logits/rejected": -2.982931613922119, + "logps/chosen": -208.10952758789062, + "logps/rejected": -351.89599609375, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.748823881149292, + "rewards/margins": 6.070823669433594, + "rewards/rejected": -7.819647789001465, + "step": 3466 + }, + { + "epoch": 0.54, + "learning_rate": 1.1604496083821775e-05, + "logits/chosen": -2.6022703647613525, + "logits/rejected": -3.000149965286255, + "logps/chosen": -399.02545166015625, + "logps/rejected": -349.5186462402344, + "loss": 1.9301, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3019602298736572, + "rewards/margins": 2.2037835121154785, + "rewards/rejected": -4.505743980407715, + "step": 3467 + }, + { + "epoch": 0.54, + "learning_rate": 1.1603762643290627e-05, + "logits/chosen": -1.0285587310791016, + "logits/rejected": -2.6817617416381836, + "logps/chosen": -52.603904724121094, + "logps/rejected": -326.88427734375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.853068470954895, + "rewards/margins": 5.336216926574707, + "rewards/rejected": -7.1892852783203125, + "step": 3468 + }, + { + "epoch": 0.54, + "learning_rate": 1.1603029202759479e-05, + "logits/chosen": -2.49753999710083, + "logits/rejected": -2.865934371948242, + "logps/chosen": -173.3043670654297, + "logps/rejected": -405.560302734375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2525566816329956, + "rewards/margins": 8.811331748962402, + "rewards/rejected": -10.063888549804688, + "step": 3469 + }, + { + "epoch": 0.54, + "learning_rate": 1.1602295762228332e-05, + "logits/chosen": -2.606020212173462, + "logits/rejected": -2.5452117919921875, + "logps/chosen": -241.1500244140625, + "logps/rejected": -234.79884338378906, + "loss": 0.923, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9293904304504395, + "rewards/margins": 3.6275339126586914, + "rewards/rejected": -6.556924343109131, + "step": 3470 + }, + { + "epoch": 0.54, + "learning_rate": 1.1601562321697184e-05, + "logits/chosen": -3.014543294906616, + "logits/rejected": -1.8389174938201904, + "logps/chosen": -185.8608856201172, + "logps/rejected": -207.81431579589844, + "loss": 1.7353, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.842961549758911, + "rewards/margins": 2.7820639610290527, + "rewards/rejected": -5.625025272369385, + "step": 3471 + }, + { + "epoch": 0.54, + "learning_rate": 1.1600828881166036e-05, + "logits/chosen": -3.0849549770355225, + "logits/rejected": -3.2631406784057617, + "logps/chosen": -378.2932434082031, + "logps/rejected": -315.7875671386719, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38714951276779175, + "rewards/margins": 5.410258769989014, + "rewards/rejected": -5.797408103942871, + "step": 3472 + }, + { + "epoch": 0.54, + "learning_rate": 1.1600095440634888e-05, + "logits/chosen": -2.759904146194458, + "logits/rejected": -3.104301691055298, + "logps/chosen": -766.861328125, + "logps/rejected": -776.65185546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.109148383140564, + "rewards/margins": 8.447816848754883, + "rewards/rejected": -9.556965827941895, + "step": 3473 + }, + { + "epoch": 0.54, + "learning_rate": 1.159936200010374e-05, + "logits/chosen": -2.9553794860839844, + "logits/rejected": -1.53363037109375, + "logps/chosen": -705.7348022460938, + "logps/rejected": -344.081298828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19553983211517334, + "rewards/margins": 8.470359802246094, + "rewards/rejected": -8.274819374084473, + "step": 3474 + }, + { + "epoch": 0.54, + "learning_rate": 1.1598628559572592e-05, + "logits/chosen": -2.4526820182800293, + "logits/rejected": -3.080209255218506, + "logps/chosen": -68.40432739257812, + "logps/rejected": -308.0743103027344, + "loss": 0.096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9116243124008179, + "rewards/margins": 4.540735244750977, + "rewards/rejected": -6.452360153198242, + "step": 3475 + }, + { + "epoch": 0.54, + "learning_rate": 1.1597895119041444e-05, + "logits/chosen": -2.503626823425293, + "logits/rejected": -2.9889307022094727, + "logps/chosen": -260.003173828125, + "logps/rejected": -367.0810546875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6233868598937988, + "rewards/margins": 5.791807174682617, + "rewards/rejected": -7.415194034576416, + "step": 3476 + }, + { + "epoch": 0.54, + "learning_rate": 1.1597161678510296e-05, + "logits/chosen": -1.5501651763916016, + "logits/rejected": -2.927595615386963, + "logps/chosen": -45.76224899291992, + "logps/rejected": -227.037841796875, + "loss": 0.0871, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.991896629333496, + "rewards/margins": 5.20255708694458, + "rewards/rejected": -7.194453239440918, + "step": 3477 + }, + { + "epoch": 0.54, + "learning_rate": 1.1596428237979148e-05, + "logits/chosen": -2.7782373428344727, + "logits/rejected": -2.0519678592681885, + "logps/chosen": -76.11048126220703, + "logps/rejected": -103.11843872070312, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.504565417766571, + "rewards/margins": 5.995386123657227, + "rewards/rejected": -6.4999518394470215, + "step": 3478 + }, + { + "epoch": 0.54, + "learning_rate": 1.1595694797448001e-05, + "logits/chosen": -1.6051790714263916, + "logits/rejected": -2.7053024768829346, + "logps/chosen": -49.97514343261719, + "logps/rejected": -327.1162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.460050106048584, + "rewards/margins": 8.30897045135498, + "rewards/rejected": -9.769020080566406, + "step": 3479 + }, + { + "epoch": 0.54, + "learning_rate": 1.1594961356916853e-05, + "logits/chosen": -2.83882737159729, + "logits/rejected": -3.1125991344451904, + "logps/chosen": -225.73773193359375, + "logps/rejected": -305.3289794921875, + "loss": 0.3972, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8313610553741455, + "rewards/margins": 3.8583545684814453, + "rewards/rejected": -5.689715385437012, + "step": 3480 + }, + { + "epoch": 0.54, + "learning_rate": 1.1594227916385705e-05, + "logits/chosen": -1.8005539178848267, + "logits/rejected": -2.575997829437256, + "logps/chosen": -37.01507568359375, + "logps/rejected": -187.00942993164062, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.287600040435791, + "rewards/margins": 5.2016167640686035, + "rewards/rejected": -7.4892168045043945, + "step": 3481 + }, + { + "epoch": 0.54, + "learning_rate": 1.1593494475854557e-05, + "logits/chosen": -3.1021506786346436, + "logits/rejected": -3.083644151687622, + "logps/chosen": -101.11429595947266, + "logps/rejected": -84.69610595703125, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7701139450073242, + "rewards/margins": 4.73842716217041, + "rewards/rejected": -6.508541107177734, + "step": 3482 + }, + { + "epoch": 0.54, + "learning_rate": 1.1592761035323409e-05, + "logits/chosen": -2.929081916809082, + "logits/rejected": -2.444705009460449, + "logps/chosen": -372.14154052734375, + "logps/rejected": -225.98951721191406, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0203933715820312, + "rewards/margins": 4.771315574645996, + "rewards/rejected": -6.791708946228027, + "step": 3483 + }, + { + "epoch": 0.54, + "learning_rate": 1.159202759479226e-05, + "logits/chosen": -1.7440980672836304, + "logits/rejected": -3.0990939140319824, + "logps/chosen": -153.9792938232422, + "logps/rejected": -255.63536071777344, + "loss": 2.0851, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.93098783493042, + "rewards/margins": -1.4536415338516235, + "rewards/rejected": -3.477346420288086, + "step": 3484 + }, + { + "epoch": 0.54, + "learning_rate": 1.1591294154261112e-05, + "logits/chosen": -2.7567505836486816, + "logits/rejected": -2.8875136375427246, + "logps/chosen": -179.99684143066406, + "logps/rejected": -240.01486206054688, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3267203569412231, + "rewards/margins": 7.642076015472412, + "rewards/rejected": -8.968796730041504, + "step": 3485 + }, + { + "epoch": 0.54, + "learning_rate": 1.1590560713729964e-05, + "logits/chosen": -2.0531740188598633, + "logits/rejected": -2.9633994102478027, + "logps/chosen": -76.29841613769531, + "logps/rejected": -238.9976043701172, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9486059546470642, + "rewards/margins": 6.9600324630737305, + "rewards/rejected": -7.908638000488281, + "step": 3486 + }, + { + "epoch": 0.54, + "learning_rate": 1.1589827273198816e-05, + "logits/chosen": -2.910972833633423, + "logits/rejected": -2.3710274696350098, + "logps/chosen": -281.1093444824219, + "logps/rejected": -263.9953918457031, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.495435357093811, + "rewards/margins": 5.553335189819336, + "rewards/rejected": -7.048769950866699, + "step": 3487 + }, + { + "epoch": 0.54, + "learning_rate": 1.158909383266767e-05, + "logits/chosen": -3.027560234069824, + "logits/rejected": -2.0208871364593506, + "logps/chosen": -208.79034423828125, + "logps/rejected": -183.17054748535156, + "loss": 1.1921, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7460341453552246, + "rewards/margins": 2.3565940856933594, + "rewards/rejected": -6.102628231048584, + "step": 3488 + }, + { + "epoch": 0.54, + "learning_rate": 1.1588360392136522e-05, + "logits/chosen": -3.067734479904175, + "logits/rejected": -2.9902288913726807, + "logps/chosen": -168.98004150390625, + "logps/rejected": -163.6572723388672, + "loss": 3.6482, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.723618507385254, + "rewards/margins": -0.9714500904083252, + "rewards/rejected": -3.7521684169769287, + "step": 3489 + }, + { + "epoch": 0.54, + "learning_rate": 1.1587626951605373e-05, + "logits/chosen": -2.501368284225464, + "logits/rejected": -2.773153305053711, + "logps/chosen": -39.83953094482422, + "logps/rejected": -240.5343017578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9805073142051697, + "rewards/margins": 8.849591255187988, + "rewards/rejected": -9.830098152160645, + "step": 3490 + }, + { + "epoch": 0.54, + "learning_rate": 1.1586893511074225e-05, + "logits/chosen": -3.038498878479004, + "logits/rejected": -2.5399107933044434, + "logps/chosen": -185.720703125, + "logps/rejected": -301.31829833984375, + "loss": 1.8587, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.376623630523682, + "rewards/margins": 2.6400370597839355, + "rewards/rejected": -7.016660690307617, + "step": 3491 + }, + { + "epoch": 0.54, + "learning_rate": 1.1586160070543077e-05, + "logits/chosen": -2.3971903324127197, + "logits/rejected": -2.929349660873413, + "logps/chosen": -389.33514404296875, + "logps/rejected": -497.0250244140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.989679217338562, + "rewards/margins": 6.646792411804199, + "rewards/rejected": -8.63647174835205, + "step": 3492 + }, + { + "epoch": 0.54, + "learning_rate": 1.158542663001193e-05, + "logits/chosen": -2.907580852508545, + "logits/rejected": -2.507108211517334, + "logps/chosen": -130.6936798095703, + "logps/rejected": -159.91458129882812, + "loss": 1.9873, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4446911811828613, + "rewards/margins": 1.7548840045928955, + "rewards/rejected": -5.199574947357178, + "step": 3493 + }, + { + "epoch": 0.54, + "learning_rate": 1.1584693189480783e-05, + "logits/chosen": -2.6232690811157227, + "logits/rejected": -2.8680036067962646, + "logps/chosen": -115.92400360107422, + "logps/rejected": -279.0418701171875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2303037643432617, + "rewards/margins": 6.306511878967285, + "rewards/rejected": -8.536815643310547, + "step": 3494 + }, + { + "epoch": 0.54, + "learning_rate": 1.1583959748949635e-05, + "logits/chosen": -3.0688326358795166, + "logits/rejected": -2.7896783351898193, + "logps/chosen": -339.1613464355469, + "logps/rejected": -324.7158203125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2189548015594482, + "rewards/margins": 7.2720723152160645, + "rewards/rejected": -8.49102783203125, + "step": 3495 + }, + { + "epoch": 0.54, + "learning_rate": 1.1583226308418486e-05, + "logits/chosen": -3.1207356452941895, + "logits/rejected": -2.938899278640747, + "logps/chosen": -77.26985168457031, + "logps/rejected": -127.78111267089844, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0573129653930664, + "rewards/margins": 4.087131977081299, + "rewards/rejected": -7.144445419311523, + "step": 3496 + }, + { + "epoch": 0.54, + "learning_rate": 1.158249286788734e-05, + "logits/chosen": -1.9656082391738892, + "logits/rejected": -2.7462165355682373, + "logps/chosen": -158.31483459472656, + "logps/rejected": -272.218505859375, + "loss": 3.5242, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.97177267074585, + "rewards/margins": -1.2479705810546875, + "rewards/rejected": -3.723802089691162, + "step": 3497 + }, + { + "epoch": 0.54, + "learning_rate": 1.1581759427356192e-05, + "logits/chosen": -2.7644574642181396, + "logits/rejected": -2.9255032539367676, + "logps/chosen": -192.10662841796875, + "logps/rejected": -237.43505859375, + "loss": 0.8292, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.943775177001953, + "rewards/margins": 2.976121425628662, + "rewards/rejected": -5.919896602630615, + "step": 3498 + }, + { + "epoch": 0.54, + "learning_rate": 1.1581025986825044e-05, + "logits/chosen": -1.7146327495574951, + "logits/rejected": -2.779757499694824, + "logps/chosen": -256.2007141113281, + "logps/rejected": -359.48114013671875, + "loss": 4.6285, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.416041374206543, + "rewards/margins": -0.7104744911193848, + "rewards/rejected": -6.705566883087158, + "step": 3499 + }, + { + "epoch": 0.54, + "learning_rate": 1.1580292546293896e-05, + "logits/chosen": -2.865856885910034, + "logits/rejected": -3.2237823009490967, + "logps/chosen": -287.05206298828125, + "logps/rejected": -369.13043212890625, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3107345402240753, + "rewards/margins": 3.9197754859924316, + "rewards/rejected": -4.2305097579956055, + "step": 3500 + }, + { + "epoch": 0.54, + "learning_rate": 1.1579559105762748e-05, + "logits/chosen": -2.4053356647491455, + "logits/rejected": -2.992584705352783, + "logps/chosen": -108.58233642578125, + "logps/rejected": -249.60562133789062, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.756784439086914, + "rewards/margins": 5.402505397796631, + "rewards/rejected": -7.159289836883545, + "step": 3501 + }, + { + "epoch": 0.54, + "learning_rate": 1.15788256652316e-05, + "logits/chosen": -2.9876067638397217, + "logits/rejected": -2.114006996154785, + "logps/chosen": -201.56451416015625, + "logps/rejected": -131.60804748535156, + "loss": 2.6019, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.715030670166016, + "rewards/margins": -1.5044018030166626, + "rewards/rejected": -3.2106287479400635, + "step": 3502 + }, + { + "epoch": 0.54, + "learning_rate": 1.1578092224700451e-05, + "logits/chosen": -2.4124252796173096, + "logits/rejected": -2.9269051551818848, + "logps/chosen": -296.6513366699219, + "logps/rejected": -211.96548461914062, + "loss": 6.1851, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.120138645172119, + "rewards/margins": -6.179060459136963, + "rewards/rejected": -0.9410781860351562, + "step": 3503 + }, + { + "epoch": 0.54, + "learning_rate": 1.1577358784169303e-05, + "logits/chosen": -2.51855731010437, + "logits/rejected": -2.850163459777832, + "logps/chosen": -198.53781127929688, + "logps/rejected": -375.8765869140625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.662616729736328, + "rewards/margins": 6.158485412597656, + "rewards/rejected": -8.821102142333984, + "step": 3504 + }, + { + "epoch": 0.55, + "learning_rate": 1.1576625343638155e-05, + "logits/chosen": -2.3596808910369873, + "logits/rejected": -3.101438283920288, + "logps/chosen": -410.33258056640625, + "logps/rejected": -517.3120727539062, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3829200267791748, + "rewards/margins": 6.202780246734619, + "rewards/rejected": -7.585700035095215, + "step": 3505 + }, + { + "epoch": 0.55, + "learning_rate": 1.1575891903107009e-05, + "logits/chosen": -3.011211395263672, + "logits/rejected": -3.380162477493286, + "logps/chosen": -81.06134033203125, + "logps/rejected": -193.19216918945312, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3197684288024902, + "rewards/margins": 5.903069972991943, + "rewards/rejected": -8.222838401794434, + "step": 3506 + }, + { + "epoch": 0.55, + "learning_rate": 1.157515846257586e-05, + "logits/chosen": -2.4438068866729736, + "logits/rejected": -3.044804811477661, + "logps/chosen": -301.69537353515625, + "logps/rejected": -434.6261901855469, + "loss": 0.0527, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5938944816589355, + "rewards/margins": 4.397458553314209, + "rewards/rejected": -5.9913530349731445, + "step": 3507 + }, + { + "epoch": 0.55, + "learning_rate": 1.1574425022044712e-05, + "logits/chosen": -2.994917154312134, + "logits/rejected": -3.014204263687134, + "logps/chosen": -125.42909240722656, + "logps/rejected": -350.4156799316406, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6908611059188843, + "rewards/margins": 7.222644805908203, + "rewards/rejected": -7.913505554199219, + "step": 3508 + }, + { + "epoch": 0.55, + "learning_rate": 1.1573691581513564e-05, + "logits/chosen": -2.7227609157562256, + "logits/rejected": -2.1074273586273193, + "logps/chosen": -555.8621215820312, + "logps/rejected": -242.8907012939453, + "loss": 5.9593, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.753660678863525, + "rewards/margins": -2.546095848083496, + "rewards/rejected": -5.207564830780029, + "step": 3509 + }, + { + "epoch": 0.55, + "learning_rate": 1.1572958140982416e-05, + "logits/chosen": -2.535623550415039, + "logits/rejected": -3.1907193660736084, + "logps/chosen": -179.30416870117188, + "logps/rejected": -292.47308349609375, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1646885871887207, + "rewards/margins": 4.684014320373535, + "rewards/rejected": -6.848703384399414, + "step": 3510 + }, + { + "epoch": 0.55, + "learning_rate": 1.1572224700451268e-05, + "logits/chosen": -0.9727683663368225, + "logits/rejected": -3.1326730251312256, + "logps/chosen": -189.64541625976562, + "logps/rejected": -542.8587646484375, + "loss": 3.2123, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.003784656524658, + "rewards/margins": 0.15422534942626953, + "rewards/rejected": -6.158010005950928, + "step": 3511 + }, + { + "epoch": 0.55, + "learning_rate": 1.157149125992012e-05, + "logits/chosen": -1.474509835243225, + "logits/rejected": -2.9686903953552246, + "logps/chosen": -230.7245635986328, + "logps/rejected": -395.95556640625, + "loss": 2.1927, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.197962284088135, + "rewards/margins": 1.1825675964355469, + "rewards/rejected": -5.380529880523682, + "step": 3512 + }, + { + "epoch": 0.55, + "learning_rate": 1.1570757819388972e-05, + "logits/chosen": -2.721323013305664, + "logits/rejected": -3.11124324798584, + "logps/chosen": -117.71493530273438, + "logps/rejected": -226.06224060058594, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5700092315673828, + "rewards/margins": 5.581653594970703, + "rewards/rejected": -7.151662826538086, + "step": 3513 + }, + { + "epoch": 0.55, + "learning_rate": 1.1570024378857824e-05, + "logits/chosen": -1.69745934009552, + "logits/rejected": -2.844430685043335, + "logps/chosen": -144.0745849609375, + "logps/rejected": -434.743896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4997093677520752, + "rewards/margins": 10.559621810913086, + "rewards/rejected": -12.059330940246582, + "step": 3514 + }, + { + "epoch": 0.55, + "learning_rate": 1.1569290938326677e-05, + "logits/chosen": -3.0440633296966553, + "logits/rejected": -2.6309993267059326, + "logps/chosen": -512.807861328125, + "logps/rejected": -546.4472045898438, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4719940423965454, + "rewards/margins": 6.097543716430664, + "rewards/rejected": -7.569537162780762, + "step": 3515 + }, + { + "epoch": 0.55, + "learning_rate": 1.1568557497795529e-05, + "logits/chosen": -2.132906198501587, + "logits/rejected": -3.050436496734619, + "logps/chosen": -257.3715515136719, + "logps/rejected": -389.0242004394531, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7605499029159546, + "rewards/margins": 4.860588073730469, + "rewards/rejected": -5.621138095855713, + "step": 3516 + }, + { + "epoch": 0.55, + "learning_rate": 1.1567824057264381e-05, + "logits/chosen": -2.6511595249176025, + "logits/rejected": -3.0142204761505127, + "logps/chosen": -286.93804931640625, + "logps/rejected": -407.10882568359375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5420265197753906, + "rewards/margins": 6.831809043884277, + "rewards/rejected": -9.373835563659668, + "step": 3517 + }, + { + "epoch": 0.55, + "learning_rate": 1.1567090616733233e-05, + "logits/chosen": -2.4715628623962402, + "logits/rejected": -3.20619797706604, + "logps/chosen": -151.57891845703125, + "logps/rejected": -254.75543212890625, + "loss": 0.1578, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307593822479248, + "rewards/margins": 1.8272511959075928, + "rewards/rejected": -4.13484525680542, + "step": 3518 + }, + { + "epoch": 0.55, + "learning_rate": 1.1566357176202085e-05, + "logits/chosen": -2.101895809173584, + "logits/rejected": -2.837756633758545, + "logps/chosen": -93.29147338867188, + "logps/rejected": -233.7544403076172, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2585651874542236, + "rewards/margins": 6.78964900970459, + "rewards/rejected": -8.048213958740234, + "step": 3519 + }, + { + "epoch": 0.55, + "learning_rate": 1.1565623735670937e-05, + "logits/chosen": -3.032827138900757, + "logits/rejected": -2.1909899711608887, + "logps/chosen": -277.0013732910156, + "logps/rejected": -208.26361083984375, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2827541828155518, + "rewards/margins": 6.627347946166992, + "rewards/rejected": -8.910101890563965, + "step": 3520 + }, + { + "epoch": 0.55, + "learning_rate": 1.1564890295139788e-05, + "logits/chosen": -2.9944701194763184, + "logits/rejected": -2.7440054416656494, + "logps/chosen": -143.54847717285156, + "logps/rejected": -422.6020812988281, + "loss": 0.2149, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.463914155960083, + "rewards/margins": 6.142475128173828, + "rewards/rejected": -8.606389045715332, + "step": 3521 + }, + { + "epoch": 0.55, + "learning_rate": 1.156415685460864e-05, + "logits/chosen": -2.0743050575256348, + "logits/rejected": -2.6839048862457275, + "logps/chosen": -376.3429870605469, + "logps/rejected": -311.1156005859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6477584838867188, + "rewards/margins": 8.246228218078613, + "rewards/rejected": -8.893986701965332, + "step": 3522 + }, + { + "epoch": 0.55, + "learning_rate": 1.1563423414077494e-05, + "logits/chosen": -3.114621639251709, + "logits/rejected": -2.4028756618499756, + "logps/chosen": -113.59226989746094, + "logps/rejected": -128.7820281982422, + "loss": 1.9234, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7035980224609375, + "rewards/margins": -0.05700075626373291, + "rewards/rejected": -5.646597385406494, + "step": 3523 + }, + { + "epoch": 0.55, + "learning_rate": 1.1562689973546346e-05, + "logits/chosen": -2.854708671569824, + "logits/rejected": -3.099137306213379, + "logps/chosen": -112.01209259033203, + "logps/rejected": -233.93991088867188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7658535242080688, + "rewards/margins": 7.0001420974731445, + "rewards/rejected": -8.765995979309082, + "step": 3524 + }, + { + "epoch": 0.55, + "learning_rate": 1.1561956533015198e-05, + "logits/chosen": -3.1225392818450928, + "logits/rejected": -3.0200278759002686, + "logps/chosen": -56.13136291503906, + "logps/rejected": -124.77459716796875, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2658926248550415, + "rewards/margins": 4.681831359863281, + "rewards/rejected": -5.947723865509033, + "step": 3525 + }, + { + "epoch": 0.55, + "learning_rate": 1.156122309248405e-05, + "logits/chosen": -1.6743234395980835, + "logits/rejected": -2.3584020137786865, + "logps/chosen": -79.16047668457031, + "logps/rejected": -342.24755859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.655625581741333, + "rewards/margins": 9.320728302001953, + "rewards/rejected": -10.976353645324707, + "step": 3526 + }, + { + "epoch": 0.55, + "learning_rate": 1.1560489651952903e-05, + "logits/chosen": -1.4006465673446655, + "logits/rejected": -2.772522449493408, + "logps/chosen": -44.9925651550293, + "logps/rejected": -279.17431640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0683977603912354, + "rewards/margins": 7.6163129806518555, + "rewards/rejected": -9.684710502624512, + "step": 3527 + }, + { + "epoch": 0.55, + "learning_rate": 1.1559756211421755e-05, + "logits/chosen": -2.8110125064849854, + "logits/rejected": -3.103524923324585, + "logps/chosen": -27.414554595947266, + "logps/rejected": -164.375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.110065221786499, + "rewards/margins": 6.6129913330078125, + "rewards/rejected": -7.723056316375732, + "step": 3528 + }, + { + "epoch": 0.55, + "learning_rate": 1.1559022770890607e-05, + "logits/chosen": -3.0402798652648926, + "logits/rejected": -3.173506021499634, + "logps/chosen": -68.93758392333984, + "logps/rejected": -170.3491668701172, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0407252311706543, + "rewards/margins": 3.7105236053466797, + "rewards/rejected": -5.751248836517334, + "step": 3529 + }, + { + "epoch": 0.55, + "learning_rate": 1.1558289330359459e-05, + "logits/chosen": -2.8765408992767334, + "logits/rejected": -2.7161927223205566, + "logps/chosen": -204.17782592773438, + "logps/rejected": -264.74053955078125, + "loss": 0.4056, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3845436573028564, + "rewards/margins": 2.8099122047424316, + "rewards/rejected": -4.194455623626709, + "step": 3530 + }, + { + "epoch": 0.55, + "learning_rate": 1.155755588982831e-05, + "logits/chosen": -3.005638599395752, + "logits/rejected": -3.099207639694214, + "logps/chosen": -82.00094604492188, + "logps/rejected": -215.8951416015625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.231073021888733, + "rewards/margins": 5.569228172302246, + "rewards/rejected": -6.800301551818848, + "step": 3531 + }, + { + "epoch": 0.55, + "learning_rate": 1.1556822449297164e-05, + "logits/chosen": -2.328843832015991, + "logits/rejected": -2.725519895553589, + "logps/chosen": -215.20591735839844, + "logps/rejected": -260.6921081542969, + "loss": 1.4259, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1246016025543213, + "rewards/margins": 3.0254321098327637, + "rewards/rejected": -6.150033473968506, + "step": 3532 + }, + { + "epoch": 0.55, + "learning_rate": 1.1556089008766016e-05, + "logits/chosen": -2.8182406425476074, + "logits/rejected": -3.0231192111968994, + "logps/chosen": -128.96617126464844, + "logps/rejected": -316.09466552734375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7874268293380737, + "rewards/margins": 5.8606743812561035, + "rewards/rejected": -6.648100852966309, + "step": 3533 + }, + { + "epoch": 0.55, + "learning_rate": 1.1555355568234868e-05, + "logits/chosen": -2.367002487182617, + "logits/rejected": -2.911367893218994, + "logps/chosen": -174.86300659179688, + "logps/rejected": -200.32345581054688, + "loss": 1.3728, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6818430423736572, + "rewards/margins": 2.1422901153564453, + "rewards/rejected": -4.824133396148682, + "step": 3534 + }, + { + "epoch": 0.55, + "learning_rate": 1.155462212770372e-05, + "logits/chosen": -2.7240865230560303, + "logits/rejected": -2.7630527019500732, + "logps/chosen": -262.6368103027344, + "logps/rejected": -359.3942565917969, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0781959518790245, + "rewards/margins": 6.878631591796875, + "rewards/rejected": -6.9568281173706055, + "step": 3535 + }, + { + "epoch": 0.55, + "learning_rate": 1.1553888687172572e-05, + "logits/chosen": -2.980642795562744, + "logits/rejected": -3.1584744453430176, + "logps/chosen": -170.98385620117188, + "logps/rejected": -284.6086730957031, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0373380184173584, + "rewards/margins": 4.30919885635376, + "rewards/rejected": -5.346536636352539, + "step": 3536 + }, + { + "epoch": 0.55, + "learning_rate": 1.1553155246641424e-05, + "logits/chosen": -2.2732532024383545, + "logits/rejected": -2.916386842727661, + "logps/chosen": -59.86780548095703, + "logps/rejected": -196.30946350097656, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8673313856124878, + "rewards/margins": 4.217686176300049, + "rewards/rejected": -6.085017204284668, + "step": 3537 + }, + { + "epoch": 0.55, + "learning_rate": 1.1552421806110275e-05, + "logits/chosen": -2.9394991397857666, + "logits/rejected": -2.919128179550171, + "logps/chosen": -38.622039794921875, + "logps/rejected": -219.67625427246094, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1604996919631958, + "rewards/margins": 5.918763160705566, + "rewards/rejected": -7.079262733459473, + "step": 3538 + }, + { + "epoch": 0.55, + "learning_rate": 1.1551688365579127e-05, + "logits/chosen": -2.865039825439453, + "logits/rejected": -2.569042444229126, + "logps/chosen": -120.93141174316406, + "logps/rejected": -261.1660461425781, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2229870557785034, + "rewards/margins": 7.016007900238037, + "rewards/rejected": -8.238994598388672, + "step": 3539 + }, + { + "epoch": 0.55, + "learning_rate": 1.155095492504798e-05, + "logits/chosen": -3.0252151489257812, + "logits/rejected": -1.9747031927108765, + "logps/chosen": -967.158203125, + "logps/rejected": -400.75506591796875, + "loss": 2.8295, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.325415134429932, + "rewards/margins": 0.34597158432006836, + "rewards/rejected": -4.671387195587158, + "step": 3540 + }, + { + "epoch": 0.55, + "learning_rate": 1.1550221484516833e-05, + "logits/chosen": -2.984407663345337, + "logits/rejected": -3.025184154510498, + "logps/chosen": -44.95494079589844, + "logps/rejected": -160.4644317626953, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1804358959198, + "rewards/margins": 4.325272560119629, + "rewards/rejected": -6.505708694458008, + "step": 3541 + }, + { + "epoch": 0.55, + "learning_rate": 1.1549488043985685e-05, + "logits/chosen": -3.005113124847412, + "logits/rejected": -3.0883948802948, + "logps/chosen": -364.51007080078125, + "logps/rejected": -533.7227783203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.950640082359314, + "rewards/margins": 8.725147247314453, + "rewards/rejected": -9.675786972045898, + "step": 3542 + }, + { + "epoch": 0.55, + "learning_rate": 1.1548754603454537e-05, + "logits/chosen": -3.059860944747925, + "logits/rejected": -2.616713285446167, + "logps/chosen": -282.9765930175781, + "logps/rejected": -330.95098876953125, + "loss": 3.7291, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.814713478088379, + "rewards/margins": -3.5831661224365234, + "rewards/rejected": -2.2315475940704346, + "step": 3543 + }, + { + "epoch": 0.55, + "learning_rate": 1.1548021162923388e-05, + "logits/chosen": -1.4791176319122314, + "logits/rejected": -2.852689027786255, + "logps/chosen": -107.59829711914062, + "logps/rejected": -326.5513610839844, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7100345492362976, + "rewards/margins": 6.635593891143799, + "rewards/rejected": -7.34562873840332, + "step": 3544 + }, + { + "epoch": 0.55, + "learning_rate": 1.154728772239224e-05, + "logits/chosen": -3.015695810317993, + "logits/rejected": -2.867748975753784, + "logps/chosen": -747.30419921875, + "logps/rejected": -699.0343627929688, + "loss": 4.1347, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.840426921844482, + "rewards/margins": 0.45572471618652344, + "rewards/rejected": -5.296151638031006, + "step": 3545 + }, + { + "epoch": 0.55, + "learning_rate": 1.1546554281861092e-05, + "logits/chosen": -2.499403476715088, + "logits/rejected": -3.0310277938842773, + "logps/chosen": -104.73577117919922, + "logps/rejected": -254.8911590576172, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.477073907852173, + "rewards/margins": 5.4284257888793945, + "rewards/rejected": -7.9054999351501465, + "step": 3546 + }, + { + "epoch": 0.55, + "learning_rate": 1.1545820841329944e-05, + "logits/chosen": -3.0526278018951416, + "logits/rejected": -3.001328468322754, + "logps/chosen": -549.2844848632812, + "logps/rejected": -443.9786376953125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6533516049385071, + "rewards/margins": 8.418990135192871, + "rewards/rejected": -7.76563835144043, + "step": 3547 + }, + { + "epoch": 0.55, + "learning_rate": 1.1545087400798796e-05, + "logits/chosen": -2.5723156929016113, + "logits/rejected": -3.0050013065338135, + "logps/chosen": -205.41400146484375, + "logps/rejected": -285.004638671875, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0692815780639648, + "rewards/margins": 5.697267055511475, + "rewards/rejected": -6.7665486335754395, + "step": 3548 + }, + { + "epoch": 0.55, + "learning_rate": 1.1544353960267648e-05, + "logits/chosen": -1.925619125366211, + "logits/rejected": -3.045541286468506, + "logps/chosen": -105.30403900146484, + "logps/rejected": -512.47216796875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19065114855766296, + "rewards/margins": 8.404867172241211, + "rewards/rejected": -8.595519065856934, + "step": 3549 + }, + { + "epoch": 0.55, + "learning_rate": 1.1543620519736501e-05, + "logits/chosen": -2.694364309310913, + "logits/rejected": -3.193937301635742, + "logps/chosen": -144.2289276123047, + "logps/rejected": -260.92486572265625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40268269181251526, + "rewards/margins": 7.072022914886475, + "rewards/rejected": -7.474705696105957, + "step": 3550 + }, + { + "epoch": 0.55, + "learning_rate": 1.1542887079205353e-05, + "logits/chosen": -3.074733257293701, + "logits/rejected": -2.9342830181121826, + "logps/chosen": -146.53602600097656, + "logps/rejected": -231.51341247558594, + "loss": 2.6737, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3817429542541504, + "rewards/margins": 0.6436984539031982, + "rewards/rejected": -4.0254411697387695, + "step": 3551 + }, + { + "epoch": 0.55, + "learning_rate": 1.1542153638674205e-05, + "logits/chosen": -3.1505002975463867, + "logits/rejected": -2.2863547801971436, + "logps/chosen": -418.39434814453125, + "logps/rejected": -253.63705444335938, + "loss": 1.4794, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0665388107299805, + "rewards/margins": 2.240813732147217, + "rewards/rejected": -6.3073530197143555, + "step": 3552 + }, + { + "epoch": 0.55, + "learning_rate": 1.1541420198143057e-05, + "logits/chosen": -2.895205020904541, + "logits/rejected": -3.066145658493042, + "logps/chosen": -118.26725769042969, + "logps/rejected": -262.0052795410156, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3378913402557373, + "rewards/margins": 5.246036529541016, + "rewards/rejected": -7.583928108215332, + "step": 3553 + }, + { + "epoch": 0.55, + "learning_rate": 1.1540686757611909e-05, + "logits/chosen": -2.863638162612915, + "logits/rejected": -2.9528980255126953, + "logps/chosen": -310.7798156738281, + "logps/rejected": -433.110595703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8521476984024048, + "rewards/margins": 8.693904876708984, + "rewards/rejected": -9.546052932739258, + "step": 3554 + }, + { + "epoch": 0.55, + "learning_rate": 1.153995331708076e-05, + "logits/chosen": -1.7914646863937378, + "logits/rejected": -2.8963980674743652, + "logps/chosen": -275.04315185546875, + "logps/rejected": -308.1217956542969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35933151841163635, + "rewards/margins": 8.211434364318848, + "rewards/rejected": -8.570765495300293, + "step": 3555 + }, + { + "epoch": 0.55, + "learning_rate": 1.1539219876549613e-05, + "logits/chosen": -1.415170669555664, + "logits/rejected": -3.1457247734069824, + "logps/chosen": -163.7313690185547, + "logps/rejected": -311.0374755859375, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0180084705352783, + "rewards/margins": 4.658321857452393, + "rewards/rejected": -6.67633056640625, + "step": 3556 + }, + { + "epoch": 0.55, + "learning_rate": 1.1538486436018465e-05, + "logits/chosen": -3.0895578861236572, + "logits/rejected": -1.863341212272644, + "logps/chosen": -258.8587341308594, + "logps/rejected": -111.6336669921875, + "loss": 0.1858, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2447121143341064, + "rewards/margins": 3.026109218597412, + "rewards/rejected": -4.270821571350098, + "step": 3557 + }, + { + "epoch": 0.55, + "learning_rate": 1.1537752995487316e-05, + "logits/chosen": -2.7048122882843018, + "logits/rejected": -3.161118268966675, + "logps/chosen": -444.9435119628906, + "logps/rejected": -438.51837158203125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08183901011943817, + "rewards/margins": 7.31595516204834, + "rewards/rejected": -7.234116554260254, + "step": 3558 + }, + { + "epoch": 0.55, + "learning_rate": 1.153701955495617e-05, + "logits/chosen": -1.838546872138977, + "logits/rejected": -2.890753984451294, + "logps/chosen": -86.70880889892578, + "logps/rejected": -316.97052001953125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.399035096168518, + "rewards/margins": 7.666170120239258, + "rewards/rejected": -9.065205574035645, + "step": 3559 + }, + { + "epoch": 0.55, + "learning_rate": 1.1536286114425022e-05, + "logits/chosen": -3.0937376022338867, + "logits/rejected": -1.5666667222976685, + "logps/chosen": -401.9353332519531, + "logps/rejected": -132.6412811279297, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6197696924209595, + "rewards/margins": 4.201911926269531, + "rewards/rejected": -5.821681499481201, + "step": 3560 + }, + { + "epoch": 0.55, + "learning_rate": 1.1535552673893875e-05, + "logits/chosen": -2.282839298248291, + "logits/rejected": -2.6356232166290283, + "logps/chosen": -509.1559143066406, + "logps/rejected": -653.753173828125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2649683952331543, + "rewards/margins": 7.379053592681885, + "rewards/rejected": -7.1140851974487305, + "step": 3561 + }, + { + "epoch": 0.55, + "learning_rate": 1.1534819233362727e-05, + "logits/chosen": -2.417342185974121, + "logits/rejected": -3.087575912475586, + "logps/chosen": -134.512451171875, + "logps/rejected": -342.6451416015625, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5947880744934082, + "rewards/margins": 5.370128631591797, + "rewards/rejected": -6.964916229248047, + "step": 3562 + }, + { + "epoch": 0.55, + "learning_rate": 1.153408579283158e-05, + "logits/chosen": -2.812991142272949, + "logits/rejected": -3.0014431476593018, + "logps/chosen": -183.6214141845703, + "logps/rejected": -342.17303466796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.171942114830017, + "rewards/margins": 7.6968255043029785, + "rewards/rejected": -8.868767738342285, + "step": 3563 + }, + { + "epoch": 0.55, + "learning_rate": 1.1533352352300431e-05, + "logits/chosen": -2.9238040447235107, + "logits/rejected": -2.3829448223114014, + "logps/chosen": -228.81727600097656, + "logps/rejected": -379.63232421875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.295902967453003, + "rewards/margins": 7.212049961090088, + "rewards/rejected": -9.507952690124512, + "step": 3564 + }, + { + "epoch": 0.55, + "learning_rate": 1.1532618911769283e-05, + "logits/chosen": -3.080747604370117, + "logits/rejected": -2.3017942905426025, + "logps/chosen": -686.061767578125, + "logps/rejected": -383.30255126953125, + "loss": 1.3666, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.078462600708008, + "rewards/margins": 1.581268548965454, + "rewards/rejected": -4.659730911254883, + "step": 3565 + }, + { + "epoch": 0.55, + "learning_rate": 1.1531885471238135e-05, + "logits/chosen": -3.0800106525421143, + "logits/rejected": -2.491711139678955, + "logps/chosen": -159.04824829101562, + "logps/rejected": -183.68458557128906, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6012626886367798, + "rewards/margins": 4.950813293457031, + "rewards/rejected": -5.55207633972168, + "step": 3566 + }, + { + "epoch": 0.55, + "learning_rate": 1.1531152030706987e-05, + "logits/chosen": -3.065991163253784, + "logits/rejected": -3.117612838745117, + "logps/chosen": -198.18170166015625, + "logps/rejected": -336.80499267578125, + "loss": 2.2809, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.108718156814575, + "rewards/margins": 0.6704292297363281, + "rewards/rejected": -3.7791473865509033, + "step": 3567 + }, + { + "epoch": 0.55, + "learning_rate": 1.153041859017584e-05, + "logits/chosen": -3.059260845184326, + "logits/rejected": -2.3255677223205566, + "logps/chosen": -263.3843688964844, + "logps/rejected": -312.0648498535156, + "loss": 3.7737, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.895219326019287, + "rewards/margins": -0.7393965721130371, + "rewards/rejected": -4.15582275390625, + "step": 3568 + }, + { + "epoch": 0.56, + "learning_rate": 1.1529685149644692e-05, + "logits/chosen": -1.885390043258667, + "logits/rejected": -3.100879669189453, + "logps/chosen": -179.48641967773438, + "logps/rejected": -522.4485473632812, + "loss": 2.632, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2352190017700195, + "rewards/margins": -0.16480112075805664, + "rewards/rejected": -4.070417881011963, + "step": 3569 + }, + { + "epoch": 0.56, + "learning_rate": 1.1528951709113544e-05, + "logits/chosen": -2.296921491622925, + "logits/rejected": -3.084770441055298, + "logps/chosen": -244.24664306640625, + "logps/rejected": -298.5039978027344, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.143360137939453, + "rewards/margins": 4.680458068847656, + "rewards/rejected": -6.823818206787109, + "step": 3570 + }, + { + "epoch": 0.56, + "learning_rate": 1.1528218268582396e-05, + "logits/chosen": -3.093071460723877, + "logits/rejected": -3.071558713912964, + "logps/chosen": -168.54380798339844, + "logps/rejected": -312.617919921875, + "loss": 2.2261, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3397789001464844, + "rewards/margins": 1.0535645484924316, + "rewards/rejected": -4.393343448638916, + "step": 3571 + }, + { + "epoch": 0.56, + "learning_rate": 1.1527484828051248e-05, + "logits/chosen": -2.9200069904327393, + "logits/rejected": -2.381443738937378, + "logps/chosen": -601.32421875, + "logps/rejected": -499.3393859863281, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3807739317417145, + "rewards/margins": 7.074562072753906, + "rewards/rejected": -6.693788051605225, + "step": 3572 + }, + { + "epoch": 0.56, + "learning_rate": 1.15267513875201e-05, + "logits/chosen": -2.2218985557556152, + "logits/rejected": -2.790727138519287, + "logps/chosen": -144.30433654785156, + "logps/rejected": -380.0753173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18707847595214844, + "rewards/margins": 9.302830696105957, + "rewards/rejected": -9.489909172058105, + "step": 3573 + }, + { + "epoch": 0.56, + "learning_rate": 1.1526017946988952e-05, + "logits/chosen": -3.1349780559539795, + "logits/rejected": -3.0722861289978027, + "logps/chosen": -286.4056701660156, + "logps/rejected": -409.92626953125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.013896942138672, + "rewards/margins": 6.656632900238037, + "rewards/rejected": -8.670530319213867, + "step": 3574 + }, + { + "epoch": 0.56, + "learning_rate": 1.1525284506457803e-05, + "logits/chosen": -2.522881031036377, + "logits/rejected": -3.1612985134124756, + "logps/chosen": -30.60025405883789, + "logps/rejected": -254.3243408203125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4757976531982422, + "rewards/margins": 5.564584255218506, + "rewards/rejected": -7.040381908416748, + "step": 3575 + }, + { + "epoch": 0.56, + "learning_rate": 1.1524551065926655e-05, + "logits/chosen": -2.7726690769195557, + "logits/rejected": -3.2714858055114746, + "logps/chosen": -349.8232116699219, + "logps/rejected": -402.4542236328125, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.113144636154175, + "rewards/margins": 3.4859261512756348, + "rewards/rejected": -5.599071025848389, + "step": 3576 + }, + { + "epoch": 0.56, + "learning_rate": 1.1523817625395509e-05, + "logits/chosen": -2.891798496246338, + "logits/rejected": -3.3546302318573, + "logps/chosen": -116.66156768798828, + "logps/rejected": -220.43417358398438, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2521421909332275, + "rewards/margins": 5.276872158050537, + "rewards/rejected": -6.529014587402344, + "step": 3577 + }, + { + "epoch": 0.56, + "learning_rate": 1.152308418486436e-05, + "logits/chosen": -2.368229389190674, + "logits/rejected": -3.1744132041931152, + "logps/chosen": -185.3365936279297, + "logps/rejected": -299.5144348144531, + "loss": 0.3302, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7377620935440063, + "rewards/margins": 3.9883241653442383, + "rewards/rejected": -5.726086616516113, + "step": 3578 + }, + { + "epoch": 0.56, + "learning_rate": 1.1522350744333213e-05, + "logits/chosen": -1.5583940744400024, + "logits/rejected": -3.0053741931915283, + "logps/chosen": -108.98931884765625, + "logps/rejected": -284.37579345703125, + "loss": 0.1143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0448946952819824, + "rewards/margins": 2.4311656951904297, + "rewards/rejected": -3.476060390472412, + "step": 3579 + }, + { + "epoch": 0.56, + "learning_rate": 1.1521617303802065e-05, + "logits/chosen": -2.1905438899993896, + "logits/rejected": -3.1205079555511475, + "logps/chosen": -145.49853515625, + "logps/rejected": -326.731201171875, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6937000751495361, + "rewards/margins": 4.743500709533691, + "rewards/rejected": -6.437201023101807, + "step": 3580 + }, + { + "epoch": 0.56, + "learning_rate": 1.1520883863270916e-05, + "logits/chosen": -2.903109073638916, + "logits/rejected": -3.0509421825408936, + "logps/chosen": -190.15774536132812, + "logps/rejected": -271.3052062988281, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.471015214920044, + "rewards/margins": 5.194608688354492, + "rewards/rejected": -6.665624618530273, + "step": 3581 + }, + { + "epoch": 0.56, + "learning_rate": 1.1520150422739768e-05, + "logits/chosen": -3.1037590503692627, + "logits/rejected": -2.1918838024139404, + "logps/chosen": -252.19496154785156, + "logps/rejected": -171.04893493652344, + "loss": 0.1641, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.640718460083008, + "rewards/margins": 3.834202289581299, + "rewards/rejected": -6.474920749664307, + "step": 3582 + }, + { + "epoch": 0.56, + "learning_rate": 1.151941698220862e-05, + "logits/chosen": -2.7883386611938477, + "logits/rejected": -1.420882225036621, + "logps/chosen": -281.41290283203125, + "logps/rejected": -275.3775939941406, + "loss": 4.1892, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.600029468536377, + "rewards/margins": 0.12315034866333008, + "rewards/rejected": -5.723179817199707, + "step": 3583 + }, + { + "epoch": 0.56, + "learning_rate": 1.1518683541677472e-05, + "logits/chosen": -2.3531501293182373, + "logits/rejected": -2.4922845363616943, + "logps/chosen": -189.8090057373047, + "logps/rejected": -390.82513427734375, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.534464716911316, + "rewards/margins": 2.9310457706451416, + "rewards/rejected": -4.465510845184326, + "step": 3584 + }, + { + "epoch": 0.56, + "learning_rate": 1.1517950101146324e-05, + "logits/chosen": -2.8708271980285645, + "logits/rejected": -2.794161081314087, + "logps/chosen": -209.53652954101562, + "logps/rejected": -399.81439208984375, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9952046871185303, + "rewards/margins": 6.77370023727417, + "rewards/rejected": -8.768905639648438, + "step": 3585 + }, + { + "epoch": 0.56, + "learning_rate": 1.1517216660615177e-05, + "logits/chosen": -3.116917133331299, + "logits/rejected": -2.472036123275757, + "logps/chosen": -663.3177490234375, + "logps/rejected": -331.29803466796875, + "loss": 3.2478, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.346304416656494, + "rewards/margins": -0.14315128326416016, + "rewards/rejected": -4.203153610229492, + "step": 3586 + }, + { + "epoch": 0.56, + "learning_rate": 1.151648322008403e-05, + "logits/chosen": -2.954439640045166, + "logits/rejected": -2.3225595951080322, + "logps/chosen": -402.99017333984375, + "logps/rejected": -376.46771240234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.618090033531189, + "rewards/margins": 8.469926834106445, + "rewards/rejected": -9.088017463684082, + "step": 3587 + }, + { + "epoch": 0.56, + "learning_rate": 1.1515749779552881e-05, + "logits/chosen": -3.1398918628692627, + "logits/rejected": -3.0414013862609863, + "logps/chosen": -59.355133056640625, + "logps/rejected": -110.76786804199219, + "loss": 2.1465, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1495463848114014, + "rewards/margins": 2.5241196155548096, + "rewards/rejected": -5.673666477203369, + "step": 3588 + }, + { + "epoch": 0.56, + "learning_rate": 1.1515016339021733e-05, + "logits/chosen": -0.7214903235435486, + "logits/rejected": -2.6062211990356445, + "logps/chosen": -131.014404296875, + "logps/rejected": -624.1611938476562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.081445574760437, + "rewards/margins": 10.819613456726074, + "rewards/rejected": -11.9010591506958, + "step": 3589 + }, + { + "epoch": 0.56, + "learning_rate": 1.1514282898490585e-05, + "logits/chosen": -2.9972567558288574, + "logits/rejected": -2.0942790508270264, + "logps/chosen": -99.23615264892578, + "logps/rejected": -245.071533203125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7009606957435608, + "rewards/margins": 8.054595947265625, + "rewards/rejected": -8.7555570602417, + "step": 3590 + }, + { + "epoch": 0.56, + "learning_rate": 1.1513549457959437e-05, + "logits/chosen": -2.260765552520752, + "logits/rejected": -2.885484218597412, + "logps/chosen": -265.7063293457031, + "logps/rejected": -238.91903686523438, + "loss": 2.9258, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.211552619934082, + "rewards/margins": -2.8652138710021973, + "rewards/rejected": -3.3463387489318848, + "step": 3591 + }, + { + "epoch": 0.56, + "learning_rate": 1.1512816017428289e-05, + "logits/chosen": -2.7661592960357666, + "logits/rejected": -2.6262857913970947, + "logps/chosen": -287.60107421875, + "logps/rejected": -276.89300537109375, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.842694878578186, + "rewards/margins": 4.617789268493652, + "rewards/rejected": -5.460484504699707, + "step": 3592 + }, + { + "epoch": 0.56, + "learning_rate": 1.1512082576897142e-05, + "logits/chosen": -2.4424874782562256, + "logits/rejected": -3.16119384765625, + "logps/chosen": -62.69676208496094, + "logps/rejected": -287.327880859375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5806885957717896, + "rewards/margins": 5.505903720855713, + "rewards/rejected": -7.086592197418213, + "step": 3593 + }, + { + "epoch": 0.56, + "learning_rate": 1.1511349136365994e-05, + "logits/chosen": -2.7476940155029297, + "logits/rejected": -3.136167287826538, + "logps/chosen": -190.6682586669922, + "logps/rejected": -286.94366455078125, + "loss": 0.0714, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7602287530899048, + "rewards/margins": 4.976513862609863, + "rewards/rejected": -6.7367424964904785, + "step": 3594 + }, + { + "epoch": 0.56, + "learning_rate": 1.1510615695834848e-05, + "logits/chosen": -3.0020298957824707, + "logits/rejected": -3.036956548690796, + "logps/chosen": -123.90442657470703, + "logps/rejected": -236.72418212890625, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7418813705444336, + "rewards/margins": 5.127378940582275, + "rewards/rejected": -6.869260311126709, + "step": 3595 + }, + { + "epoch": 0.56, + "learning_rate": 1.15098822553037e-05, + "logits/chosen": -2.9001030921936035, + "logits/rejected": -1.2841054201126099, + "logps/chosen": -407.91900634765625, + "logps/rejected": -269.7786865234375, + "loss": 3.1953, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.384394884109497, + "rewards/margins": 0.6282885074615479, + "rewards/rejected": -4.012683391571045, + "step": 3596 + }, + { + "epoch": 0.56, + "learning_rate": 1.1509148814772552e-05, + "logits/chosen": -2.320917844772339, + "logits/rejected": -3.1460702419281006, + "logps/chosen": -92.71371459960938, + "logps/rejected": -243.02027893066406, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0194590091705322, + "rewards/margins": 5.701366424560547, + "rewards/rejected": -7.720826148986816, + "step": 3597 + }, + { + "epoch": 0.56, + "learning_rate": 1.1508415374241403e-05, + "logits/chosen": -2.729487895965576, + "logits/rejected": -3.1718156337738037, + "logps/chosen": -246.12826538085938, + "logps/rejected": -390.70452880859375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.451697587966919, + "rewards/margins": 7.742396354675293, + "rewards/rejected": -9.194093704223633, + "step": 3598 + }, + { + "epoch": 0.56, + "learning_rate": 1.1507681933710255e-05, + "logits/chosen": -2.4343607425689697, + "logits/rejected": -2.991021156311035, + "logps/chosen": -105.76931762695312, + "logps/rejected": -221.992431640625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6108154058456421, + "rewards/margins": 7.775016784667969, + "rewards/rejected": -7.164201736450195, + "step": 3599 + }, + { + "epoch": 0.56, + "learning_rate": 1.1506948493179107e-05, + "logits/chosen": -1.6096899509429932, + "logits/rejected": -3.167300224304199, + "logps/chosen": -83.9081039428711, + "logps/rejected": -424.67822265625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5786356925964355, + "rewards/margins": 7.0347089767456055, + "rewards/rejected": -8.6133451461792, + "step": 3600 + }, + { + "epoch": 0.56, + "learning_rate": 1.1506215052647959e-05, + "logits/chosen": -1.93429696559906, + "logits/rejected": -2.8464550971984863, + "logps/chosen": -61.82941818237305, + "logps/rejected": -324.43212890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7723137140274048, + "rewards/margins": 7.393884658813477, + "rewards/rejected": -8.16619873046875, + "step": 3601 + }, + { + "epoch": 0.56, + "learning_rate": 1.1505481612116811e-05, + "logits/chosen": -3.23100209236145, + "logits/rejected": -2.7763733863830566, + "logps/chosen": -170.63455200195312, + "logps/rejected": -189.21273803710938, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5026306509971619, + "rewards/margins": 7.060813903808594, + "rewards/rejected": -7.5634446144104, + "step": 3602 + }, + { + "epoch": 0.56, + "learning_rate": 1.1504748171585663e-05, + "logits/chosen": -2.825035572052002, + "logits/rejected": -2.100297451019287, + "logps/chosen": -200.12442016601562, + "logps/rejected": -196.9855194091797, + "loss": 3.0601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.889188766479492, + "rewards/margins": -0.5218415260314941, + "rewards/rejected": -4.367347240447998, + "step": 3603 + }, + { + "epoch": 0.56, + "learning_rate": 1.1504014731054516e-05, + "logits/chosen": -2.4502055644989014, + "logits/rejected": -3.027484893798828, + "logps/chosen": -167.67913818359375, + "logps/rejected": -365.4040832519531, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5806710720062256, + "rewards/margins": 5.035796165466309, + "rewards/rejected": -7.616467475891113, + "step": 3604 + }, + { + "epoch": 0.56, + "learning_rate": 1.1503281290523368e-05, + "logits/chosen": -2.215662717819214, + "logits/rejected": -2.938621759414673, + "logps/chosen": -164.9420166015625, + "logps/rejected": -349.55078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01302184909582138, + "rewards/margins": 7.884457588195801, + "rewards/rejected": -7.89747953414917, + "step": 3605 + }, + { + "epoch": 0.56, + "learning_rate": 1.150254784999222e-05, + "logits/chosen": -2.8162293434143066, + "logits/rejected": -2.2027785778045654, + "logps/chosen": -142.04946899414062, + "logps/rejected": -51.14408493041992, + "loss": 4.6683, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.689610481262207, + "rewards/margins": -4.6265549659729, + "rewards/rejected": -2.0630552768707275, + "step": 3606 + }, + { + "epoch": 0.56, + "learning_rate": 1.1501814409461072e-05, + "logits/chosen": -1.6608586311340332, + "logits/rejected": -3.1044042110443115, + "logps/chosen": -455.9305419921875, + "logps/rejected": -644.6092529296875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3699219226837158, + "rewards/margins": 6.153601169586182, + "rewards/rejected": -7.523523330688477, + "step": 3607 + }, + { + "epoch": 0.56, + "learning_rate": 1.1501080968929924e-05, + "logits/chosen": -1.7210570573806763, + "logits/rejected": -3.0856175422668457, + "logps/chosen": -227.6019744873047, + "logps/rejected": -307.6502990722656, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.025778889656067, + "rewards/margins": 5.411338806152344, + "rewards/rejected": -6.437117576599121, + "step": 3608 + }, + { + "epoch": 0.56, + "learning_rate": 1.1500347528398776e-05, + "logits/chosen": -2.9426841735839844, + "logits/rejected": -3.3307743072509766, + "logps/chosen": -34.64860534667969, + "logps/rejected": -281.85467529296875, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0481982231140137, + "rewards/margins": 3.8385915756225586, + "rewards/rejected": -5.886789798736572, + "step": 3609 + }, + { + "epoch": 0.56, + "learning_rate": 1.1499614087867628e-05, + "logits/chosen": -2.5824148654937744, + "logits/rejected": -3.028430223464966, + "logps/chosen": -97.95274353027344, + "logps/rejected": -262.3138732910156, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1396617889404297, + "rewards/margins": 5.350569725036621, + "rewards/rejected": -7.490231990814209, + "step": 3610 + }, + { + "epoch": 0.56, + "learning_rate": 1.149888064733648e-05, + "logits/chosen": -2.2291502952575684, + "logits/rejected": -3.091782331466675, + "logps/chosen": -195.1978302001953, + "logps/rejected": -358.89892578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7901575565338135, + "rewards/margins": 8.381332397460938, + "rewards/rejected": -9.171490669250488, + "step": 3611 + }, + { + "epoch": 0.56, + "learning_rate": 1.1498147206805331e-05, + "logits/chosen": -2.52516770362854, + "logits/rejected": -2.7755796909332275, + "logps/chosen": -254.0008544921875, + "logps/rejected": -390.75885009765625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4206719398498535, + "rewards/margins": 7.139666557312012, + "rewards/rejected": -10.560338020324707, + "step": 3612 + }, + { + "epoch": 0.56, + "learning_rate": 1.1497413766274185e-05, + "logits/chosen": -3.0184600353240967, + "logits/rejected": -2.0816586017608643, + "logps/chosen": -138.29299926757812, + "logps/rejected": -207.6859588623047, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7906047701835632, + "rewards/margins": 6.086377143859863, + "rewards/rejected": -6.87698221206665, + "step": 3613 + }, + { + "epoch": 0.56, + "learning_rate": 1.1496680325743037e-05, + "logits/chosen": -2.8591902256011963, + "logits/rejected": -3.1981282234191895, + "logps/chosen": -38.67218780517578, + "logps/rejected": -329.66204833984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.924670934677124, + "rewards/margins": 9.157926559448242, + "rewards/rejected": -10.082597732543945, + "step": 3614 + }, + { + "epoch": 0.56, + "learning_rate": 1.1495946885211889e-05, + "logits/chosen": -2.099282741546631, + "logits/rejected": -3.1884939670562744, + "logps/chosen": -87.29412078857422, + "logps/rejected": -198.93264770507812, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1481456756591797, + "rewards/margins": 5.797802925109863, + "rewards/rejected": -6.945948600769043, + "step": 3615 + }, + { + "epoch": 0.56, + "learning_rate": 1.149521344468074e-05, + "logits/chosen": -2.859879732131958, + "logits/rejected": -3.005127429962158, + "logps/chosen": -336.65985107421875, + "logps/rejected": -497.017333984375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40832290053367615, + "rewards/margins": 7.266590118408203, + "rewards/rejected": -7.67491340637207, + "step": 3616 + }, + { + "epoch": 0.56, + "learning_rate": 1.1494480004149592e-05, + "logits/chosen": -3.1797420978546143, + "logits/rejected": -2.8955700397491455, + "logps/chosen": -222.32418823242188, + "logps/rejected": -141.7109375, + "loss": 0.8977, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8979082107543945, + "rewards/margins": 2.8458664417266846, + "rewards/rejected": -4.7437744140625, + "step": 3617 + }, + { + "epoch": 0.56, + "learning_rate": 1.1493746563618444e-05, + "logits/chosen": -2.711327314376831, + "logits/rejected": -2.6339356899261475, + "logps/chosen": -252.86737060546875, + "logps/rejected": -386.27935791015625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.353290557861328, + "rewards/margins": 6.211770057678223, + "rewards/rejected": -8.56506061553955, + "step": 3618 + }, + { + "epoch": 0.56, + "learning_rate": 1.1493013123087296e-05, + "logits/chosen": -3.216092824935913, + "logits/rejected": -2.204030990600586, + "logps/chosen": -205.78125, + "logps/rejected": -82.44346618652344, + "loss": 0.1054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.115067481994629, + "rewards/margins": 2.2731807231903076, + "rewards/rejected": -5.388248443603516, + "step": 3619 + }, + { + "epoch": 0.56, + "learning_rate": 1.1492279682556148e-05, + "logits/chosen": -1.242431402206421, + "logits/rejected": -2.945246934890747, + "logps/chosen": -112.15351104736328, + "logps/rejected": -352.36920166015625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.067903518676758, + "rewards/margins": 5.643001556396484, + "rewards/rejected": -8.710905075073242, + "step": 3620 + }, + { + "epoch": 0.56, + "learning_rate": 1.1491546242025002e-05, + "logits/chosen": -2.9565796852111816, + "logits/rejected": -3.039818286895752, + "logps/chosen": -154.37147521972656, + "logps/rejected": -129.06552124023438, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8971221446990967, + "rewards/margins": 4.388241767883301, + "rewards/rejected": -6.285364151000977, + "step": 3621 + }, + { + "epoch": 0.56, + "learning_rate": 1.1490812801493854e-05, + "logits/chosen": -1.5308839082717896, + "logits/rejected": -3.1950225830078125, + "logps/chosen": -220.88124084472656, + "logps/rejected": -488.0755615234375, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6036525964736938, + "rewards/margins": 5.981601238250732, + "rewards/rejected": -7.585253715515137, + "step": 3622 + }, + { + "epoch": 0.56, + "learning_rate": 1.1490079360962705e-05, + "logits/chosen": -3.1826987266540527, + "logits/rejected": -2.7940850257873535, + "logps/chosen": -602.6459350585938, + "logps/rejected": -387.5638427734375, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1276969909667969, + "rewards/margins": 4.4534406661987305, + "rewards/rejected": -5.581137657165527, + "step": 3623 + }, + { + "epoch": 0.56, + "learning_rate": 1.1489345920431557e-05, + "logits/chosen": -2.125335216522217, + "logits/rejected": -2.8783209323883057, + "logps/chosen": -91.68601989746094, + "logps/rejected": -151.26217651367188, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.763330459594727, + "rewards/margins": 1.4591732025146484, + "rewards/rejected": -6.222503662109375, + "step": 3624 + }, + { + "epoch": 0.56, + "learning_rate": 1.148861247990041e-05, + "logits/chosen": -2.9970996379852295, + "logits/rejected": -2.5327963829040527, + "logps/chosen": -321.389892578125, + "logps/rejected": -342.578125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6074234247207642, + "rewards/margins": 7.108721733093262, + "rewards/rejected": -7.7161455154418945, + "step": 3625 + }, + { + "epoch": 0.56, + "learning_rate": 1.1487879039369261e-05, + "logits/chosen": -3.1069722175598145, + "logits/rejected": -3.1163623332977295, + "logps/chosen": -342.16717529296875, + "logps/rejected": -240.36610412597656, + "loss": 1.5807, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.26837158203125, + "rewards/margins": 0.3490631580352783, + "rewards/rejected": -1.6174347400665283, + "step": 3626 + }, + { + "epoch": 0.56, + "learning_rate": 1.1487145598838115e-05, + "logits/chosen": -3.208059072494507, + "logits/rejected": -2.1002609729766846, + "logps/chosen": -219.93084716796875, + "logps/rejected": -148.9287872314453, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48596662282943726, + "rewards/margins": 6.324336051940918, + "rewards/rejected": -6.810302734375, + "step": 3627 + }, + { + "epoch": 0.56, + "learning_rate": 1.1486412158306967e-05, + "logits/chosen": -2.814103603363037, + "logits/rejected": -2.3428964614868164, + "logps/chosen": -170.0819854736328, + "logps/rejected": -236.16000366210938, + "loss": 2.9666, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.66633677482605, + "rewards/margins": 2.172800064086914, + "rewards/rejected": -5.839136600494385, + "step": 3628 + }, + { + "epoch": 0.56, + "learning_rate": 1.1485678717775818e-05, + "logits/chosen": -2.6308670043945312, + "logits/rejected": -2.7573859691619873, + "logps/chosen": -558.105712890625, + "logps/rejected": -670.739990234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.444937229156494, + "rewards/margins": 8.898855209350586, + "rewards/rejected": -12.343792915344238, + "step": 3629 + }, + { + "epoch": 0.56, + "learning_rate": 1.1484945277244672e-05, + "logits/chosen": -3.236623764038086, + "logits/rejected": -3.261658191680908, + "logps/chosen": -234.9283905029297, + "logps/rejected": -238.43045043945312, + "loss": 0.0476, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2135238647460938, + "rewards/margins": 4.852969646453857, + "rewards/rejected": -6.066493511199951, + "step": 3630 + }, + { + "epoch": 0.56, + "learning_rate": 1.1484211836713524e-05, + "logits/chosen": -3.263338804244995, + "logits/rejected": -2.2514548301696777, + "logps/chosen": -308.5094909667969, + "logps/rejected": -157.1531524658203, + "loss": 5.1483, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.9951982498168945, + "rewards/margins": -5.1423773765563965, + "rewards/rejected": -0.8528209924697876, + "step": 3631 + }, + { + "epoch": 0.56, + "learning_rate": 1.1483478396182376e-05, + "logits/chosen": -3.173692226409912, + "logits/rejected": -2.9160044193267822, + "logps/chosen": -339.3102111816406, + "logps/rejected": -106.49092102050781, + "loss": 1.3429, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.533213138580322, + "rewards/margins": -0.1766108274459839, + "rewards/rejected": -5.356602191925049, + "step": 3632 + }, + { + "epoch": 0.57, + "learning_rate": 1.1482744955651228e-05, + "logits/chosen": -2.849595069885254, + "logits/rejected": -2.9495768547058105, + "logps/chosen": -104.21659851074219, + "logps/rejected": -178.94937133789062, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8505425453186035, + "rewards/margins": 5.1612114906311035, + "rewards/rejected": -9.011754035949707, + "step": 3633 + }, + { + "epoch": 0.57, + "learning_rate": 1.148201151512008e-05, + "logits/chosen": -2.1875340938568115, + "logits/rejected": -3.0848546028137207, + "logps/chosen": -48.59901809692383, + "logps/rejected": -241.11830139160156, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3607256412506104, + "rewards/margins": 5.941912651062012, + "rewards/rejected": -7.302638053894043, + "step": 3634 + }, + { + "epoch": 0.57, + "learning_rate": 1.1481278074588931e-05, + "logits/chosen": -2.4847042560577393, + "logits/rejected": -3.044919967651367, + "logps/chosen": -516.0269775390625, + "logps/rejected": -508.853271484375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.987626314163208, + "rewards/margins": 8.645021438598633, + "rewards/rejected": -10.632647514343262, + "step": 3635 + }, + { + "epoch": 0.57, + "learning_rate": 1.1480544634057783e-05, + "logits/chosen": -1.1484450101852417, + "logits/rejected": -2.580639600753784, + "logps/chosen": -609.4758911132812, + "logps/rejected": -399.6487731933594, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2449901103973389, + "rewards/margins": 7.142427444458008, + "rewards/rejected": -8.387417793273926, + "step": 3636 + }, + { + "epoch": 0.57, + "learning_rate": 1.1479811193526635e-05, + "logits/chosen": -3.011667251586914, + "logits/rejected": -3.1395840644836426, + "logps/chosen": -68.06758117675781, + "logps/rejected": -173.21914672851562, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.084580421447754, + "rewards/margins": 4.75055456161499, + "rewards/rejected": -6.835134983062744, + "step": 3637 + }, + { + "epoch": 0.57, + "learning_rate": 1.1479077752995487e-05, + "logits/chosen": -2.0356109142303467, + "logits/rejected": -2.8414154052734375, + "logps/chosen": -146.67355346679688, + "logps/rejected": -183.2642822265625, + "loss": 0.1974, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0139291286468506, + "rewards/margins": 3.05303955078125, + "rewards/rejected": -6.06696891784668, + "step": 3638 + }, + { + "epoch": 0.57, + "learning_rate": 1.147834431246434e-05, + "logits/chosen": -2.551506996154785, + "logits/rejected": -3.1342580318450928, + "logps/chosen": -84.9289779663086, + "logps/rejected": -192.82261657714844, + "loss": 0.0765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9582855701446533, + "rewards/margins": 4.5152482986450195, + "rewards/rejected": -5.473534107208252, + "step": 3639 + }, + { + "epoch": 0.57, + "learning_rate": 1.1477610871933192e-05, + "logits/chosen": -2.1145710945129395, + "logits/rejected": -3.0079874992370605, + "logps/chosen": -310.70867919921875, + "logps/rejected": -471.8994140625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.697509765625, + "rewards/margins": 5.505148410797119, + "rewards/rejected": -6.202658176422119, + "step": 3640 + }, + { + "epoch": 0.57, + "learning_rate": 1.1476877431402044e-05, + "logits/chosen": -2.9493398666381836, + "logits/rejected": -2.2548041343688965, + "logps/chosen": -429.32421875, + "logps/rejected": -367.31689453125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1971328258514404, + "rewards/margins": 6.702680587768555, + "rewards/rejected": -5.505547523498535, + "step": 3641 + }, + { + "epoch": 0.57, + "learning_rate": 1.1476143990870896e-05, + "logits/chosen": -2.6371254920959473, + "logits/rejected": -2.8466572761535645, + "logps/chosen": -89.26152801513672, + "logps/rejected": -178.63888549804688, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1949784755706787, + "rewards/margins": 6.490411758422852, + "rewards/rejected": -8.68539047241211, + "step": 3642 + }, + { + "epoch": 0.57, + "learning_rate": 1.1475410550339748e-05, + "logits/chosen": -2.9533021450042725, + "logits/rejected": -2.1962151527404785, + "logps/chosen": -250.1421356201172, + "logps/rejected": -98.84536743164062, + "loss": 3.4499, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.276617050170898, + "rewards/margins": -2.0499298572540283, + "rewards/rejected": -5.226686954498291, + "step": 3643 + }, + { + "epoch": 0.57, + "learning_rate": 1.14746771098086e-05, + "logits/chosen": -1.811968207359314, + "logits/rejected": -2.985370397567749, + "logps/chosen": -121.89734649658203, + "logps/rejected": -266.6850891113281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41206473112106323, + "rewards/margins": 8.62413215637207, + "rewards/rejected": -9.036195755004883, + "step": 3644 + }, + { + "epoch": 0.57, + "learning_rate": 1.1473943669277452e-05, + "logits/chosen": -2.8328535556793213, + "logits/rejected": -3.194857597351074, + "logps/chosen": -53.82703399658203, + "logps/rejected": -179.85659790039062, + "loss": 0.0699, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5069222450256348, + "rewards/margins": 4.444899559020996, + "rewards/rejected": -6.951821327209473, + "step": 3645 + }, + { + "epoch": 0.57, + "learning_rate": 1.1473210228746304e-05, + "logits/chosen": -2.8327231407165527, + "logits/rejected": -2.4653356075286865, + "logps/chosen": -77.0600357055664, + "logps/rejected": -105.0724868774414, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.236440181732178, + "rewards/margins": 4.3395538330078125, + "rewards/rejected": -8.575993537902832, + "step": 3646 + }, + { + "epoch": 0.57, + "learning_rate": 1.1472476788215156e-05, + "logits/chosen": -1.356241226196289, + "logits/rejected": -3.14760684967041, + "logps/chosen": -159.15980529785156, + "logps/rejected": -792.947265625, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5886585712432861, + "rewards/margins": 7.734055519104004, + "rewards/rejected": -9.322713851928711, + "step": 3647 + }, + { + "epoch": 0.57, + "learning_rate": 1.147174334768401e-05, + "logits/chosen": -2.579765558242798, + "logits/rejected": -3.2008774280548096, + "logps/chosen": -367.0605773925781, + "logps/rejected": -432.8064270019531, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6114834547042847, + "rewards/margins": 7.071838855743408, + "rewards/rejected": -8.68332290649414, + "step": 3648 + }, + { + "epoch": 0.57, + "learning_rate": 1.1471009907152861e-05, + "logits/chosen": -2.7769668102264404, + "logits/rejected": -3.184298276901245, + "logps/chosen": -258.2101135253906, + "logps/rejected": -331.9529724121094, + "loss": 0.7004, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.056646704673767, + "rewards/margins": 1.5514497756958008, + "rewards/rejected": -2.6080963611602783, + "step": 3649 + }, + { + "epoch": 0.57, + "learning_rate": 1.1470276466621713e-05, + "logits/chosen": -2.9914591312408447, + "logits/rejected": -3.2889792919158936, + "logps/chosen": -570.571044921875, + "logps/rejected": -294.795166015625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7716295123100281, + "rewards/margins": 5.410423278808594, + "rewards/rejected": -6.1820526123046875, + "step": 3650 + }, + { + "epoch": 0.57, + "learning_rate": 1.1469543026090565e-05, + "logits/chosen": -2.6168534755706787, + "logits/rejected": -3.226858139038086, + "logps/chosen": -115.35052490234375, + "logps/rejected": -197.3204345703125, + "loss": 2.3784, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.122349739074707, + "rewards/margins": -0.520942211151123, + "rewards/rejected": -4.601408004760742, + "step": 3651 + }, + { + "epoch": 0.57, + "learning_rate": 1.1468809585559417e-05, + "logits/chosen": -3.043715715408325, + "logits/rejected": -3.214879274368286, + "logps/chosen": -534.4449462890625, + "logps/rejected": -578.923095703125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0178848505020142, + "rewards/margins": 5.6881561279296875, + "rewards/rejected": -6.706040859222412, + "step": 3652 + }, + { + "epoch": 0.57, + "learning_rate": 1.1468076145028269e-05, + "logits/chosen": -2.1573538780212402, + "logits/rejected": -2.856849193572998, + "logps/chosen": -125.48477935791016, + "logps/rejected": -353.72283935546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5488603711128235, + "rewards/margins": 7.570979118347168, + "rewards/rejected": -8.119839668273926, + "step": 3653 + }, + { + "epoch": 0.57, + "learning_rate": 1.146734270449712e-05, + "logits/chosen": -2.8802123069763184, + "logits/rejected": -1.8577483892440796, + "logps/chosen": -307.7071533203125, + "logps/rejected": -322.1135559082031, + "loss": 1.4077, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0611984729766846, + "rewards/margins": 3.7871787548065186, + "rewards/rejected": -6.848377227783203, + "step": 3654 + }, + { + "epoch": 0.57, + "learning_rate": 1.1466609263965972e-05, + "logits/chosen": -2.98891019821167, + "logits/rejected": -2.3459649085998535, + "logps/chosen": -570.3302001953125, + "logps/rejected": -416.028564453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2884063720703125, + "rewards/margins": 7.172399997711182, + "rewards/rejected": -7.460806369781494, + "step": 3655 + }, + { + "epoch": 0.57, + "learning_rate": 1.1465875823434824e-05, + "logits/chosen": -1.797544240951538, + "logits/rejected": -3.1757054328918457, + "logps/chosen": -107.00079345703125, + "logps/rejected": -406.4725646972656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8413459658622742, + "rewards/margins": 9.368897438049316, + "rewards/rejected": -10.210243225097656, + "step": 3656 + }, + { + "epoch": 0.57, + "learning_rate": 1.1465142382903678e-05, + "logits/chosen": -2.0774590969085693, + "logits/rejected": -3.14475154876709, + "logps/chosen": -43.91870880126953, + "logps/rejected": -337.9157409667969, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.914348602294922, + "rewards/margins": 8.240312576293945, + "rewards/rejected": -11.154661178588867, + "step": 3657 + }, + { + "epoch": 0.57, + "learning_rate": 1.146440894237253e-05, + "logits/chosen": -2.8705391883850098, + "logits/rejected": -2.48140549659729, + "logps/chosen": -336.43035888671875, + "logps/rejected": -288.4793701171875, + "loss": 0.135, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.323751449584961, + "rewards/margins": 3.9289121627807617, + "rewards/rejected": -7.252663612365723, + "step": 3658 + }, + { + "epoch": 0.57, + "learning_rate": 1.1463675501841382e-05, + "logits/chosen": -2.7811944484710693, + "logits/rejected": -3.037365436553955, + "logps/chosen": -268.4913635253906, + "logps/rejected": -203.10577392578125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9820533990859985, + "rewards/margins": 4.938419342041016, + "rewards/rejected": -5.920472145080566, + "step": 3659 + }, + { + "epoch": 0.57, + "learning_rate": 1.1462942061310233e-05, + "logits/chosen": -2.9981908798217773, + "logits/rejected": -2.2366232872009277, + "logps/chosen": -432.564697265625, + "logps/rejected": -251.67556762695312, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2688134908676147, + "rewards/margins": 4.204100608825684, + "rewards/rejected": -5.47291374206543, + "step": 3660 + }, + { + "epoch": 0.57, + "learning_rate": 1.1462208620779087e-05, + "logits/chosen": -2.3418257236480713, + "logits/rejected": -2.938009738922119, + "logps/chosen": -143.01773071289062, + "logps/rejected": -238.83062744140625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1379696130752563, + "rewards/margins": 6.21336030960083, + "rewards/rejected": -7.351329803466797, + "step": 3661 + }, + { + "epoch": 0.57, + "learning_rate": 1.1461475180247939e-05, + "logits/chosen": -2.9861886501312256, + "logits/rejected": -1.1859771013259888, + "logps/chosen": -227.9916534423828, + "logps/rejected": -275.04632568359375, + "loss": 0.0641, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2909011840820312, + "rewards/margins": 5.750766754150391, + "rewards/rejected": -9.041667938232422, + "step": 3662 + }, + { + "epoch": 0.57, + "learning_rate": 1.146074173971679e-05, + "logits/chosen": -2.047065258026123, + "logits/rejected": -2.7970588207244873, + "logps/chosen": -34.53630828857422, + "logps/rejected": -276.4222412109375, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.595984935760498, + "rewards/margins": 4.70964241027832, + "rewards/rejected": -6.305627346038818, + "step": 3663 + }, + { + "epoch": 0.57, + "learning_rate": 1.1460008299185643e-05, + "logits/chosen": -3.0839312076568604, + "logits/rejected": -2.272970199584961, + "logps/chosen": -746.0296630859375, + "logps/rejected": -578.3305053710938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2099563628435135, + "rewards/margins": 8.785476684570312, + "rewards/rejected": -8.575520515441895, + "step": 3664 + }, + { + "epoch": 0.57, + "learning_rate": 1.1459274858654495e-05, + "logits/chosen": -1.756179928779602, + "logits/rejected": -2.813420295715332, + "logps/chosen": -163.15261840820312, + "logps/rejected": -514.1674194335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7821294069290161, + "rewards/margins": 12.835494995117188, + "rewards/rejected": -14.617624282836914, + "step": 3665 + }, + { + "epoch": 0.57, + "learning_rate": 1.1458541418123348e-05, + "logits/chosen": -2.8781871795654297, + "logits/rejected": -3.2100515365600586, + "logps/chosen": -71.4327621459961, + "logps/rejected": -219.29864501953125, + "loss": 0.0526, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3874526023864746, + "rewards/margins": 3.4911580085754395, + "rewards/rejected": -6.878610610961914, + "step": 3666 + }, + { + "epoch": 0.57, + "learning_rate": 1.14578079775922e-05, + "logits/chosen": -3.2179129123687744, + "logits/rejected": -3.1066792011260986, + "logps/chosen": -527.7446899414062, + "logps/rejected": -440.05242919921875, + "loss": 3.5404, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.991504669189453, + "rewards/margins": -0.2825767993927002, + "rewards/rejected": -4.708928108215332, + "step": 3667 + }, + { + "epoch": 0.57, + "learning_rate": 1.1457074537061052e-05, + "logits/chosen": -2.6934397220611572, + "logits/rejected": -3.1456711292266846, + "logps/chosen": -39.24347686767578, + "logps/rejected": -170.598388671875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7799830436706543, + "rewards/margins": 6.329067230224609, + "rewards/rejected": -9.109050750732422, + "step": 3668 + }, + { + "epoch": 0.57, + "learning_rate": 1.1456341096529904e-05, + "logits/chosen": -3.0414180755615234, + "logits/rejected": -1.2054073810577393, + "logps/chosen": -248.64892578125, + "logps/rejected": -201.49478149414062, + "loss": 3.4233, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.328224182128906, + "rewards/margins": -0.5257675647735596, + "rewards/rejected": -5.802456855773926, + "step": 3669 + }, + { + "epoch": 0.57, + "learning_rate": 1.1455607655998756e-05, + "logits/chosen": -2.038318157196045, + "logits/rejected": -2.8378562927246094, + "logps/chosen": -107.10215759277344, + "logps/rejected": -203.45223999023438, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0447964668273926, + "rewards/margins": 4.859287261962891, + "rewards/rejected": -7.904084205627441, + "step": 3670 + }, + { + "epoch": 0.57, + "learning_rate": 1.1454874215467607e-05, + "logits/chosen": -2.989392042160034, + "logits/rejected": -2.7794554233551025, + "logps/chosen": -92.286376953125, + "logps/rejected": -249.90516662597656, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9302618503570557, + "rewards/margins": 3.9470694065093994, + "rewards/rejected": -6.877331256866455, + "step": 3671 + }, + { + "epoch": 0.57, + "learning_rate": 1.145414077493646e-05, + "logits/chosen": -3.088792085647583, + "logits/rejected": -3.199218988418579, + "logps/chosen": -796.2674560546875, + "logps/rejected": -479.7364501953125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2097744941711426, + "rewards/margins": 4.6312994956970215, + "rewards/rejected": -7.841073989868164, + "step": 3672 + }, + { + "epoch": 0.57, + "learning_rate": 1.1453407334405311e-05, + "logits/chosen": -2.5643646717071533, + "logits/rejected": -3.040574073791504, + "logps/chosen": -90.71502685546875, + "logps/rejected": -310.62298583984375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6390961408615112, + "rewards/margins": 5.842782974243164, + "rewards/rejected": -7.481878757476807, + "step": 3673 + }, + { + "epoch": 0.57, + "learning_rate": 1.1452673893874163e-05, + "logits/chosen": -2.4610049724578857, + "logits/rejected": -2.997678756713867, + "logps/chosen": -204.15199279785156, + "logps/rejected": -274.7986145019531, + "loss": 0.1597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8928737640380859, + "rewards/margins": 4.341580867767334, + "rewards/rejected": -5.23445463180542, + "step": 3674 + }, + { + "epoch": 0.57, + "learning_rate": 1.1451940453343017e-05, + "logits/chosen": -3.268598794937134, + "logits/rejected": -3.058119535446167, + "logps/chosen": -164.73036193847656, + "logps/rejected": -166.14096069335938, + "loss": 3.0765, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.04386043548584, + "rewards/margins": -0.017031431198120117, + "rewards/rejected": -7.026828765869141, + "step": 3675 + }, + { + "epoch": 0.57, + "learning_rate": 1.1451207012811869e-05, + "logits/chosen": -2.5791773796081543, + "logits/rejected": -2.8982558250427246, + "logps/chosen": -161.57577514648438, + "logps/rejected": -271.1610107421875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3359196186065674, + "rewards/margins": 5.868624210357666, + "rewards/rejected": -8.204544067382812, + "step": 3676 + }, + { + "epoch": 0.57, + "learning_rate": 1.145047357228072e-05, + "logits/chosen": -2.710171699523926, + "logits/rejected": -2.9334661960601807, + "logps/chosen": -66.36144256591797, + "logps/rejected": -257.1005859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.24178147315979, + "rewards/margins": 6.831748962402344, + "rewards/rejected": -9.073530197143555, + "step": 3677 + }, + { + "epoch": 0.57, + "learning_rate": 1.1449740131749572e-05, + "logits/chosen": -1.805279016494751, + "logits/rejected": -2.2161879539489746, + "logps/chosen": -146.646728515625, + "logps/rejected": -317.2138671875, + "loss": 0.1037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477661371231079, + "rewards/margins": 6.580533981323242, + "rewards/rejected": -10.058195114135742, + "step": 3678 + }, + { + "epoch": 0.57, + "learning_rate": 1.1449006691218424e-05, + "logits/chosen": -2.291991949081421, + "logits/rejected": -3.1086723804473877, + "logps/chosen": -499.55633544921875, + "logps/rejected": -531.7821044921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7188736200332642, + "rewards/margins": 7.491605758666992, + "rewards/rejected": -6.772732734680176, + "step": 3679 + }, + { + "epoch": 0.57, + "learning_rate": 1.1448273250687276e-05, + "logits/chosen": -2.6029281616210938, + "logits/rejected": -3.058393955230713, + "logps/chosen": -97.2270278930664, + "logps/rejected": -241.0570831298828, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5335423946380615, + "rewards/margins": 7.325941562652588, + "rewards/rejected": -9.85948371887207, + "step": 3680 + }, + { + "epoch": 0.57, + "learning_rate": 1.1447539810156128e-05, + "logits/chosen": -2.2812838554382324, + "logits/rejected": -3.1255133152008057, + "logps/chosen": -77.58477783203125, + "logps/rejected": -360.3933410644531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0668985843658447, + "rewards/margins": 8.07984733581543, + "rewards/rejected": -10.146745681762695, + "step": 3681 + }, + { + "epoch": 0.57, + "learning_rate": 1.144680636962498e-05, + "logits/chosen": -2.7036526203155518, + "logits/rejected": -3.3437843322753906, + "logps/chosen": -295.51873779296875, + "logps/rejected": -259.0963439941406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6137089133262634, + "rewards/margins": 9.037760734558105, + "rewards/rejected": -8.424052238464355, + "step": 3682 + }, + { + "epoch": 0.57, + "learning_rate": 1.1446072929093832e-05, + "logits/chosen": -3.0189156532287598, + "logits/rejected": -2.8723785877227783, + "logps/chosen": -41.09486770629883, + "logps/rejected": -334.1087341308594, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3711771965026855, + "rewards/margins": 7.738117218017578, + "rewards/rejected": -9.109294891357422, + "step": 3683 + }, + { + "epoch": 0.57, + "learning_rate": 1.1445339488562685e-05, + "logits/chosen": -2.7079098224639893, + "logits/rejected": -2.8713302612304688, + "logps/chosen": -61.40924072265625, + "logps/rejected": -214.4592742919922, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3290936946868896, + "rewards/margins": 6.974367141723633, + "rewards/rejected": -9.303461074829102, + "step": 3684 + }, + { + "epoch": 0.57, + "learning_rate": 1.1444606048031537e-05, + "logits/chosen": -1.3253659009933472, + "logits/rejected": -3.0040764808654785, + "logps/chosen": -79.83100891113281, + "logps/rejected": -405.56280517578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3345417976379395, + "rewards/margins": 7.535571098327637, + "rewards/rejected": -9.870113372802734, + "step": 3685 + }, + { + "epoch": 0.57, + "learning_rate": 1.1443872607500389e-05, + "logits/chosen": -1.822775959968567, + "logits/rejected": -2.8345205783843994, + "logps/chosen": -127.80596923828125, + "logps/rejected": -188.1705780029297, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.563800096511841, + "rewards/margins": 5.4258131980896, + "rewards/rejected": -7.9896135330200195, + "step": 3686 + }, + { + "epoch": 0.57, + "learning_rate": 1.1443139166969241e-05, + "logits/chosen": -2.4871370792388916, + "logits/rejected": -2.9890236854553223, + "logps/chosen": -219.63818359375, + "logps/rejected": -249.1486358642578, + "loss": 2.8805, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.478459358215332, + "rewards/margins": 2.4369945526123047, + "rewards/rejected": -6.915453910827637, + "step": 3687 + }, + { + "epoch": 0.57, + "learning_rate": 1.1442405726438093e-05, + "logits/chosen": -2.429112195968628, + "logits/rejected": -2.8645358085632324, + "logps/chosen": -132.07110595703125, + "logps/rejected": -154.8109893798828, + "loss": 1.6139, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.337163209915161, + "rewards/margins": 1.6186612844467163, + "rewards/rejected": -4.955824375152588, + "step": 3688 + }, + { + "epoch": 0.57, + "learning_rate": 1.1441672285906945e-05, + "logits/chosen": -3.055251359939575, + "logits/rejected": -2.02116060256958, + "logps/chosen": -289.3518371582031, + "logps/rejected": -144.20448303222656, + "loss": 4.2749, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.521474361419678, + "rewards/margins": -0.1638956069946289, + "rewards/rejected": -6.357578754425049, + "step": 3689 + }, + { + "epoch": 0.57, + "learning_rate": 1.1440938845375797e-05, + "logits/chosen": -2.9599030017852783, + "logits/rejected": -3.305598020553589, + "logps/chosen": -84.18894958496094, + "logps/rejected": -215.77685546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3035197257995605, + "rewards/margins": 7.105216026306152, + "rewards/rejected": -8.408735275268555, + "step": 3690 + }, + { + "epoch": 0.57, + "learning_rate": 1.1440205404844648e-05, + "logits/chosen": -2.9913852214813232, + "logits/rejected": -2.7291507720947266, + "logps/chosen": -226.27066040039062, + "logps/rejected": -483.56060791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4462936222553253, + "rewards/margins": 11.307905197143555, + "rewards/rejected": -11.75419807434082, + "step": 3691 + }, + { + "epoch": 0.57, + "learning_rate": 1.14394719643135e-05, + "logits/chosen": -2.073928117752075, + "logits/rejected": -2.908238410949707, + "logps/chosen": -179.1873321533203, + "logps/rejected": -270.1817626953125, + "loss": 1.8444, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.956501483917236, + "rewards/margins": 2.982560396194458, + "rewards/rejected": -7.939062118530273, + "step": 3692 + }, + { + "epoch": 0.57, + "learning_rate": 1.1438738523782354e-05, + "logits/chosen": -3.1194772720336914, + "logits/rejected": -2.8914637565612793, + "logps/chosen": -94.74302673339844, + "logps/rejected": -194.90135192871094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9957301616668701, + "rewards/margins": 5.898398399353027, + "rewards/rejected": -7.894128799438477, + "step": 3693 + }, + { + "epoch": 0.57, + "learning_rate": 1.1438005083251206e-05, + "logits/chosen": -2.9070065021514893, + "logits/rejected": -2.4867100715637207, + "logps/chosen": -272.0149230957031, + "logps/rejected": -424.522216796875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1089874505996704, + "rewards/margins": 6.584378242492676, + "rewards/rejected": -7.693365573883057, + "step": 3694 + }, + { + "epoch": 0.57, + "learning_rate": 1.143727164272006e-05, + "logits/chosen": -3.1441192626953125, + "logits/rejected": -2.379040479660034, + "logps/chosen": -375.8748474121094, + "logps/rejected": -403.5294189453125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0407730340957642, + "rewards/margins": 8.021039962768555, + "rewards/rejected": -9.061813354492188, + "step": 3695 + }, + { + "epoch": 0.57, + "learning_rate": 1.1436538202188911e-05, + "logits/chosen": -2.091701030731201, + "logits/rejected": -2.7155330181121826, + "logps/chosen": -74.4316635131836, + "logps/rejected": -351.6912841796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.906740427017212, + "rewards/margins": 8.96700382232666, + "rewards/rejected": -10.87374496459961, + "step": 3696 + }, + { + "epoch": 0.57, + "learning_rate": 1.1435804761657763e-05, + "logits/chosen": -2.3913776874542236, + "logits/rejected": -3.110766649246216, + "logps/chosen": -57.84916687011719, + "logps/rejected": -187.35302734375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5309263467788696, + "rewards/margins": 5.715455055236816, + "rewards/rejected": -7.246380805969238, + "step": 3697 + }, + { + "epoch": 0.58, + "learning_rate": 1.1435071321126615e-05, + "logits/chosen": -2.4927420616149902, + "logits/rejected": -2.9825451374053955, + "logps/chosen": -227.80064392089844, + "logps/rejected": -425.348388671875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3033370971679688, + "rewards/margins": 6.531848907470703, + "rewards/rejected": -7.835186004638672, + "step": 3698 + }, + { + "epoch": 0.58, + "learning_rate": 1.1434337880595467e-05, + "logits/chosen": -2.939183473587036, + "logits/rejected": -3.074599027633667, + "logps/chosen": -166.4315643310547, + "logps/rejected": -266.8128662109375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4268466830253601, + "rewards/margins": 5.793957710266113, + "rewards/rejected": -6.220804214477539, + "step": 3699 + }, + { + "epoch": 0.58, + "learning_rate": 1.1433604440064319e-05, + "logits/chosen": -2.84731125831604, + "logits/rejected": -3.111168146133423, + "logps/chosen": -206.424560546875, + "logps/rejected": -208.87818908691406, + "loss": 0.8207, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4734458923339844, + "rewards/margins": 2.1102983951568604, + "rewards/rejected": -4.583744049072266, + "step": 3700 + }, + { + "epoch": 0.58, + "learning_rate": 1.143287099953317e-05, + "logits/chosen": -1.9138386249542236, + "logits/rejected": -3.081120491027832, + "logps/chosen": -241.0424346923828, + "logps/rejected": -381.17437744140625, + "loss": 2.6286, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9937782287597656, + "rewards/margins": 0.21140289306640625, + "rewards/rejected": -4.205181121826172, + "step": 3701 + }, + { + "epoch": 0.58, + "learning_rate": 1.1432137559002024e-05, + "logits/chosen": -2.9556286334991455, + "logits/rejected": -2.583585023880005, + "logps/chosen": -298.3879089355469, + "logps/rejected": -358.325927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07972371578216553, + "rewards/margins": 8.941696166992188, + "rewards/rejected": -9.021419525146484, + "step": 3702 + }, + { + "epoch": 0.58, + "learning_rate": 1.1431404118470876e-05, + "logits/chosen": -2.2194230556488037, + "logits/rejected": -2.8945577144622803, + "logps/chosen": -145.4133758544922, + "logps/rejected": -316.2989501953125, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0450454950332642, + "rewards/margins": 7.390116214752197, + "rewards/rejected": -8.435161590576172, + "step": 3703 + }, + { + "epoch": 0.58, + "learning_rate": 1.1430670677939728e-05, + "logits/chosen": -1.1807998418807983, + "logits/rejected": -2.398752450942993, + "logps/chosen": -91.85873413085938, + "logps/rejected": -345.94842529296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9797532558441162, + "rewards/margins": 6.897089958190918, + "rewards/rejected": -8.876842498779297, + "step": 3704 + }, + { + "epoch": 0.58, + "learning_rate": 1.142993723740858e-05, + "logits/chosen": -2.305773973464966, + "logits/rejected": -3.1881582736968994, + "logps/chosen": -99.81040954589844, + "logps/rejected": -267.42974853515625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5979382991790771, + "rewards/margins": 5.444550514221191, + "rewards/rejected": -7.042489051818848, + "step": 3705 + }, + { + "epoch": 0.58, + "learning_rate": 1.1429203796877432e-05, + "logits/chosen": -1.9681154489517212, + "logits/rejected": -3.0654797554016113, + "logps/chosen": -160.7779083251953, + "logps/rejected": -347.5738220214844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8061288595199585, + "rewards/margins": 8.521646499633789, + "rewards/rejected": -10.327775955200195, + "step": 3706 + }, + { + "epoch": 0.58, + "learning_rate": 1.1428470356346284e-05, + "logits/chosen": -3.2100131511688232, + "logits/rejected": -2.4415791034698486, + "logps/chosen": -175.29034423828125, + "logps/rejected": -228.02101135253906, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3525569438934326, + "rewards/margins": 1.5549626350402832, + "rewards/rejected": -3.907519578933716, + "step": 3707 + }, + { + "epoch": 0.58, + "learning_rate": 1.1427736915815135e-05, + "logits/chosen": -3.069549560546875, + "logits/rejected": -3.2457590103149414, + "logps/chosen": -81.32191467285156, + "logps/rejected": -269.248779296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02296333760023117, + "rewards/margins": 7.603985786437988, + "rewards/rejected": -7.581022262573242, + "step": 3708 + }, + { + "epoch": 0.58, + "learning_rate": 1.1427003475283987e-05, + "logits/chosen": -2.0728206634521484, + "logits/rejected": -2.994025468826294, + "logps/chosen": -202.58004760742188, + "logps/rejected": -318.10467529296875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7964470386505127, + "rewards/margins": 5.756089210510254, + "rewards/rejected": -7.5525360107421875, + "step": 3709 + }, + { + "epoch": 0.58, + "learning_rate": 1.1426270034752841e-05, + "logits/chosen": -2.2344727516174316, + "logits/rejected": -3.315061330795288, + "logps/chosen": -191.8931884765625, + "logps/rejected": -407.1960754394531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0251823663711548, + "rewards/margins": 6.860810279846191, + "rewards/rejected": -7.885993003845215, + "step": 3710 + }, + { + "epoch": 0.58, + "learning_rate": 1.1425536594221693e-05, + "logits/chosen": -3.07751202583313, + "logits/rejected": -2.1888997554779053, + "logps/chosen": -341.50970458984375, + "logps/rejected": -286.30230712890625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1355741024017334, + "rewards/margins": 5.9896650314331055, + "rewards/rejected": -7.12523889541626, + "step": 3711 + }, + { + "epoch": 0.58, + "learning_rate": 1.1424803153690545e-05, + "logits/chosen": -2.044738292694092, + "logits/rejected": -3.1558382511138916, + "logps/chosen": -188.2537841796875, + "logps/rejected": -289.6141357421875, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22838973999023438, + "rewards/margins": 4.7093400955200195, + "rewards/rejected": -4.937729835510254, + "step": 3712 + }, + { + "epoch": 0.58, + "learning_rate": 1.1424069713159397e-05, + "logits/chosen": -2.944566249847412, + "logits/rejected": -1.5020368099212646, + "logps/chosen": -337.2545166015625, + "logps/rejected": -131.13143920898438, + "loss": 1.5807, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.722360849380493, + "rewards/margins": 1.6767346858978271, + "rewards/rejected": -4.39909553527832, + "step": 3713 + }, + { + "epoch": 0.58, + "learning_rate": 1.1423336272628248e-05, + "logits/chosen": -1.6144769191741943, + "logits/rejected": -2.8662681579589844, + "logps/chosen": -143.90603637695312, + "logps/rejected": -593.9848022460938, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8301620483398438, + "rewards/margins": 6.8607330322265625, + "rewards/rejected": -9.690895080566406, + "step": 3714 + }, + { + "epoch": 0.58, + "learning_rate": 1.14226028320971e-05, + "logits/chosen": -2.255772113800049, + "logits/rejected": -3.03682017326355, + "logps/chosen": -284.2929992675781, + "logps/rejected": -304.98486328125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2947704493999481, + "rewards/margins": 6.520095348358154, + "rewards/rejected": -6.814866065979004, + "step": 3715 + }, + { + "epoch": 0.58, + "learning_rate": 1.1421869391565952e-05, + "logits/chosen": -3.2825279235839844, + "logits/rejected": -2.2995901107788086, + "logps/chosen": -302.58544921875, + "logps/rejected": -103.95222473144531, + "loss": 2.7925, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.135446310043335, + "rewards/margins": 0.36058545112609863, + "rewards/rejected": -3.4960317611694336, + "step": 3716 + }, + { + "epoch": 0.58, + "learning_rate": 1.1421135951034804e-05, + "logits/chosen": -1.8693664073944092, + "logits/rejected": -3.0869433879852295, + "logps/chosen": -111.2110366821289, + "logps/rejected": -298.25921630859375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.555798351764679, + "rewards/margins": 6.507162094116211, + "rewards/rejected": -7.062960624694824, + "step": 3717 + }, + { + "epoch": 0.58, + "learning_rate": 1.1420402510503656e-05, + "logits/chosen": -2.1449384689331055, + "logits/rejected": -2.735041856765747, + "logps/chosen": -125.89987182617188, + "logps/rejected": -420.08349609375, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2752540111541748, + "rewards/margins": 5.2721123695373535, + "rewards/rejected": -6.547366142272949, + "step": 3718 + }, + { + "epoch": 0.58, + "learning_rate": 1.141966906997251e-05, + "logits/chosen": -3.0780951976776123, + "logits/rejected": -2.5635125637054443, + "logps/chosen": -784.4546508789062, + "logps/rejected": -621.788818359375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2342033386230469, + "rewards/margins": 6.451040744781494, + "rewards/rejected": -7.685244083404541, + "step": 3719 + }, + { + "epoch": 0.58, + "learning_rate": 1.1418935629441361e-05, + "logits/chosen": -2.4133946895599365, + "logits/rejected": -3.1300101280212402, + "logps/chosen": -263.3374328613281, + "logps/rejected": -461.64678955078125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9477920532226562, + "rewards/margins": 7.817852020263672, + "rewards/rejected": -8.765644073486328, + "step": 3720 + }, + { + "epoch": 0.58, + "learning_rate": 1.1418202188910213e-05, + "logits/chosen": -2.3632900714874268, + "logits/rejected": -3.154433250427246, + "logps/chosen": -336.3020935058594, + "logps/rejected": -471.4904479980469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6943695545196533, + "rewards/margins": 7.1407575607299805, + "rewards/rejected": -7.835127353668213, + "step": 3721 + }, + { + "epoch": 0.58, + "learning_rate": 1.1417468748379065e-05, + "logits/chosen": -2.592982530593872, + "logits/rejected": -3.0480897426605225, + "logps/chosen": -196.69546508789062, + "logps/rejected": -261.7543640136719, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6064910888671875, + "rewards/margins": 4.999802589416504, + "rewards/rejected": -7.606293678283691, + "step": 3722 + }, + { + "epoch": 0.58, + "learning_rate": 1.1416735307847917e-05, + "logits/chosen": -2.088045835494995, + "logits/rejected": -2.7302236557006836, + "logps/chosen": -277.89984130859375, + "logps/rejected": -419.81109619140625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4593154788017273, + "rewards/margins": 6.312716484069824, + "rewards/rejected": -6.772032260894775, + "step": 3723 + }, + { + "epoch": 0.58, + "learning_rate": 1.1416001867316769e-05, + "logits/chosen": -3.060485363006592, + "logits/rejected": -3.156111001968384, + "logps/chosen": -93.08476257324219, + "logps/rejected": -242.54066467285156, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3218410015106201, + "rewards/margins": 6.271522045135498, + "rewards/rejected": -7.593362808227539, + "step": 3724 + }, + { + "epoch": 0.58, + "learning_rate": 1.141526842678562e-05, + "logits/chosen": -2.9205000400543213, + "logits/rejected": -1.8688322305679321, + "logps/chosen": -410.5843200683594, + "logps/rejected": -299.94036865234375, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8306515216827393, + "rewards/margins": 6.007743835449219, + "rewards/rejected": -8.838395118713379, + "step": 3725 + }, + { + "epoch": 0.58, + "learning_rate": 1.1414534986254473e-05, + "logits/chosen": -1.743443250656128, + "logits/rejected": -2.7630794048309326, + "logps/chosen": -90.45813751220703, + "logps/rejected": -296.8332824707031, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3880078792572021, + "rewards/margins": 6.478583812713623, + "rewards/rejected": -7.866591453552246, + "step": 3726 + }, + { + "epoch": 0.58, + "learning_rate": 1.1413801545723326e-05, + "logits/chosen": -3.0298213958740234, + "logits/rejected": -3.118191719055176, + "logps/chosen": -39.20393371582031, + "logps/rejected": -209.96435546875, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1485323905944824, + "rewards/margins": 5.119924545288086, + "rewards/rejected": -7.268457412719727, + "step": 3727 + }, + { + "epoch": 0.58, + "learning_rate": 1.1413068105192178e-05, + "logits/chosen": -1.4905081987380981, + "logits/rejected": -2.9783003330230713, + "logps/chosen": -170.56353759765625, + "logps/rejected": -176.31320190429688, + "loss": 5.1889, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.63132381439209, + "rewards/margins": -4.032387733459473, + "rewards/rejected": -4.598936080932617, + "step": 3728 + }, + { + "epoch": 0.58, + "learning_rate": 1.1412334664661032e-05, + "logits/chosen": -1.915126085281372, + "logits/rejected": -3.094376564025879, + "logps/chosen": -109.4820556640625, + "logps/rejected": -324.7679748535156, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.23038649559021, + "rewards/margins": 6.250070571899414, + "rewards/rejected": -9.480457305908203, + "step": 3729 + }, + { + "epoch": 0.58, + "learning_rate": 1.1411601224129884e-05, + "logits/chosen": -2.8210413455963135, + "logits/rejected": -3.088531255722046, + "logps/chosen": -223.87860107421875, + "logps/rejected": -349.8506164550781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9885263442993164, + "rewards/margins": 7.6715240478515625, + "rewards/rejected": -11.660050392150879, + "step": 3730 + }, + { + "epoch": 0.58, + "learning_rate": 1.1410867783598735e-05, + "logits/chosen": -3.099965810775757, + "logits/rejected": -3.1732964515686035, + "logps/chosen": -121.78704071044922, + "logps/rejected": -194.6613006591797, + "loss": 3.9459, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.8007965087890625, + "rewards/margins": -0.6025867462158203, + "rewards/rejected": -5.198209762573242, + "step": 3731 + }, + { + "epoch": 0.58, + "learning_rate": 1.1410134343067587e-05, + "logits/chosen": -2.5526015758514404, + "logits/rejected": -3.0989840030670166, + "logps/chosen": -286.0181884765625, + "logps/rejected": -341.13360595703125, + "loss": 2.6923, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.404426574707031, + "rewards/margins": -0.7116774320602417, + "rewards/rejected": -3.6927490234375, + "step": 3732 + }, + { + "epoch": 0.58, + "learning_rate": 1.140940090253644e-05, + "logits/chosen": -2.1428041458129883, + "logits/rejected": -3.062044620513916, + "logps/chosen": -86.66172790527344, + "logps/rejected": -243.6536865234375, + "loss": 0.6669, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.515340805053711, + "rewards/margins": 2.863807201385498, + "rewards/rejected": -8.379148483276367, + "step": 3733 + }, + { + "epoch": 0.58, + "learning_rate": 1.1408667462005291e-05, + "logits/chosen": -2.8475148677825928, + "logits/rejected": -2.958620548248291, + "logps/chosen": -286.5552978515625, + "logps/rejected": -375.2843322753906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1837143898010254, + "rewards/margins": 7.867323875427246, + "rewards/rejected": -10.05103874206543, + "step": 3734 + }, + { + "epoch": 0.58, + "learning_rate": 1.1407934021474143e-05, + "logits/chosen": -3.190420150756836, + "logits/rejected": -2.8710639476776123, + "logps/chosen": -503.29656982421875, + "logps/rejected": -335.93341064453125, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5568339824676514, + "rewards/margins": 5.439291954040527, + "rewards/rejected": -6.996126174926758, + "step": 3735 + }, + { + "epoch": 0.58, + "learning_rate": 1.1407200580942995e-05, + "logits/chosen": -3.0651116371154785, + "logits/rejected": -1.3003190755844116, + "logps/chosen": -262.2867431640625, + "logps/rejected": -112.15657043457031, + "loss": 0.5309, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.667750120162964, + "rewards/margins": 2.255014657974243, + "rewards/rejected": -5.922764778137207, + "step": 3736 + }, + { + "epoch": 0.58, + "learning_rate": 1.1406467140411848e-05, + "logits/chosen": -3.0316288471221924, + "logits/rejected": -3.0337939262390137, + "logps/chosen": -63.46925735473633, + "logps/rejected": -157.1438751220703, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5919995307922363, + "rewards/margins": 5.9224958419799805, + "rewards/rejected": -8.514495849609375, + "step": 3737 + }, + { + "epoch": 0.58, + "learning_rate": 1.14057336998807e-05, + "logits/chosen": -2.6986289024353027, + "logits/rejected": -2.899374485015869, + "logps/chosen": -196.07192993164062, + "logps/rejected": -313.94256591796875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4365333616733551, + "rewards/margins": 6.051767349243164, + "rewards/rejected": -6.488300323486328, + "step": 3738 + }, + { + "epoch": 0.58, + "learning_rate": 1.1405000259349552e-05, + "logits/chosen": -2.389110803604126, + "logits/rejected": -3.0780842304229736, + "logps/chosen": -387.82281494140625, + "logps/rejected": -428.7838439941406, + "loss": 2.3762, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.976593017578125, + "rewards/margins": 1.8467791080474854, + "rewards/rejected": -6.823371887207031, + "step": 3739 + }, + { + "epoch": 0.58, + "learning_rate": 1.1404266818818404e-05, + "logits/chosen": -3.0160436630249023, + "logits/rejected": -2.507429361343384, + "logps/chosen": -483.86572265625, + "logps/rejected": -393.0363464355469, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5959198474884033, + "rewards/margins": 7.033841133117676, + "rewards/rejected": -7.6297607421875, + "step": 3740 + }, + { + "epoch": 0.58, + "learning_rate": 1.1403533378287256e-05, + "logits/chosen": -2.3652851581573486, + "logits/rejected": -3.172511100769043, + "logps/chosen": -91.198974609375, + "logps/rejected": -141.8336639404297, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3216912746429443, + "rewards/margins": 2.5614211559295654, + "rewards/rejected": -5.88311243057251, + "step": 3741 + }, + { + "epoch": 0.58, + "learning_rate": 1.1402799937756108e-05, + "logits/chosen": -3.0824170112609863, + "logits/rejected": -1.702720046043396, + "logps/chosen": -309.29150390625, + "logps/rejected": -242.1681671142578, + "loss": 0.8159, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7265594005584717, + "rewards/margins": 4.200287818908691, + "rewards/rejected": -6.926846981048584, + "step": 3742 + }, + { + "epoch": 0.58, + "learning_rate": 1.140206649722496e-05, + "logits/chosen": -2.406146764755249, + "logits/rejected": -3.0747547149658203, + "logps/chosen": -201.02044677734375, + "logps/rejected": -191.494384765625, + "loss": 2.0608, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.331406593322754, + "rewards/margins": -0.635649561882019, + "rewards/rejected": -4.695756912231445, + "step": 3743 + }, + { + "epoch": 0.58, + "learning_rate": 1.1401333056693812e-05, + "logits/chosen": -2.408576250076294, + "logits/rejected": -2.9554264545440674, + "logps/chosen": -344.61810302734375, + "logps/rejected": -381.70660400390625, + "loss": 4.0725, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.413443088531494, + "rewards/margins": -1.9050993919372559, + "rewards/rejected": -3.508343458175659, + "step": 3744 + }, + { + "epoch": 0.58, + "learning_rate": 1.1400599616162663e-05, + "logits/chosen": -1.7884541749954224, + "logits/rejected": -3.0112948417663574, + "logps/chosen": -187.3073272705078, + "logps/rejected": -267.22137451171875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.201582908630371, + "rewards/margins": 5.6700592041015625, + "rewards/rejected": -7.871642112731934, + "step": 3745 + }, + { + "epoch": 0.58, + "learning_rate": 1.1399866175631517e-05, + "logits/chosen": -2.9552712440490723, + "logits/rejected": -2.0697951316833496, + "logps/chosen": -316.1715393066406, + "logps/rejected": -292.2504577636719, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4004976749420166, + "rewards/margins": 4.633194923400879, + "rewards/rejected": -7.033692359924316, + "step": 3746 + }, + { + "epoch": 0.58, + "learning_rate": 1.1399132735100369e-05, + "logits/chosen": -2.287548542022705, + "logits/rejected": -3.092794418334961, + "logps/chosen": -76.94650268554688, + "logps/rejected": -149.6789093017578, + "loss": 0.6285, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9670073986053467, + "rewards/margins": 2.3713366985321045, + "rewards/rejected": -6.338344097137451, + "step": 3747 + }, + { + "epoch": 0.58, + "learning_rate": 1.139839929456922e-05, + "logits/chosen": -1.9098635911941528, + "logits/rejected": -3.058305263519287, + "logps/chosen": -270.80572509765625, + "logps/rejected": -526.7966918945312, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3070557117462158, + "rewards/margins": 6.607383728027344, + "rewards/rejected": -7.9144392013549805, + "step": 3748 + }, + { + "epoch": 0.58, + "learning_rate": 1.1397665854038073e-05, + "logits/chosen": -1.790993571281433, + "logits/rejected": -2.7843122482299805, + "logps/chosen": -233.42210388183594, + "logps/rejected": -499.010009765625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4852661192417145, + "rewards/margins": 9.693013191223145, + "rewards/rejected": -10.178279876708984, + "step": 3749 + }, + { + "epoch": 0.58, + "learning_rate": 1.1396932413506924e-05, + "logits/chosen": -2.8594987392425537, + "logits/rejected": -3.1026782989501953, + "logps/chosen": -124.37030029296875, + "logps/rejected": -126.11426544189453, + "loss": 3.1497, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.625391006469727, + "rewards/margins": -0.04527616500854492, + "rewards/rejected": -6.580114841461182, + "step": 3750 + }, + { + "epoch": 0.58, + "learning_rate": 1.1396198972975776e-05, + "logits/chosen": -3.0106914043426514, + "logits/rejected": -3.015808582305908, + "logps/chosen": -308.8331298828125, + "logps/rejected": -216.7109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2916489839553833, + "rewards/margins": 7.424864768981934, + "rewards/rejected": -8.716514587402344, + "step": 3751 + }, + { + "epoch": 0.58, + "learning_rate": 1.1395465532444628e-05, + "logits/chosen": -3.07098388671875, + "logits/rejected": -2.613812208175659, + "logps/chosen": -762.8740844726562, + "logps/rejected": -511.7774353027344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8929199576377869, + "rewards/margins": 7.851686477661133, + "rewards/rejected": -8.744606018066406, + "step": 3752 + }, + { + "epoch": 0.58, + "learning_rate": 1.139473209191348e-05, + "logits/chosen": -3.122070789337158, + "logits/rejected": -2.4722063541412354, + "logps/chosen": -312.7135009765625, + "logps/rejected": -281.5108642578125, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.866722822189331, + "rewards/margins": 4.232672691345215, + "rewards/rejected": -6.099395751953125, + "step": 3753 + }, + { + "epoch": 0.58, + "learning_rate": 1.1393998651382332e-05, + "logits/chosen": -2.66310453414917, + "logits/rejected": -3.146131992340088, + "logps/chosen": -197.78668212890625, + "logps/rejected": -188.67620849609375, + "loss": 2.3254, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.441810607910156, + "rewards/margins": 1.726226806640625, + "rewards/rejected": -6.168037414550781, + "step": 3754 + }, + { + "epoch": 0.58, + "learning_rate": 1.1393265210851186e-05, + "logits/chosen": -1.8476696014404297, + "logits/rejected": -2.9769272804260254, + "logps/chosen": -86.68096160888672, + "logps/rejected": -238.11715698242188, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7842304706573486, + "rewards/margins": 4.993631839752197, + "rewards/rejected": -6.777862548828125, + "step": 3755 + }, + { + "epoch": 0.58, + "learning_rate": 1.1392531770320037e-05, + "logits/chosen": -1.691265344619751, + "logits/rejected": -3.041100025177002, + "logps/chosen": -177.57611083984375, + "logps/rejected": -417.4342041015625, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3833928108215332, + "rewards/margins": 6.113300800323486, + "rewards/rejected": -7.4966936111450195, + "step": 3756 + }, + { + "epoch": 0.58, + "learning_rate": 1.139179832978889e-05, + "logits/chosen": -2.762006998062134, + "logits/rejected": -2.8996241092681885, + "logps/chosen": -610.872802734375, + "logps/rejected": -589.10791015625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43362122774124146, + "rewards/margins": 7.321512222290039, + "rewards/rejected": -7.755133152008057, + "step": 3757 + }, + { + "epoch": 0.58, + "learning_rate": 1.1391064889257741e-05, + "logits/chosen": -2.1353204250335693, + "logits/rejected": -3.212100028991699, + "logps/chosen": -48.90284729003906, + "logps/rejected": -232.52383422851562, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7755370140075684, + "rewards/margins": 5.291078567504883, + "rewards/rejected": -9.06661605834961, + "step": 3758 + }, + { + "epoch": 0.58, + "learning_rate": 1.1390331448726593e-05, + "logits/chosen": -3.017925262451172, + "logits/rejected": -3.0121047496795654, + "logps/chosen": -61.703147888183594, + "logps/rejected": -158.17178344726562, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.684478998184204, + "rewards/margins": 5.0380682945251465, + "rewards/rejected": -6.72254753112793, + "step": 3759 + }, + { + "epoch": 0.58, + "learning_rate": 1.1389598008195445e-05, + "logits/chosen": -2.937842845916748, + "logits/rejected": -2.355710744857788, + "logps/chosen": -531.62548828125, + "logps/rejected": -379.88592529296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9725525379180908, + "rewards/margins": 7.478571891784668, + "rewards/rejected": -9.45112419128418, + "step": 3760 + }, + { + "epoch": 0.58, + "learning_rate": 1.1388864567664299e-05, + "logits/chosen": -2.4536468982696533, + "logits/rejected": -2.963214635848999, + "logps/chosen": -180.56011962890625, + "logps/rejected": -291.36865234375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.815425157546997, + "rewards/margins": 5.885694980621338, + "rewards/rejected": -8.701120376586914, + "step": 3761 + }, + { + "epoch": 0.59, + "learning_rate": 1.138813112713315e-05, + "logits/chosen": -2.941426992416382, + "logits/rejected": -3.035353660583496, + "logps/chosen": -391.38446044921875, + "logps/rejected": -422.4303283691406, + "loss": 0.0586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6994308233261108, + "rewards/margins": 4.759280204772949, + "rewards/rejected": -5.458711624145508, + "step": 3762 + }, + { + "epoch": 0.59, + "learning_rate": 1.1387397686602002e-05, + "logits/chosen": -2.245138645172119, + "logits/rejected": -2.8668596744537354, + "logps/chosen": -72.11148071289062, + "logps/rejected": -199.5767822265625, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.525832176208496, + "rewards/margins": 3.420409679412842, + "rewards/rejected": -5.946242332458496, + "step": 3763 + }, + { + "epoch": 0.59, + "learning_rate": 1.1386664246070856e-05, + "logits/chosen": -1.7371784448623657, + "logits/rejected": -3.072288751602173, + "logps/chosen": -513.79345703125, + "logps/rejected": -680.2732543945312, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5809386968612671, + "rewards/margins": 6.531511306762695, + "rewards/rejected": -7.112449645996094, + "step": 3764 + }, + { + "epoch": 0.59, + "learning_rate": 1.1385930805539708e-05, + "logits/chosen": -2.9391329288482666, + "logits/rejected": -3.1169066429138184, + "logps/chosen": -262.8294372558594, + "logps/rejected": -153.67630004882812, + "loss": 2.2311, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.536207675933838, + "rewards/margins": -1.084562063217163, + "rewards/rejected": -3.4516453742980957, + "step": 3765 + }, + { + "epoch": 0.59, + "learning_rate": 1.138519736500856e-05, + "logits/chosen": -3.108490467071533, + "logits/rejected": -2.6619229316711426, + "logps/chosen": -454.99688720703125, + "logps/rejected": -405.0234375, + "loss": 1.7148, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.531055450439453, + "rewards/margins": 0.2935839891433716, + "rewards/rejected": -3.824639320373535, + "step": 3766 + }, + { + "epoch": 0.59, + "learning_rate": 1.1384463924477412e-05, + "logits/chosen": -2.0653445720672607, + "logits/rejected": -2.998579502105713, + "logps/chosen": -362.1794738769531, + "logps/rejected": -528.13720703125, + "loss": 0.7026, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6843032836914062, + "rewards/margins": 4.158448696136475, + "rewards/rejected": -6.842751979827881, + "step": 3767 + }, + { + "epoch": 0.59, + "learning_rate": 1.1383730483946263e-05, + "logits/chosen": -2.713031530380249, + "logits/rejected": -1.7731777429580688, + "logps/chosen": -284.2060546875, + "logps/rejected": -297.1426696777344, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2353534698486328, + "rewards/margins": 5.513665676116943, + "rewards/rejected": -6.749018669128418, + "step": 3768 + }, + { + "epoch": 0.59, + "learning_rate": 1.1382997043415115e-05, + "logits/chosen": -3.076808214187622, + "logits/rejected": -1.7646652460098267, + "logps/chosen": -467.38458251953125, + "logps/rejected": -229.1182861328125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0757341384887695, + "rewards/margins": 4.414078235626221, + "rewards/rejected": -6.48981237411499, + "step": 3769 + }, + { + "epoch": 0.59, + "learning_rate": 1.1382263602883967e-05, + "logits/chosen": -2.976501703262329, + "logits/rejected": -1.7628352642059326, + "logps/chosen": -486.3306579589844, + "logps/rejected": -284.8875732421875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7549285888671875, + "rewards/margins": 6.140988349914551, + "rewards/rejected": -8.895916938781738, + "step": 3770 + }, + { + "epoch": 0.59, + "learning_rate": 1.1381530162352819e-05, + "logits/chosen": -2.2613205909729004, + "logits/rejected": -3.018566131591797, + "logps/chosen": -121.36914825439453, + "logps/rejected": -296.12347412109375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5629608631134033, + "rewards/margins": 5.571394443511963, + "rewards/rejected": -8.134355545043945, + "step": 3771 + }, + { + "epoch": 0.59, + "learning_rate": 1.1380796721821671e-05, + "logits/chosen": -3.079702854156494, + "logits/rejected": -2.4164443016052246, + "logps/chosen": -127.75222778320312, + "logps/rejected": -228.67758178710938, + "loss": 3.2878, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.970962047576904, + "rewards/margins": 0.1266646385192871, + "rewards/rejected": -5.097626686096191, + "step": 3772 + }, + { + "epoch": 0.59, + "learning_rate": 1.1380063281290524e-05, + "logits/chosen": -3.138706684112549, + "logits/rejected": -2.6685824394226074, + "logps/chosen": -113.27356719970703, + "logps/rejected": -195.36520385742188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2727935314178467, + "rewards/margins": 7.009495258331299, + "rewards/rejected": -8.282288551330566, + "step": 3773 + }, + { + "epoch": 0.59, + "learning_rate": 1.1379329840759376e-05, + "logits/chosen": -2.8843023777008057, + "logits/rejected": -3.1391685009002686, + "logps/chosen": -88.96661376953125, + "logps/rejected": -200.39154052734375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4787838459014893, + "rewards/margins": 4.431884765625, + "rewards/rejected": -6.91066837310791, + "step": 3774 + }, + { + "epoch": 0.59, + "learning_rate": 1.1378596400228228e-05, + "logits/chosen": -2.9186220169067383, + "logits/rejected": -2.2768869400024414, + "logps/chosen": -161.89422607421875, + "logps/rejected": -220.75900268554688, + "loss": 2.4095, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.878993988037109, + "rewards/margins": 1.4743170738220215, + "rewards/rejected": -6.353311538696289, + "step": 3775 + }, + { + "epoch": 0.59, + "learning_rate": 1.137786295969708e-05, + "logits/chosen": -2.0163979530334473, + "logits/rejected": -3.160609245300293, + "logps/chosen": -192.21763610839844, + "logps/rejected": -307.336181640625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.436677932739258, + "rewards/margins": 5.2476019859313965, + "rewards/rejected": -7.684279918670654, + "step": 3776 + }, + { + "epoch": 0.59, + "learning_rate": 1.1377129519165932e-05, + "logits/chosen": -3.0896546840667725, + "logits/rejected": -2.924311637878418, + "logps/chosen": -327.42608642578125, + "logps/rejected": -450.2162780761719, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3636810779571533, + "rewards/margins": 8.390403747558594, + "rewards/rejected": -9.754085540771484, + "step": 3777 + }, + { + "epoch": 0.59, + "learning_rate": 1.1376396078634784e-05, + "logits/chosen": -3.1298837661743164, + "logits/rejected": -2.8057878017425537, + "logps/chosen": -361.9836730957031, + "logps/rejected": -185.8458251953125, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0588998794555664, + "rewards/margins": 4.797740936279297, + "rewards/rejected": -6.856640815734863, + "step": 3778 + }, + { + "epoch": 0.59, + "learning_rate": 1.1375662638103636e-05, + "logits/chosen": -3.1245970726013184, + "logits/rejected": -2.4319143295288086, + "logps/chosen": -178.78546142578125, + "logps/rejected": -203.54257202148438, + "loss": 4.6443, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.372776031494141, + "rewards/margins": -0.6087102890014648, + "rewards/rejected": -5.764065742492676, + "step": 3779 + }, + { + "epoch": 0.59, + "learning_rate": 1.1374929197572488e-05, + "logits/chosen": -3.172168493270874, + "logits/rejected": -2.745927095413208, + "logps/chosen": -538.945556640625, + "logps/rejected": -457.432373046875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5414459109306335, + "rewards/margins": 7.18474006652832, + "rewards/rejected": -7.7261857986450195, + "step": 3780 + }, + { + "epoch": 0.59, + "learning_rate": 1.137419575704134e-05, + "logits/chosen": -2.9269115924835205, + "logits/rejected": -3.290393352508545, + "logps/chosen": -89.02839660644531, + "logps/rejected": -326.18359375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.188028335571289, + "rewards/margins": 5.3601155281066895, + "rewards/rejected": -8.54814338684082, + "step": 3781 + }, + { + "epoch": 0.59, + "learning_rate": 1.1373462316510193e-05, + "logits/chosen": -2.8847999572753906, + "logits/rejected": -2.094726800918579, + "logps/chosen": -173.7277069091797, + "logps/rejected": -205.87689208984375, + "loss": 1.6645, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1626057624816895, + "rewards/margins": 1.0440887212753296, + "rewards/rejected": -5.206694602966309, + "step": 3782 + }, + { + "epoch": 0.59, + "learning_rate": 1.1372728875979045e-05, + "logits/chosen": -3.1384596824645996, + "logits/rejected": -2.0057668685913086, + "logps/chosen": -459.2462158203125, + "logps/rejected": -381.77447509765625, + "loss": 2.7714, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.603778839111328, + "rewards/margins": 0.20804834365844727, + "rewards/rejected": -3.8118271827697754, + "step": 3783 + }, + { + "epoch": 0.59, + "learning_rate": 1.1371995435447897e-05, + "logits/chosen": -3.0271472930908203, + "logits/rejected": -2.8048653602600098, + "logps/chosen": -94.56112670898438, + "logps/rejected": -255.00405883789062, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.142951011657715, + "rewards/margins": 5.3910980224609375, + "rewards/rejected": -8.534049034118652, + "step": 3784 + }, + { + "epoch": 0.59, + "learning_rate": 1.1371261994916749e-05, + "logits/chosen": -2.414008140563965, + "logits/rejected": -3.034554958343506, + "logps/chosen": -143.6761016845703, + "logps/rejected": -336.7781066894531, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.55576491355896, + "rewards/margins": 5.366023063659668, + "rewards/rejected": -6.921788215637207, + "step": 3785 + }, + { + "epoch": 0.59, + "learning_rate": 1.13705285543856e-05, + "logits/chosen": -2.4522600173950195, + "logits/rejected": -2.9805941581726074, + "logps/chosen": -37.113346099853516, + "logps/rejected": -288.7417297363281, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.459646224975586, + "rewards/margins": 7.973842620849609, + "rewards/rejected": -10.433488845825195, + "step": 3786 + }, + { + "epoch": 0.59, + "learning_rate": 1.1369795113854452e-05, + "logits/chosen": -2.394704818725586, + "logits/rejected": -2.9220128059387207, + "logps/chosen": -154.98095703125, + "logps/rejected": -336.2529296875, + "loss": 0.1753, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.103746175765991, + "rewards/margins": 3.213552951812744, + "rewards/rejected": -6.317298889160156, + "step": 3787 + }, + { + "epoch": 0.59, + "learning_rate": 1.1369061673323304e-05, + "logits/chosen": -1.0647494792938232, + "logits/rejected": -2.7812275886535645, + "logps/chosen": -140.31866455078125, + "logps/rejected": -506.0675354003906, + "loss": 0.4062, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.097163200378418, + "rewards/margins": 2.964566707611084, + "rewards/rejected": -7.061729431152344, + "step": 3788 + }, + { + "epoch": 0.59, + "learning_rate": 1.1368328232792156e-05, + "logits/chosen": -2.9327964782714844, + "logits/rejected": -2.5520780086517334, + "logps/chosen": -265.88525390625, + "logps/rejected": -83.54588317871094, + "loss": 5.4454, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.635947227478027, + "rewards/margins": -5.430246829986572, + "rewards/rejected": -2.205700397491455, + "step": 3789 + }, + { + "epoch": 0.59, + "learning_rate": 1.1367594792261008e-05, + "logits/chosen": -3.1399707794189453, + "logits/rejected": -2.7081351280212402, + "logps/chosen": -378.4805908203125, + "logps/rejected": -376.51080322265625, + "loss": 4.1008, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.326101303100586, + "rewards/margins": -4.047876358032227, + "rewards/rejected": -3.2782251834869385, + "step": 3790 + }, + { + "epoch": 0.59, + "learning_rate": 1.1366861351729862e-05, + "logits/chosen": -2.8372080326080322, + "logits/rejected": -3.2476449012756348, + "logps/chosen": -56.620208740234375, + "logps/rejected": -144.26763916015625, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.362121820449829, + "rewards/margins": 4.61097526550293, + "rewards/rejected": -6.97309684753418, + "step": 3791 + }, + { + "epoch": 0.59, + "learning_rate": 1.1366127911198714e-05, + "logits/chosen": -3.126004695892334, + "logits/rejected": -3.351943254470825, + "logps/chosen": -491.8207702636719, + "logps/rejected": -542.6751708984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8732759952545166, + "rewards/margins": 6.438375473022461, + "rewards/rejected": -7.311651229858398, + "step": 3792 + }, + { + "epoch": 0.59, + "learning_rate": 1.1365394470667565e-05, + "logits/chosen": -2.6388072967529297, + "logits/rejected": -3.15462064743042, + "logps/chosen": -436.73785400390625, + "logps/rejected": -423.9708251953125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3661487102508545, + "rewards/margins": 6.081233978271484, + "rewards/rejected": -7.447382926940918, + "step": 3793 + }, + { + "epoch": 0.59, + "learning_rate": 1.1364661030136417e-05, + "logits/chosen": -3.1842947006225586, + "logits/rejected": -3.0372605323791504, + "logps/chosen": -89.69476318359375, + "logps/rejected": -122.67938232421875, + "loss": 1.5081, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.844637632369995, + "rewards/margins": 1.3303688764572144, + "rewards/rejected": -5.17500638961792, + "step": 3794 + }, + { + "epoch": 0.59, + "learning_rate": 1.136392758960527e-05, + "logits/chosen": -3.120070695877075, + "logits/rejected": -2.972755193710327, + "logps/chosen": -286.6922302246094, + "logps/rejected": -312.83770751953125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9130253791809082, + "rewards/margins": 5.704530715942383, + "rewards/rejected": -7.617556095123291, + "step": 3795 + }, + { + "epoch": 0.59, + "learning_rate": 1.1363194149074123e-05, + "logits/chosen": -2.023987054824829, + "logits/rejected": -3.0571205615997314, + "logps/chosen": -410.9918518066406, + "logps/rejected": -606.3640747070312, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7124825119972229, + "rewards/margins": 6.733071327209473, + "rewards/rejected": -7.445553779602051, + "step": 3796 + }, + { + "epoch": 0.59, + "learning_rate": 1.1362460708542975e-05, + "logits/chosen": -3.019953966140747, + "logits/rejected": -2.9979493618011475, + "logps/chosen": -90.87837219238281, + "logps/rejected": -102.7071762084961, + "loss": 1.3152, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2379961013793945, + "rewards/margins": 0.5290563106536865, + "rewards/rejected": -4.767052173614502, + "step": 3797 + }, + { + "epoch": 0.59, + "learning_rate": 1.1361727268011827e-05, + "logits/chosen": -2.387970209121704, + "logits/rejected": -3.079495668411255, + "logps/chosen": -135.3221893310547, + "logps/rejected": -290.53515625, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.946641206741333, + "rewards/margins": 1.0299427509307861, + "rewards/rejected": -3.976583957672119, + "step": 3798 + }, + { + "epoch": 0.59, + "learning_rate": 1.1360993827480678e-05, + "logits/chosen": -3.078671455383301, + "logits/rejected": -2.5618817806243896, + "logps/chosen": -548.8582763671875, + "logps/rejected": -430.4756774902344, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5733366012573242, + "rewards/margins": 6.237112522125244, + "rewards/rejected": -7.810449123382568, + "step": 3799 + }, + { + "epoch": 0.59, + "learning_rate": 1.1360260386949532e-05, + "logits/chosen": -2.559823751449585, + "logits/rejected": -3.0101473331451416, + "logps/chosen": -79.27029418945312, + "logps/rejected": -259.85028076171875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8112783432006836, + "rewards/margins": 6.825210094451904, + "rewards/rejected": -9.636488914489746, + "step": 3800 + }, + { + "epoch": 0.59, + "learning_rate": 1.1359526946418384e-05, + "logits/chosen": -2.562056541442871, + "logits/rejected": -3.1881625652313232, + "logps/chosen": -804.294677734375, + "logps/rejected": -717.2000732421875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.641143798828125, + "rewards/margins": 6.719911575317383, + "rewards/rejected": -6.078767776489258, + "step": 3801 + }, + { + "epoch": 0.59, + "learning_rate": 1.1358793505887236e-05, + "logits/chosen": -1.641737461090088, + "logits/rejected": -2.9180777072906494, + "logps/chosen": -75.64749908447266, + "logps/rejected": -258.71356201171875, + "loss": 0.1115, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.104508876800537, + "rewards/margins": 3.5575411319732666, + "rewards/rejected": -6.662050247192383, + "step": 3802 + }, + { + "epoch": 0.59, + "learning_rate": 1.1358060065356088e-05, + "logits/chosen": -3.2796945571899414, + "logits/rejected": -3.09501314163208, + "logps/chosen": -216.9652557373047, + "logps/rejected": -182.34974670410156, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.609663724899292, + "rewards/margins": 4.532090187072754, + "rewards/rejected": -7.141754150390625, + "step": 3803 + }, + { + "epoch": 0.59, + "learning_rate": 1.135732662482494e-05, + "logits/chosen": -3.1251437664031982, + "logits/rejected": -1.7233591079711914, + "logps/chosen": -300.7721862792969, + "logps/rejected": -210.61282348632812, + "loss": 2.3293, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.169699668884277, + "rewards/margins": 1.8776259422302246, + "rewards/rejected": -6.047325611114502, + "step": 3804 + }, + { + "epoch": 0.59, + "learning_rate": 1.1356593184293791e-05, + "logits/chosen": -2.0632846355438232, + "logits/rejected": -3.136350154876709, + "logps/chosen": -259.1851501464844, + "logps/rejected": -396.8221435546875, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.509955406188965, + "rewards/margins": 4.492110252380371, + "rewards/rejected": -7.002065658569336, + "step": 3805 + }, + { + "epoch": 0.59, + "learning_rate": 1.1355859743762643e-05, + "logits/chosen": -2.6862881183624268, + "logits/rejected": -2.343778371810913, + "logps/chosen": -164.41256713867188, + "logps/rejected": -124.08702087402344, + "loss": 0.9484, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.812760353088379, + "rewards/margins": 1.1282191276550293, + "rewards/rejected": -4.940979480743408, + "step": 3806 + }, + { + "epoch": 0.59, + "learning_rate": 1.1355126303231495e-05, + "logits/chosen": -1.7249375581741333, + "logits/rejected": -2.5165481567382812, + "logps/chosen": -231.0886688232422, + "logps/rejected": -383.3602294921875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.026094913482666, + "rewards/margins": 6.04584264755249, + "rewards/rejected": -8.071937561035156, + "step": 3807 + }, + { + "epoch": 0.59, + "learning_rate": 1.1354392862700349e-05, + "logits/chosen": -2.2671847343444824, + "logits/rejected": -3.1089928150177, + "logps/chosen": -475.8455810546875, + "logps/rejected": -656.4097900390625, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2628452777862549, + "rewards/margins": 3.9105160236358643, + "rewards/rejected": -5.173361301422119, + "step": 3808 + }, + { + "epoch": 0.59, + "learning_rate": 1.13536594221692e-05, + "logits/chosen": -2.9361555576324463, + "logits/rejected": -3.119187116622925, + "logps/chosen": -106.4435806274414, + "logps/rejected": -147.8745574951172, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1514172554016113, + "rewards/margins": 4.020125389099121, + "rewards/rejected": -6.171543121337891, + "step": 3809 + }, + { + "epoch": 0.59, + "learning_rate": 1.1352925981638052e-05, + "logits/chosen": -2.9907124042510986, + "logits/rejected": -2.7919795513153076, + "logps/chosen": -151.49346923828125, + "logps/rejected": -322.862548828125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.504824161529541, + "rewards/margins": 7.895759582519531, + "rewards/rejected": -10.400583267211914, + "step": 3810 + }, + { + "epoch": 0.59, + "learning_rate": 1.1352192541106904e-05, + "logits/chosen": -3.146920680999756, + "logits/rejected": -2.7751855850219727, + "logps/chosen": -261.4927978515625, + "logps/rejected": -81.6883316040039, + "loss": 2.6258, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.456606864929199, + "rewards/margins": -2.3734140396118164, + "rewards/rejected": -3.083193063735962, + "step": 3811 + }, + { + "epoch": 0.59, + "learning_rate": 1.1351459100575756e-05, + "logits/chosen": -2.0449588298797607, + "logits/rejected": -2.662060499191284, + "logps/chosen": -87.69668579101562, + "logps/rejected": -266.8807373046875, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8438572883605957, + "rewards/margins": 6.726264476776123, + "rewards/rejected": -10.570121765136719, + "step": 3812 + }, + { + "epoch": 0.59, + "learning_rate": 1.1350725660044608e-05, + "logits/chosen": -1.991835355758667, + "logits/rejected": -2.776237726211548, + "logps/chosen": -180.97103881835938, + "logps/rejected": -256.5119934082031, + "loss": 2.6518, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.586794853210449, + "rewards/margins": 1.1541659832000732, + "rewards/rejected": -5.740961074829102, + "step": 3813 + }, + { + "epoch": 0.59, + "learning_rate": 1.134999221951346e-05, + "logits/chosen": -2.98783016204834, + "logits/rejected": -3.1514828205108643, + "logps/chosen": -341.736572265625, + "logps/rejected": -277.8502502441406, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6778030395507812, + "rewards/margins": 5.581282138824463, + "rewards/rejected": -7.259085178375244, + "step": 3814 + }, + { + "epoch": 0.59, + "learning_rate": 1.1349258778982312e-05, + "logits/chosen": -2.102168083190918, + "logits/rejected": -3.065232515335083, + "logps/chosen": -200.061279296875, + "logps/rejected": -373.9407958984375, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.581214189529419, + "rewards/margins": 4.11721134185791, + "rewards/rejected": -5.69842529296875, + "step": 3815 + }, + { + "epoch": 0.59, + "learning_rate": 1.1348525338451164e-05, + "logits/chosen": -2.998039960861206, + "logits/rejected": -2.1011366844177246, + "logps/chosen": -165.514892578125, + "logps/rejected": -310.4486083984375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3832788467407227, + "rewards/margins": 7.942597389221191, + "rewards/rejected": -9.325876235961914, + "step": 3816 + }, + { + "epoch": 0.59, + "learning_rate": 1.1347791897920017e-05, + "logits/chosen": -2.2996370792388916, + "logits/rejected": -2.881457805633545, + "logps/chosen": -138.701416015625, + "logps/rejected": -382.09893798828125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.728445529937744, + "rewards/margins": 5.585799694061279, + "rewards/rejected": -8.314245223999023, + "step": 3817 + }, + { + "epoch": 0.59, + "learning_rate": 1.1347058457388869e-05, + "logits/chosen": -1.7964999675750732, + "logits/rejected": -2.240149736404419, + "logps/chosen": -223.35316467285156, + "logps/rejected": -155.3406524658203, + "loss": 3.6145, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.323765277862549, + "rewards/margins": -1.7044206857681274, + "rewards/rejected": -3.619344711303711, + "step": 3818 + }, + { + "epoch": 0.59, + "learning_rate": 1.1346325016857721e-05, + "logits/chosen": -2.8372058868408203, + "logits/rejected": -1.8624972105026245, + "logps/chosen": -467.6766052246094, + "logps/rejected": -309.4350280761719, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0572540760040283, + "rewards/margins": 7.142709255218506, + "rewards/rejected": -9.199963569641113, + "step": 3819 + }, + { + "epoch": 0.59, + "learning_rate": 1.1345591576326573e-05, + "logits/chosen": -3.1023471355438232, + "logits/rejected": -3.3177127838134766, + "logps/chosen": -73.04512023925781, + "logps/rejected": -236.11825561523438, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2023582458496094, + "rewards/margins": 4.135087966918945, + "rewards/rejected": -7.337446212768555, + "step": 3820 + }, + { + "epoch": 0.59, + "learning_rate": 1.1344858135795425e-05, + "logits/chosen": -3.0095863342285156, + "logits/rejected": -3.059103012084961, + "logps/chosen": -111.53244018554688, + "logps/rejected": -91.759521484375, + "loss": 0.94, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.279022216796875, + "rewards/margins": 1.293179988861084, + "rewards/rejected": -5.572201728820801, + "step": 3821 + }, + { + "epoch": 0.59, + "learning_rate": 1.1344124695264277e-05, + "logits/chosen": -2.4545905590057373, + "logits/rejected": -3.1849021911621094, + "logps/chosen": -280.765869140625, + "logps/rejected": -369.5987243652344, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8797394037246704, + "rewards/margins": 4.159872531890869, + "rewards/rejected": -6.03961181640625, + "step": 3822 + }, + { + "epoch": 0.59, + "learning_rate": 1.1343391254733129e-05, + "logits/chosen": -1.9626222848892212, + "logits/rejected": -3.0393803119659424, + "logps/chosen": -95.34765625, + "logps/rejected": -538.7517700195312, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7990925312042236, + "rewards/margins": 9.441864013671875, + "rewards/rejected": -12.240957260131836, + "step": 3823 + }, + { + "epoch": 0.59, + "learning_rate": 1.134265781420198e-05, + "logits/chosen": -1.8752905130386353, + "logits/rejected": -3.0220139026641846, + "logps/chosen": -177.1505126953125, + "logps/rejected": -369.5250244140625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3477160930633545, + "rewards/margins": 7.289987564086914, + "rewards/rejected": -8.637703895568848, + "step": 3824 + }, + { + "epoch": 0.59, + "learning_rate": 1.1341924373670832e-05, + "logits/chosen": -2.5353240966796875, + "logits/rejected": -3.267840623855591, + "logps/chosen": -504.71063232421875, + "logps/rejected": -521.8453369140625, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2239339351654053, + "rewards/margins": 5.554834365844727, + "rewards/rejected": -7.778768539428711, + "step": 3825 + }, + { + "epoch": 0.6, + "learning_rate": 1.1341190933139686e-05, + "logits/chosen": -3.260488986968994, + "logits/rejected": -1.1544358730316162, + "logps/chosen": -431.999755859375, + "logps/rejected": -147.66656494140625, + "loss": 5.1166, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.199241638183594, + "rewards/margins": -5.109528541564941, + "rewards/rejected": -2.0897133350372314, + "step": 3826 + }, + { + "epoch": 0.6, + "learning_rate": 1.1340457492608538e-05, + "logits/chosen": -2.883273124694824, + "logits/rejected": -1.2813869714736938, + "logps/chosen": -410.0849914550781, + "logps/rejected": -259.9609375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41795119643211365, + "rewards/margins": 7.067962169647217, + "rewards/rejected": -7.485913276672363, + "step": 3827 + }, + { + "epoch": 0.6, + "learning_rate": 1.133972405207739e-05, + "logits/chosen": -3.019770383834839, + "logits/rejected": -3.14631986618042, + "logps/chosen": -78.42790222167969, + "logps/rejected": -233.27896118164062, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4237380027770996, + "rewards/margins": 3.1815080642700195, + "rewards/rejected": -5.605246067047119, + "step": 3828 + }, + { + "epoch": 0.6, + "learning_rate": 1.1338990611546242e-05, + "logits/chosen": -3.1588549613952637, + "logits/rejected": -3.1229140758514404, + "logps/chosen": -425.635986328125, + "logps/rejected": -434.815185546875, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3018217086791992, + "rewards/margins": 5.090342998504639, + "rewards/rejected": -6.392164707183838, + "step": 3829 + }, + { + "epoch": 0.6, + "learning_rate": 1.1338257171015095e-05, + "logits/chosen": -3.3529231548309326, + "logits/rejected": -3.0475118160247803, + "logps/chosen": -216.60610961914062, + "logps/rejected": -115.1056137084961, + "loss": 3.6501, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.686617374420166, + "rewards/margins": -3.606052875518799, + "rewards/rejected": -1.0805644989013672, + "step": 3830 + }, + { + "epoch": 0.6, + "learning_rate": 1.1337523730483947e-05, + "logits/chosen": -2.9960317611694336, + "logits/rejected": -1.3930957317352295, + "logps/chosen": -148.9945526123047, + "logps/rejected": -161.66156005859375, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9707841873168945, + "rewards/margins": 4.936147212982178, + "rewards/rejected": -8.906930923461914, + "step": 3831 + }, + { + "epoch": 0.6, + "learning_rate": 1.1336790289952799e-05, + "logits/chosen": -3.1916098594665527, + "logits/rejected": -2.4103758335113525, + "logps/chosen": -183.95643615722656, + "logps/rejected": -155.85455322265625, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.934993028640747, + "rewards/margins": 2.5212008953094482, + "rewards/rejected": -4.456193923950195, + "step": 3832 + }, + { + "epoch": 0.6, + "learning_rate": 1.133605684942165e-05, + "logits/chosen": -1.4701924324035645, + "logits/rejected": -3.07122540473938, + "logps/chosen": -88.9070053100586, + "logps/rejected": -438.6034240722656, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9987571239471436, + "rewards/margins": 6.216994762420654, + "rewards/rejected": -9.215751647949219, + "step": 3833 + }, + { + "epoch": 0.6, + "learning_rate": 1.1335323408890503e-05, + "logits/chosen": -1.4060271978378296, + "logits/rejected": -3.1116325855255127, + "logps/chosen": -107.74675750732422, + "logps/rejected": -332.675048828125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5073039531707764, + "rewards/margins": 5.906193256378174, + "rewards/rejected": -7.413496971130371, + "step": 3834 + }, + { + "epoch": 0.6, + "learning_rate": 1.1334589968359356e-05, + "logits/chosen": -1.0655165910720825, + "logits/rejected": -2.9529807567596436, + "logps/chosen": -141.91046142578125, + "logps/rejected": -379.0450439453125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3693618774414062, + "rewards/margins": 5.945606708526611, + "rewards/rejected": -7.314969062805176, + "step": 3835 + }, + { + "epoch": 0.6, + "learning_rate": 1.1333856527828208e-05, + "logits/chosen": -2.3661062717437744, + "logits/rejected": -3.24420428276062, + "logps/chosen": -231.15203857421875, + "logps/rejected": -411.11907958984375, + "loss": 1.974, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.502429246902466, + "rewards/margins": 0.7331253290176392, + "rewards/rejected": -4.2355546951293945, + "step": 3836 + }, + { + "epoch": 0.6, + "learning_rate": 1.133312308729706e-05, + "logits/chosen": -3.2132582664489746, + "logits/rejected": -2.194664716720581, + "logps/chosen": -200.50827026367188, + "logps/rejected": -65.4835433959961, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.01500940322876, + "rewards/margins": 1.0409799814224243, + "rewards/rejected": -5.0559892654418945, + "step": 3837 + }, + { + "epoch": 0.6, + "learning_rate": 1.1332389646765912e-05, + "logits/chosen": -2.9828789234161377, + "logits/rejected": -2.6199631690979004, + "logps/chosen": -208.8623046875, + "logps/rejected": -359.725830078125, + "loss": 1.5007, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4235169887542725, + "rewards/margins": 3.94425368309021, + "rewards/rejected": -7.367770671844482, + "step": 3838 + }, + { + "epoch": 0.6, + "learning_rate": 1.1331656206234764e-05, + "logits/chosen": -2.581770658493042, + "logits/rejected": -3.0214498043060303, + "logps/chosen": -175.58084106445312, + "logps/rejected": -236.8284912109375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3157196044921875, + "rewards/margins": 6.465068817138672, + "rewards/rejected": -7.780788421630859, + "step": 3839 + }, + { + "epoch": 0.6, + "learning_rate": 1.1330922765703616e-05, + "logits/chosen": -3.0356497764587402, + "logits/rejected": -2.5858168601989746, + "logps/chosen": -855.2562255859375, + "logps/rejected": -559.83251953125, + "loss": 0.6521, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9406425952911377, + "rewards/margins": 2.668489694595337, + "rewards/rejected": -6.609132289886475, + "step": 3840 + }, + { + "epoch": 0.6, + "learning_rate": 1.1330189325172467e-05, + "logits/chosen": -2.340826988220215, + "logits/rejected": -3.0828564167022705, + "logps/chosen": -234.01388549804688, + "logps/rejected": -218.91851806640625, + "loss": 2.1416, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.702993154525757, + "rewards/margins": 1.7971184253692627, + "rewards/rejected": -5.5001115798950195, + "step": 3841 + }, + { + "epoch": 0.6, + "learning_rate": 1.132945588464132e-05, + "logits/chosen": -3.1468923091888428, + "logits/rejected": -1.7484228610992432, + "logps/chosen": -503.98980712890625, + "logps/rejected": -200.62782287597656, + "loss": 4.0557, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.245344638824463, + "rewards/margins": -4.034146308898926, + "rewards/rejected": -1.211198091506958, + "step": 3842 + }, + { + "epoch": 0.6, + "learning_rate": 1.1328722444110171e-05, + "logits/chosen": -2.0018694400787354, + "logits/rejected": -3.232550859451294, + "logps/chosen": -157.0059814453125, + "logps/rejected": -338.63482666015625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47405242919921875, + "rewards/margins": 6.037905693054199, + "rewards/rejected": -6.511958122253418, + "step": 3843 + }, + { + "epoch": 0.6, + "learning_rate": 1.1327989003579025e-05, + "logits/chosen": -2.884852886199951, + "logits/rejected": -2.974282741546631, + "logps/chosen": -494.40692138671875, + "logps/rejected": -421.512451171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.356269121170044, + "rewards/margins": 7.501058101654053, + "rewards/rejected": -8.857327461242676, + "step": 3844 + }, + { + "epoch": 0.6, + "learning_rate": 1.1327255563047877e-05, + "logits/chosen": -3.031829833984375, + "logits/rejected": -3.290205240249634, + "logps/chosen": -102.45204162597656, + "logps/rejected": -261.2061767578125, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.291322112083435, + "rewards/margins": 3.8369717597961426, + "rewards/rejected": -5.128293991088867, + "step": 3845 + }, + { + "epoch": 0.6, + "learning_rate": 1.1326522122516729e-05, + "logits/chosen": -3.0970067977905273, + "logits/rejected": -0.7484630346298218, + "logps/chosen": -325.2731018066406, + "logps/rejected": -190.40658569335938, + "loss": 0.6426, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3757762908935547, + "rewards/margins": 3.4856653213500977, + "rewards/rejected": -6.8614420890808105, + "step": 3846 + }, + { + "epoch": 0.6, + "learning_rate": 1.132578868198558e-05, + "logits/chosen": -3.2069265842437744, + "logits/rejected": -2.8561065196990967, + "logps/chosen": -155.68658447265625, + "logps/rejected": -191.18768310546875, + "loss": 2.444, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.513594627380371, + "rewards/margins": -0.45343995094299316, + "rewards/rejected": -5.060154438018799, + "step": 3847 + }, + { + "epoch": 0.6, + "learning_rate": 1.1325055241454432e-05, + "logits/chosen": -3.277819871902466, + "logits/rejected": -2.796785593032837, + "logps/chosen": -824.3106689453125, + "logps/rejected": -342.83929443359375, + "loss": 1.8146, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9912140369415283, + "rewards/margins": -0.8054632544517517, + "rewards/rejected": -3.185750961303711, + "step": 3848 + }, + { + "epoch": 0.6, + "learning_rate": 1.1324321800923284e-05, + "logits/chosen": -1.6124885082244873, + "logits/rejected": -2.8953304290771484, + "logps/chosen": -120.16358184814453, + "logps/rejected": -415.41156005859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7173007726669312, + "rewards/margins": 6.845356464385986, + "rewards/rejected": -8.562657356262207, + "step": 3849 + }, + { + "epoch": 0.6, + "learning_rate": 1.1323588360392136e-05, + "logits/chosen": -2.968472957611084, + "logits/rejected": -3.025500774383545, + "logps/chosen": -338.7628173828125, + "logps/rejected": -475.0483703613281, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8132461309432983, + "rewards/margins": 5.622088432312012, + "rewards/rejected": -4.808842658996582, + "step": 3850 + }, + { + "epoch": 0.6, + "learning_rate": 1.1322854919860988e-05, + "logits/chosen": -2.2851812839508057, + "logits/rejected": -2.8572938442230225, + "logps/chosen": -150.23602294921875, + "logps/rejected": -298.7715148925781, + "loss": 1.3134, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.143905162811279, + "rewards/margins": 2.709794521331787, + "rewards/rejected": -7.853699684143066, + "step": 3851 + }, + { + "epoch": 0.6, + "learning_rate": 1.132212147932984e-05, + "logits/chosen": -3.1794755458831787, + "logits/rejected": -2.7239744663238525, + "logps/chosen": -126.12174224853516, + "logps/rejected": -219.6663818359375, + "loss": 1.2798, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4361042976379395, + "rewards/margins": 4.264309883117676, + "rewards/rejected": -6.700414180755615, + "step": 3852 + }, + { + "epoch": 0.6, + "learning_rate": 1.1321388038798693e-05, + "logits/chosen": -2.0276589393615723, + "logits/rejected": -3.2912063598632812, + "logps/chosen": -410.29583740234375, + "logps/rejected": -479.5848083496094, + "loss": 1.9415, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3770551681518555, + "rewards/margins": 1.3232340812683105, + "rewards/rejected": -3.700289249420166, + "step": 3853 + }, + { + "epoch": 0.6, + "learning_rate": 1.1320654598267545e-05, + "logits/chosen": -3.1450910568237305, + "logits/rejected": -3.12658953666687, + "logps/chosen": -335.787109375, + "logps/rejected": -355.65850830078125, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6311533451080322, + "rewards/margins": 4.303606033325195, + "rewards/rejected": -4.934759616851807, + "step": 3854 + }, + { + "epoch": 0.6, + "learning_rate": 1.1319921157736397e-05, + "logits/chosen": -2.9231410026550293, + "logits/rejected": -3.000044345855713, + "logps/chosen": -158.88107299804688, + "logps/rejected": -221.6419677734375, + "loss": 1.7247, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6912970542907715, + "rewards/margins": -0.9918798208236694, + "rewards/rejected": -2.6994171142578125, + "step": 3855 + }, + { + "epoch": 0.6, + "learning_rate": 1.1319187717205249e-05, + "logits/chosen": -3.0039286613464355, + "logits/rejected": -3.264848232269287, + "logps/chosen": -223.39309692382812, + "logps/rejected": -221.5491180419922, + "loss": 2.2468, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4370710849761963, + "rewards/margins": -0.4359595775604248, + "rewards/rejected": -3.0011115074157715, + "step": 3856 + }, + { + "epoch": 0.6, + "learning_rate": 1.1318454276674101e-05, + "logits/chosen": -3.032728433609009, + "logits/rejected": -3.120351791381836, + "logps/chosen": -321.4175720214844, + "logps/rejected": -476.76190185546875, + "loss": 1.9465, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7575881481170654, + "rewards/margins": -0.20631110668182373, + "rewards/rejected": -2.5512771606445312, + "step": 3857 + }, + { + "epoch": 0.6, + "learning_rate": 1.1317720836142953e-05, + "logits/chosen": -3.0753848552703857, + "logits/rejected": -2.9537174701690674, + "logps/chosen": -249.4521484375, + "logps/rejected": -265.47259521484375, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3986191749572754, + "rewards/margins": 4.606381416320801, + "rewards/rejected": -6.005000591278076, + "step": 3858 + }, + { + "epoch": 0.6, + "learning_rate": 1.1316987395611805e-05, + "logits/chosen": -2.951206922531128, + "logits/rejected": -3.1673901081085205, + "logps/chosen": -109.87255859375, + "logps/rejected": -185.0720977783203, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5656039714813232, + "rewards/margins": 3.2536795139312744, + "rewards/rejected": -5.819283485412598, + "step": 3859 + }, + { + "epoch": 0.6, + "learning_rate": 1.1316253955080657e-05, + "logits/chosen": -3.213428258895874, + "logits/rejected": -3.320796012878418, + "logps/chosen": -32.40888595581055, + "logps/rejected": -117.51864624023438, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0930333137512207, + "rewards/margins": 3.955303430557251, + "rewards/rejected": -6.048336505889893, + "step": 3860 + }, + { + "epoch": 0.6, + "learning_rate": 1.1315520514549508e-05, + "logits/chosen": -2.4522545337677, + "logits/rejected": -3.209719657897949, + "logps/chosen": -109.4429702758789, + "logps/rejected": -150.70127868652344, + "loss": 2.6208, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.101240634918213, + "rewards/margins": -0.9721719026565552, + "rewards/rejected": -4.129068851470947, + "step": 3861 + }, + { + "epoch": 0.6, + "learning_rate": 1.1314787074018362e-05, + "logits/chosen": -2.323357343673706, + "logits/rejected": -3.1320266723632812, + "logps/chosen": -536.2970581054688, + "logps/rejected": -674.1311645507812, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01689453423023224, + "rewards/margins": 7.076590061187744, + "rewards/rejected": -7.093484878540039, + "step": 3862 + }, + { + "epoch": 0.6, + "learning_rate": 1.1314053633487214e-05, + "logits/chosen": -2.464390516281128, + "logits/rejected": -3.2084975242614746, + "logps/chosen": -622.0516967773438, + "logps/rejected": -542.0411376953125, + "loss": 0.4041, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9267487525939941, + "rewards/margins": 1.1606563329696655, + "rewards/rejected": -3.08740496635437, + "step": 3863 + }, + { + "epoch": 0.6, + "learning_rate": 1.1313320192956067e-05, + "logits/chosen": -2.265289545059204, + "logits/rejected": -3.2143054008483887, + "logps/chosen": -151.28152465820312, + "logps/rejected": -329.9674072265625, + "loss": 0.169, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6631804704666138, + "rewards/margins": 2.8843584060668945, + "rewards/rejected": -4.547538757324219, + "step": 3864 + }, + { + "epoch": 0.6, + "learning_rate": 1.131258675242492e-05, + "logits/chosen": -1.5286470651626587, + "logits/rejected": -3.1790335178375244, + "logps/chosen": -222.11122131347656, + "logps/rejected": -267.8401794433594, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6218093633651733, + "rewards/margins": 5.3653364181518555, + "rewards/rejected": -5.987145900726318, + "step": 3865 + }, + { + "epoch": 0.6, + "learning_rate": 1.1311853311893771e-05, + "logits/chosen": -2.2683494091033936, + "logits/rejected": -3.2431914806365967, + "logps/chosen": -83.31829833984375, + "logps/rejected": -339.2803955078125, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.163703203201294, + "rewards/margins": 4.191516876220703, + "rewards/rejected": -6.355219841003418, + "step": 3866 + }, + { + "epoch": 0.6, + "learning_rate": 1.1311119871362623e-05, + "logits/chosen": -2.114488124847412, + "logits/rejected": -2.9458205699920654, + "logps/chosen": -124.64590454101562, + "logps/rejected": -116.60261535644531, + "loss": 1.4983, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4865312576293945, + "rewards/margins": -0.3364948034286499, + "rewards/rejected": -5.150036334991455, + "step": 3867 + }, + { + "epoch": 0.6, + "learning_rate": 1.1310386430831475e-05, + "logits/chosen": -3.1461360454559326, + "logits/rejected": -2.5767953395843506, + "logps/chosen": -355.21282958984375, + "logps/rejected": -491.52850341796875, + "loss": 1.565, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.403575897216797, + "rewards/margins": 0.07907545566558838, + "rewards/rejected": -2.482651472091675, + "step": 3868 + }, + { + "epoch": 0.6, + "learning_rate": 1.1309652990300327e-05, + "logits/chosen": -2.963718891143799, + "logits/rejected": -3.251044273376465, + "logps/chosen": -155.30935668945312, + "logps/rejected": -284.1723327636719, + "loss": 3.0256, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.982741355895996, + "rewards/margins": -2.9652509689331055, + "rewards/rejected": -2.0174903869628906, + "step": 3869 + }, + { + "epoch": 0.6, + "learning_rate": 1.1308919549769179e-05, + "logits/chosen": -1.9342408180236816, + "logits/rejected": -2.6141836643218994, + "logps/chosen": -157.78668212890625, + "logps/rejected": -219.82180786132812, + "loss": 2.9, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.735188007354736, + "rewards/margins": -2.770446538925171, + "rewards/rejected": -2.9647414684295654, + "step": 3870 + }, + { + "epoch": 0.6, + "learning_rate": 1.1308186109238032e-05, + "logits/chosen": -2.835820436477661, + "logits/rejected": -3.207242250442505, + "logps/chosen": -159.6322479248047, + "logps/rejected": -303.262451171875, + "loss": 0.1441, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.416370391845703, + "rewards/margins": 2.5027079582214355, + "rewards/rejected": -4.919078350067139, + "step": 3871 + }, + { + "epoch": 0.6, + "learning_rate": 1.1307452668706884e-05, + "logits/chosen": -3.02648663520813, + "logits/rejected": -3.178640365600586, + "logps/chosen": -240.59133911132812, + "logps/rejected": -180.23428344726562, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3942551016807556, + "rewards/margins": 4.68720817565918, + "rewards/rejected": -5.08146333694458, + "step": 3872 + }, + { + "epoch": 0.6, + "learning_rate": 1.1306719228175736e-05, + "logits/chosen": -2.9803521633148193, + "logits/rejected": -3.206609010696411, + "logps/chosen": -112.19363403320312, + "logps/rejected": -119.51307678222656, + "loss": 0.8856, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0103671550750732, + "rewards/margins": 1.8781535625457764, + "rewards/rejected": -4.88852071762085, + "step": 3873 + }, + { + "epoch": 0.6, + "learning_rate": 1.1305985787644588e-05, + "logits/chosen": -1.2337287664413452, + "logits/rejected": -3.0654118061065674, + "logps/chosen": -156.3901824951172, + "logps/rejected": -301.3959045410156, + "loss": 1.9882, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8728199005126953, + "rewards/margins": -1.4238934516906738, + "rewards/rejected": -2.4489264488220215, + "step": 3874 + }, + { + "epoch": 0.6, + "learning_rate": 1.130525234711344e-05, + "logits/chosen": -2.6141817569732666, + "logits/rejected": -3.099510669708252, + "logps/chosen": -196.98739624023438, + "logps/rejected": -362.64398193359375, + "loss": 0.0949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7641021609306335, + "rewards/margins": 3.8180313110351562, + "rewards/rejected": -4.5821332931518555, + "step": 3875 + }, + { + "epoch": 0.6, + "learning_rate": 1.1304518906582292e-05, + "logits/chosen": -2.944720506668091, + "logits/rejected": -3.3649981021881104, + "logps/chosen": -63.013484954833984, + "logps/rejected": -172.0924835205078, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.053173303604126, + "rewards/margins": 4.066876411437988, + "rewards/rejected": -6.120049476623535, + "step": 3876 + }, + { + "epoch": 0.6, + "learning_rate": 1.1303785466051144e-05, + "logits/chosen": -1.4723163843154907, + "logits/rejected": -3.091393232345581, + "logps/chosen": -205.43519592285156, + "logps/rejected": -318.1528015136719, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1449403762817383, + "rewards/margins": 4.3417067527771, + "rewards/rejected": -6.48664665222168, + "step": 3877 + }, + { + "epoch": 0.6, + "learning_rate": 1.1303052025519995e-05, + "logits/chosen": -3.117360830307007, + "logits/rejected": -2.8576583862304688, + "logps/chosen": -216.92083740234375, + "logps/rejected": -98.02040100097656, + "loss": 3.8185, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7727508544921875, + "rewards/margins": -2.0303468704223633, + "rewards/rejected": -3.742403984069824, + "step": 3878 + }, + { + "epoch": 0.6, + "learning_rate": 1.1302318584988847e-05, + "logits/chosen": -2.603060007095337, + "logits/rejected": -2.837324380874634, + "logps/chosen": -62.01219940185547, + "logps/rejected": -236.07737731933594, + "loss": 0.0735, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0786349773406982, + "rewards/margins": 2.6873085498809814, + "rewards/rejected": -5.76594352722168, + "step": 3879 + }, + { + "epoch": 0.6, + "learning_rate": 1.1301585144457701e-05, + "logits/chosen": -3.0839715003967285, + "logits/rejected": -3.133247137069702, + "logps/chosen": -412.3887023925781, + "logps/rejected": -362.06976318359375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1529357433319092, + "rewards/margins": 5.2257208824157715, + "rewards/rejected": -6.378656387329102, + "step": 3880 + }, + { + "epoch": 0.6, + "learning_rate": 1.1300851703926553e-05, + "logits/chosen": -2.6209301948547363, + "logits/rejected": -3.097507953643799, + "logps/chosen": -104.8455810546875, + "logps/rejected": -252.00064086914062, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9215705394744873, + "rewards/margins": 5.283202171325684, + "rewards/rejected": -7.20477294921875, + "step": 3881 + }, + { + "epoch": 0.6, + "learning_rate": 1.1300118263395405e-05, + "logits/chosen": -2.576777458190918, + "logits/rejected": -1.981386661529541, + "logps/chosen": -571.6568603515625, + "logps/rejected": -520.61767578125, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3565406799316406, + "rewards/margins": 4.310131072998047, + "rewards/rejected": -5.6666717529296875, + "step": 3882 + }, + { + "epoch": 0.6, + "learning_rate": 1.1299384822864256e-05, + "logits/chosen": -2.3707683086395264, + "logits/rejected": -3.2829432487487793, + "logps/chosen": -122.2470932006836, + "logps/rejected": -287.67919921875, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6592323780059814, + "rewards/margins": 5.016488075256348, + "rewards/rejected": -6.67572021484375, + "step": 3883 + }, + { + "epoch": 0.6, + "learning_rate": 1.1298651382333108e-05, + "logits/chosen": -2.370168685913086, + "logits/rejected": -2.546844005584717, + "logps/chosen": -151.7229766845703, + "logps/rejected": -297.2734680175781, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0021175146102905, + "rewards/margins": 7.581404209136963, + "rewards/rejected": -8.583521842956543, + "step": 3884 + }, + { + "epoch": 0.6, + "learning_rate": 1.129791794180196e-05, + "logits/chosen": -3.1628267765045166, + "logits/rejected": -3.011085033416748, + "logps/chosen": -146.80126953125, + "logps/rejected": -45.20037841796875, + "loss": 2.3912, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.529482364654541, + "rewards/margins": -2.2215614318847656, + "rewards/rejected": -2.3079206943511963, + "step": 3885 + }, + { + "epoch": 0.6, + "learning_rate": 1.1297184501270812e-05, + "logits/chosen": -2.982456684112549, + "logits/rejected": -3.252471685409546, + "logps/chosen": -258.81781005859375, + "logps/rejected": -231.86566162109375, + "loss": 1.6828, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7549338340759277, + "rewards/margins": -0.4868415594100952, + "rewards/rejected": -2.268092393875122, + "step": 3886 + }, + { + "epoch": 0.6, + "learning_rate": 1.1296451060739664e-05, + "logits/chosen": -1.83830726146698, + "logits/rejected": -2.606330156326294, + "logps/chosen": -319.66265869140625, + "logps/rejected": -508.4228515625, + "loss": 3.3994, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3380441665649414, + "rewards/margins": 2.575789451599121, + "rewards/rejected": -5.9138336181640625, + "step": 3887 + }, + { + "epoch": 0.6, + "learning_rate": 1.1295717620208516e-05, + "logits/chosen": -3.013918876647949, + "logits/rejected": -3.1329469680786133, + "logps/chosen": -37.33116912841797, + "logps/rejected": -114.50202941894531, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.787947416305542, + "rewards/margins": 3.887894868850708, + "rewards/rejected": -5.67584228515625, + "step": 3888 + }, + { + "epoch": 0.6, + "learning_rate": 1.129498417967737e-05, + "logits/chosen": -3.2685890197753906, + "logits/rejected": -2.9918370246887207, + "logps/chosen": -192.99884033203125, + "logps/rejected": -252.44000244140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.378365397453308, + "rewards/margins": 7.333889961242676, + "rewards/rejected": -8.712255477905273, + "step": 3889 + }, + { + "epoch": 0.6, + "learning_rate": 1.1294250739146221e-05, + "logits/chosen": -3.150381088256836, + "logits/rejected": -3.057241678237915, + "logps/chosen": -92.9898681640625, + "logps/rejected": -121.82974243164062, + "loss": 2.2144, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4568376541137695, + "rewards/margins": -0.7992907762527466, + "rewards/rejected": -2.6575467586517334, + "step": 3890 + }, + { + "epoch": 0.61, + "learning_rate": 1.1293517298615073e-05, + "logits/chosen": -1.901335597038269, + "logits/rejected": -3.112886428833008, + "logps/chosen": -112.11103820800781, + "logps/rejected": -197.8376922607422, + "loss": 1.2747, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.002598285675049, + "rewards/margins": 1.152821660041809, + "rewards/rejected": -4.155419826507568, + "step": 3891 + }, + { + "epoch": 0.61, + "learning_rate": 1.1292783858083925e-05, + "logits/chosen": -1.789623737335205, + "logits/rejected": -2.113574504852295, + "logps/chosen": -256.3450927734375, + "logps/rejected": -400.0832824707031, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8926964402198792, + "rewards/margins": 5.264204025268555, + "rewards/rejected": -6.156900405883789, + "step": 3892 + }, + { + "epoch": 0.61, + "learning_rate": 1.1292050417552777e-05, + "logits/chosen": -1.356848120689392, + "logits/rejected": -2.9761250019073486, + "logps/chosen": -112.57872009277344, + "logps/rejected": -361.33843994140625, + "loss": 0.3811, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7829232215881348, + "rewards/margins": 1.3364105224609375, + "rewards/rejected": -4.119333744049072, + "step": 3893 + }, + { + "epoch": 0.61, + "learning_rate": 1.1291316977021629e-05, + "logits/chosen": -1.2995365858078003, + "logits/rejected": -2.9498343467712402, + "logps/chosen": -142.06336975097656, + "logps/rejected": -284.0243835449219, + "loss": 3.096, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.577859878540039, + "rewards/margins": -2.4455130100250244, + "rewards/rejected": -3.1323471069335938, + "step": 3894 + }, + { + "epoch": 0.61, + "learning_rate": 1.129058353649048e-05, + "logits/chosen": -3.0578811168670654, + "logits/rejected": -2.414008140563965, + "logps/chosen": -410.66748046875, + "logps/rejected": -208.8895721435547, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4725296199321747, + "rewards/margins": 4.236906051635742, + "rewards/rejected": -4.70943546295166, + "step": 3895 + }, + { + "epoch": 0.61, + "learning_rate": 1.1289850095959334e-05, + "logits/chosen": -1.8195806741714478, + "logits/rejected": -3.1997721195220947, + "logps/chosen": -134.61849975585938, + "logps/rejected": -435.3106689453125, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8817260265350342, + "rewards/margins": 4.241935729980469, + "rewards/rejected": -6.123661994934082, + "step": 3896 + }, + { + "epoch": 0.61, + "learning_rate": 1.1289116655428186e-05, + "logits/chosen": -3.142472743988037, + "logits/rejected": -2.6830692291259766, + "logps/chosen": -192.36935424804688, + "logps/rejected": -233.1968536376953, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.535017490386963, + "rewards/margins": 3.9580183029174805, + "rewards/rejected": -6.493035793304443, + "step": 3897 + }, + { + "epoch": 0.61, + "learning_rate": 1.128838321489704e-05, + "logits/chosen": -3.2547147274017334, + "logits/rejected": -3.0864312648773193, + "logps/chosen": -232.61627197265625, + "logps/rejected": -76.5836181640625, + "loss": 4.7036, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.9570488929748535, + "rewards/margins": -3.7948248386383057, + "rewards/rejected": -1.1622236967086792, + "step": 3898 + }, + { + "epoch": 0.61, + "learning_rate": 1.1287649774365892e-05, + "logits/chosen": -2.2104125022888184, + "logits/rejected": -3.207702398300171, + "logps/chosen": -93.37903594970703, + "logps/rejected": -135.40121459960938, + "loss": 1.6236, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5780420303344727, + "rewards/margins": 0.24212408065795898, + "rewards/rejected": -3.8201661109924316, + "step": 3899 + }, + { + "epoch": 0.61, + "learning_rate": 1.1286916333834744e-05, + "logits/chosen": -2.8523061275482178, + "logits/rejected": -3.0431618690490723, + "logps/chosen": -209.2153778076172, + "logps/rejected": -188.1805419921875, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8055145740509033, + "rewards/margins": 3.0022263526916504, + "rewards/rejected": -4.807741165161133, + "step": 3900 + }, + { + "epoch": 0.61, + "learning_rate": 1.1286182893303595e-05, + "logits/chosen": -2.069451332092285, + "logits/rejected": -3.2064497470855713, + "logps/chosen": -144.97360229492188, + "logps/rejected": -379.5577697753906, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.623572587966919, + "rewards/margins": 8.778632164001465, + "rewards/rejected": -10.402204513549805, + "step": 3901 + }, + { + "epoch": 0.61, + "learning_rate": 1.1285449452772447e-05, + "logits/chosen": -3.172919750213623, + "logits/rejected": -2.7408599853515625, + "logps/chosen": -375.22125244140625, + "logps/rejected": -302.4430236816406, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4523628056049347, + "rewards/margins": 5.186763763427734, + "rewards/rejected": -5.639126777648926, + "step": 3902 + }, + { + "epoch": 0.61, + "learning_rate": 1.1284716012241299e-05, + "logits/chosen": -2.0663135051727295, + "logits/rejected": -2.9894118309020996, + "logps/chosen": -173.65805053710938, + "logps/rejected": -433.7037353515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.837212324142456, + "rewards/margins": 7.964733123779297, + "rewards/rejected": -9.801945686340332, + "step": 3903 + }, + { + "epoch": 0.61, + "learning_rate": 1.1283982571710151e-05, + "logits/chosen": -3.064887762069702, + "logits/rejected": -1.9931893348693848, + "logps/chosen": -277.79302978515625, + "logps/rejected": -192.91177368164062, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2768471240997314, + "rewards/margins": 4.241063594818115, + "rewards/rejected": -5.517910957336426, + "step": 3904 + }, + { + "epoch": 0.61, + "learning_rate": 1.1283249131179003e-05, + "logits/chosen": -2.069888114929199, + "logits/rejected": -3.1958811283111572, + "logps/chosen": -273.0550842285156, + "logps/rejected": -294.614990234375, + "loss": 0.8142, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3923981189727783, + "rewards/margins": -0.011404991149902344, + "rewards/rejected": -2.380993127822876, + "step": 3905 + }, + { + "epoch": 0.61, + "learning_rate": 1.1282515690647856e-05, + "logits/chosen": -2.8102030754089355, + "logits/rejected": -2.934471845626831, + "logps/chosen": -108.25821685791016, + "logps/rejected": -180.5901641845703, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.175951361656189, + "rewards/margins": 3.816696882247925, + "rewards/rejected": -4.992648124694824, + "step": 3906 + }, + { + "epoch": 0.61, + "learning_rate": 1.1281782250116708e-05, + "logits/chosen": -2.9953317642211914, + "logits/rejected": -3.3131113052368164, + "logps/chosen": -105.38951110839844, + "logps/rejected": -231.52561950683594, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2197837829589844, + "rewards/margins": 1.9214093685150146, + "rewards/rejected": -5.141193389892578, + "step": 3907 + }, + { + "epoch": 0.61, + "learning_rate": 1.128104880958556e-05, + "logits/chosen": -2.8334121704101562, + "logits/rejected": -1.7087785005569458, + "logps/chosen": -197.35643005371094, + "logps/rejected": -101.73489379882812, + "loss": 2.093, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.06111478805542, + "rewards/margins": -0.05706477165222168, + "rewards/rejected": -4.004050254821777, + "step": 3908 + }, + { + "epoch": 0.61, + "learning_rate": 1.1280315369054412e-05, + "logits/chosen": -2.540776014328003, + "logits/rejected": -2.9005985260009766, + "logps/chosen": -301.6378479003906, + "logps/rejected": -369.40594482421875, + "loss": 1.7699, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.181899309158325, + "rewards/margins": 1.120481014251709, + "rewards/rejected": -4.302380561828613, + "step": 3909 + }, + { + "epoch": 0.61, + "learning_rate": 1.1279581928523264e-05, + "logits/chosen": -2.8238449096679688, + "logits/rejected": -3.1314358711242676, + "logps/chosen": -52.206382751464844, + "logps/rejected": -143.83554077148438, + "loss": 0.1054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7625679969787598, + "rewards/margins": 2.212581157684326, + "rewards/rejected": -4.975149154663086, + "step": 3910 + }, + { + "epoch": 0.61, + "learning_rate": 1.1278848487992116e-05, + "logits/chosen": -3.094595432281494, + "logits/rejected": -2.4419424533843994, + "logps/chosen": -243.4374237060547, + "logps/rejected": -208.215576171875, + "loss": 1.6181, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6149940490722656, + "rewards/margins": 0.7085622549057007, + "rewards/rejected": -3.323556423187256, + "step": 3911 + }, + { + "epoch": 0.61, + "learning_rate": 1.1278115047460968e-05, + "logits/chosen": -3.1405162811279297, + "logits/rejected": -2.878404140472412, + "logps/chosen": -731.5660400390625, + "logps/rejected": -514.9208984375, + "loss": 1.6627, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.879538059234619, + "rewards/margins": 1.5108871459960938, + "rewards/rejected": -4.390425205230713, + "step": 3912 + }, + { + "epoch": 0.61, + "learning_rate": 1.127738160692982e-05, + "logits/chosen": -3.1508774757385254, + "logits/rejected": -2.6487200260162354, + "logps/chosen": -285.1561279296875, + "logps/rejected": -209.62628173828125, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1462814807891846, + "rewards/margins": 4.230111598968506, + "rewards/rejected": -5.376392841339111, + "step": 3913 + }, + { + "epoch": 0.61, + "learning_rate": 1.1276648166398671e-05, + "logits/chosen": -3.3683955669403076, + "logits/rejected": -3.18876314163208, + "logps/chosen": -162.00018310546875, + "logps/rejected": -244.09426879882812, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7850542068481445, + "rewards/margins": 5.724392890930176, + "rewards/rejected": -7.50944709777832, + "step": 3914 + }, + { + "epoch": 0.61, + "learning_rate": 1.1275914725867525e-05, + "logits/chosen": -3.32087779045105, + "logits/rejected": -3.069598436355591, + "logps/chosen": -302.4647216796875, + "logps/rejected": -211.44602966308594, + "loss": 2.8054, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8610215187072754, + "rewards/margins": -1.0930324792861938, + "rewards/rejected": -2.767988920211792, + "step": 3915 + }, + { + "epoch": 0.61, + "learning_rate": 1.1275181285336377e-05, + "logits/chosen": -1.9161344766616821, + "logits/rejected": -3.0324485301971436, + "logps/chosen": -53.09386444091797, + "logps/rejected": -210.04995727539062, + "loss": 0.0655, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.142367362976074, + "rewards/margins": 3.1019763946533203, + "rewards/rejected": -6.2443437576293945, + "step": 3916 + }, + { + "epoch": 0.61, + "learning_rate": 1.1274447844805229e-05, + "logits/chosen": -1.9845728874206543, + "logits/rejected": -3.2382073402404785, + "logps/chosen": -202.65452575683594, + "logps/rejected": -559.8617553710938, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3205413818359375, + "rewards/margins": 4.340127944946289, + "rewards/rejected": -5.660669326782227, + "step": 3917 + }, + { + "epoch": 0.61, + "learning_rate": 1.127371440427408e-05, + "logits/chosen": -3.057912588119507, + "logits/rejected": -2.71329927444458, + "logps/chosen": -134.1057891845703, + "logps/rejected": -185.86045837402344, + "loss": 1.978, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.831207752227783, + "rewards/margins": 1.5275152921676636, + "rewards/rejected": -5.358722686767578, + "step": 3918 + }, + { + "epoch": 0.61, + "learning_rate": 1.1272980963742933e-05, + "logits/chosen": -3.2247347831726074, + "logits/rejected": -3.3222341537475586, + "logps/chosen": -309.10595703125, + "logps/rejected": -472.5005798339844, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5476856231689453, + "rewards/margins": 6.155622482299805, + "rewards/rejected": -6.70330810546875, + "step": 3919 + }, + { + "epoch": 0.61, + "learning_rate": 1.1272247523211784e-05, + "logits/chosen": -3.220287799835205, + "logits/rejected": -2.4219024181365967, + "logps/chosen": -512.69140625, + "logps/rejected": -342.299560546875, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6412109136581421, + "rewards/margins": 4.335439205169678, + "rewards/rejected": -4.976650238037109, + "step": 3920 + }, + { + "epoch": 0.61, + "learning_rate": 1.1271514082680636e-05, + "logits/chosen": -2.9978208541870117, + "logits/rejected": -3.080817461013794, + "logps/chosen": -89.60275268554688, + "logps/rejected": -268.61663818359375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7611727714538574, + "rewards/margins": 6.810307502746582, + "rewards/rejected": -8.571479797363281, + "step": 3921 + }, + { + "epoch": 0.61, + "learning_rate": 1.1270780642149488e-05, + "logits/chosen": -3.165647029876709, + "logits/rejected": -2.2998080253601074, + "logps/chosen": -156.5771026611328, + "logps/rejected": -228.49571228027344, + "loss": 0.2453, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.028026103973389, + "rewards/margins": 3.0893495082855225, + "rewards/rejected": -7.11737585067749, + "step": 3922 + }, + { + "epoch": 0.61, + "learning_rate": 1.127004720161834e-05, + "logits/chosen": -3.3287532329559326, + "logits/rejected": -2.1542818546295166, + "logps/chosen": -286.3052978515625, + "logps/rejected": -56.005226135253906, + "loss": 2.7277, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6178529262542725, + "rewards/margins": -0.2830507755279541, + "rewards/rejected": -3.3348021507263184, + "step": 3923 + }, + { + "epoch": 0.61, + "learning_rate": 1.1269313761087194e-05, + "logits/chosen": -2.8145840167999268, + "logits/rejected": -3.2399401664733887, + "logps/chosen": -590.2584228515625, + "logps/rejected": -603.9953002929688, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2543472051620483, + "rewards/margins": 4.235905647277832, + "rewards/rejected": -5.49025297164917, + "step": 3924 + }, + { + "epoch": 0.61, + "learning_rate": 1.1268580320556046e-05, + "logits/chosen": -3.002119779586792, + "logits/rejected": -3.191265106201172, + "logps/chosen": -448.545654296875, + "logps/rejected": -457.02947998046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1915847659111023, + "rewards/margins": 6.579649925231934, + "rewards/rejected": -6.771234512329102, + "step": 3925 + }, + { + "epoch": 0.61, + "learning_rate": 1.1267846880024897e-05, + "logits/chosen": -2.4482600688934326, + "logits/rejected": -3.225705623626709, + "logps/chosen": -72.31380462646484, + "logps/rejected": -185.98922729492188, + "loss": 0.1149, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9836089611053467, + "rewards/margins": 3.053896188735962, + "rewards/rejected": -5.037505149841309, + "step": 3926 + }, + { + "epoch": 0.61, + "learning_rate": 1.126711343949375e-05, + "logits/chosen": -2.8258042335510254, + "logits/rejected": -3.261079788208008, + "logps/chosen": -212.24459838867188, + "logps/rejected": -428.6764221191406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4515807628631592, + "rewards/margins": 8.20214557647705, + "rewards/rejected": -9.653726577758789, + "step": 3927 + }, + { + "epoch": 0.61, + "learning_rate": 1.1266379998962601e-05, + "logits/chosen": -3.155168056488037, + "logits/rejected": -3.137732982635498, + "logps/chosen": -98.89424896240234, + "logps/rejected": -180.18478393554688, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.460786819458008, + "rewards/margins": 3.8389315605163574, + "rewards/rejected": -6.299718379974365, + "step": 3928 + }, + { + "epoch": 0.61, + "learning_rate": 1.1265646558431453e-05, + "logits/chosen": -3.2285349369049072, + "logits/rejected": -3.145416259765625, + "logps/chosen": -351.1978759765625, + "logps/rejected": -181.11952209472656, + "loss": 2.1775, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.044917106628418, + "rewards/margins": 0.16112422943115234, + "rewards/rejected": -4.20604133605957, + "step": 3929 + }, + { + "epoch": 0.61, + "learning_rate": 1.1264913117900307e-05, + "logits/chosen": -2.9444386959075928, + "logits/rejected": -2.5301125049591064, + "logps/chosen": -173.5108642578125, + "logps/rejected": -118.1866683959961, + "loss": 1.0434, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1753876209259033, + "rewards/margins": 2.1742749214172363, + "rewards/rejected": -4.3496623039245605, + "step": 3930 + }, + { + "epoch": 0.61, + "learning_rate": 1.1264179677369159e-05, + "logits/chosen": -3.169747829437256, + "logits/rejected": -3.1932456493377686, + "logps/chosen": -168.4527130126953, + "logps/rejected": -222.81219482421875, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6850191354751587, + "rewards/margins": 3.660386800765991, + "rewards/rejected": -5.3454060554504395, + "step": 3931 + }, + { + "epoch": 0.61, + "learning_rate": 1.126344623683801e-05, + "logits/chosen": -2.695910930633545, + "logits/rejected": -2.9454686641693115, + "logps/chosen": -117.58999633789062, + "logps/rejected": -203.13693237304688, + "loss": 0.5417, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7375606298446655, + "rewards/margins": 2.9855594635009766, + "rewards/rejected": -4.723120212554932, + "step": 3932 + }, + { + "epoch": 0.61, + "learning_rate": 1.1262712796306864e-05, + "logits/chosen": -3.3222734928131104, + "logits/rejected": -2.5740885734558105, + "logps/chosen": -614.627685546875, + "logps/rejected": -72.3291015625, + "loss": 1.7663, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4072396755218506, + "rewards/margins": 0.4914206266403198, + "rewards/rejected": -1.8986603021621704, + "step": 3933 + }, + { + "epoch": 0.61, + "learning_rate": 1.1261979355775716e-05, + "logits/chosen": -3.038684368133545, + "logits/rejected": -2.1464860439300537, + "logps/chosen": -152.8272247314453, + "logps/rejected": -105.50959777832031, + "loss": 0.2016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.552816390991211, + "rewards/margins": 1.6548370122909546, + "rewards/rejected": -3.207653522491455, + "step": 3934 + }, + { + "epoch": 0.61, + "learning_rate": 1.1261245915244568e-05, + "logits/chosen": -2.7521824836730957, + "logits/rejected": -3.239496946334839, + "logps/chosen": -511.4392395019531, + "logps/rejected": -502.74517822265625, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5198546648025513, + "rewards/margins": 3.7872262001037598, + "rewards/rejected": -5.3070807456970215, + "step": 3935 + }, + { + "epoch": 0.61, + "learning_rate": 1.126051247471342e-05, + "logits/chosen": -1.9925912618637085, + "logits/rejected": -3.0192677974700928, + "logps/chosen": -242.60986328125, + "logps/rejected": -340.969482421875, + "loss": 1.9686, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.669175148010254, + "rewards/margins": 2.4268202781677246, + "rewards/rejected": -6.09599494934082, + "step": 3936 + }, + { + "epoch": 0.61, + "learning_rate": 1.1259779034182271e-05, + "logits/chosen": -3.3751354217529297, + "logits/rejected": -2.8308606147766113, + "logps/chosen": -309.6779479980469, + "logps/rejected": -308.3880920410156, + "loss": 1.6932, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.704550266265869, + "rewards/margins": -0.43592262268066406, + "rewards/rejected": -3.268627643585205, + "step": 3937 + }, + { + "epoch": 0.61, + "learning_rate": 1.1259045593651123e-05, + "logits/chosen": -2.913625478744507, + "logits/rejected": -3.1053802967071533, + "logps/chosen": -626.6900024414062, + "logps/rejected": -527.3980712890625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7790725827217102, + "rewards/margins": 5.413992404937744, + "rewards/rejected": -6.1930646896362305, + "step": 3938 + }, + { + "epoch": 0.61, + "learning_rate": 1.1258312153119975e-05, + "logits/chosen": -3.194467306137085, + "logits/rejected": -3.0788064002990723, + "logps/chosen": -509.3960266113281, + "logps/rejected": -365.18182373046875, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8845977783203125, + "rewards/margins": 4.044003486633301, + "rewards/rejected": -4.928601264953613, + "step": 3939 + }, + { + "epoch": 0.61, + "learning_rate": 1.1257578712588827e-05, + "logits/chosen": -1.872025489807129, + "logits/rejected": -3.10219407081604, + "logps/chosen": -123.57948303222656, + "logps/rejected": -297.74847412109375, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5363445281982422, + "rewards/margins": 4.480647087097168, + "rewards/rejected": -5.01699161529541, + "step": 3940 + }, + { + "epoch": 0.61, + "learning_rate": 1.1256845272057679e-05, + "logits/chosen": -2.8995635509490967, + "logits/rejected": -3.316692590713501, + "logps/chosen": -167.1862335205078, + "logps/rejected": -261.951416015625, + "loss": 0.1445, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1768593788146973, + "rewards/margins": 3.0170774459838867, + "rewards/rejected": -5.193936824798584, + "step": 3941 + }, + { + "epoch": 0.61, + "learning_rate": 1.1256111831526533e-05, + "logits/chosen": -3.2015538215637207, + "logits/rejected": -2.9841926097869873, + "logps/chosen": -184.1430206298828, + "logps/rejected": -174.96908569335938, + "loss": 0.9171, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2463722229003906, + "rewards/margins": 0.47060996294021606, + "rewards/rejected": -3.716982364654541, + "step": 3942 + }, + { + "epoch": 0.61, + "learning_rate": 1.1255378390995384e-05, + "logits/chosen": -2.1444547176361084, + "logits/rejected": -2.5244343280792236, + "logps/chosen": -201.45309448242188, + "logps/rejected": -338.7101745605469, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6715476512908936, + "rewards/margins": 5.987287998199463, + "rewards/rejected": -7.658835411071777, + "step": 3943 + }, + { + "epoch": 0.61, + "learning_rate": 1.1254644950464236e-05, + "logits/chosen": -3.2274179458618164, + "logits/rejected": -2.618480682373047, + "logps/chosen": -377.4862060546875, + "logps/rejected": -490.615478515625, + "loss": 1.6066, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5461418628692627, + "rewards/margins": 1.4622199535369873, + "rewards/rejected": -4.00836181640625, + "step": 3944 + }, + { + "epoch": 0.61, + "learning_rate": 1.1253911509933088e-05, + "logits/chosen": -2.853872299194336, + "logits/rejected": -3.0486903190612793, + "logps/chosen": -574.4193115234375, + "logps/rejected": -633.801025390625, + "loss": 0.6948, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.8101150393486023, + "rewards/margins": 1.571559190750122, + "rewards/rejected": -2.381674289703369, + "step": 3945 + }, + { + "epoch": 0.61, + "learning_rate": 1.125317806940194e-05, + "logits/chosen": -2.5095102787017822, + "logits/rejected": -2.958416700363159, + "logps/chosen": -85.16978454589844, + "logps/rejected": -229.6205291748047, + "loss": 0.0457, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1554811000823975, + "rewards/margins": 3.552359104156494, + "rewards/rejected": -6.7078399658203125, + "step": 3946 + }, + { + "epoch": 0.61, + "learning_rate": 1.1252444628870792e-05, + "logits/chosen": -3.1461148262023926, + "logits/rejected": -3.0245630741119385, + "logps/chosen": -325.7459716796875, + "logps/rejected": -285.80560302734375, + "loss": 0.0741, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.264883041381836, + "rewards/margins": 2.8095293045043945, + "rewards/rejected": -5.0744123458862305, + "step": 3947 + }, + { + "epoch": 0.61, + "learning_rate": 1.1251711188339644e-05, + "logits/chosen": -2.4726595878601074, + "logits/rejected": -2.8513576984405518, + "logps/chosen": -130.54966735839844, + "logps/rejected": -320.848876953125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7009248733520508, + "rewards/margins": 8.377063751220703, + "rewards/rejected": -10.077988624572754, + "step": 3948 + }, + { + "epoch": 0.61, + "learning_rate": 1.1250977747808496e-05, + "logits/chosen": -3.1052818298339844, + "logits/rejected": -3.1520464420318604, + "logps/chosen": -292.75689697265625, + "logps/rejected": -412.0491943359375, + "loss": 2.4178, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.355700492858887, + "rewards/margins": -2.197604179382324, + "rewards/rejected": -2.1580963134765625, + "step": 3949 + }, + { + "epoch": 0.61, + "learning_rate": 1.1250244307277348e-05, + "logits/chosen": -2.5923542976379395, + "logits/rejected": -3.2396819591522217, + "logps/chosen": -53.4219970703125, + "logps/rejected": -273.528076171875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.212796688079834, + "rewards/margins": 6.280926704406738, + "rewards/rejected": -8.493722915649414, + "step": 3950 + }, + { + "epoch": 0.61, + "learning_rate": 1.1249510866746201e-05, + "logits/chosen": -2.7598073482513428, + "logits/rejected": -3.1360087394714355, + "logps/chosen": -84.1538314819336, + "logps/rejected": -343.3573303222656, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.615891695022583, + "rewards/margins": 6.687439918518066, + "rewards/rejected": -8.30333137512207, + "step": 3951 + }, + { + "epoch": 0.61, + "learning_rate": 1.1248777426215053e-05, + "logits/chosen": -3.21341872215271, + "logits/rejected": -2.905601739883423, + "logps/chosen": -449.09454345703125, + "logps/rejected": -548.2963256835938, + "loss": 1.2637, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6513481140136719, + "rewards/margins": 1.3696281909942627, + "rewards/rejected": -3.0209763050079346, + "step": 3952 + }, + { + "epoch": 0.61, + "learning_rate": 1.1248043985683905e-05, + "logits/chosen": -2.7550480365753174, + "logits/rejected": -3.3057045936584473, + "logps/chosen": -749.4661254882812, + "logps/rejected": -1038.96630859375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7359627485275269, + "rewards/margins": 4.874258041381836, + "rewards/rejected": -6.610220432281494, + "step": 3953 + }, + { + "epoch": 0.61, + "learning_rate": 1.1247310545152757e-05, + "logits/chosen": -2.026602268218994, + "logits/rejected": -3.086714506149292, + "logps/chosen": -92.72532653808594, + "logps/rejected": -206.55953979492188, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7908267974853516, + "rewards/margins": 3.963623523712158, + "rewards/rejected": -5.75445032119751, + "step": 3954 + }, + { + "epoch": 0.62, + "learning_rate": 1.1246577104621609e-05, + "logits/chosen": -3.265491485595703, + "logits/rejected": -3.114370822906494, + "logps/chosen": -164.7435302734375, + "logps/rejected": -292.7374572753906, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.208031177520752, + "rewards/margins": 2.4919567108154297, + "rewards/rejected": -4.699987888336182, + "step": 3955 + }, + { + "epoch": 0.62, + "learning_rate": 1.124584366409046e-05, + "logits/chosen": -3.3628978729248047, + "logits/rejected": -3.3001210689544678, + "logps/chosen": -189.63031005859375, + "logps/rejected": -196.82606506347656, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4424777030944824, + "rewards/margins": 5.602746963500977, + "rewards/rejected": -8.045225143432617, + "step": 3956 + }, + { + "epoch": 0.62, + "learning_rate": 1.1245110223559312e-05, + "logits/chosen": -2.03115177154541, + "logits/rejected": -3.191856861114502, + "logps/chosen": -128.98631286621094, + "logps/rejected": -423.501220703125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8375316858291626, + "rewards/margins": 6.754231929779053, + "rewards/rejected": -8.591763496398926, + "step": 3957 + }, + { + "epoch": 0.62, + "learning_rate": 1.1244376783028164e-05, + "logits/chosen": -2.81191086769104, + "logits/rejected": -3.0255491733551025, + "logps/chosen": -115.86808776855469, + "logps/rejected": -351.05029296875, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.308309316635132, + "rewards/margins": 5.865499973297119, + "rewards/rejected": -8.173809051513672, + "step": 3958 + }, + { + "epoch": 0.62, + "learning_rate": 1.1243643342497016e-05, + "logits/chosen": -2.092630386352539, + "logits/rejected": -3.1004741191864014, + "logps/chosen": -158.27871704101562, + "logps/rejected": -315.77392578125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8433758020401001, + "rewards/margins": 5.4261884689331055, + "rewards/rejected": -6.269564151763916, + "step": 3959 + }, + { + "epoch": 0.62, + "learning_rate": 1.124290990196587e-05, + "logits/chosen": -3.3721976280212402, + "logits/rejected": -2.5874855518341064, + "logps/chosen": -440.9321594238281, + "logps/rejected": -165.46768188476562, + "loss": 2.2505, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.163970947265625, + "rewards/margins": -1.0679699182510376, + "rewards/rejected": -3.096001148223877, + "step": 3960 + }, + { + "epoch": 0.62, + "learning_rate": 1.1242176461434722e-05, + "logits/chosen": -2.734684467315674, + "logits/rejected": -3.1530563831329346, + "logps/chosen": -129.1830596923828, + "logps/rejected": -78.89312744140625, + "loss": 0.4862, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.374022960662842, + "rewards/margins": 0.9324550628662109, + "rewards/rejected": -4.306478023529053, + "step": 3961 + }, + { + "epoch": 0.62, + "learning_rate": 1.1241443020903574e-05, + "logits/chosen": -3.30094575881958, + "logits/rejected": -3.466858386993408, + "logps/chosen": -342.2569580078125, + "logps/rejected": -294.73468017578125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006994530558586121, + "rewards/margins": 4.782454490661621, + "rewards/rejected": -4.775460243225098, + "step": 3962 + }, + { + "epoch": 0.62, + "learning_rate": 1.1240709580372425e-05, + "logits/chosen": -2.4628899097442627, + "logits/rejected": -3.0672767162323, + "logps/chosen": -140.73165893554688, + "logps/rejected": -336.26025390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2994155883789062, + "rewards/margins": 8.74313735961914, + "rewards/rejected": -10.042552947998047, + "step": 3963 + }, + { + "epoch": 0.62, + "learning_rate": 1.1239976139841279e-05, + "logits/chosen": -3.240037441253662, + "logits/rejected": -2.9805543422698975, + "logps/chosen": -154.82601928710938, + "logps/rejected": -179.80255126953125, + "loss": 2.2289, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3108932971954346, + "rewards/margins": 0.3435790538787842, + "rewards/rejected": -3.6544723510742188, + "step": 3964 + }, + { + "epoch": 0.62, + "learning_rate": 1.123924269931013e-05, + "logits/chosen": -2.993809938430786, + "logits/rejected": -3.2952733039855957, + "logps/chosen": -96.33847045898438, + "logps/rejected": -199.58697509765625, + "loss": 0.775, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0425658226013184, + "rewards/margins": 1.5569819211959839, + "rewards/rejected": -3.5995476245880127, + "step": 3965 + }, + { + "epoch": 0.62, + "learning_rate": 1.1238509258778983e-05, + "logits/chosen": -3.1762099266052246, + "logits/rejected": -3.067899703979492, + "logps/chosen": -211.42269897460938, + "logps/rejected": -361.70220947265625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.522967576980591, + "rewards/margins": 6.248441696166992, + "rewards/rejected": -8.77140998840332, + "step": 3966 + }, + { + "epoch": 0.62, + "learning_rate": 1.1237775818247835e-05, + "logits/chosen": -2.426201105117798, + "logits/rejected": -3.207880735397339, + "logps/chosen": -86.96460723876953, + "logps/rejected": -213.63246154785156, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5300228595733643, + "rewards/margins": 4.105254650115967, + "rewards/rejected": -5.63527774810791, + "step": 3967 + }, + { + "epoch": 0.62, + "learning_rate": 1.1237042377716686e-05, + "logits/chosen": -2.9745116233825684, + "logits/rejected": -3.1497766971588135, + "logps/chosen": -119.92497253417969, + "logps/rejected": -147.10009765625, + "loss": 0.8503, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2048699855804443, + "rewards/margins": 1.9573432207107544, + "rewards/rejected": -4.162213325500488, + "step": 3968 + }, + { + "epoch": 0.62, + "learning_rate": 1.123630893718554e-05, + "logits/chosen": -2.097036361694336, + "logits/rejected": -3.0243358612060547, + "logps/chosen": -285.83673095703125, + "logps/rejected": -403.119140625, + "loss": 0.1197, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0497925281524658, + "rewards/margins": 2.87298583984375, + "rewards/rejected": -3.922778367996216, + "step": 3969 + }, + { + "epoch": 0.62, + "learning_rate": 1.1235575496654392e-05, + "logits/chosen": -2.714606285095215, + "logits/rejected": -3.1396045684814453, + "logps/chosen": -100.75299072265625, + "logps/rejected": -130.48008728027344, + "loss": 0.5634, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.696258783340454, + "rewards/margins": 1.9969017505645752, + "rewards/rejected": -3.6931605339050293, + "step": 3970 + }, + { + "epoch": 0.62, + "learning_rate": 1.1234842056123244e-05, + "logits/chosen": -3.018472194671631, + "logits/rejected": -2.5959370136260986, + "logps/chosen": -165.30548095703125, + "logps/rejected": -247.9859161376953, + "loss": 2.5813, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.770271301269531, + "rewards/margins": 0.8558142185211182, + "rewards/rejected": -6.62608528137207, + "step": 3971 + }, + { + "epoch": 0.62, + "learning_rate": 1.1234108615592096e-05, + "logits/chosen": -3.433426856994629, + "logits/rejected": -3.0066864490509033, + "logps/chosen": -267.7474365234375, + "logps/rejected": -164.29786682128906, + "loss": 1.3161, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0647149085998535, + "rewards/margins": 0.9320534467697144, + "rewards/rejected": -3.9967684745788574, + "step": 3972 + }, + { + "epoch": 0.62, + "learning_rate": 1.1233375175060948e-05, + "logits/chosen": -3.145298480987549, + "logits/rejected": -2.2506375312805176, + "logps/chosen": -409.00103759765625, + "logps/rejected": -351.1177062988281, + "loss": 0.1118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2704663276672363, + "rewards/margins": 3.489811897277832, + "rewards/rejected": -4.760278224945068, + "step": 3973 + }, + { + "epoch": 0.62, + "learning_rate": 1.12326417345298e-05, + "logits/chosen": -3.177823781967163, + "logits/rejected": -2.2056796550750732, + "logps/chosen": -181.64381408691406, + "logps/rejected": -81.5977783203125, + "loss": 3.4802, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.945596694946289, + "rewards/margins": -3.3743879795074463, + "rewards/rejected": -3.5712084770202637, + "step": 3974 + }, + { + "epoch": 0.62, + "learning_rate": 1.1231908293998651e-05, + "logits/chosen": -2.720667600631714, + "logits/rejected": -3.4426286220550537, + "logps/chosen": -67.17928314208984, + "logps/rejected": -290.82080078125, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3377797603607178, + "rewards/margins": 3.9533803462982178, + "rewards/rejected": -5.2911601066589355, + "step": 3975 + }, + { + "epoch": 0.62, + "learning_rate": 1.1231174853467503e-05, + "logits/chosen": -0.6695396304130554, + "logits/rejected": -2.7207529544830322, + "logps/chosen": -72.08383178710938, + "logps/rejected": -462.6741027832031, + "loss": 0.475, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0330774784088135, + "rewards/margins": 0.7507559061050415, + "rewards/rejected": -3.7838335037231445, + "step": 3976 + }, + { + "epoch": 0.62, + "learning_rate": 1.1230441412936355e-05, + "logits/chosen": -3.2024452686309814, + "logits/rejected": -2.6164910793304443, + "logps/chosen": -550.2298583984375, + "logps/rejected": -507.05279541015625, + "loss": 1.4006, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2731399536132812, + "rewards/margins": 1.2260832786560059, + "rewards/rejected": -3.499223232269287, + "step": 3977 + }, + { + "epoch": 0.62, + "learning_rate": 1.1229707972405209e-05, + "logits/chosen": -2.7475359439849854, + "logits/rejected": -3.167006254196167, + "logps/chosen": -46.43705749511719, + "logps/rejected": -186.3590087890625, + "loss": 1.0137, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6197502613067627, + "rewards/margins": -0.1504303216934204, + "rewards/rejected": -3.4693198204040527, + "step": 3978 + }, + { + "epoch": 0.62, + "learning_rate": 1.122897453187406e-05, + "logits/chosen": -3.099778175354004, + "logits/rejected": -2.6196484565734863, + "logps/chosen": -447.1676940917969, + "logps/rejected": -398.80682373046875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5179939270019531, + "rewards/margins": 4.878879547119141, + "rewards/rejected": -4.3608856201171875, + "step": 3979 + }, + { + "epoch": 0.62, + "learning_rate": 1.1228241091342912e-05, + "logits/chosen": -2.9400179386138916, + "logits/rejected": -3.422917366027832, + "logps/chosen": -84.9666519165039, + "logps/rejected": -166.57275390625, + "loss": 0.1557, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1020851135253906, + "rewards/margins": 2.501317262649536, + "rewards/rejected": -3.603402614593506, + "step": 3980 + }, + { + "epoch": 0.62, + "learning_rate": 1.1227507650811764e-05, + "logits/chosen": -1.8653180599212646, + "logits/rejected": -3.055436134338379, + "logps/chosen": -125.13864135742188, + "logps/rejected": -430.6966247558594, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6202781796455383, + "rewards/margins": 7.382281303405762, + "rewards/rejected": -8.002559661865234, + "step": 3981 + }, + { + "epoch": 0.62, + "learning_rate": 1.1226774210280616e-05, + "logits/chosen": -2.808905601501465, + "logits/rejected": -3.3146893978118896, + "logps/chosen": -484.16326904296875, + "logps/rejected": -500.7738037109375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0525360107421875, + "rewards/margins": 5.808967590332031, + "rewards/rejected": -6.861503601074219, + "step": 3982 + }, + { + "epoch": 0.62, + "learning_rate": 1.1226040769749468e-05, + "logits/chosen": -2.3600974082946777, + "logits/rejected": -2.9380080699920654, + "logps/chosen": -100.81565856933594, + "logps/rejected": -424.3753662109375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1447120904922485, + "rewards/margins": 6.182778358459473, + "rewards/rejected": -7.32749080657959, + "step": 3983 + }, + { + "epoch": 0.62, + "learning_rate": 1.122530732921832e-05, + "logits/chosen": -2.039116382598877, + "logits/rejected": -2.847620725631714, + "logps/chosen": -145.29331970214844, + "logps/rejected": -247.92160034179688, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1306545734405518, + "rewards/margins": 4.366827964782715, + "rewards/rejected": -6.4974822998046875, + "step": 3984 + }, + { + "epoch": 0.62, + "learning_rate": 1.1224573888687172e-05, + "logits/chosen": -3.038282632827759, + "logits/rejected": -3.320470094680786, + "logps/chosen": -126.11268615722656, + "logps/rejected": -243.03268432617188, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8408050537109375, + "rewards/margins": 4.433242321014404, + "rewards/rejected": -5.274047374725342, + "step": 3985 + }, + { + "epoch": 0.62, + "learning_rate": 1.1223840448156024e-05, + "logits/chosen": -2.756558895111084, + "logits/rejected": -3.3404977321624756, + "logps/chosen": -84.62940216064453, + "logps/rejected": -287.9864501953125, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.595487594604492, + "rewards/margins": 4.333513259887695, + "rewards/rejected": -6.9290008544921875, + "step": 3986 + }, + { + "epoch": 0.62, + "learning_rate": 1.1223107007624877e-05, + "logits/chosen": -2.6830060482025146, + "logits/rejected": -3.210726499557495, + "logps/chosen": -274.0638427734375, + "logps/rejected": -383.492919921875, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0723671913146973, + "rewards/margins": 3.757261037826538, + "rewards/rejected": -4.829627990722656, + "step": 3987 + }, + { + "epoch": 0.62, + "learning_rate": 1.1222373567093729e-05, + "logits/chosen": -3.246227264404297, + "logits/rejected": -3.193307638168335, + "logps/chosen": -305.9050598144531, + "logps/rejected": -174.47613525390625, + "loss": 1.8665, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.129270076751709, + "rewards/margins": -0.16664659976959229, + "rewards/rejected": -3.9626235961914062, + "step": 3988 + }, + { + "epoch": 0.62, + "learning_rate": 1.1221640126562581e-05, + "logits/chosen": -3.1678466796875, + "logits/rejected": -2.592914342880249, + "logps/chosen": -249.4317169189453, + "logps/rejected": -272.2870788574219, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8343749046325684, + "rewards/margins": 5.172070503234863, + "rewards/rejected": -7.006445407867432, + "step": 3989 + }, + { + "epoch": 0.62, + "learning_rate": 1.1220906686031433e-05, + "logits/chosen": -2.6364166736602783, + "logits/rejected": -3.2818148136138916, + "logps/chosen": -201.83375549316406, + "logps/rejected": -428.0818176269531, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.174103021621704, + "rewards/margins": 5.2595672607421875, + "rewards/rejected": -7.4336700439453125, + "step": 3990 + }, + { + "epoch": 0.62, + "learning_rate": 1.1220173245500285e-05, + "logits/chosen": -3.018857955932617, + "logits/rejected": -1.869865894317627, + "logps/chosen": -482.67816162109375, + "logps/rejected": -468.87213134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0525566339492798, + "rewards/margins": 9.605973243713379, + "rewards/rejected": -10.658529281616211, + "step": 3991 + }, + { + "epoch": 0.62, + "learning_rate": 1.1219439804969137e-05, + "logits/chosen": -1.5353739261627197, + "logits/rejected": -3.21732759475708, + "logps/chosen": -39.763282775878906, + "logps/rejected": -386.5334167480469, + "loss": 0.9569, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9579944610595703, + "rewards/margins": 0.961993932723999, + "rewards/rejected": -3.9199883937835693, + "step": 3992 + }, + { + "epoch": 0.62, + "learning_rate": 1.1218706364437989e-05, + "logits/chosen": -2.8450865745544434, + "logits/rejected": -2.9751639366149902, + "logps/chosen": -78.8765869140625, + "logps/rejected": -335.762939453125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.845086932182312, + "rewards/margins": 7.263343811035156, + "rewards/rejected": -9.108430862426758, + "step": 3993 + }, + { + "epoch": 0.62, + "learning_rate": 1.121797292390684e-05, + "logits/chosen": -1.7786118984222412, + "logits/rejected": -3.1828131675720215, + "logps/chosen": -118.97174835205078, + "logps/rejected": -366.52874755859375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3170719146728516, + "rewards/margins": 4.612555503845215, + "rewards/rejected": -6.929627418518066, + "step": 3994 + }, + { + "epoch": 0.62, + "learning_rate": 1.1217239483375694e-05, + "logits/chosen": -2.848036289215088, + "logits/rejected": -3.3233115673065186, + "logps/chosen": -29.390167236328125, + "logps/rejected": -266.2943420410156, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6230716705322266, + "rewards/margins": 4.645219802856445, + "rewards/rejected": -6.268290996551514, + "step": 3995 + }, + { + "epoch": 0.62, + "learning_rate": 1.1216506042844546e-05, + "logits/chosen": -1.981856107711792, + "logits/rejected": -3.195354700088501, + "logps/chosen": -36.979190826416016, + "logps/rejected": -403.5688171386719, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2438838481903076, + "rewards/margins": 6.510663986206055, + "rewards/rejected": -8.754548072814941, + "step": 3996 + }, + { + "epoch": 0.62, + "learning_rate": 1.1215772602313398e-05, + "logits/chosen": -2.0575788021087646, + "logits/rejected": -3.0533559322357178, + "logps/chosen": -153.34033203125, + "logps/rejected": -363.82867431640625, + "loss": 2.7672, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.962935447692871, + "rewards/margins": 2.3348729610443115, + "rewards/rejected": -8.297808647155762, + "step": 3997 + }, + { + "epoch": 0.62, + "learning_rate": 1.1215039161782251e-05, + "logits/chosen": -2.303619861602783, + "logits/rejected": -2.9928126335144043, + "logps/chosen": -39.22062301635742, + "logps/rejected": -126.5726318359375, + "loss": 0.8228, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7821385860443115, + "rewards/margins": 0.536500871181488, + "rewards/rejected": -3.3186395168304443, + "step": 3998 + }, + { + "epoch": 0.62, + "learning_rate": 1.1214305721251103e-05, + "logits/chosen": -1.7593073844909668, + "logits/rejected": -2.8169801235198975, + "logps/chosen": -182.5164337158203, + "logps/rejected": -182.74755859375, + "loss": 2.2249, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9671473503112793, + "rewards/margins": -0.4881531000137329, + "rewards/rejected": -3.478994369506836, + "step": 3999 + }, + { + "epoch": 0.62, + "learning_rate": 1.1213572280719955e-05, + "logits/chosen": -1.4770212173461914, + "logits/rejected": -3.2162954807281494, + "logps/chosen": -147.33792114257812, + "logps/rejected": -227.82923889160156, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.705984592437744, + "rewards/margins": 3.346956253051758, + "rewards/rejected": -6.052940845489502, + "step": 4000 + }, + { + "epoch": 0.62, + "learning_rate": 1.1212838840188807e-05, + "logits/chosen": -2.5884604454040527, + "logits/rejected": -3.209595203399658, + "logps/chosen": -36.23874282836914, + "logps/rejected": -270.2354736328125, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.454446315765381, + "rewards/margins": 5.410244941711426, + "rewards/rejected": -7.864690780639648, + "step": 4001 + }, + { + "epoch": 0.62, + "learning_rate": 1.1212105399657659e-05, + "logits/chosen": -3.1574482917785645, + "logits/rejected": -3.3308045864105225, + "logps/chosen": -100.60020446777344, + "logps/rejected": -282.30908203125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4572887420654297, + "rewards/margins": 6.567807674407959, + "rewards/rejected": -8.025096893310547, + "step": 4002 + }, + { + "epoch": 0.62, + "learning_rate": 1.121137195912651e-05, + "logits/chosen": -2.7059788703918457, + "logits/rejected": -3.0598201751708984, + "logps/chosen": -245.24227905273438, + "logps/rejected": -249.17251586914062, + "loss": 1.9461, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7629942893981934, + "rewards/margins": 0.85727858543396, + "rewards/rejected": -3.6202728748321533, + "step": 4003 + }, + { + "epoch": 0.62, + "learning_rate": 1.1210638518595364e-05, + "logits/chosen": -1.0995677709579468, + "logits/rejected": -2.753610610961914, + "logps/chosen": -193.53192138671875, + "logps/rejected": -520.2669677734375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5057930946350098, + "rewards/margins": 7.738438606262207, + "rewards/rejected": -9.244231224060059, + "step": 4004 + }, + { + "epoch": 0.62, + "learning_rate": 1.1209905078064216e-05, + "logits/chosen": -3.2773420810699463, + "logits/rejected": -2.800398826599121, + "logps/chosen": -280.4759521484375, + "logps/rejected": -292.9292297363281, + "loss": 1.5599, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.353447437286377, + "rewards/margins": 2.841423988342285, + "rewards/rejected": -6.194871425628662, + "step": 4005 + }, + { + "epoch": 0.62, + "learning_rate": 1.1209171637533068e-05, + "logits/chosen": -3.1027886867523193, + "logits/rejected": -3.1567306518554688, + "logps/chosen": -403.37841796875, + "logps/rejected": -369.5496826171875, + "loss": 0.1056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.99083411693573, + "rewards/margins": 2.553553819656372, + "rewards/rejected": -3.5443878173828125, + "step": 4006 + }, + { + "epoch": 0.62, + "learning_rate": 1.120843819700192e-05, + "logits/chosen": -2.9773223400115967, + "logits/rejected": -2.515986442565918, + "logps/chosen": -355.4434509277344, + "logps/rejected": -272.8609924316406, + "loss": 0.0592, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5834171772003174, + "rewards/margins": 2.807678699493408, + "rewards/rejected": -4.3910956382751465, + "step": 4007 + }, + { + "epoch": 0.62, + "learning_rate": 1.1207704756470772e-05, + "logits/chosen": -3.2379281520843506, + "logits/rejected": -3.177307605743408, + "logps/chosen": -449.90167236328125, + "logps/rejected": -520.9446411132812, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9250202178955078, + "rewards/margins": 5.778059005737305, + "rewards/rejected": -7.7030792236328125, + "step": 4008 + }, + { + "epoch": 0.62, + "learning_rate": 1.1206971315939624e-05, + "logits/chosen": -2.1314408779144287, + "logits/rejected": -2.846529245376587, + "logps/chosen": -243.2570343017578, + "logps/rejected": -452.29156494140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18481940031051636, + "rewards/margins": 7.831320285797119, + "rewards/rejected": -8.01613998413086, + "step": 4009 + }, + { + "epoch": 0.62, + "learning_rate": 1.1206237875408476e-05, + "logits/chosen": -3.0317952632904053, + "logits/rejected": -1.5839660167694092, + "logps/chosen": -265.8885498046875, + "logps/rejected": -196.6639404296875, + "loss": 2.0041, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4199981689453125, + "rewards/margins": 0.9947264194488525, + "rewards/rejected": -4.414724826812744, + "step": 4010 + }, + { + "epoch": 0.62, + "learning_rate": 1.1205504434877327e-05, + "logits/chosen": -3.029707670211792, + "logits/rejected": -2.734243392944336, + "logps/chosen": -450.0087890625, + "logps/rejected": -361.18292236328125, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.885215401649475, + "rewards/margins": 3.9104108810424805, + "rewards/rejected": -5.795626163482666, + "step": 4011 + }, + { + "epoch": 0.62, + "learning_rate": 1.120477099434618e-05, + "logits/chosen": -2.917008876800537, + "logits/rejected": -2.9813315868377686, + "logps/chosen": -134.2371368408203, + "logps/rejected": -364.9664306640625, + "loss": 1.1781, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6327555179595947, + "rewards/margins": 2.278341293334961, + "rewards/rejected": -4.911097049713135, + "step": 4012 + }, + { + "epoch": 0.62, + "learning_rate": 1.1204037553815033e-05, + "logits/chosen": -2.776261806488037, + "logits/rejected": -3.1518166065216064, + "logps/chosen": -440.2326354980469, + "logps/rejected": -400.3595886230469, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.582355499267578, + "rewards/margins": 5.898653984069824, + "rewards/rejected": -8.481009483337402, + "step": 4013 + }, + { + "epoch": 0.62, + "learning_rate": 1.1203304113283885e-05, + "logits/chosen": -3.0842158794403076, + "logits/rejected": -3.0041418075561523, + "logps/chosen": -347.41571044921875, + "logps/rejected": -323.20233154296875, + "loss": 0.6185, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.896297454833984, + "rewards/margins": 1.2103383541107178, + "rewards/rejected": -6.106635570526123, + "step": 4014 + }, + { + "epoch": 0.62, + "learning_rate": 1.1202570672752737e-05, + "logits/chosen": -2.6124939918518066, + "logits/rejected": -3.0509252548217773, + "logps/chosen": -87.58846282958984, + "logps/rejected": -220.71603393554688, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.312530517578125, + "rewards/margins": 5.997757911682129, + "rewards/rejected": -8.310288429260254, + "step": 4015 + }, + { + "epoch": 0.62, + "learning_rate": 1.1201837232221588e-05, + "logits/chosen": -1.852007508277893, + "logits/rejected": -3.068387031555176, + "logps/chosen": -181.25901794433594, + "logps/rejected": -340.6169128417969, + "loss": 2.374, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8347890377044678, + "rewards/margins": -0.3299565315246582, + "rewards/rejected": -3.5048325061798096, + "step": 4016 + }, + { + "epoch": 0.62, + "learning_rate": 1.120110379169044e-05, + "logits/chosen": -3.0730888843536377, + "logits/rejected": -3.1092941761016846, + "logps/chosen": -153.72903442382812, + "logps/rejected": -231.1923828125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.382162094116211, + "rewards/margins": 5.268305778503418, + "rewards/rejected": -6.650467872619629, + "step": 4017 + }, + { + "epoch": 0.62, + "learning_rate": 1.1200370351159292e-05, + "logits/chosen": -2.659616470336914, + "logits/rejected": -2.9109585285186768, + "logps/chosen": -156.373291015625, + "logps/rejected": -251.8431854248047, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1044583320617676, + "rewards/margins": 6.579988956451416, + "rewards/rejected": -8.684447288513184, + "step": 4018 + }, + { + "epoch": 0.63, + "learning_rate": 1.1199636910628144e-05, + "logits/chosen": -3.2381985187530518, + "logits/rejected": -2.808887243270874, + "logps/chosen": -133.68548583984375, + "logps/rejected": -174.19247436523438, + "loss": 1.1786, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.004537582397461, + "rewards/margins": 2.6885507106781006, + "rewards/rejected": -5.693088054656982, + "step": 4019 + }, + { + "epoch": 0.63, + "learning_rate": 1.1198903470096996e-05, + "logits/chosen": -2.6794233322143555, + "logits/rejected": -3.136352300643921, + "logps/chosen": -99.62014770507812, + "logps/rejected": -107.67477416992188, + "loss": 2.128, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.045226097106934, + "rewards/margins": 0.4594156742095947, + "rewards/rejected": -4.504642009735107, + "step": 4020 + }, + { + "epoch": 0.63, + "learning_rate": 1.1198170029565848e-05, + "logits/chosen": -3.0603036880493164, + "logits/rejected": -3.2455639839172363, + "logps/chosen": -70.1724624633789, + "logps/rejected": -288.64923095703125, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1447525024414062, + "rewards/margins": 6.616110324859619, + "rewards/rejected": -7.760863304138184, + "step": 4021 + }, + { + "epoch": 0.63, + "learning_rate": 1.1197436589034701e-05, + "logits/chosen": -2.1636455059051514, + "logits/rejected": -3.022291421890259, + "logps/chosen": -68.98784637451172, + "logps/rejected": -294.4815673828125, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8700549602508545, + "rewards/margins": 4.687591552734375, + "rewards/rejected": -6.55764627456665, + "step": 4022 + }, + { + "epoch": 0.63, + "learning_rate": 1.1196703148503553e-05, + "logits/chosen": -2.048689603805542, + "logits/rejected": -3.090351104736328, + "logps/chosen": -145.94308471679688, + "logps/rejected": -471.8355407714844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3285629749298096, + "rewards/margins": 8.823994636535645, + "rewards/rejected": -11.152557373046875, + "step": 4023 + }, + { + "epoch": 0.63, + "learning_rate": 1.1195969707972405e-05, + "logits/chosen": -3.154371738433838, + "logits/rejected": -2.9459989070892334, + "logps/chosen": -522.1674194335938, + "logps/rejected": -272.7623596191406, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.690319836139679, + "rewards/margins": 7.685368537902832, + "rewards/rejected": -8.375688552856445, + "step": 4024 + }, + { + "epoch": 0.63, + "learning_rate": 1.1195236267441257e-05, + "logits/chosen": -2.1305224895477295, + "logits/rejected": -3.2042691707611084, + "logps/chosen": -182.0247039794922, + "logps/rejected": -404.6582336425781, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4469826221466064, + "rewards/margins": 7.136806488037109, + "rewards/rejected": -8.583788871765137, + "step": 4025 + }, + { + "epoch": 0.63, + "learning_rate": 1.1194502826910109e-05, + "logits/chosen": -3.2468559741973877, + "logits/rejected": -1.911537528038025, + "logps/chosen": -413.5613708496094, + "logps/rejected": -299.921142578125, + "loss": 2.0953, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.979212999343872, + "rewards/margins": 0.17667150497436523, + "rewards/rejected": -3.1558845043182373, + "step": 4026 + }, + { + "epoch": 0.63, + "learning_rate": 1.119376938637896e-05, + "logits/chosen": -2.0900304317474365, + "logits/rejected": -3.038881301879883, + "logps/chosen": -137.5762939453125, + "logps/rejected": -255.55987548828125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4399592876434326, + "rewards/margins": 6.264996528625488, + "rewards/rejected": -8.7049560546875, + "step": 4027 + }, + { + "epoch": 0.63, + "learning_rate": 1.1193035945847813e-05, + "logits/chosen": -1.600522518157959, + "logits/rejected": -2.800279140472412, + "logps/chosen": -239.26055908203125, + "logps/rejected": -467.459228515625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7537529468536377, + "rewards/margins": 8.499652862548828, + "rewards/rejected": -11.253405570983887, + "step": 4028 + }, + { + "epoch": 0.63, + "learning_rate": 1.1192302505316665e-05, + "logits/chosen": -2.8606443405151367, + "logits/rejected": -3.1283156871795654, + "logps/chosen": -72.97993469238281, + "logps/rejected": -111.697509765625, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.800126075744629, + "rewards/margins": 3.9206061363220215, + "rewards/rejected": -6.72073221206665, + "step": 4029 + }, + { + "epoch": 0.63, + "learning_rate": 1.1191569064785518e-05, + "logits/chosen": -3.079789876937866, + "logits/rejected": -3.170539140701294, + "logps/chosen": -68.79203796386719, + "logps/rejected": -176.0184326171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6010825634002686, + "rewards/margins": 6.192953586578369, + "rewards/rejected": -7.794035911560059, + "step": 4030 + }, + { + "epoch": 0.63, + "learning_rate": 1.119083562425437e-05, + "logits/chosen": -1.7434285879135132, + "logits/rejected": -2.760507822036743, + "logps/chosen": -77.06360626220703, + "logps/rejected": -337.37017822265625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7444238662719727, + "rewards/margins": 7.347917079925537, + "rewards/rejected": -10.092340469360352, + "step": 4031 + }, + { + "epoch": 0.63, + "learning_rate": 1.1190102183723224e-05, + "logits/chosen": -2.4234721660614014, + "logits/rejected": -3.2460215091705322, + "logps/chosen": -260.8444519042969, + "logps/rejected": -326.3660583496094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1005405187606812, + "rewards/margins": 7.946100234985352, + "rewards/rejected": -9.046640396118164, + "step": 4032 + }, + { + "epoch": 0.63, + "learning_rate": 1.1189368743192075e-05, + "logits/chosen": -1.7989575862884521, + "logits/rejected": -3.1179444789886475, + "logps/chosen": -305.8819274902344, + "logps/rejected": -538.0818481445312, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1864956617355347, + "rewards/margins": 7.469878196716309, + "rewards/rejected": -8.656373977661133, + "step": 4033 + }, + { + "epoch": 0.63, + "learning_rate": 1.1188635302660927e-05, + "logits/chosen": -2.6702075004577637, + "logits/rejected": -3.278029441833496, + "logps/chosen": -67.73837280273438, + "logps/rejected": -304.4271240234375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7701646685600281, + "rewards/margins": 4.69503927230835, + "rewards/rejected": -5.465204238891602, + "step": 4034 + }, + { + "epoch": 0.63, + "learning_rate": 1.118790186212978e-05, + "logits/chosen": -1.7536656856536865, + "logits/rejected": -2.973041296005249, + "logps/chosen": -159.9005126953125, + "logps/rejected": -363.53656005859375, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7545719146728516, + "rewards/margins": 4.936126708984375, + "rewards/rejected": -6.690698623657227, + "step": 4035 + }, + { + "epoch": 0.63, + "learning_rate": 1.1187168421598631e-05, + "logits/chosen": -2.8079636096954346, + "logits/rejected": -3.1720705032348633, + "logps/chosen": -99.6775131225586, + "logps/rejected": -258.39630126953125, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5706813335418701, + "rewards/margins": 3.84623384475708, + "rewards/rejected": -5.416914939880371, + "step": 4036 + }, + { + "epoch": 0.63, + "learning_rate": 1.1186434981067483e-05, + "logits/chosen": -2.829536199569702, + "logits/rejected": -2.557030200958252, + "logps/chosen": -240.24429321289062, + "logps/rejected": -248.36361694335938, + "loss": 2.3927, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.423513412475586, + "rewards/margins": -0.9452106952667236, + "rewards/rejected": -3.478302478790283, + "step": 4037 + }, + { + "epoch": 0.63, + "learning_rate": 1.1185701540536335e-05, + "logits/chosen": -2.483424186706543, + "logits/rejected": -3.170599937438965, + "logps/chosen": -92.33770751953125, + "logps/rejected": -250.5438232421875, + "loss": 0.0497, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8919124603271484, + "rewards/margins": 5.537805557250977, + "rewards/rejected": -7.429718017578125, + "step": 4038 + }, + { + "epoch": 0.63, + "learning_rate": 1.1184968100005187e-05, + "logits/chosen": -2.8251140117645264, + "logits/rejected": -3.1648759841918945, + "logps/chosen": -111.88220977783203, + "logps/rejected": -169.90248107910156, + "loss": 2.7762, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.546597957611084, + "rewards/margins": 2.074744701385498, + "rewards/rejected": -6.621342658996582, + "step": 4039 + }, + { + "epoch": 0.63, + "learning_rate": 1.118423465947404e-05, + "logits/chosen": -0.8371939659118652, + "logits/rejected": -2.9963788986206055, + "logps/chosen": -70.85012817382812, + "logps/rejected": -513.32568359375, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9911441802978516, + "rewards/margins": 4.807201385498047, + "rewards/rejected": -7.798345565795898, + "step": 4040 + }, + { + "epoch": 0.63, + "learning_rate": 1.1183501218942892e-05, + "logits/chosen": -2.0681405067443848, + "logits/rejected": -3.2634172439575195, + "logps/chosen": -385.2271423339844, + "logps/rejected": -534.0869750976562, + "loss": 0.082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4400253295898438, + "rewards/margins": 5.377386093139648, + "rewards/rejected": -6.817411422729492, + "step": 4041 + }, + { + "epoch": 0.63, + "learning_rate": 1.1182767778411744e-05, + "logits/chosen": -3.0005106925964355, + "logits/rejected": -2.007404327392578, + "logps/chosen": -340.8254089355469, + "logps/rejected": -236.27029418945312, + "loss": 3.1802, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.969641208648682, + "rewards/margins": -0.21861815452575684, + "rewards/rejected": -4.751023292541504, + "step": 4042 + }, + { + "epoch": 0.63, + "learning_rate": 1.1182034337880596e-05, + "logits/chosen": -2.9353339672088623, + "logits/rejected": -3.208353281021118, + "logps/chosen": -357.02392578125, + "logps/rejected": -340.6476135253906, + "loss": 3.1849, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.929733753204346, + "rewards/margins": 0.2870810031890869, + "rewards/rejected": -5.2168145179748535, + "step": 4043 + }, + { + "epoch": 0.63, + "learning_rate": 1.1181300897349448e-05, + "logits/chosen": -2.1177594661712646, + "logits/rejected": -3.157773733139038, + "logps/chosen": -242.4502410888672, + "logps/rejected": -368.7861633300781, + "loss": 0.0923, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8948800563812256, + "rewards/margins": 5.466108322143555, + "rewards/rejected": -8.36098861694336, + "step": 4044 + }, + { + "epoch": 0.63, + "learning_rate": 1.11805674568183e-05, + "logits/chosen": -2.8366475105285645, + "logits/rejected": -3.026637077331543, + "logps/chosen": -293.3455810546875, + "logps/rejected": -414.8797912597656, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.896664619445801, + "rewards/margins": 6.002204895019531, + "rewards/rejected": -8.898869514465332, + "step": 4045 + }, + { + "epoch": 0.63, + "learning_rate": 1.1179834016287152e-05, + "logits/chosen": -2.1066927909851074, + "logits/rejected": -3.033648729324341, + "logps/chosen": -78.25125885009766, + "logps/rejected": -341.26849365234375, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.32065486907959, + "rewards/margins": 6.457939147949219, + "rewards/rejected": -8.778594017028809, + "step": 4046 + }, + { + "epoch": 0.63, + "learning_rate": 1.1179100575756003e-05, + "logits/chosen": -3.150082588195801, + "logits/rejected": -3.2078258991241455, + "logps/chosen": -147.28721618652344, + "logps/rejected": -211.50428771972656, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4447094202041626, + "rewards/margins": 5.240509033203125, + "rewards/rejected": -6.685218811035156, + "step": 4047 + }, + { + "epoch": 0.63, + "learning_rate": 1.1178367135224855e-05, + "logits/chosen": -0.9231566786766052, + "logits/rejected": -2.605785846710205, + "logps/chosen": -48.245731353759766, + "logps/rejected": -453.75823974609375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7759513854980469, + "rewards/margins": 7.809336185455322, + "rewards/rejected": -9.585287094116211, + "step": 4048 + }, + { + "epoch": 0.63, + "learning_rate": 1.1177633694693709e-05, + "logits/chosen": -2.6738741397857666, + "logits/rejected": -2.9112188816070557, + "logps/chosen": -48.13172149658203, + "logps/rejected": -143.388427734375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.856844186782837, + "rewards/margins": 5.187640190124512, + "rewards/rejected": -7.0444841384887695, + "step": 4049 + }, + { + "epoch": 0.63, + "learning_rate": 1.117690025416256e-05, + "logits/chosen": -2.762709140777588, + "logits/rejected": -3.0022881031036377, + "logps/chosen": -72.65325927734375, + "logps/rejected": -131.45298767089844, + "loss": 0.086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8269201517105103, + "rewards/margins": 4.176501750946045, + "rewards/rejected": -5.003421783447266, + "step": 4050 + }, + { + "epoch": 0.63, + "learning_rate": 1.1176166813631413e-05, + "logits/chosen": -0.8822939395904541, + "logits/rejected": -2.6777327060699463, + "logps/chosen": -69.86325073242188, + "logps/rejected": -296.5887451171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.989749789237976, + "rewards/margins": 6.08328914642334, + "rewards/rejected": -8.073039054870605, + "step": 4051 + }, + { + "epoch": 0.63, + "learning_rate": 1.1175433373100265e-05, + "logits/chosen": -2.9659993648529053, + "logits/rejected": -3.1027259826660156, + "logps/chosen": -102.61849975585938, + "logps/rejected": -247.1663360595703, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7187583446502686, + "rewards/margins": 5.379918098449707, + "rewards/rejected": -7.098676681518555, + "step": 4052 + }, + { + "epoch": 0.63, + "learning_rate": 1.1174699932569116e-05, + "logits/chosen": -3.1241097450256348, + "logits/rejected": -3.175999879837036, + "logps/chosen": -21.923404693603516, + "logps/rejected": -243.79867553710938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9286819696426392, + "rewards/margins": 8.706754684448242, + "rewards/rejected": -9.63543701171875, + "step": 4053 + }, + { + "epoch": 0.63, + "learning_rate": 1.1173966492037968e-05, + "logits/chosen": -2.9232139587402344, + "logits/rejected": -2.820377826690674, + "logps/chosen": -87.0300521850586, + "logps/rejected": -184.7461700439453, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7321715354919434, + "rewards/margins": 3.95033597946167, + "rewards/rejected": -5.682507514953613, + "step": 4054 + }, + { + "epoch": 0.63, + "learning_rate": 1.117323305150682e-05, + "logits/chosen": -2.9805402755737305, + "logits/rejected": -3.248781681060791, + "logps/chosen": -146.95765686035156, + "logps/rejected": -205.41224670410156, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8513126373291016, + "rewards/margins": 4.807607650756836, + "rewards/rejected": -6.6589202880859375, + "step": 4055 + }, + { + "epoch": 0.63, + "learning_rate": 1.1172499610975672e-05, + "logits/chosen": -2.9180681705474854, + "logits/rejected": -2.97012996673584, + "logps/chosen": -131.91558837890625, + "logps/rejected": -294.7469177246094, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8419291973114014, + "rewards/margins": 5.818471908569336, + "rewards/rejected": -6.660401344299316, + "step": 4056 + }, + { + "epoch": 0.63, + "learning_rate": 1.1171766170444524e-05, + "logits/chosen": -3.156437635421753, + "logits/rejected": -2.3360400199890137, + "logps/chosen": -240.9590301513672, + "logps/rejected": -221.77349853515625, + "loss": 1.4245, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9902405738830566, + "rewards/margins": 3.1583218574523926, + "rewards/rejected": -7.148562431335449, + "step": 4057 + }, + { + "epoch": 0.63, + "learning_rate": 1.1171032729913378e-05, + "logits/chosen": -2.927342176437378, + "logits/rejected": -1.8617161512374878, + "logps/chosen": -115.35099792480469, + "logps/rejected": -74.62439727783203, + "loss": 2.3985, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.977748155593872, + "rewards/margins": -0.9436618089675903, + "rewards/rejected": -3.034086227416992, + "step": 4058 + }, + { + "epoch": 0.63, + "learning_rate": 1.117029928938223e-05, + "logits/chosen": -3.0673866271972656, + "logits/rejected": -2.7391788959503174, + "logps/chosen": -361.25897216796875, + "logps/rejected": -347.8146057128906, + "loss": 3.4322, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.223740577697754, + "rewards/margins": 2.403258800506592, + "rewards/rejected": -7.626999378204346, + "step": 4059 + }, + { + "epoch": 0.63, + "learning_rate": 1.1169565848851081e-05, + "logits/chosen": -2.463503360748291, + "logits/rejected": -2.9151501655578613, + "logps/chosen": -153.6352081298828, + "logps/rejected": -407.7918395996094, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48054394125938416, + "rewards/margins": 7.107051849365234, + "rewards/rejected": -7.587595462799072, + "step": 4060 + }, + { + "epoch": 0.63, + "learning_rate": 1.1168832408319933e-05, + "logits/chosen": -3.0163064002990723, + "logits/rejected": -2.706974983215332, + "logps/chosen": -309.7120361328125, + "logps/rejected": -355.1600341796875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0101475715637207, + "rewards/margins": 7.968199253082275, + "rewards/rejected": -9.978346824645996, + "step": 4061 + }, + { + "epoch": 0.63, + "learning_rate": 1.1168098967788785e-05, + "logits/chosen": -2.039376735687256, + "logits/rejected": -2.8622941970825195, + "logps/chosen": -131.54000854492188, + "logps/rejected": -342.6953430175781, + "loss": 0.0905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2954111099243164, + "rewards/margins": 5.822080612182617, + "rewards/rejected": -7.117491722106934, + "step": 4062 + }, + { + "epoch": 0.63, + "learning_rate": 1.1167365527257637e-05, + "logits/chosen": -2.014416456222534, + "logits/rejected": -3.1313118934631348, + "logps/chosen": -54.89193344116211, + "logps/rejected": -240.75643920898438, + "loss": 0.0615, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7878835201263428, + "rewards/margins": 4.324451923370361, + "rewards/rejected": -6.112335205078125, + "step": 4063 + }, + { + "epoch": 0.63, + "learning_rate": 1.116663208672649e-05, + "logits/chosen": -2.839297294616699, + "logits/rejected": -2.5292375087738037, + "logps/chosen": -116.58796691894531, + "logps/rejected": -167.735107421875, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5726053714752197, + "rewards/margins": 4.620950222015381, + "rewards/rejected": -6.19355583190918, + "step": 4064 + }, + { + "epoch": 0.63, + "learning_rate": 1.1165898646195342e-05, + "logits/chosen": -2.2860913276672363, + "logits/rejected": -2.3133676052093506, + "logps/chosen": -194.75277709960938, + "logps/rejected": -402.2408447265625, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8981307744979858, + "rewards/margins": 4.654393196105957, + "rewards/rejected": -6.552523612976074, + "step": 4065 + }, + { + "epoch": 0.63, + "learning_rate": 1.1165165205664194e-05, + "logits/chosen": -2.826622247695923, + "logits/rejected": -2.9995017051696777, + "logps/chosen": -199.9322509765625, + "logps/rejected": -249.6480255126953, + "loss": 0.0919, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.459688663482666, + "rewards/margins": 2.534728527069092, + "rewards/rejected": -3.9944169521331787, + "step": 4066 + }, + { + "epoch": 0.63, + "learning_rate": 1.1164431765133048e-05, + "logits/chosen": -2.734302043914795, + "logits/rejected": -3.0792057514190674, + "logps/chosen": -405.6386413574219, + "logps/rejected": -811.8402099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3155858516693115, + "rewards/margins": 10.188070297241211, + "rewards/rejected": -12.503656387329102, + "step": 4067 + }, + { + "epoch": 0.63, + "learning_rate": 1.11636983246019e-05, + "logits/chosen": -2.979311466217041, + "logits/rejected": -3.166377544403076, + "logps/chosen": -113.56778717041016, + "logps/rejected": -198.7606201171875, + "loss": 2.1369, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.067742109298706, + "rewards/margins": 0.304412841796875, + "rewards/rejected": -3.372154951095581, + "step": 4068 + }, + { + "epoch": 0.63, + "learning_rate": 1.1162964884070752e-05, + "logits/chosen": -3.0435855388641357, + "logits/rejected": -3.2255797386169434, + "logps/chosen": -23.351673126220703, + "logps/rejected": -161.5249481201172, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8154304027557373, + "rewards/margins": 6.788827896118164, + "rewards/rejected": -7.604258060455322, + "step": 4069 + }, + { + "epoch": 0.63, + "learning_rate": 1.1162231443539603e-05, + "logits/chosen": -0.8100531697273254, + "logits/rejected": -2.396191120147705, + "logps/chosen": -144.5531005859375, + "logps/rejected": -379.30706787109375, + "loss": 0.4367, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6448252201080322, + "rewards/margins": 4.36354923248291, + "rewards/rejected": -7.008374214172363, + "step": 4070 + }, + { + "epoch": 0.63, + "learning_rate": 1.1161498003008455e-05, + "logits/chosen": -3.0583372116088867, + "logits/rejected": -3.069491147994995, + "logps/chosen": -360.0066833496094, + "logps/rejected": -140.30328369140625, + "loss": 5.7518, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.974090576171875, + "rewards/margins": -3.123412847518921, + "rewards/rejected": -3.850677490234375, + "step": 4071 + }, + { + "epoch": 0.63, + "learning_rate": 1.1160764562477307e-05, + "logits/chosen": -3.1330654621124268, + "logits/rejected": -2.7393932342529297, + "logps/chosen": -113.29495239257812, + "logps/rejected": -171.46368408203125, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.356318712234497, + "rewards/margins": 4.077729225158691, + "rewards/rejected": -5.434047698974609, + "step": 4072 + }, + { + "epoch": 0.63, + "learning_rate": 1.1160031121946159e-05, + "logits/chosen": -2.292487859725952, + "logits/rejected": -3.2695817947387695, + "logps/chosen": -32.99064254760742, + "logps/rejected": -311.629638671875, + "loss": 0.2191, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1974728107452393, + "rewards/margins": 3.834047555923462, + "rewards/rejected": -6.031520366668701, + "step": 4073 + }, + { + "epoch": 0.63, + "learning_rate": 1.1159297681415011e-05, + "logits/chosen": -2.371185302734375, + "logits/rejected": -2.8514256477355957, + "logps/chosen": -94.39248657226562, + "logps/rejected": -128.7566375732422, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.975728988647461, + "rewards/margins": 4.056481838226318, + "rewards/rejected": -6.032210826873779, + "step": 4074 + }, + { + "epoch": 0.63, + "learning_rate": 1.1158564240883863e-05, + "logits/chosen": -2.3259050846099854, + "logits/rejected": -3.087785005569458, + "logps/chosen": -109.28972625732422, + "logps/rejected": -465.72906494140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9198329448699951, + "rewards/margins": 8.645668029785156, + "rewards/rejected": -10.56550121307373, + "step": 4075 + }, + { + "epoch": 0.63, + "learning_rate": 1.1157830800352716e-05, + "logits/chosen": -2.1164276599884033, + "logits/rejected": -3.1810576915740967, + "logps/chosen": -44.06544494628906, + "logps/rejected": -546.2659912109375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6775881052017212, + "rewards/margins": 5.107611179351807, + "rewards/rejected": -6.785199165344238, + "step": 4076 + }, + { + "epoch": 0.63, + "learning_rate": 1.1157097359821568e-05, + "logits/chosen": -2.077371120452881, + "logits/rejected": -3.131268262863159, + "logps/chosen": -155.59156799316406, + "logps/rejected": -424.7514953613281, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7507102489471436, + "rewards/margins": 5.756714820861816, + "rewards/rejected": -8.507425308227539, + "step": 4077 + }, + { + "epoch": 0.63, + "learning_rate": 1.115636391929042e-05, + "logits/chosen": -2.4011456966400146, + "logits/rejected": -2.7120392322540283, + "logps/chosen": -259.21368408203125, + "logps/rejected": -384.91790771484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0880417823791504, + "rewards/margins": 6.262212753295898, + "rewards/rejected": -7.350255012512207, + "step": 4078 + }, + { + "epoch": 0.63, + "learning_rate": 1.1155630478759272e-05, + "logits/chosen": -2.332195520401001, + "logits/rejected": -2.9662744998931885, + "logps/chosen": -116.2685546875, + "logps/rejected": -350.35491943359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8936203718185425, + "rewards/margins": 7.406861782073975, + "rewards/rejected": -9.300481796264648, + "step": 4079 + }, + { + "epoch": 0.63, + "learning_rate": 1.1154897038228124e-05, + "logits/chosen": -3.213646411895752, + "logits/rejected": -2.212033748626709, + "logps/chosen": -259.9530944824219, + "logps/rejected": -182.99798583984375, + "loss": 3.2083, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.268103122711182, + "rewards/margins": 0.2541632652282715, + "rewards/rejected": -4.522266864776611, + "step": 4080 + }, + { + "epoch": 0.63, + "learning_rate": 1.1154163597696976e-05, + "logits/chosen": -1.3397870063781738, + "logits/rejected": -3.22947359085083, + "logps/chosen": -152.72525024414062, + "logps/rejected": -538.399658203125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9102299213409424, + "rewards/margins": 6.5200419425964355, + "rewards/rejected": -7.430272102355957, + "step": 4081 + }, + { + "epoch": 0.63, + "learning_rate": 1.1153430157165828e-05, + "logits/chosen": -2.94262433052063, + "logits/rejected": -3.186779499053955, + "logps/chosen": -449.2366638183594, + "logps/rejected": -280.2093200683594, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1520965099334717, + "rewards/margins": 4.856274604797363, + "rewards/rejected": -6.008370876312256, + "step": 4082 + }, + { + "epoch": 0.63, + "learning_rate": 1.115269671663468e-05, + "logits/chosen": -2.9533658027648926, + "logits/rejected": -3.127408266067505, + "logps/chosen": -24.736696243286133, + "logps/rejected": -97.11138153076172, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8955081105232239, + "rewards/margins": 5.480396270751953, + "rewards/rejected": -6.375904083251953, + "step": 4083 + }, + { + "epoch": 0.64, + "learning_rate": 1.1151963276103533e-05, + "logits/chosen": -2.673630952835083, + "logits/rejected": -3.0730228424072266, + "logps/chosen": -50.07898712158203, + "logps/rejected": -322.2217712402344, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6844704747200012, + "rewards/margins": 8.259272575378418, + "rewards/rejected": -8.943743705749512, + "step": 4084 + }, + { + "epoch": 0.64, + "learning_rate": 1.1151229835572385e-05, + "logits/chosen": -2.5574042797088623, + "logits/rejected": -3.1599903106689453, + "logps/chosen": -197.11666870117188, + "logps/rejected": -232.0139617919922, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8952629566192627, + "rewards/margins": 4.475224494934082, + "rewards/rejected": -7.370487213134766, + "step": 4085 + }, + { + "epoch": 0.64, + "learning_rate": 1.1150496395041237e-05, + "logits/chosen": -3.232318639755249, + "logits/rejected": -2.5941340923309326, + "logps/chosen": -410.5166015625, + "logps/rejected": -274.15948486328125, + "loss": 3.3228, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4946296215057373, + "rewards/margins": -0.02395319938659668, + "rewards/rejected": -3.4706764221191406, + "step": 4086 + }, + { + "epoch": 0.64, + "learning_rate": 1.1149762954510089e-05, + "logits/chosen": -2.713413715362549, + "logits/rejected": -3.1003034114837646, + "logps/chosen": -124.83756256103516, + "logps/rejected": -270.7470397949219, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3351703882217407, + "rewards/margins": 5.645429611206055, + "rewards/rejected": -6.980600357055664, + "step": 4087 + }, + { + "epoch": 0.64, + "learning_rate": 1.114902951397894e-05, + "logits/chosen": -2.4132325649261475, + "logits/rejected": -3.0609688758850098, + "logps/chosen": -93.55517578125, + "logps/rejected": -366.2193908691406, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3778553009033203, + "rewards/margins": 7.948480129241943, + "rewards/rejected": -11.326335906982422, + "step": 4088 + }, + { + "epoch": 0.64, + "learning_rate": 1.1148296073447793e-05, + "logits/chosen": -2.993483543395996, + "logits/rejected": -1.5074716806411743, + "logps/chosen": -233.41519165039062, + "logps/rejected": -221.5428009033203, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6715000867843628, + "rewards/margins": 3.935270309448242, + "rewards/rejected": -4.6067705154418945, + "step": 4089 + }, + { + "epoch": 0.64, + "learning_rate": 1.1147562632916644e-05, + "logits/chosen": -0.9078343510627747, + "logits/rejected": -2.980623722076416, + "logps/chosen": -69.7061767578125, + "logps/rejected": -308.8534240722656, + "loss": 0.0822, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6612634658813477, + "rewards/margins": 3.447585105895996, + "rewards/rejected": -6.108848571777344, + "step": 4090 + }, + { + "epoch": 0.64, + "learning_rate": 1.1146829192385496e-05, + "logits/chosen": -1.9843363761901855, + "logits/rejected": -3.1790294647216797, + "logps/chosen": -315.6466064453125, + "logps/rejected": -401.57073974609375, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.514430284500122, + "rewards/margins": 4.095983982086182, + "rewards/rejected": -5.610414505004883, + "step": 4091 + }, + { + "epoch": 0.64, + "learning_rate": 1.1146095751854348e-05, + "logits/chosen": -3.057863235473633, + "logits/rejected": -1.7599083185195923, + "logps/chosen": -326.29425048828125, + "logps/rejected": -180.75360107421875, + "loss": 2.0303, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7463676929473877, + "rewards/margins": 0.19801068305969238, + "rewards/rejected": -2.94437837600708, + "step": 4092 + }, + { + "epoch": 0.64, + "learning_rate": 1.1145362311323202e-05, + "logits/chosen": -2.4850399494171143, + "logits/rejected": -2.8011462688446045, + "logps/chosen": -152.28773498535156, + "logps/rejected": -331.8819885253906, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.554919481277466, + "rewards/margins": 5.8650407791137695, + "rewards/rejected": -8.419960021972656, + "step": 4093 + }, + { + "epoch": 0.64, + "learning_rate": 1.1144628870792054e-05, + "logits/chosen": -2.9769904613494873, + "logits/rejected": -2.8219711780548096, + "logps/chosen": -172.00827026367188, + "logps/rejected": -302.4910888671875, + "loss": 2.0588, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.669023036956787, + "rewards/margins": 3.9145123958587646, + "rewards/rejected": -9.583535194396973, + "step": 4094 + }, + { + "epoch": 0.64, + "learning_rate": 1.1143895430260906e-05, + "logits/chosen": -1.0604645013809204, + "logits/rejected": -3.000767946243286, + "logps/chosen": -43.727115631103516, + "logps/rejected": -341.2225646972656, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6065192222595215, + "rewards/margins": 6.152468681335449, + "rewards/rejected": -7.758988380432129, + "step": 4095 + }, + { + "epoch": 0.64, + "learning_rate": 1.1143161989729757e-05, + "logits/chosen": -3.073519229888916, + "logits/rejected": -3.0980026721954346, + "logps/chosen": -333.48382568359375, + "logps/rejected": -404.04449462890625, + "loss": 0.0753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9005165100097656, + "rewards/margins": 5.297849655151367, + "rewards/rejected": -6.198366165161133, + "step": 4096 + }, + { + "epoch": 0.64, + "learning_rate": 1.114242854919861e-05, + "logits/chosen": -3.0371546745300293, + "logits/rejected": -3.0178489685058594, + "logps/chosen": -297.7040710449219, + "logps/rejected": -280.1343078613281, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2216918468475342, + "rewards/margins": 6.817788124084473, + "rewards/rejected": -8.039480209350586, + "step": 4097 + }, + { + "epoch": 0.64, + "learning_rate": 1.1141695108667463e-05, + "logits/chosen": -2.9174249172210693, + "logits/rejected": -2.332012414932251, + "logps/chosen": -233.91146850585938, + "logps/rejected": -313.645751953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8510277271270752, + "rewards/margins": 5.808351039886475, + "rewards/rejected": -7.659379005432129, + "step": 4098 + }, + { + "epoch": 0.64, + "learning_rate": 1.1140961668136315e-05, + "logits/chosen": -3.1627960205078125, + "logits/rejected": -2.8686769008636475, + "logps/chosen": -192.45651245117188, + "logps/rejected": -165.2989959716797, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5868568420410156, + "rewards/margins": 3.849885940551758, + "rewards/rejected": -5.436742782592773, + "step": 4099 + }, + { + "epoch": 0.64, + "learning_rate": 1.1140228227605167e-05, + "logits/chosen": -2.9454426765441895, + "logits/rejected": -3.123194456100464, + "logps/chosen": -48.8270263671875, + "logps/rejected": -351.4872741699219, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.313524842262268, + "rewards/margins": 8.68095874786377, + "rewards/rejected": -9.994483947753906, + "step": 4100 + }, + { + "epoch": 0.64, + "learning_rate": 1.1139494787074018e-05, + "logits/chosen": -3.0296595096588135, + "logits/rejected": -3.1453938484191895, + "logps/chosen": -112.68028259277344, + "logps/rejected": -218.84910583496094, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.320343494415283, + "rewards/margins": 4.985721588134766, + "rewards/rejected": -7.306065559387207, + "step": 4101 + }, + { + "epoch": 0.64, + "learning_rate": 1.1138761346542872e-05, + "logits/chosen": -3.3231258392333984, + "logits/rejected": -3.339048385620117, + "logps/chosen": -257.8311462402344, + "logps/rejected": -303.5120849609375, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7107246518135071, + "rewards/margins": 4.597894668579102, + "rewards/rejected": -5.308619022369385, + "step": 4102 + }, + { + "epoch": 0.64, + "learning_rate": 1.1138027906011724e-05, + "logits/chosen": -2.646594285964966, + "logits/rejected": -2.771831512451172, + "logps/chosen": -83.8414306640625, + "logps/rejected": -335.0250549316406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.215479850769043, + "rewards/margins": 9.095779418945312, + "rewards/rejected": -11.311260223388672, + "step": 4103 + }, + { + "epoch": 0.64, + "learning_rate": 1.1137294465480576e-05, + "logits/chosen": -2.9009366035461426, + "logits/rejected": -3.1411774158477783, + "logps/chosen": -23.075481414794922, + "logps/rejected": -192.42431640625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9974558353424072, + "rewards/margins": 7.031520843505859, + "rewards/rejected": -8.028976440429688, + "step": 4104 + }, + { + "epoch": 0.64, + "learning_rate": 1.1136561024949428e-05, + "logits/chosen": -2.85024094581604, + "logits/rejected": -3.1731464862823486, + "logps/chosen": -425.6995849609375, + "logps/rejected": -847.7384033203125, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1789710521698, + "rewards/margins": 4.946392059326172, + "rewards/rejected": -7.125363349914551, + "step": 4105 + }, + { + "epoch": 0.64, + "learning_rate": 1.113582758441828e-05, + "logits/chosen": -2.1586432456970215, + "logits/rejected": -2.9708192348480225, + "logps/chosen": -95.11868286132812, + "logps/rejected": -185.6549072265625, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.65749454498291, + "rewards/margins": 4.0358662605285645, + "rewards/rejected": -6.693360805511475, + "step": 4106 + }, + { + "epoch": 0.64, + "learning_rate": 1.1135094143887131e-05, + "logits/chosen": -2.961216688156128, + "logits/rejected": -3.1541268825531006, + "logps/chosen": -86.29481506347656, + "logps/rejected": -278.921630859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1452815532684326, + "rewards/margins": 6.7326340675354, + "rewards/rejected": -8.87791633605957, + "step": 4107 + }, + { + "epoch": 0.64, + "learning_rate": 1.1134360703355983e-05, + "logits/chosen": -1.4263359308242798, + "logits/rejected": -2.905449867248535, + "logps/chosen": -55.07799530029297, + "logps/rejected": -433.81719970703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7658258676528931, + "rewards/margins": 6.703154563903809, + "rewards/rejected": -7.46898078918457, + "step": 4108 + }, + { + "epoch": 0.64, + "learning_rate": 1.1133627262824835e-05, + "logits/chosen": -3.057307481765747, + "logits/rejected": -2.9866816997528076, + "logps/chosen": -139.17630004882812, + "logps/rejected": -322.96051025390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34967708587646484, + "rewards/margins": 6.080016613006592, + "rewards/rejected": -6.429693698883057, + "step": 4109 + }, + { + "epoch": 0.64, + "learning_rate": 1.1132893822293687e-05, + "logits/chosen": -3.056107759475708, + "logits/rejected": -2.409149408340454, + "logps/chosen": -124.79010009765625, + "logps/rejected": -221.49240112304688, + "loss": 0.0786, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5418481826782227, + "rewards/margins": 6.072686195373535, + "rewards/rejected": -8.614534378051758, + "step": 4110 + }, + { + "epoch": 0.64, + "learning_rate": 1.113216038176254e-05, + "logits/chosen": -2.0850226879119873, + "logits/rejected": -3.1376593112945557, + "logps/chosen": -343.9091796875, + "logps/rejected": -526.581787109375, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4258666038513184, + "rewards/margins": 5.466826915740967, + "rewards/rejected": -7.892693519592285, + "step": 4111 + }, + { + "epoch": 0.64, + "learning_rate": 1.1131426941231393e-05, + "logits/chosen": -3.056432008743286, + "logits/rejected": -2.629051685333252, + "logps/chosen": -178.39910888671875, + "logps/rejected": -187.35195922851562, + "loss": 0.6364, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7462966442108154, + "rewards/margins": 2.1012187004089355, + "rewards/rejected": -4.847515106201172, + "step": 4112 + }, + { + "epoch": 0.64, + "learning_rate": 1.1130693500700244e-05, + "logits/chosen": -2.993722438812256, + "logits/rejected": -2.3096916675567627, + "logps/chosen": -311.54644775390625, + "logps/rejected": -318.0287170410156, + "loss": 0.1901, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2608580589294434, + "rewards/margins": 2.7591629028320312, + "rewards/rejected": -5.020021438598633, + "step": 4113 + }, + { + "epoch": 0.64, + "learning_rate": 1.1129960060169096e-05, + "logits/chosen": -2.475282907485962, + "logits/rejected": -3.088881254196167, + "logps/chosen": -81.98974609375, + "logps/rejected": -458.509765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.300877332687378, + "rewards/margins": 7.151761054992676, + "rewards/rejected": -8.452638626098633, + "step": 4114 + }, + { + "epoch": 0.64, + "learning_rate": 1.1129226619637948e-05, + "logits/chosen": -2.585681438446045, + "logits/rejected": -2.7112367153167725, + "logps/chosen": -402.91558837890625, + "logps/rejected": -384.030517578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.504696011543274, + "rewards/margins": 7.634123802185059, + "rewards/rejected": -9.13882064819336, + "step": 4115 + }, + { + "epoch": 0.64, + "learning_rate": 1.11284931791068e-05, + "logits/chosen": -2.937823534011841, + "logits/rejected": -2.9280447959899902, + "logps/chosen": -444.0908203125, + "logps/rejected": -503.4217529296875, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.708046793937683, + "rewards/margins": 5.658994197845459, + "rewards/rejected": -7.367041110992432, + "step": 4116 + }, + { + "epoch": 0.64, + "learning_rate": 1.1127759738575652e-05, + "logits/chosen": -2.727397918701172, + "logits/rejected": -3.131558895111084, + "logps/chosen": -68.60202026367188, + "logps/rejected": -220.58639526367188, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5151355266571045, + "rewards/margins": 6.221518516540527, + "rewards/rejected": -7.736654281616211, + "step": 4117 + }, + { + "epoch": 0.64, + "learning_rate": 1.1127026298044504e-05, + "logits/chosen": -2.984077215194702, + "logits/rejected": -2.573394298553467, + "logps/chosen": -236.13734436035156, + "logps/rejected": -198.46485900878906, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.19517982006073, + "rewards/margins": 5.589885234832764, + "rewards/rejected": -6.785065174102783, + "step": 4118 + }, + { + "epoch": 0.64, + "learning_rate": 1.1126292857513356e-05, + "logits/chosen": -1.0271862745285034, + "logits/rejected": -2.3362793922424316, + "logps/chosen": -189.1959228515625, + "logps/rejected": -571.4119873046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3011505603790283, + "rewards/margins": 8.60736083984375, + "rewards/rejected": -9.9085111618042, + "step": 4119 + }, + { + "epoch": 0.64, + "learning_rate": 1.112555941698221e-05, + "logits/chosen": -3.0535242557525635, + "logits/rejected": -3.027208089828491, + "logps/chosen": -152.32350158691406, + "logps/rejected": -110.07362365722656, + "loss": 3.0046, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.120823860168457, + "rewards/margins": -1.4116435050964355, + "rewards/rejected": -2.7091803550720215, + "step": 4120 + }, + { + "epoch": 0.64, + "learning_rate": 1.1124825976451061e-05, + "logits/chosen": -2.9838204383850098, + "logits/rejected": -2.434947967529297, + "logps/chosen": -157.67535400390625, + "logps/rejected": -283.258544921875, + "loss": 2.1192, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.517561435699463, + "rewards/margins": 3.028022289276123, + "rewards/rejected": -6.545583248138428, + "step": 4121 + }, + { + "epoch": 0.64, + "learning_rate": 1.1124092535919913e-05, + "logits/chosen": -3.0308926105499268, + "logits/rejected": -1.4231678247451782, + "logps/chosen": -500.254150390625, + "logps/rejected": -359.8621520996094, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0722179412841797, + "rewards/margins": 7.253360748291016, + "rewards/rejected": -10.325578689575195, + "step": 4122 + }, + { + "epoch": 0.64, + "learning_rate": 1.1123359095388765e-05, + "logits/chosen": -2.7510907649993896, + "logits/rejected": -3.117128610610962, + "logps/chosen": -281.1895751953125, + "logps/rejected": -292.30047607421875, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0457978248596191, + "rewards/margins": 5.918649673461914, + "rewards/rejected": -6.964447975158691, + "step": 4123 + }, + { + "epoch": 0.64, + "learning_rate": 1.1122625654857617e-05, + "logits/chosen": -2.685593843460083, + "logits/rejected": -3.199889898300171, + "logps/chosen": -898.0943603515625, + "logps/rejected": -722.0931396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8884888291358948, + "rewards/margins": 9.947555541992188, + "rewards/rejected": -10.836044311523438, + "step": 4124 + }, + { + "epoch": 0.64, + "learning_rate": 1.1121892214326469e-05, + "logits/chosen": -2.976100206375122, + "logits/rejected": -2.818556785583496, + "logps/chosen": -132.40045166015625, + "logps/rejected": -237.4778594970703, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.278163194656372, + "rewards/margins": 6.693184852600098, + "rewards/rejected": -8.97134780883789, + "step": 4125 + }, + { + "epoch": 0.64, + "learning_rate": 1.112115877379532e-05, + "logits/chosen": -2.966587543487549, + "logits/rejected": -3.062689781188965, + "logps/chosen": -401.2615661621094, + "logps/rejected": -445.6236572265625, + "loss": 0.8447, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.9688419699668884, + "rewards/margins": 2.6718273162841797, + "rewards/rejected": -3.640669345855713, + "step": 4126 + }, + { + "epoch": 0.64, + "learning_rate": 1.1120425333264172e-05, + "logits/chosen": -3.18794322013855, + "logits/rejected": -2.673656702041626, + "logps/chosen": -249.84523010253906, + "logps/rejected": -361.880615234375, + "loss": 0.937, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.599853515625, + "rewards/margins": 2.6266109943389893, + "rewards/rejected": -5.22646427154541, + "step": 4127 + }, + { + "epoch": 0.64, + "learning_rate": 1.1119691892733024e-05, + "logits/chosen": -2.7714779376983643, + "logits/rejected": -2.2442164421081543, + "logps/chosen": -297.7554016113281, + "logps/rejected": -407.8721923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8420631885528564, + "rewards/margins": 10.433009147644043, + "rewards/rejected": -13.27507209777832, + "step": 4128 + }, + { + "epoch": 0.64, + "learning_rate": 1.1118958452201878e-05, + "logits/chosen": -2.9865880012512207, + "logits/rejected": -3.1218247413635254, + "logps/chosen": -332.9310302734375, + "logps/rejected": -455.45721435546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.781247615814209, + "rewards/margins": 11.694478034973145, + "rewards/rejected": -13.475726127624512, + "step": 4129 + }, + { + "epoch": 0.64, + "learning_rate": 1.111822501167073e-05, + "logits/chosen": -3.0972280502319336, + "logits/rejected": -2.904047966003418, + "logps/chosen": -293.6678466796875, + "logps/rejected": -589.09912109375, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.284877061843872, + "rewards/margins": 9.171629905700684, + "rewards/rejected": -10.456506729125977, + "step": 4130 + }, + { + "epoch": 0.64, + "learning_rate": 1.1117491571139582e-05, + "logits/chosen": -2.581472873687744, + "logits/rejected": -3.2105281352996826, + "logps/chosen": -164.80661010742188, + "logps/rejected": -203.73019409179688, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2647063732147217, + "rewards/margins": 5.6855363845825195, + "rewards/rejected": -6.950242519378662, + "step": 4131 + }, + { + "epoch": 0.64, + "learning_rate": 1.1116758130608435e-05, + "logits/chosen": -2.2380831241607666, + "logits/rejected": -2.5959415435791016, + "logps/chosen": -184.76991271972656, + "logps/rejected": -297.22900390625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.482258915901184, + "rewards/margins": 6.846385955810547, + "rewards/rejected": -8.328644752502441, + "step": 4132 + }, + { + "epoch": 0.64, + "learning_rate": 1.1116024690077287e-05, + "logits/chosen": -2.8513739109039307, + "logits/rejected": -3.2493183612823486, + "logps/chosen": -66.31659698486328, + "logps/rejected": -206.34262084960938, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7762037515640259, + "rewards/margins": 4.408415794372559, + "rewards/rejected": -6.184619426727295, + "step": 4133 + }, + { + "epoch": 0.64, + "learning_rate": 1.1115291249546139e-05, + "logits/chosen": -2.762251138687134, + "logits/rejected": -2.9624640941619873, + "logps/chosen": -380.31976318359375, + "logps/rejected": -593.2093505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2962069511413574, + "rewards/margins": 11.260217666625977, + "rewards/rejected": -13.556424140930176, + "step": 4134 + }, + { + "epoch": 0.64, + "learning_rate": 1.111455780901499e-05, + "logits/chosen": -3.0889058113098145, + "logits/rejected": -2.7550840377807617, + "logps/chosen": -198.80067443847656, + "logps/rejected": -222.75033569335938, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.100271701812744, + "rewards/margins": 5.651648044586182, + "rewards/rejected": -7.751919746398926, + "step": 4135 + }, + { + "epoch": 0.64, + "learning_rate": 1.1113824368483843e-05, + "logits/chosen": -2.778160333633423, + "logits/rejected": -3.295356273651123, + "logps/chosen": -60.000144958496094, + "logps/rejected": -301.0589599609375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5133780837059021, + "rewards/margins": 7.537456512451172, + "rewards/rejected": -8.050834655761719, + "step": 4136 + }, + { + "epoch": 0.64, + "learning_rate": 1.1113090927952695e-05, + "logits/chosen": -2.1454343795776367, + "logits/rejected": -3.118497371673584, + "logps/chosen": -595.27490234375, + "logps/rejected": -432.3189392089844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.92425537109375, + "rewards/margins": 7.962950706481934, + "rewards/rejected": -9.887205123901367, + "step": 4137 + }, + { + "epoch": 0.64, + "learning_rate": 1.1112357487421548e-05, + "logits/chosen": -3.03192138671875, + "logits/rejected": -2.5790963172912598, + "logps/chosen": -445.396728515625, + "logps/rejected": -393.07342529296875, + "loss": 0.0536, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5982155799865723, + "rewards/margins": 3.6253859996795654, + "rewards/rejected": -6.223601341247559, + "step": 4138 + }, + { + "epoch": 0.64, + "learning_rate": 1.11116240468904e-05, + "logits/chosen": -2.880627393722534, + "logits/rejected": -3.216714382171631, + "logps/chosen": -130.24867248535156, + "logps/rejected": -201.5282745361328, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3006463944911957, + "rewards/margins": 6.274728775024414, + "rewards/rejected": -6.575375080108643, + "step": 4139 + }, + { + "epoch": 0.64, + "learning_rate": 1.1110890606359252e-05, + "logits/chosen": -2.752716541290283, + "logits/rejected": -2.2791268825531006, + "logps/chosen": -1221.8636474609375, + "logps/rejected": -639.027587890625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4229156970977783, + "rewards/margins": 5.304635047912598, + "rewards/rejected": -6.727551460266113, + "step": 4140 + }, + { + "epoch": 0.64, + "learning_rate": 1.1110157165828104e-05, + "logits/chosen": -3.1350784301757812, + "logits/rejected": -3.164781093597412, + "logps/chosen": -147.2962188720703, + "logps/rejected": -384.2980041503906, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5790951251983643, + "rewards/margins": 5.895744800567627, + "rewards/rejected": -7.47484016418457, + "step": 4141 + }, + { + "epoch": 0.64, + "learning_rate": 1.1109423725296956e-05, + "logits/chosen": -1.6985362768173218, + "logits/rejected": -3.0742251873016357, + "logps/chosen": -172.19869995117188, + "logps/rejected": -467.62969970703125, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9827768206596375, + "rewards/margins": 3.2483158111572266, + "rewards/rejected": -4.231092929840088, + "step": 4142 + }, + { + "epoch": 0.64, + "learning_rate": 1.1108690284765808e-05, + "logits/chosen": -1.8558005094528198, + "logits/rejected": -2.9969215393066406, + "logps/chosen": -168.15969848632812, + "logps/rejected": -385.38824462890625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2099661827087402, + "rewards/margins": 5.225892066955566, + "rewards/rejected": -7.435858726501465, + "step": 4143 + }, + { + "epoch": 0.64, + "learning_rate": 1.110795684423466e-05, + "logits/chosen": -2.7714486122131348, + "logits/rejected": -3.02152156829834, + "logps/chosen": -30.925567626953125, + "logps/rejected": -150.52923583984375, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0569648742675781, + "rewards/margins": 4.891109943389893, + "rewards/rejected": -5.948074817657471, + "step": 4144 + }, + { + "epoch": 0.64, + "learning_rate": 1.1107223403703511e-05, + "logits/chosen": -2.6200037002563477, + "logits/rejected": -3.0249149799346924, + "logps/chosen": -97.1345443725586, + "logps/rejected": -196.13636779785156, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0424692630767822, + "rewards/margins": 4.386248588562012, + "rewards/rejected": -5.428717613220215, + "step": 4145 + }, + { + "epoch": 0.64, + "learning_rate": 1.1106489963172363e-05, + "logits/chosen": -2.6901097297668457, + "logits/rejected": -2.7647922039031982, + "logps/chosen": -137.50274658203125, + "logps/rejected": -198.09571838378906, + "loss": 0.0477, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.31054949760437, + "rewards/margins": 4.575357437133789, + "rewards/rejected": -6.885907173156738, + "step": 4146 + }, + { + "epoch": 0.64, + "learning_rate": 1.1105756522641217e-05, + "logits/chosen": -1.6260371208190918, + "logits/rejected": -2.6079137325286865, + "logps/chosen": -204.62823486328125, + "logps/rejected": -350.4973449707031, + "loss": 0.8414, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.325978755950928, + "rewards/margins": 3.471008539199829, + "rewards/rejected": -7.796987533569336, + "step": 4147 + }, + { + "epoch": 0.65, + "learning_rate": 1.1105023082110069e-05, + "logits/chosen": -2.926121234893799, + "logits/rejected": -3.151221513748169, + "logps/chosen": -40.96346664428711, + "logps/rejected": -148.62103271484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5844284296035767, + "rewards/margins": 7.422703742980957, + "rewards/rejected": -9.007131576538086, + "step": 4148 + }, + { + "epoch": 0.65, + "learning_rate": 1.110428964157892e-05, + "logits/chosen": -2.1200385093688965, + "logits/rejected": -3.0127718448638916, + "logps/chosen": -271.93048095703125, + "logps/rejected": -583.0521240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5271646976470947, + "rewards/margins": 9.45029354095459, + "rewards/rejected": -10.977458000183105, + "step": 4149 + }, + { + "epoch": 0.65, + "learning_rate": 1.1103556201047772e-05, + "logits/chosen": -1.6800638437271118, + "logits/rejected": -2.8685567378997803, + "logps/chosen": -212.34640502929688, + "logps/rejected": -397.0973815917969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2740355730056763, + "rewards/margins": 7.1997575759887695, + "rewards/rejected": -8.473793029785156, + "step": 4150 + }, + { + "epoch": 0.65, + "learning_rate": 1.1102822760516624e-05, + "logits/chosen": -2.1911494731903076, + "logits/rejected": -3.012761116027832, + "logps/chosen": -519.8065795898438, + "logps/rejected": -647.4432373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3888230323791504, + "rewards/margins": 9.428037643432617, + "rewards/rejected": -10.816861152648926, + "step": 4151 + }, + { + "epoch": 0.65, + "learning_rate": 1.1102089319985476e-05, + "logits/chosen": -2.0529685020446777, + "logits/rejected": -2.950204849243164, + "logps/chosen": -393.28521728515625, + "logps/rejected": -797.6759033203125, + "loss": 3.9362, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.965504169464111, + "rewards/margins": 2.847194194793701, + "rewards/rejected": -7.8126983642578125, + "step": 4152 + }, + { + "epoch": 0.65, + "learning_rate": 1.1101355879454328e-05, + "logits/chosen": -2.882719039916992, + "logits/rejected": -2.7763891220092773, + "logps/chosen": -173.39785766601562, + "logps/rejected": -155.99899291992188, + "loss": 0.0769, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3625662326812744, + "rewards/margins": 3.28615140914917, + "rewards/rejected": -6.648717880249023, + "step": 4153 + }, + { + "epoch": 0.65, + "learning_rate": 1.110062243892318e-05, + "logits/chosen": -3.056041955947876, + "logits/rejected": -2.29616641998291, + "logps/chosen": -819.183349609375, + "logps/rejected": -459.1463928222656, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7104389667510986, + "rewards/margins": 6.264235973358154, + "rewards/rejected": -7.974675178527832, + "step": 4154 + }, + { + "epoch": 0.65, + "learning_rate": 1.1099888998392032e-05, + "logits/chosen": -1.055199146270752, + "logits/rejected": -2.1717276573181152, + "logps/chosen": -517.7642211914062, + "logps/rejected": -925.88427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.78934645652771, + "rewards/margins": 11.0037841796875, + "rewards/rejected": -12.793130874633789, + "step": 4155 + }, + { + "epoch": 0.65, + "learning_rate": 1.1099155557860885e-05, + "logits/chosen": -2.8622798919677734, + "logits/rejected": -2.279465913772583, + "logps/chosen": -239.13214111328125, + "logps/rejected": -171.56190490722656, + "loss": 3.595, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.233487606048584, + "rewards/margins": 0.5395524501800537, + "rewards/rejected": -6.773039817810059, + "step": 4156 + }, + { + "epoch": 0.65, + "learning_rate": 1.1098422117329737e-05, + "logits/chosen": -2.1724135875701904, + "logits/rejected": -2.833677291870117, + "logps/chosen": -146.15927124023438, + "logps/rejected": -460.7110595703125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8933732509613037, + "rewards/margins": 6.552567481994629, + "rewards/rejected": -8.445940017700195, + "step": 4157 + }, + { + "epoch": 0.65, + "learning_rate": 1.1097688676798589e-05, + "logits/chosen": -2.9376211166381836, + "logits/rejected": -2.926027297973633, + "logps/chosen": -341.081787109375, + "logps/rejected": -576.9519653320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5930191278457642, + "rewards/margins": 9.700520515441895, + "rewards/rejected": -10.293539047241211, + "step": 4158 + }, + { + "epoch": 0.65, + "learning_rate": 1.1096955236267441e-05, + "logits/chosen": -1.7657822370529175, + "logits/rejected": -3.1218221187591553, + "logps/chosen": -204.6395263671875, + "logps/rejected": -523.984130859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0792953968048096, + "rewards/margins": 7.796404838562012, + "rewards/rejected": -9.875699996948242, + "step": 4159 + }, + { + "epoch": 0.65, + "learning_rate": 1.1096221795736293e-05, + "logits/chosen": -2.9133365154266357, + "logits/rejected": -1.4420431852340698, + "logps/chosen": -913.23095703125, + "logps/rejected": -432.49395751953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4074462950229645, + "rewards/margins": 8.0914306640625, + "rewards/rejected": -8.49887752532959, + "step": 4160 + }, + { + "epoch": 0.65, + "learning_rate": 1.1095488355205145e-05, + "logits/chosen": -2.4661152362823486, + "logits/rejected": -3.2591471672058105, + "logps/chosen": -479.66253662109375, + "logps/rejected": -716.8005981445312, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8999011516571045, + "rewards/margins": 8.151250839233398, + "rewards/rejected": -9.051152229309082, + "step": 4161 + }, + { + "epoch": 0.65, + "learning_rate": 1.1094754914673997e-05, + "logits/chosen": -2.7236907482147217, + "logits/rejected": -2.651324987411499, + "logps/chosen": -84.76188659667969, + "logps/rejected": -330.02838134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.527489185333252, + "rewards/margins": 10.096273422241211, + "rewards/rejected": -12.623762130737305, + "step": 4162 + }, + { + "epoch": 0.65, + "learning_rate": 1.1094021474142848e-05, + "logits/chosen": -2.8462278842926025, + "logits/rejected": -3.057537794113159, + "logps/chosen": -113.41398620605469, + "logps/rejected": -194.13540649414062, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.18391752243042, + "rewards/margins": 6.4832563400268555, + "rewards/rejected": -8.667173385620117, + "step": 4163 + }, + { + "epoch": 0.65, + "learning_rate": 1.10932880336117e-05, + "logits/chosen": -2.684844493865967, + "logits/rejected": -3.2186458110809326, + "logps/chosen": -201.42864990234375, + "logps/rejected": -481.3153076171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.483389675617218, + "rewards/margins": 9.955850601196289, + "rewards/rejected": -10.439239501953125, + "step": 4164 + }, + { + "epoch": 0.65, + "learning_rate": 1.1092554593080554e-05, + "logits/chosen": -2.9054436683654785, + "logits/rejected": -2.6758553981781006, + "logps/chosen": -217.5707550048828, + "logps/rejected": -259.5384521484375, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0751701593399048, + "rewards/margins": 7.208385944366455, + "rewards/rejected": -8.28355598449707, + "step": 4165 + }, + { + "epoch": 0.65, + "learning_rate": 1.1091821152549407e-05, + "logits/chosen": -1.1987577676773071, + "logits/rejected": -2.8287487030029297, + "logps/chosen": -30.99102020263672, + "logps/rejected": -366.1635437011719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0629329681396484, + "rewards/margins": 10.464829444885254, + "rewards/rejected": -12.527762413024902, + "step": 4166 + }, + { + "epoch": 0.65, + "learning_rate": 1.109108771201826e-05, + "logits/chosen": -3.167287826538086, + "logits/rejected": -1.7829859256744385, + "logps/chosen": -237.29815673828125, + "logps/rejected": -182.29452514648438, + "loss": 1.9867, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5929927825927734, + "rewards/margins": 0.2608436346054077, + "rewards/rejected": -3.8538362979888916, + "step": 4167 + }, + { + "epoch": 0.65, + "learning_rate": 1.1090354271487111e-05, + "logits/chosen": -2.125673770904541, + "logits/rejected": -2.9546403884887695, + "logps/chosen": -113.92527770996094, + "logps/rejected": -393.90185546875, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9660451412200928, + "rewards/margins": 7.552394866943359, + "rewards/rejected": -9.518440246582031, + "step": 4168 + }, + { + "epoch": 0.65, + "learning_rate": 1.1089620830955963e-05, + "logits/chosen": -2.257664680480957, + "logits/rejected": -2.6932756900787354, + "logps/chosen": -277.9175720214844, + "logps/rejected": -518.0582275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7434868812561035, + "rewards/margins": 11.894343376159668, + "rewards/rejected": -14.63783073425293, + "step": 4169 + }, + { + "epoch": 0.65, + "learning_rate": 1.1088887390424815e-05, + "logits/chosen": -2.7661445140838623, + "logits/rejected": -2.2062339782714844, + "logps/chosen": -144.75115966796875, + "logps/rejected": -141.9287109375, + "loss": 2.5034, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.67309045791626, + "rewards/margins": 0.18708181381225586, + "rewards/rejected": -4.860171794891357, + "step": 4170 + }, + { + "epoch": 0.65, + "learning_rate": 1.1088153949893667e-05, + "logits/chosen": -2.6584150791168213, + "logits/rejected": -3.1709561347961426, + "logps/chosen": -57.29196548461914, + "logps/rejected": -324.40692138671875, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.097407341003418, + "rewards/margins": 8.02550983428955, + "rewards/rejected": -10.122917175292969, + "step": 4171 + }, + { + "epoch": 0.65, + "learning_rate": 1.1087420509362519e-05, + "logits/chosen": -2.9957971572875977, + "logits/rejected": -1.9345983266830444, + "logps/chosen": -393.5487365722656, + "logps/rejected": -232.55276489257812, + "loss": 4.245, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.096094131469727, + "rewards/margins": -0.6062054634094238, + "rewards/rejected": -4.489888668060303, + "step": 4172 + }, + { + "epoch": 0.65, + "learning_rate": 1.108668706883137e-05, + "logits/chosen": -1.9150127172470093, + "logits/rejected": -2.9591002464294434, + "logps/chosen": -23.78119659423828, + "logps/rejected": -228.58505249023438, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8137086033821106, + "rewards/margins": 7.79190731048584, + "rewards/rejected": -8.605615615844727, + "step": 4173 + }, + { + "epoch": 0.65, + "learning_rate": 1.1085953628300224e-05, + "logits/chosen": -2.6348876953125, + "logits/rejected": -3.110506296157837, + "logps/chosen": -653.4566650390625, + "logps/rejected": -526.50830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5055534839630127, + "rewards/margins": 10.086593627929688, + "rewards/rejected": -11.592145919799805, + "step": 4174 + }, + { + "epoch": 0.65, + "learning_rate": 1.1085220187769076e-05, + "logits/chosen": -2.648013114929199, + "logits/rejected": -2.851835250854492, + "logps/chosen": -193.68734741210938, + "logps/rejected": -305.5753479003906, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.399930477142334, + "rewards/margins": 6.304500579833984, + "rewards/rejected": -8.704431533813477, + "step": 4175 + }, + { + "epoch": 0.65, + "learning_rate": 1.1084486747237928e-05, + "logits/chosen": -3.1828184127807617, + "logits/rejected": -2.8654818534851074, + "logps/chosen": -628.5148315429688, + "logps/rejected": -471.64788818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6711761951446533, + "rewards/margins": 10.836637496948242, + "rewards/rejected": -12.507814407348633, + "step": 4176 + }, + { + "epoch": 0.65, + "learning_rate": 1.108375330670678e-05, + "logits/chosen": -3.137007713317871, + "logits/rejected": -1.7553259134292603, + "logps/chosen": -365.99310302734375, + "logps/rejected": -69.47713470458984, + "loss": 2.6943, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0649943351745605, + "rewards/margins": -0.9816086292266846, + "rewards/rejected": -3.083385467529297, + "step": 4177 + }, + { + "epoch": 0.65, + "learning_rate": 1.1083019866175632e-05, + "logits/chosen": -2.652724504470825, + "logits/rejected": -2.824986696243286, + "logps/chosen": -298.8143005371094, + "logps/rejected": -355.71990966796875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2789908647537231, + "rewards/margins": 6.672213077545166, + "rewards/rejected": -7.951204299926758, + "step": 4178 + }, + { + "epoch": 0.65, + "learning_rate": 1.1082286425644484e-05, + "logits/chosen": -2.4353606700897217, + "logits/rejected": -2.810148239135742, + "logps/chosen": -111.5572509765625, + "logps/rejected": -287.7445983886719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1224342584609985, + "rewards/margins": 7.9012675285339355, + "rewards/rejected": -9.023701667785645, + "step": 4179 + }, + { + "epoch": 0.65, + "learning_rate": 1.1081552985113335e-05, + "logits/chosen": -2.315485954284668, + "logits/rejected": -2.9297831058502197, + "logps/chosen": -202.068603515625, + "logps/rejected": -336.10693359375, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.002327799797058, + "rewards/margins": 5.395285606384277, + "rewards/rejected": -6.397613048553467, + "step": 4180 + }, + { + "epoch": 0.65, + "learning_rate": 1.1080819544582187e-05, + "logits/chosen": -2.9778788089752197, + "logits/rejected": -3.205467939376831, + "logps/chosen": -188.97598266601562, + "logps/rejected": -198.63612365722656, + "loss": 3.6001, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.2690324783325195, + "rewards/margins": -1.8117449283599854, + "rewards/rejected": -4.457287311553955, + "step": 4181 + }, + { + "epoch": 0.65, + "learning_rate": 1.1080086104051041e-05, + "logits/chosen": -2.1693944931030273, + "logits/rejected": -3.187756299972534, + "logps/chosen": -175.43081665039062, + "logps/rejected": -433.02716064453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.780869483947754, + "rewards/margins": 7.161551475524902, + "rewards/rejected": -9.942420959472656, + "step": 4182 + }, + { + "epoch": 0.65, + "learning_rate": 1.1079352663519893e-05, + "logits/chosen": -0.9008700847625732, + "logits/rejected": -2.6396749019622803, + "logps/chosen": -99.07766723632812, + "logps/rejected": -546.21044921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1212596744298935, + "rewards/margins": 11.186899185180664, + "rewards/rejected": -11.308158874511719, + "step": 4183 + }, + { + "epoch": 0.65, + "learning_rate": 1.1078619222988745e-05, + "logits/chosen": -2.472811698913574, + "logits/rejected": -2.837989330291748, + "logps/chosen": -57.34219741821289, + "logps/rejected": -310.8668212890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6241282224655151, + "rewards/margins": 8.895798683166504, + "rewards/rejected": -10.519927024841309, + "step": 4184 + }, + { + "epoch": 0.65, + "learning_rate": 1.1077885782457597e-05, + "logits/chosen": -0.7086736559867859, + "logits/rejected": -2.414141893386841, + "logps/chosen": -92.9720458984375, + "logps/rejected": -596.3727416992188, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.20257306098938, + "rewards/margins": 10.458821296691895, + "rewards/rejected": -12.661394119262695, + "step": 4185 + }, + { + "epoch": 0.65, + "learning_rate": 1.1077152341926448e-05, + "logits/chosen": -1.186694622039795, + "logits/rejected": -2.940997362136841, + "logps/chosen": -108.67302703857422, + "logps/rejected": -350.7149658203125, + "loss": 0.1051, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.439523458480835, + "rewards/margins": 5.313385963439941, + "rewards/rejected": -8.752908706665039, + "step": 4186 + }, + { + "epoch": 0.65, + "learning_rate": 1.10764189013953e-05, + "logits/chosen": -2.2301716804504395, + "logits/rejected": -2.9004149436950684, + "logps/chosen": -120.33228302001953, + "logps/rejected": -233.64112854003906, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7845582962036133, + "rewards/margins": 6.559035301208496, + "rewards/rejected": -9.34359359741211, + "step": 4187 + }, + { + "epoch": 0.65, + "learning_rate": 1.1075685460864152e-05, + "logits/chosen": -2.9909493923187256, + "logits/rejected": -2.9790515899658203, + "logps/chosen": -283.91473388671875, + "logps/rejected": -340.3675842285156, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9930992126464844, + "rewards/margins": 4.5515594482421875, + "rewards/rejected": -7.544658660888672, + "step": 4188 + }, + { + "epoch": 0.65, + "learning_rate": 1.1074952020333004e-05, + "logits/chosen": -1.3095782995224, + "logits/rejected": -2.876281976699829, + "logps/chosen": -186.19876098632812, + "logps/rejected": -528.82177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1940548419952393, + "rewards/margins": 10.229217529296875, + "rewards/rejected": -11.423273086547852, + "step": 4189 + }, + { + "epoch": 0.65, + "learning_rate": 1.1074218579801856e-05, + "logits/chosen": -3.111691951751709, + "logits/rejected": -2.8787448406219482, + "logps/chosen": -487.4718322753906, + "logps/rejected": -423.5507507324219, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.366315484046936, + "rewards/margins": 6.606200695037842, + "rewards/rejected": -7.972516059875488, + "step": 4190 + }, + { + "epoch": 0.65, + "learning_rate": 1.107348513927071e-05, + "logits/chosen": -3.235685110092163, + "logits/rejected": -2.388259172439575, + "logps/chosen": -280.6181335449219, + "logps/rejected": -214.608642578125, + "loss": 5.0031, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.67095422744751, + "rewards/margins": -0.5451231002807617, + "rewards/rejected": -5.125831127166748, + "step": 4191 + }, + { + "epoch": 0.65, + "learning_rate": 1.1072751698739561e-05, + "logits/chosen": -3.081617832183838, + "logits/rejected": -3.1777114868164062, + "logps/chosen": -134.4417724609375, + "logps/rejected": -310.0171813964844, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.190658092498779, + "rewards/margins": 5.473167896270752, + "rewards/rejected": -9.663825988769531, + "step": 4192 + }, + { + "epoch": 0.65, + "learning_rate": 1.1072018258208413e-05, + "logits/chosen": -2.277564525604248, + "logits/rejected": -2.5652518272399902, + "logps/chosen": -196.8355255126953, + "logps/rejected": -308.1889343261719, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3708882331848145, + "rewards/margins": 6.799371719360352, + "rewards/rejected": -9.170260429382324, + "step": 4193 + }, + { + "epoch": 0.65, + "learning_rate": 1.1071284817677265e-05, + "logits/chosen": -1.925276517868042, + "logits/rejected": -2.96128511428833, + "logps/chosen": -158.53915405273438, + "logps/rejected": -200.19871520996094, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.509859561920166, + "rewards/margins": 4.175233840942383, + "rewards/rejected": -6.685092926025391, + "step": 4194 + }, + { + "epoch": 0.65, + "learning_rate": 1.1070551377146117e-05, + "logits/chosen": -3.200331926345825, + "logits/rejected": -2.592379331588745, + "logps/chosen": -143.86273193359375, + "logps/rejected": -171.94253540039062, + "loss": 1.041, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2367594242095947, + "rewards/margins": 2.2103750705718994, + "rewards/rejected": -5.447134494781494, + "step": 4195 + }, + { + "epoch": 0.65, + "learning_rate": 1.1069817936614969e-05, + "logits/chosen": -2.843324661254883, + "logits/rejected": -2.662437915802002, + "logps/chosen": -250.110107421875, + "logps/rejected": -287.7413330078125, + "loss": 0.3652, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2157182693481445, + "rewards/margins": 2.7575249671936035, + "rewards/rejected": -5.973243236541748, + "step": 4196 + }, + { + "epoch": 0.65, + "learning_rate": 1.106908449608382e-05, + "logits/chosen": -1.5307737588882446, + "logits/rejected": -2.8497438430786133, + "logps/chosen": -90.11426544189453, + "logps/rejected": -305.15594482421875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2927939891815186, + "rewards/margins": 7.4784393310546875, + "rewards/rejected": -9.771233558654785, + "step": 4197 + }, + { + "epoch": 0.65, + "learning_rate": 1.1068351055552673e-05, + "logits/chosen": -2.8482370376586914, + "logits/rejected": -3.1075212955474854, + "logps/chosen": -348.3254089355469, + "logps/rejected": -445.1514587402344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.264807939529419, + "rewards/margins": 7.145779132843018, + "rewards/rejected": -9.410587310791016, + "step": 4198 + }, + { + "epoch": 0.65, + "learning_rate": 1.1067617615021526e-05, + "logits/chosen": -2.4058494567871094, + "logits/rejected": -2.456132411956787, + "logps/chosen": -453.49169921875, + "logps/rejected": -455.2490234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6830345392227173, + "rewards/margins": 7.853758811950684, + "rewards/rejected": -9.53679370880127, + "step": 4199 + }, + { + "epoch": 0.65, + "learning_rate": 1.1066884174490378e-05, + "logits/chosen": -3.0218873023986816, + "logits/rejected": -2.8827078342437744, + "logps/chosen": -609.052978515625, + "logps/rejected": -579.8812255859375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.991558849811554, + "rewards/margins": 6.150158882141113, + "rewards/rejected": -7.141717910766602, + "step": 4200 + }, + { + "epoch": 0.65, + "learning_rate": 1.1066150733959232e-05, + "logits/chosen": -2.6904451847076416, + "logits/rejected": -2.92979097366333, + "logps/chosen": -360.0599365234375, + "logps/rejected": -438.3830871582031, + "loss": 2.3951, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.592848300933838, + "rewards/margins": 2.975830078125, + "rewards/rejected": -6.568678855895996, + "step": 4201 + }, + { + "epoch": 0.65, + "learning_rate": 1.1065417293428084e-05, + "logits/chosen": -2.6933047771453857, + "logits/rejected": -3.033987283706665, + "logps/chosen": -56.901302337646484, + "logps/rejected": -207.8798828125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0278172492980957, + "rewards/margins": 4.692486763000488, + "rewards/rejected": -6.720303535461426, + "step": 4202 + }, + { + "epoch": 0.65, + "learning_rate": 1.1064683852896935e-05, + "logits/chosen": -0.8243155479431152, + "logits/rejected": -2.8289108276367188, + "logps/chosen": -43.41573715209961, + "logps/rejected": -399.04241943359375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.281114101409912, + "rewards/margins": 6.543998718261719, + "rewards/rejected": -8.825113296508789, + "step": 4203 + }, + { + "epoch": 0.65, + "learning_rate": 1.1063950412365787e-05, + "logits/chosen": -3.0937459468841553, + "logits/rejected": -3.1232306957244873, + "logps/chosen": -153.5824432373047, + "logps/rejected": -114.23709106445312, + "loss": 4.5046, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.603619575500488, + "rewards/margins": -2.568491220474243, + "rewards/rejected": -3.035127878189087, + "step": 4204 + }, + { + "epoch": 0.65, + "learning_rate": 1.106321697183464e-05, + "logits/chosen": -2.9512763023376465, + "logits/rejected": -1.4914816617965698, + "logps/chosen": -293.69281005859375, + "logps/rejected": -124.12071990966797, + "loss": 0.1698, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8144981861114502, + "rewards/margins": 3.9365742206573486, + "rewards/rejected": -5.751072406768799, + "step": 4205 + }, + { + "epoch": 0.65, + "learning_rate": 1.1062483531303491e-05, + "logits/chosen": -2.2392942905426025, + "logits/rejected": -3.2003157138824463, + "logps/chosen": -54.079132080078125, + "logps/rejected": -295.221923828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.302854061126709, + "rewards/margins": 7.813290596008301, + "rewards/rejected": -10.116144180297852, + "step": 4206 + }, + { + "epoch": 0.65, + "learning_rate": 1.1061750090772343e-05, + "logits/chosen": -2.646090030670166, + "logits/rejected": -2.8671152591705322, + "logps/chosen": -108.65888977050781, + "logps/rejected": -368.9093322753906, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.500927448272705, + "rewards/margins": 7.049388408660889, + "rewards/rejected": -9.550315856933594, + "step": 4207 + }, + { + "epoch": 0.65, + "learning_rate": 1.1061016650241195e-05, + "logits/chosen": -2.920987844467163, + "logits/rejected": -3.1900923252105713, + "logps/chosen": -420.97357177734375, + "logps/rejected": -524.8334350585938, + "loss": 5.5678, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.702647686004639, + "rewards/margins": -2.6975631713867188, + "rewards/rejected": -5.00508451461792, + "step": 4208 + }, + { + "epoch": 0.65, + "learning_rate": 1.1060283209710048e-05, + "logits/chosen": -1.7953420877456665, + "logits/rejected": -3.051431894302368, + "logps/chosen": -96.22479248046875, + "logps/rejected": -383.88861083984375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5428012609481812, + "rewards/margins": 9.298558235168457, + "rewards/rejected": -9.84135913848877, + "step": 4209 + }, + { + "epoch": 0.65, + "learning_rate": 1.10595497691789e-05, + "logits/chosen": -3.196652412414551, + "logits/rejected": -3.0282952785491943, + "logps/chosen": -312.782958984375, + "logps/rejected": -137.50799560546875, + "loss": 0.2845, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.367245674133301, + "rewards/margins": 1.4192471504211426, + "rewards/rejected": -3.7864928245544434, + "step": 4210 + }, + { + "epoch": 0.65, + "learning_rate": 1.1058816328647752e-05, + "logits/chosen": -2.5055630207061768, + "logits/rejected": -2.822558879852295, + "logps/chosen": -184.5318603515625, + "logps/rejected": -289.3075866699219, + "loss": 0.9524, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.807592868804932, + "rewards/margins": 2.922412395477295, + "rewards/rejected": -7.730005264282227, + "step": 4211 + }, + { + "epoch": 0.66, + "learning_rate": 1.1058082888116604e-05, + "logits/chosen": -2.9853413105010986, + "logits/rejected": -3.091825008392334, + "logps/chosen": -160.27191162109375, + "logps/rejected": -299.3145751953125, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0190455913543701, + "rewards/margins": 6.263607978820801, + "rewards/rejected": -7.28265380859375, + "step": 4212 + }, + { + "epoch": 0.66, + "learning_rate": 1.1057349447585456e-05, + "logits/chosen": -3.0807316303253174, + "logits/rejected": -2.403130531311035, + "logps/chosen": -398.8123779296875, + "logps/rejected": -373.12518310546875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4299750328063965, + "rewards/margins": 7.05840539932251, + "rewards/rejected": -9.488380432128906, + "step": 4213 + }, + { + "epoch": 0.66, + "learning_rate": 1.1056616007054308e-05, + "logits/chosen": -2.7628285884857178, + "logits/rejected": -3.0874826908111572, + "logps/chosen": -162.34698486328125, + "logps/rejected": -269.7391662597656, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6193597316741943, + "rewards/margins": 4.614448070526123, + "rewards/rejected": -7.233807563781738, + "step": 4214 + }, + { + "epoch": 0.66, + "learning_rate": 1.105588256652316e-05, + "logits/chosen": -2.7577414512634277, + "logits/rejected": -2.9141228199005127, + "logps/chosen": -103.38088989257812, + "logps/rejected": -425.8796691894531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1651368141174316, + "rewards/margins": 9.885515213012695, + "rewards/rejected": -11.050651550292969, + "step": 4215 + }, + { + "epoch": 0.66, + "learning_rate": 1.1055149125992012e-05, + "logits/chosen": -1.147507667541504, + "logits/rejected": -2.497694730758667, + "logps/chosen": -139.58290100097656, + "logps/rejected": -346.54583740234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3927757740020752, + "rewards/margins": 6.354872703552246, + "rewards/rejected": -7.747648239135742, + "step": 4216 + }, + { + "epoch": 0.66, + "learning_rate": 1.1054415685460863e-05, + "logits/chosen": -2.44960355758667, + "logits/rejected": -3.0512804985046387, + "logps/chosen": -91.53392028808594, + "logps/rejected": -155.46624755859375, + "loss": 1.9125, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5253849029541016, + "rewards/margins": -0.12411892414093018, + "rewards/rejected": -3.401265859603882, + "step": 4217 + }, + { + "epoch": 0.66, + "learning_rate": 1.1053682244929717e-05, + "logits/chosen": -3.1745853424072266, + "logits/rejected": -2.871520519256592, + "logps/chosen": -384.8074035644531, + "logps/rejected": -289.751220703125, + "loss": 4.8343, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.433815002441406, + "rewards/margins": -0.38645124435424805, + "rewards/rejected": -4.04736328125, + "step": 4218 + }, + { + "epoch": 0.66, + "learning_rate": 1.1052948804398569e-05, + "logits/chosen": -3.036060094833374, + "logits/rejected": -2.6181812286376953, + "logps/chosen": -448.6377868652344, + "logps/rejected": -273.486328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8104568719863892, + "rewards/margins": 7.837428092956543, + "rewards/rejected": -7.026970863342285, + "step": 4219 + }, + { + "epoch": 0.66, + "learning_rate": 1.105221536386742e-05, + "logits/chosen": -3.0296475887298584, + "logits/rejected": -2.697086811065674, + "logps/chosen": -111.64510345458984, + "logps/rejected": -94.71611022949219, + "loss": 3.3394, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.038362503051758, + "rewards/margins": -0.8176867961883545, + "rewards/rejected": -3.2206757068634033, + "step": 4220 + }, + { + "epoch": 0.66, + "learning_rate": 1.1051481923336273e-05, + "logits/chosen": -1.7422116994857788, + "logits/rejected": -3.0688650608062744, + "logps/chosen": -274.3197326660156, + "logps/rejected": -320.8229675292969, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8446426391601562, + "rewards/margins": 6.688714027404785, + "rewards/rejected": -5.844071388244629, + "step": 4221 + }, + { + "epoch": 0.66, + "learning_rate": 1.1050748482805125e-05, + "logits/chosen": -2.3412911891937256, + "logits/rejected": -3.036499261856079, + "logps/chosen": -390.0335693359375, + "logps/rejected": -496.76275634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45010989904403687, + "rewards/margins": 9.79906940460205, + "rewards/rejected": -10.24917984008789, + "step": 4222 + }, + { + "epoch": 0.66, + "learning_rate": 1.1050015042273976e-05, + "logits/chosen": -3.0764567852020264, + "logits/rejected": -2.789139986038208, + "logps/chosen": -165.56045532226562, + "logps/rejected": -279.9818115234375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0442672967910767, + "rewards/margins": 5.310527801513672, + "rewards/rejected": -6.354795455932617, + "step": 4223 + }, + { + "epoch": 0.66, + "learning_rate": 1.1049281601742828e-05, + "logits/chosen": -1.8979297876358032, + "logits/rejected": -3.0070815086364746, + "logps/chosen": -497.6363525390625, + "logps/rejected": -603.4356079101562, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.134516716003418, + "rewards/margins": 7.83877420425415, + "rewards/rejected": -9.973291397094727, + "step": 4224 + }, + { + "epoch": 0.66, + "learning_rate": 1.104854816121168e-05, + "logits/chosen": -3.1208608150482178, + "logits/rejected": -1.9272962808609009, + "logps/chosen": -367.22314453125, + "logps/rejected": -328.7843322753906, + "loss": 2.0876, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.935960292816162, + "rewards/margins": 0.5997262001037598, + "rewards/rejected": -3.535686492919922, + "step": 4225 + }, + { + "epoch": 0.66, + "learning_rate": 1.1047814720680532e-05, + "logits/chosen": -3.035233736038208, + "logits/rejected": -2.9616050720214844, + "logps/chosen": -164.566162109375, + "logps/rejected": -228.87982177734375, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5840446949005127, + "rewards/margins": 4.794046401977539, + "rewards/rejected": -5.378091335296631, + "step": 4226 + }, + { + "epoch": 0.66, + "learning_rate": 1.1047081280149386e-05, + "logits/chosen": -2.532320737838745, + "logits/rejected": -3.1182472705841064, + "logps/chosen": -402.898193359375, + "logps/rejected": -536.3118286132812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7338972091674805, + "rewards/margins": 8.04289436340332, + "rewards/rejected": -8.7767915725708, + "step": 4227 + }, + { + "epoch": 0.66, + "learning_rate": 1.1046347839618238e-05, + "logits/chosen": -2.702244281768799, + "logits/rejected": -3.1822824478149414, + "logps/chosen": -87.53376770019531, + "logps/rejected": -226.0324249267578, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8465934991836548, + "rewards/margins": 5.325037002563477, + "rewards/rejected": -7.171630859375, + "step": 4228 + }, + { + "epoch": 0.66, + "learning_rate": 1.104561439908709e-05, + "logits/chosen": -3.088667154312134, + "logits/rejected": -2.0886428356170654, + "logps/chosen": -245.40936279296875, + "logps/rejected": -252.97195434570312, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.262089729309082, + "rewards/margins": 4.313736438751221, + "rewards/rejected": -6.575826168060303, + "step": 4229 + }, + { + "epoch": 0.66, + "learning_rate": 1.1044880958555941e-05, + "logits/chosen": -3.2181496620178223, + "logits/rejected": -2.9397974014282227, + "logps/chosen": -283.2481689453125, + "logps/rejected": -63.91246795654297, + "loss": 3.0779, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.277871131896973, + "rewards/margins": -2.9475622177124023, + "rewards/rejected": -2.330308437347412, + "step": 4230 + }, + { + "epoch": 0.66, + "learning_rate": 1.1044147518024793e-05, + "logits/chosen": -1.4551315307617188, + "logits/rejected": -2.7275822162628174, + "logps/chosen": -91.72158813476562, + "logps/rejected": -261.5244140625, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.682612895965576, + "rewards/margins": 3.5323333740234375, + "rewards/rejected": -6.2149457931518555, + "step": 4231 + }, + { + "epoch": 0.66, + "learning_rate": 1.1043414077493645e-05, + "logits/chosen": -2.6231062412261963, + "logits/rejected": -2.918945074081421, + "logps/chosen": -129.46214294433594, + "logps/rejected": -193.61944580078125, + "loss": 0.1574, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3793115615844727, + "rewards/margins": 4.603939533233643, + "rewards/rejected": -6.983251094818115, + "step": 4232 + }, + { + "epoch": 0.66, + "learning_rate": 1.1042680636962499e-05, + "logits/chosen": -1.333919644355774, + "logits/rejected": -2.810746192932129, + "logps/chosen": -94.38615417480469, + "logps/rejected": -307.16864013671875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026658248156309128, + "rewards/margins": 6.078124046325684, + "rewards/rejected": -6.1047821044921875, + "step": 4233 + }, + { + "epoch": 0.66, + "learning_rate": 1.104194719643135e-05, + "logits/chosen": -3.0152781009674072, + "logits/rejected": -2.093472957611084, + "logps/chosen": -283.1324157714844, + "logps/rejected": -245.8073272705078, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.372109979391098, + "rewards/margins": 7.302896499633789, + "rewards/rejected": -6.930786609649658, + "step": 4234 + }, + { + "epoch": 0.66, + "learning_rate": 1.1041213755900202e-05, + "logits/chosen": -3.1249654293060303, + "logits/rejected": -3.086459159851074, + "logps/chosen": -266.1192932128906, + "logps/rejected": -284.8359375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5107994079589844, + "rewards/margins": 7.113633155822754, + "rewards/rejected": -8.624432563781738, + "step": 4235 + }, + { + "epoch": 0.66, + "learning_rate": 1.1040480315369056e-05, + "logits/chosen": -2.862034320831299, + "logits/rejected": -3.068626642227173, + "logps/chosen": -199.12782287597656, + "logps/rejected": -327.2584533691406, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5671730041503906, + "rewards/margins": 5.402901649475098, + "rewards/rejected": -6.970074653625488, + "step": 4236 + }, + { + "epoch": 0.66, + "learning_rate": 1.1039746874837908e-05, + "logits/chosen": -3.150148630142212, + "logits/rejected": -2.435821294784546, + "logps/chosen": -192.11376953125, + "logps/rejected": -106.76861572265625, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.204201579093933, + "rewards/margins": 3.9519383907318115, + "rewards/rejected": -5.156139850616455, + "step": 4237 + }, + { + "epoch": 0.66, + "learning_rate": 1.103901343430676e-05, + "logits/chosen": -1.3252869844436646, + "logits/rejected": -2.6186678409576416, + "logps/chosen": -159.95758056640625, + "logps/rejected": -543.8028564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.240106225013733, + "rewards/margins": 11.76669692993164, + "rewards/rejected": -13.006802558898926, + "step": 4238 + }, + { + "epoch": 0.66, + "learning_rate": 1.1038279993775612e-05, + "logits/chosen": -2.957080364227295, + "logits/rejected": -3.127183198928833, + "logps/chosen": -100.82657623291016, + "logps/rejected": -260.7392578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4463344812393188, + "rewards/margins": 8.846672058105469, + "rewards/rejected": -10.293006896972656, + "step": 4239 + }, + { + "epoch": 0.66, + "learning_rate": 1.1037546553244463e-05, + "logits/chosen": -2.997150421142578, + "logits/rejected": -2.619368076324463, + "logps/chosen": -99.02787780761719, + "logps/rejected": -197.31600952148438, + "loss": 0.0851, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2098777294158936, + "rewards/margins": 3.156714677810669, + "rewards/rejected": -5.3665924072265625, + "step": 4240 + }, + { + "epoch": 0.66, + "learning_rate": 1.1036813112713315e-05, + "logits/chosen": -2.5845184326171875, + "logits/rejected": -3.281731605529785, + "logps/chosen": -51.04802703857422, + "logps/rejected": -163.23919677734375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7138985395431519, + "rewards/margins": 4.815188884735107, + "rewards/rejected": -5.529087543487549, + "step": 4241 + }, + { + "epoch": 0.66, + "learning_rate": 1.1036079672182167e-05, + "logits/chosen": -1.1805795431137085, + "logits/rejected": -2.643742322921753, + "logps/chosen": -189.65553283691406, + "logps/rejected": -540.4928588867188, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.641046941280365, + "rewards/margins": 5.903809547424316, + "rewards/rejected": -6.544856071472168, + "step": 4242 + }, + { + "epoch": 0.66, + "learning_rate": 1.1035346231651019e-05, + "logits/chosen": -1.929336667060852, + "logits/rejected": -3.0631637573242188, + "logps/chosen": -120.36276245117188, + "logps/rejected": -268.7259216308594, + "loss": 3.9204, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.124753952026367, + "rewards/margins": -1.4699201583862305, + "rewards/rejected": -3.6548337936401367, + "step": 4243 + }, + { + "epoch": 0.66, + "learning_rate": 1.1034612791119871e-05, + "logits/chosen": -2.882336378097534, + "logits/rejected": -3.093458414077759, + "logps/chosen": -279.9079284667969, + "logps/rejected": -361.55078125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2847626209259033, + "rewards/margins": 6.435791969299316, + "rewards/rejected": -7.720554351806641, + "step": 4244 + }, + { + "epoch": 0.66, + "learning_rate": 1.1033879350588725e-05, + "logits/chosen": -2.6999621391296387, + "logits/rejected": -2.8028218746185303, + "logps/chosen": -165.1058807373047, + "logps/rejected": -188.24118041992188, + "loss": 0.8378, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0289618968963623, + "rewards/margins": 2.124495029449463, + "rewards/rejected": -5.153457164764404, + "step": 4245 + }, + { + "epoch": 0.66, + "learning_rate": 1.1033145910057576e-05, + "logits/chosen": -1.1898139715194702, + "logits/rejected": -3.179807662963867, + "logps/chosen": -106.79314422607422, + "logps/rejected": -624.5617065429688, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.807589530944824, + "rewards/margins": 5.712271690368652, + "rewards/rejected": -8.519861221313477, + "step": 4246 + }, + { + "epoch": 0.66, + "learning_rate": 1.1032412469526428e-05, + "logits/chosen": -2.6633896827697754, + "logits/rejected": -3.210848808288574, + "logps/chosen": -115.7557144165039, + "logps/rejected": -204.03782653808594, + "loss": 2.9567, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.451314449310303, + "rewards/margins": 0.13816332817077637, + "rewards/rejected": -4.5894775390625, + "step": 4247 + }, + { + "epoch": 0.66, + "learning_rate": 1.103167902899528e-05, + "logits/chosen": -2.8489906787872314, + "logits/rejected": -2.832409620285034, + "logps/chosen": -105.10716247558594, + "logps/rejected": -289.72503662109375, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.185084104537964, + "rewards/margins": 5.584949016571045, + "rewards/rejected": -7.77003288269043, + "step": 4248 + }, + { + "epoch": 0.66, + "learning_rate": 1.1030945588464132e-05, + "logits/chosen": -3.1760802268981934, + "logits/rejected": -3.0650641918182373, + "logps/chosen": -191.8540496826172, + "logps/rejected": -152.052490234375, + "loss": 2.0656, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6411209106445312, + "rewards/margins": 0.528425931930542, + "rewards/rejected": -4.169546604156494, + "step": 4249 + }, + { + "epoch": 0.66, + "learning_rate": 1.1030212147932984e-05, + "logits/chosen": -2.6868839263916016, + "logits/rejected": -3.3326849937438965, + "logps/chosen": -77.0684814453125, + "logps/rejected": -252.38743591308594, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.315845251083374, + "rewards/margins": 5.179192066192627, + "rewards/rejected": -7.495037078857422, + "step": 4250 + }, + { + "epoch": 0.66, + "learning_rate": 1.1029478707401836e-05, + "logits/chosen": -2.866506814956665, + "logits/rejected": -3.1771490573883057, + "logps/chosen": -109.99076843261719, + "logps/rejected": -255.78628540039062, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.755648136138916, + "rewards/margins": 4.494772434234619, + "rewards/rejected": -6.250420570373535, + "step": 4251 + }, + { + "epoch": 0.66, + "learning_rate": 1.1028745266870688e-05, + "logits/chosen": -2.8461146354675293, + "logits/rejected": -3.102804183959961, + "logps/chosen": -385.66668701171875, + "logps/rejected": -579.5885009765625, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3286199569702148, + "rewards/margins": 5.18812370300293, + "rewards/rejected": -6.5167436599731445, + "step": 4252 + }, + { + "epoch": 0.66, + "learning_rate": 1.102801182633954e-05, + "logits/chosen": -2.410795211791992, + "logits/rejected": -2.8235435485839844, + "logps/chosen": -256.54107666015625, + "logps/rejected": -218.5579071044922, + "loss": 2.0373, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0161213874816895, + "rewards/margins": 2.953484058380127, + "rewards/rejected": -6.969605445861816, + "step": 4253 + }, + { + "epoch": 0.66, + "learning_rate": 1.1027278385808393e-05, + "logits/chosen": -1.8011534214019775, + "logits/rejected": -2.8881711959838867, + "logps/chosen": -250.06130981445312, + "logps/rejected": -410.8465881347656, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5642136335372925, + "rewards/margins": 5.9180588722229, + "rewards/rejected": -7.482272148132324, + "step": 4254 + }, + { + "epoch": 0.66, + "learning_rate": 1.1026544945277245e-05, + "logits/chosen": -2.9222378730773926, + "logits/rejected": -2.772320508956909, + "logps/chosen": -163.89053344726562, + "logps/rejected": -301.8590087890625, + "loss": 2.1178, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.259999752044678, + "rewards/margins": 2.5997214317321777, + "rewards/rejected": -6.859721660614014, + "step": 4255 + }, + { + "epoch": 0.66, + "learning_rate": 1.1025811504746097e-05, + "logits/chosen": -3.036466598510742, + "logits/rejected": -2.4206552505493164, + "logps/chosen": -460.81329345703125, + "logps/rejected": -453.8385925292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11540487408638, + "rewards/margins": 9.495965957641602, + "rewards/rejected": -9.611371040344238, + "step": 4256 + }, + { + "epoch": 0.66, + "learning_rate": 1.1025078064214949e-05, + "logits/chosen": -2.509601593017578, + "logits/rejected": -3.0498287677764893, + "logps/chosen": -427.8448181152344, + "logps/rejected": -719.1121215820312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3203877210617065, + "rewards/margins": 8.887361526489258, + "rewards/rejected": -10.207748413085938, + "step": 4257 + }, + { + "epoch": 0.66, + "learning_rate": 1.10243446236838e-05, + "logits/chosen": -1.8742021322250366, + "logits/rejected": -2.8742661476135254, + "logps/chosen": -186.55955505371094, + "logps/rejected": -313.3579406738281, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.620186448097229, + "rewards/margins": 6.1515350341796875, + "rewards/rejected": -6.771721363067627, + "step": 4258 + }, + { + "epoch": 0.66, + "learning_rate": 1.1023611183152653e-05, + "logits/chosen": -3.0466246604919434, + "logits/rejected": -2.9940853118896484, + "logps/chosen": -171.12757873535156, + "logps/rejected": -255.20619201660156, + "loss": 2.7956, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.186836242675781, + "rewards/margins": -1.5799548625946045, + "rewards/rejected": -4.606881141662598, + "step": 4259 + }, + { + "epoch": 0.66, + "learning_rate": 1.1022877742621504e-05, + "logits/chosen": -3.1972615718841553, + "logits/rejected": -2.7943050861358643, + "logps/chosen": -565.8250732421875, + "logps/rejected": -354.4587707519531, + "loss": 1.8211, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5991761684417725, + "rewards/margins": -0.014162898063659668, + "rewards/rejected": -3.5850133895874023, + "step": 4260 + }, + { + "epoch": 0.66, + "learning_rate": 1.1022144302090356e-05, + "logits/chosen": -1.5607562065124512, + "logits/rejected": -3.020153522491455, + "logps/chosen": -149.59996032714844, + "logps/rejected": -322.3540344238281, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6677595376968384, + "rewards/margins": 4.302764892578125, + "rewards/rejected": -5.970524787902832, + "step": 4261 + }, + { + "epoch": 0.66, + "learning_rate": 1.1021410861559208e-05, + "logits/chosen": -2.597210168838501, + "logits/rejected": -3.1022472381591797, + "logps/chosen": -279.35467529296875, + "logps/rejected": -332.051513671875, + "loss": 2.9128, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.604093551635742, + "rewards/margins": -1.087367296218872, + "rewards/rejected": -3.516726016998291, + "step": 4262 + }, + { + "epoch": 0.66, + "learning_rate": 1.1020677421028062e-05, + "logits/chosen": -2.9842402935028076, + "logits/rejected": -2.4378066062927246, + "logps/chosen": -469.53564453125, + "logps/rejected": -428.5281982421875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.974012017250061, + "rewards/margins": 7.701502799987793, + "rewards/rejected": -8.675515174865723, + "step": 4263 + }, + { + "epoch": 0.66, + "learning_rate": 1.1019943980496914e-05, + "logits/chosen": -3.0524353981018066, + "logits/rejected": -2.497011661529541, + "logps/chosen": -351.1307678222656, + "logps/rejected": -528.7947387695312, + "loss": 2.1831, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.180301666259766, + "rewards/margins": 3.5869882106781006, + "rewards/rejected": -9.767290115356445, + "step": 4264 + }, + { + "epoch": 0.66, + "learning_rate": 1.1019210539965765e-05, + "logits/chosen": -2.8084051609039307, + "logits/rejected": -2.924891233444214, + "logps/chosen": -45.84531784057617, + "logps/rejected": -136.72267150878906, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7563941478729248, + "rewards/margins": 3.3360066413879395, + "rewards/rejected": -5.092400550842285, + "step": 4265 + }, + { + "epoch": 0.66, + "learning_rate": 1.1018477099434617e-05, + "logits/chosen": -2.823523998260498, + "logits/rejected": -1.4912346601486206, + "logps/chosen": -180.16282653808594, + "logps/rejected": -200.20970153808594, + "loss": 0.4233, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.441664218902588, + "rewards/margins": 4.973692417144775, + "rewards/rejected": -8.415356636047363, + "step": 4266 + }, + { + "epoch": 0.66, + "learning_rate": 1.1017743658903471e-05, + "logits/chosen": -2.249586820602417, + "logits/rejected": -3.079519748687744, + "logps/chosen": -258.9798278808594, + "logps/rejected": -217.15298461914062, + "loss": 3.6479, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.4334716796875, + "rewards/margins": -3.542848587036133, + "rewards/rejected": -1.8906230926513672, + "step": 4267 + }, + { + "epoch": 0.66, + "learning_rate": 1.1017010218372323e-05, + "logits/chosen": -3.073275566101074, + "logits/rejected": -3.120386838912964, + "logps/chosen": -46.17230987548828, + "logps/rejected": -152.23489379882812, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6549005508422852, + "rewards/margins": 4.252155303955078, + "rewards/rejected": -4.907055854797363, + "step": 4268 + }, + { + "epoch": 0.66, + "learning_rate": 1.1016276777841175e-05, + "logits/chosen": -3.0008294582366943, + "logits/rejected": -3.0906667709350586, + "logps/chosen": -94.79730224609375, + "logps/rejected": -162.3297119140625, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5656251907348633, + "rewards/margins": 5.278578758239746, + "rewards/rejected": -5.844203948974609, + "step": 4269 + }, + { + "epoch": 0.66, + "learning_rate": 1.1015543337310027e-05, + "logits/chosen": -3.012481212615967, + "logits/rejected": -2.31353497505188, + "logps/chosen": -417.7882995605469, + "logps/rejected": -430.6934814453125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.618560791015625, + "rewards/margins": 6.544267177581787, + "rewards/rejected": -7.16282844543457, + "step": 4270 + }, + { + "epoch": 0.66, + "learning_rate": 1.101480989677888e-05, + "logits/chosen": -2.490602970123291, + "logits/rejected": -2.5521745681762695, + "logps/chosen": -272.68994140625, + "logps/rejected": -174.87648010253906, + "loss": 4.1325, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.402605056762695, + "rewards/margins": -0.3383951187133789, + "rewards/rejected": -6.064209938049316, + "step": 4271 + }, + { + "epoch": 0.66, + "learning_rate": 1.1014076456247732e-05, + "logits/chosen": -1.8559437990188599, + "logits/rejected": -3.1215250492095947, + "logps/chosen": -153.49647521972656, + "logps/rejected": -545.3807983398438, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.107856273651123, + "rewards/margins": 5.943178176879883, + "rewards/rejected": -8.051034927368164, + "step": 4272 + }, + { + "epoch": 0.66, + "learning_rate": 1.1013343015716584e-05, + "logits/chosen": -2.846095561981201, + "logits/rejected": -3.017791271209717, + "logps/chosen": -481.8235168457031, + "logps/rejected": -450.6390380859375, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0780441761016846, + "rewards/margins": 6.338350772857666, + "rewards/rejected": -7.4163947105407715, + "step": 4273 + }, + { + "epoch": 0.66, + "learning_rate": 1.1012609575185436e-05, + "logits/chosen": -2.791693687438965, + "logits/rejected": -2.9582912921905518, + "logps/chosen": -177.85464477539062, + "logps/rejected": -202.79920959472656, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.806708574295044, + "rewards/margins": 5.540966987609863, + "rewards/rejected": -7.347675323486328, + "step": 4274 + }, + { + "epoch": 0.66, + "learning_rate": 1.1011876134654288e-05, + "logits/chosen": -3.0594515800476074, + "logits/rejected": -1.9750263690948486, + "logps/chosen": -266.8188781738281, + "logps/rejected": -137.58316040039062, + "loss": 3.033, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6257383823394775, + "rewards/margins": -0.16091513633728027, + "rewards/rejected": -3.4648232460021973, + "step": 4275 + }, + { + "epoch": 0.67, + "learning_rate": 1.101114269412314e-05, + "logits/chosen": -3.180680751800537, + "logits/rejected": -2.908804416656494, + "logps/chosen": -177.0581512451172, + "logps/rejected": -129.76832580566406, + "loss": 0.8557, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.213255882263184, + "rewards/margins": 1.351340889930725, + "rewards/rejected": -5.564597129821777, + "step": 4276 + }, + { + "epoch": 0.67, + "learning_rate": 1.1010409253591991e-05, + "logits/chosen": -2.5699594020843506, + "logits/rejected": -3.16582989692688, + "logps/chosen": -840.1292114257812, + "logps/rejected": -725.549072265625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.162841796875, + "rewards/margins": 5.905200481414795, + "rewards/rejected": -6.068042278289795, + "step": 4277 + }, + { + "epoch": 0.67, + "learning_rate": 1.1009675813060843e-05, + "logits/chosen": -3.230559825897217, + "logits/rejected": -3.2225968837738037, + "logps/chosen": -330.73828125, + "logps/rejected": -404.8979797363281, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3074069023132324, + "rewards/margins": 5.31278133392334, + "rewards/rejected": -7.6201887130737305, + "step": 4278 + }, + { + "epoch": 0.67, + "learning_rate": 1.1008942372529695e-05, + "logits/chosen": -3.2235512733459473, + "logits/rejected": -2.551675796508789, + "logps/chosen": -363.8871154785156, + "logps/rejected": -451.0527038574219, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9733844995498657, + "rewards/margins": 5.76584529876709, + "rewards/rejected": -6.739230155944824, + "step": 4279 + }, + { + "epoch": 0.67, + "learning_rate": 1.1008208931998549e-05, + "logits/chosen": -2.7733051776885986, + "logits/rejected": -3.0571234226226807, + "logps/chosen": -340.64013671875, + "logps/rejected": -332.0133361816406, + "loss": 7.4102, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.046167373657227, + "rewards/margins": -7.409528732299805, + "rewards/rejected": -0.6366382837295532, + "step": 4280 + }, + { + "epoch": 0.67, + "learning_rate": 1.10074754914674e-05, + "logits/chosen": -2.0545027256011963, + "logits/rejected": -3.1814138889312744, + "logps/chosen": -304.31561279296875, + "logps/rejected": -292.88616943359375, + "loss": 2.4212, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.147854328155518, + "rewards/margins": -0.34082603454589844, + "rewards/rejected": -4.807028293609619, + "step": 4281 + }, + { + "epoch": 0.67, + "learning_rate": 1.1006742050936252e-05, + "logits/chosen": -2.7167623043060303, + "logits/rejected": -2.89884614944458, + "logps/chosen": -90.95829772949219, + "logps/rejected": -342.2889709472656, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.644587993621826, + "rewards/margins": 5.781288146972656, + "rewards/rejected": -8.425875663757324, + "step": 4282 + }, + { + "epoch": 0.67, + "learning_rate": 1.1006008610405104e-05, + "logits/chosen": -1.8248891830444336, + "logits/rejected": -1.9990850687026978, + "logps/chosen": -440.393798828125, + "logps/rejected": -380.8663330078125, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.602116584777832, + "rewards/margins": 5.457226753234863, + "rewards/rejected": -8.059343338012695, + "step": 4283 + }, + { + "epoch": 0.67, + "learning_rate": 1.1005275169873956e-05, + "logits/chosen": -2.345411777496338, + "logits/rejected": -2.2639107704162598, + "logps/chosen": -264.50030517578125, + "logps/rejected": -310.4530334472656, + "loss": 5.6551, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.281994819641113, + "rewards/margins": -5.651324272155762, + "rewards/rejected": -0.6306701898574829, + "step": 4284 + }, + { + "epoch": 0.67, + "learning_rate": 1.1004541729342808e-05, + "logits/chosen": -2.9201414585113525, + "logits/rejected": -3.0932092666625977, + "logps/chosen": -316.06011962890625, + "logps/rejected": -385.82208251953125, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7277140617370605, + "rewards/margins": 3.6902718544006348, + "rewards/rejected": -5.417985916137695, + "step": 4285 + }, + { + "epoch": 0.67, + "learning_rate": 1.100380828881166e-05, + "logits/chosen": -3.136718273162842, + "logits/rejected": -3.024991750717163, + "logps/chosen": -557.4676513671875, + "logps/rejected": -298.51495361328125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1637786626815796, + "rewards/margins": 5.895495414733887, + "rewards/rejected": -7.059274673461914, + "step": 4286 + }, + { + "epoch": 0.67, + "learning_rate": 1.1003074848280512e-05, + "logits/chosen": -3.1114935874938965, + "logits/rejected": -2.6233789920806885, + "logps/chosen": -346.50396728515625, + "logps/rejected": -267.1178894042969, + "loss": 2.8786, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4778740406036377, + "rewards/margins": 0.9174013137817383, + "rewards/rejected": -4.395275115966797, + "step": 4287 + }, + { + "epoch": 0.67, + "learning_rate": 1.1002341407749364e-05, + "logits/chosen": -1.9429458379745483, + "logits/rejected": -2.5925395488739014, + "logps/chosen": -128.06475830078125, + "logps/rejected": -273.5440368652344, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.966484546661377, + "rewards/margins": 4.23615837097168, + "rewards/rejected": -6.202642917633057, + "step": 4288 + }, + { + "epoch": 0.67, + "learning_rate": 1.1001607967218217e-05, + "logits/chosen": -3.098853826522827, + "logits/rejected": -2.947209358215332, + "logps/chosen": -901.7127685546875, + "logps/rejected": -768.298583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.654322862625122, + "rewards/margins": 10.578557014465332, + "rewards/rejected": -8.924234390258789, + "step": 4289 + }, + { + "epoch": 0.67, + "learning_rate": 1.100087452668707e-05, + "logits/chosen": -2.9391391277313232, + "logits/rejected": -3.0856337547302246, + "logps/chosen": -515.5391845703125, + "logps/rejected": -515.5474243164062, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46043699979782104, + "rewards/margins": 5.611598014831543, + "rewards/rejected": -6.072035312652588, + "step": 4290 + }, + { + "epoch": 0.67, + "learning_rate": 1.1000141086155921e-05, + "logits/chosen": -2.7678661346435547, + "logits/rejected": -3.061831474304199, + "logps/chosen": -149.58023071289062, + "logps/rejected": -71.26412200927734, + "loss": 4.4105, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.225243091583252, + "rewards/margins": -4.397242546081543, + "rewards/rejected": -1.828000545501709, + "step": 4291 + }, + { + "epoch": 0.67, + "learning_rate": 1.0999407645624773e-05, + "logits/chosen": -2.1920371055603027, + "logits/rejected": -2.9288418292999268, + "logps/chosen": -91.65538024902344, + "logps/rejected": -205.91290283203125, + "loss": 0.0691, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0138075351715088, + "rewards/margins": 5.826802730560303, + "rewards/rejected": -6.840610504150391, + "step": 4292 + }, + { + "epoch": 0.67, + "learning_rate": 1.0998674205093625e-05, + "logits/chosen": -2.8367533683776855, + "logits/rejected": -3.089747905731201, + "logps/chosen": -81.26628112792969, + "logps/rejected": -118.35324096679688, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2403292655944824, + "rewards/margins": 3.353949785232544, + "rewards/rejected": -4.5942792892456055, + "step": 4293 + }, + { + "epoch": 0.67, + "learning_rate": 1.0997940764562477e-05, + "logits/chosen": -2.030078172683716, + "logits/rejected": -2.7390449047088623, + "logps/chosen": -157.82510375976562, + "logps/rejected": -508.4573059082031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4922633171081543, + "rewards/margins": 9.416518211364746, + "rewards/rejected": -10.908781051635742, + "step": 4294 + }, + { + "epoch": 0.67, + "learning_rate": 1.0997207324031329e-05, + "logits/chosen": -2.859276056289673, + "logits/rejected": -3.366926431655884, + "logps/chosen": -151.5869140625, + "logps/rejected": -360.6654052734375, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7763023376464844, + "rewards/margins": 5.286714553833008, + "rewards/rejected": -7.063016891479492, + "step": 4295 + }, + { + "epoch": 0.67, + "learning_rate": 1.099647388350018e-05, + "logits/chosen": -2.518537759780884, + "logits/rejected": -3.0432937145233154, + "logps/chosen": -313.70159912109375, + "logps/rejected": -278.6307373046875, + "loss": 2.6124, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.799783229827881, + "rewards/margins": 0.30336952209472656, + "rewards/rejected": -4.103153228759766, + "step": 4296 + }, + { + "epoch": 0.67, + "learning_rate": 1.0995740442969032e-05, + "logits/chosen": -2.672762632369995, + "logits/rejected": -2.8589835166931152, + "logps/chosen": -86.44245910644531, + "logps/rejected": -245.01332092285156, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.196699619293213, + "rewards/margins": 5.322072982788086, + "rewards/rejected": -7.518772125244141, + "step": 4297 + }, + { + "epoch": 0.67, + "learning_rate": 1.0995007002437886e-05, + "logits/chosen": -2.8606622219085693, + "logits/rejected": -2.5857579708099365, + "logps/chosen": -296.25640869140625, + "logps/rejected": -265.33258056640625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1907295286655426, + "rewards/margins": 7.2615766525268555, + "rewards/rejected": -7.0708465576171875, + "step": 4298 + }, + { + "epoch": 0.67, + "learning_rate": 1.0994273561906738e-05, + "logits/chosen": -3.0915393829345703, + "logits/rejected": -2.8485045433044434, + "logps/chosen": -580.6961669921875, + "logps/rejected": -640.6276245117188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4266296625137329, + "rewards/margins": 7.904245376586914, + "rewards/rejected": -8.330875396728516, + "step": 4299 + }, + { + "epoch": 0.67, + "learning_rate": 1.099354012137559e-05, + "logits/chosen": -3.0165488719940186, + "logits/rejected": -3.09175705909729, + "logps/chosen": -614.3569946289062, + "logps/rejected": -623.6027221679688, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5072246789932251, + "rewards/margins": 7.09221076965332, + "rewards/rejected": -7.599435806274414, + "step": 4300 + }, + { + "epoch": 0.67, + "learning_rate": 1.0992806680844443e-05, + "logits/chosen": -2.4088804721832275, + "logits/rejected": -2.960538387298584, + "logps/chosen": -159.72349548339844, + "logps/rejected": -207.06346130371094, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.510087013244629, + "rewards/margins": 5.057589054107666, + "rewards/rejected": -6.567675590515137, + "step": 4301 + }, + { + "epoch": 0.67, + "learning_rate": 1.0992073240313295e-05, + "logits/chosen": -2.5546700954437256, + "logits/rejected": -3.2517800331115723, + "logps/chosen": -63.11671447753906, + "logps/rejected": -174.101318359375, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.513878583908081, + "rewards/margins": 4.152544021606445, + "rewards/rejected": -5.6664228439331055, + "step": 4302 + }, + { + "epoch": 0.67, + "learning_rate": 1.0991339799782147e-05, + "logits/chosen": -3.0450191497802734, + "logits/rejected": -3.272905111312866, + "logps/chosen": -54.169002532958984, + "logps/rejected": -108.95622253417969, + "loss": 0.1471, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7915278673171997, + "rewards/margins": 2.6273112297058105, + "rewards/rejected": -4.418839454650879, + "step": 4303 + }, + { + "epoch": 0.67, + "learning_rate": 1.0990606359250999e-05, + "logits/chosen": -2.998878240585327, + "logits/rejected": -3.0551211833953857, + "logps/chosen": -177.35398864746094, + "logps/rejected": -274.485595703125, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9932725429534912, + "rewards/margins": 4.486852169036865, + "rewards/rejected": -6.4801249504089355, + "step": 4304 + }, + { + "epoch": 0.67, + "learning_rate": 1.098987291871985e-05, + "logits/chosen": -2.9061150550842285, + "logits/rejected": -1.5799099206924438, + "logps/chosen": -203.3038787841797, + "logps/rejected": -200.68460083007812, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5104804039001465, + "rewards/margins": 5.825705528259277, + "rewards/rejected": -7.336186408996582, + "step": 4305 + }, + { + "epoch": 0.67, + "learning_rate": 1.0989139478188703e-05, + "logits/chosen": -2.5318613052368164, + "logits/rejected": -2.699582815170288, + "logps/chosen": -257.524169921875, + "logps/rejected": -286.298828125, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6862244606018066, + "rewards/margins": 4.3954973220825195, + "rewards/rejected": -6.081721305847168, + "step": 4306 + }, + { + "epoch": 0.67, + "learning_rate": 1.0988406037657556e-05, + "logits/chosen": -2.1978163719177246, + "logits/rejected": -2.557435989379883, + "logps/chosen": -319.0837707519531, + "logps/rejected": -562.5181884765625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9513170719146729, + "rewards/margins": 7.885246276855469, + "rewards/rejected": -8.836563110351562, + "step": 4307 + }, + { + "epoch": 0.67, + "learning_rate": 1.0987672597126408e-05, + "logits/chosen": -2.6879141330718994, + "logits/rejected": -3.0210273265838623, + "logps/chosen": -246.99853515625, + "logps/rejected": -263.0240173339844, + "loss": 2.1152, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4659926891326904, + "rewards/margins": 2.4643502235412598, + "rewards/rejected": -5.930343151092529, + "step": 4308 + }, + { + "epoch": 0.67, + "learning_rate": 1.098693915659526e-05, + "logits/chosen": -3.0449306964874268, + "logits/rejected": -3.122426748275757, + "logps/chosen": -86.00566101074219, + "logps/rejected": -153.895751953125, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.904127299785614, + "rewards/margins": 5.7247467041015625, + "rewards/rejected": -6.628873825073242, + "step": 4309 + }, + { + "epoch": 0.67, + "learning_rate": 1.0986205716064112e-05, + "logits/chosen": -2.053452968597412, + "logits/rejected": -2.832517385482788, + "logps/chosen": -88.08488464355469, + "logps/rejected": -274.52691650390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5204715728759766, + "rewards/margins": 7.848649024963379, + "rewards/rejected": -9.369119644165039, + "step": 4310 + }, + { + "epoch": 0.67, + "learning_rate": 1.0985472275532964e-05, + "logits/chosen": -3.081233263015747, + "logits/rejected": -2.6889524459838867, + "logps/chosen": -724.5719604492188, + "logps/rejected": -542.8720092773438, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3307174444198608, + "rewards/margins": 4.474542140960693, + "rewards/rejected": -5.805259704589844, + "step": 4311 + }, + { + "epoch": 0.67, + "learning_rate": 1.0984738835001816e-05, + "logits/chosen": -2.511931896209717, + "logits/rejected": -3.167591094970703, + "logps/chosen": -162.5863037109375, + "logps/rejected": -359.8490295410156, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.272834539413452, + "rewards/margins": 4.509989261627197, + "rewards/rejected": -7.7828240394592285, + "step": 4312 + }, + { + "epoch": 0.67, + "learning_rate": 1.0984005394470667e-05, + "logits/chosen": -2.966278553009033, + "logits/rejected": -3.0022382736206055, + "logps/chosen": -116.5103988647461, + "logps/rejected": -201.29080200195312, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.593298614025116, + "rewards/margins": 6.697253227233887, + "rewards/rejected": -7.290552139282227, + "step": 4313 + }, + { + "epoch": 0.67, + "learning_rate": 1.098327195393952e-05, + "logits/chosen": -2.189262628555298, + "logits/rejected": -3.118617296218872, + "logps/chosen": -133.09463500976562, + "logps/rejected": -363.36920166015625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2921142578125, + "rewards/margins": 4.8706374168396, + "rewards/rejected": -7.162752151489258, + "step": 4314 + }, + { + "epoch": 0.67, + "learning_rate": 1.0982538513408371e-05, + "logits/chosen": -3.063932418823242, + "logits/rejected": -3.092686176300049, + "logps/chosen": -330.7771301269531, + "logps/rejected": -304.43011474609375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5969348549842834, + "rewards/margins": 6.0217084884643555, + "rewards/rejected": -6.618642807006836, + "step": 4315 + }, + { + "epoch": 0.67, + "learning_rate": 1.0981805072877225e-05, + "logits/chosen": -3.1243979930877686, + "logits/rejected": -2.3933944702148438, + "logps/chosen": -314.5425720214844, + "logps/rejected": -267.9434509277344, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.76064133644104, + "rewards/margins": 6.75803279876709, + "rewards/rejected": -7.518673896789551, + "step": 4316 + }, + { + "epoch": 0.67, + "learning_rate": 1.0981071632346077e-05, + "logits/chosen": -2.9982399940490723, + "logits/rejected": -3.200145959854126, + "logps/chosen": -221.14773559570312, + "logps/rejected": -338.37579345703125, + "loss": 1.4998, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7869583368301392, + "rewards/margins": 2.060451030731201, + "rewards/rejected": -3.847409248352051, + "step": 4317 + }, + { + "epoch": 0.67, + "learning_rate": 1.0980338191814929e-05, + "logits/chosen": -2.9903483390808105, + "logits/rejected": -2.0992467403411865, + "logps/chosen": -666.056884765625, + "logps/rejected": -365.2531433105469, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.717226505279541, + "rewards/margins": 2.93485164642334, + "rewards/rejected": -4.652078151702881, + "step": 4318 + }, + { + "epoch": 0.67, + "learning_rate": 1.097960475128378e-05, + "logits/chosen": -2.5289740562438965, + "logits/rejected": -2.9590818881988525, + "logps/chosen": -196.09893798828125, + "logps/rejected": -328.72308349609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1318390369415283, + "rewards/margins": 7.183085918426514, + "rewards/rejected": -10.314925193786621, + "step": 4319 + }, + { + "epoch": 0.67, + "learning_rate": 1.0978871310752632e-05, + "logits/chosen": -1.9309526681900024, + "logits/rejected": -2.988445997238159, + "logps/chosen": -85.01858520507812, + "logps/rejected": -254.48443603515625, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.452129602432251, + "rewards/margins": 4.073469161987305, + "rewards/rejected": -6.525598526000977, + "step": 4320 + }, + { + "epoch": 0.67, + "learning_rate": 1.0978137870221484e-05, + "logits/chosen": -2.9933950901031494, + "logits/rejected": -2.4735660552978516, + "logps/chosen": -361.0389099121094, + "logps/rejected": -297.7141418457031, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.376870721578598, + "rewards/margins": 6.897355079650879, + "rewards/rejected": -6.52048397064209, + "step": 4321 + }, + { + "epoch": 0.67, + "learning_rate": 1.0977404429690336e-05, + "logits/chosen": -3.1604177951812744, + "logits/rejected": -3.272878885269165, + "logps/chosen": -384.64141845703125, + "logps/rejected": -440.9232177734375, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7308449149131775, + "rewards/margins": 4.891277313232422, + "rewards/rejected": -5.622122287750244, + "step": 4322 + }, + { + "epoch": 0.67, + "learning_rate": 1.0976670989159188e-05, + "logits/chosen": -3.139604330062866, + "logits/rejected": -1.1769872903823853, + "logps/chosen": -424.6833190917969, + "logps/rejected": -412.30023193359375, + "loss": 2.2804, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.20011043548584, + "rewards/margins": 0.3423607349395752, + "rewards/rejected": -4.542471408843994, + "step": 4323 + }, + { + "epoch": 0.67, + "learning_rate": 1.097593754862804e-05, + "logits/chosen": -3.0661518573760986, + "logits/rejected": -2.8165080547332764, + "logps/chosen": -128.1815643310547, + "logps/rejected": -208.99154663085938, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3755406141281128, + "rewards/margins": 5.743884563446045, + "rewards/rejected": -7.119424819946289, + "step": 4324 + }, + { + "epoch": 0.67, + "learning_rate": 1.0975204108096893e-05, + "logits/chosen": -1.5971558094024658, + "logits/rejected": -2.9325902462005615, + "logps/chosen": -95.80068969726562, + "logps/rejected": -363.69281005859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1435142755508423, + "rewards/margins": 6.79763126373291, + "rewards/rejected": -7.941145420074463, + "step": 4325 + }, + { + "epoch": 0.67, + "learning_rate": 1.0974470667565745e-05, + "logits/chosen": -2.0369107723236084, + "logits/rejected": -2.6952106952667236, + "logps/chosen": -183.55552673339844, + "logps/rejected": -151.6887969970703, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.873652696609497, + "rewards/margins": 3.984682559967041, + "rewards/rejected": -6.858335494995117, + "step": 4326 + }, + { + "epoch": 0.67, + "learning_rate": 1.0973737227034597e-05, + "logits/chosen": -3.0074639320373535, + "logits/rejected": -3.172598361968994, + "logps/chosen": -374.0172424316406, + "logps/rejected": -282.570068359375, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0866241529583931, + "rewards/margins": 5.133581638336182, + "rewards/rejected": -5.046957492828369, + "step": 4327 + }, + { + "epoch": 0.67, + "learning_rate": 1.0973003786503449e-05, + "logits/chosen": -1.7017730474472046, + "logits/rejected": -2.9553089141845703, + "logps/chosen": -65.80575561523438, + "logps/rejected": -388.0947265625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.90068519115448, + "rewards/margins": 6.969514846801758, + "rewards/rejected": -8.870200157165527, + "step": 4328 + }, + { + "epoch": 0.67, + "learning_rate": 1.0972270345972301e-05, + "logits/chosen": -2.7500181198120117, + "logits/rejected": -3.1182196140289307, + "logps/chosen": -249.2543487548828, + "logps/rejected": -374.92730712890625, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2870041131973267, + "rewards/margins": 3.723750114440918, + "rewards/rejected": -5.010754108428955, + "step": 4329 + }, + { + "epoch": 0.67, + "learning_rate": 1.0971536905441153e-05, + "logits/chosen": -2.8559463024139404, + "logits/rejected": -3.122901678085327, + "logps/chosen": -243.53602600097656, + "logps/rejected": -97.96261596679688, + "loss": 4.1559, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.137365341186523, + "rewards/margins": -1.6375718116760254, + "rewards/rejected": -4.499793529510498, + "step": 4330 + }, + { + "epoch": 0.67, + "learning_rate": 1.0970803464910005e-05, + "logits/chosen": -3.0167596340179443, + "logits/rejected": -3.123169422149658, + "logps/chosen": -48.2783088684082, + "logps/rejected": -141.57928466796875, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.343503713607788, + "rewards/margins": 3.9025533199310303, + "rewards/rejected": -5.246057033538818, + "step": 4331 + }, + { + "epoch": 0.67, + "learning_rate": 1.0970070024378857e-05, + "logits/chosen": -3.0141446590423584, + "logits/rejected": -3.06343150138855, + "logps/chosen": -63.762184143066406, + "logps/rejected": -183.4561004638672, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2247906923294067, + "rewards/margins": 5.585749626159668, + "rewards/rejected": -6.810540199279785, + "step": 4332 + }, + { + "epoch": 0.67, + "learning_rate": 1.096933658384771e-05, + "logits/chosen": -3.079824447631836, + "logits/rejected": -2.012758493423462, + "logps/chosen": -310.45758056640625, + "logps/rejected": -460.34503173828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.385553002357483, + "rewards/margins": 8.979949951171875, + "rewards/rejected": -10.365503311157227, + "step": 4333 + }, + { + "epoch": 0.67, + "learning_rate": 1.0968603143316562e-05, + "logits/chosen": -3.0743508338928223, + "logits/rejected": -2.0499300956726074, + "logps/chosen": -618.972900390625, + "logps/rejected": -245.53387451171875, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2926185131072998, + "rewards/margins": 5.315045356750488, + "rewards/rejected": -6.607664108276367, + "step": 4334 + }, + { + "epoch": 0.67, + "learning_rate": 1.0967869702785416e-05, + "logits/chosen": -2.8409228324890137, + "logits/rejected": -2.812922477722168, + "logps/chosen": -117.65760040283203, + "logps/rejected": -272.3839416503906, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2672125101089478, + "rewards/margins": 6.39912223815918, + "rewards/rejected": -7.666334629058838, + "step": 4335 + }, + { + "epoch": 0.67, + "learning_rate": 1.0967136262254267e-05, + "logits/chosen": -2.8235998153686523, + "logits/rejected": -2.6303634643554688, + "logps/chosen": -110.8669204711914, + "logps/rejected": -228.1766357421875, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1024703979492188, + "rewards/margins": 5.7585320472717285, + "rewards/rejected": -6.861002445220947, + "step": 4336 + }, + { + "epoch": 0.67, + "learning_rate": 1.096640282172312e-05, + "logits/chosen": -3.050844669342041, + "logits/rejected": -3.07303524017334, + "logps/chosen": -363.211181640625, + "logps/rejected": -331.0845642089844, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28809890151023865, + "rewards/margins": 6.042641639709473, + "rewards/rejected": -6.330740451812744, + "step": 4337 + }, + { + "epoch": 0.67, + "learning_rate": 1.0965669381191971e-05, + "logits/chosen": -1.9942326545715332, + "logits/rejected": -2.4881768226623535, + "logps/chosen": -132.0608673095703, + "logps/rejected": -236.19456481933594, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.047002911567688, + "rewards/margins": 8.309293746948242, + "rewards/rejected": -9.356295585632324, + "step": 4338 + }, + { + "epoch": 0.67, + "learning_rate": 1.0964935940660823e-05, + "logits/chosen": -1.6852830648422241, + "logits/rejected": -2.893387794494629, + "logps/chosen": -78.96397399902344, + "logps/rejected": -203.04129028320312, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.838998317718506, + "rewards/margins": 4.133807182312012, + "rewards/rejected": -6.972805500030518, + "step": 4339 + }, + { + "epoch": 0.67, + "learning_rate": 1.0964202500129675e-05, + "logits/chosen": -2.4414796829223633, + "logits/rejected": -2.959916353225708, + "logps/chosen": -318.4706115722656, + "logps/rejected": -512.0410766601562, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.741642713546753, + "rewards/margins": 8.24400520324707, + "rewards/rejected": -9.985647201538086, + "step": 4340 + }, + { + "epoch": 0.68, + "learning_rate": 1.0963469059598527e-05, + "logits/chosen": -1.9286044836044312, + "logits/rejected": -3.03663969039917, + "logps/chosen": -31.599464416503906, + "logps/rejected": -328.26959228515625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0064549446105957, + "rewards/margins": 6.554078102111816, + "rewards/rejected": -8.56053352355957, + "step": 4341 + }, + { + "epoch": 0.68, + "learning_rate": 1.0962735619067379e-05, + "logits/chosen": -2.230220317840576, + "logits/rejected": -3.2352020740509033, + "logps/chosen": -650.936279296875, + "logps/rejected": -729.3446044921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7217742800712585, + "rewards/margins": 7.467756748199463, + "rewards/rejected": -8.189531326293945, + "step": 4342 + }, + { + "epoch": 0.68, + "learning_rate": 1.0962002178536232e-05, + "logits/chosen": -2.2914648056030273, + "logits/rejected": -3.202535390853882, + "logps/chosen": -362.26251220703125, + "logps/rejected": -412.0472106933594, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5864365100860596, + "rewards/margins": 7.348572731018066, + "rewards/rejected": -7.935009002685547, + "step": 4343 + }, + { + "epoch": 0.68, + "learning_rate": 1.0961268738005084e-05, + "logits/chosen": -3.0133163928985596, + "logits/rejected": -3.0690596103668213, + "logps/chosen": -89.99940490722656, + "logps/rejected": -203.53717041015625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4712624549865723, + "rewards/margins": 5.383391380310059, + "rewards/rejected": -6.854653358459473, + "step": 4344 + }, + { + "epoch": 0.68, + "learning_rate": 1.0960535297473936e-05, + "logits/chosen": -3.103177547454834, + "logits/rejected": -2.797072172164917, + "logps/chosen": -521.9359741210938, + "logps/rejected": -697.1746826171875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.231679916381836, + "rewards/margins": 9.807133674621582, + "rewards/rejected": -8.57545280456543, + "step": 4345 + }, + { + "epoch": 0.68, + "learning_rate": 1.0959801856942788e-05, + "logits/chosen": -2.9596478939056396, + "logits/rejected": -2.9855992794036865, + "logps/chosen": -92.79991149902344, + "logps/rejected": -297.5789794921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7591300010681152, + "rewards/margins": 7.348018169403076, + "rewards/rejected": -9.107148170471191, + "step": 4346 + }, + { + "epoch": 0.68, + "learning_rate": 1.095906841641164e-05, + "logits/chosen": -2.971862554550171, + "logits/rejected": -2.6361005306243896, + "logps/chosen": -52.1204833984375, + "logps/rejected": -269.0071105957031, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0102343559265137, + "rewards/margins": 7.421097755432129, + "rewards/rejected": -8.431331634521484, + "step": 4347 + }, + { + "epoch": 0.68, + "learning_rate": 1.0958334975880492e-05, + "logits/chosen": -3.2113635540008545, + "logits/rejected": -2.894260883331299, + "logps/chosen": -188.12022399902344, + "logps/rejected": -260.6025085449219, + "loss": 1.3076, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7666995525360107, + "rewards/margins": 2.2788267135620117, + "rewards/rejected": -5.045526504516602, + "step": 4348 + }, + { + "epoch": 0.68, + "learning_rate": 1.0957601535349344e-05, + "logits/chosen": -2.8731698989868164, + "logits/rejected": -3.15189790725708, + "logps/chosen": -189.2863311767578, + "logps/rejected": -412.6522216796875, + "loss": 0.0553, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0428900718688965, + "rewards/margins": 3.307384729385376, + "rewards/rejected": -5.350275039672852, + "step": 4349 + }, + { + "epoch": 0.68, + "learning_rate": 1.0956868094818195e-05, + "logits/chosen": -2.7062392234802246, + "logits/rejected": -3.0171382427215576, + "logps/chosen": -188.91619873046875, + "logps/rejected": -224.09835815429688, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1314895153045654, + "rewards/margins": 4.237310886383057, + "rewards/rejected": -6.368800163269043, + "step": 4350 + }, + { + "epoch": 0.68, + "learning_rate": 1.0956134654287047e-05, + "logits/chosen": -2.5672974586486816, + "logits/rejected": -3.0805907249450684, + "logps/chosen": -131.72984313964844, + "logps/rejected": -288.4953308105469, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7135177850723267, + "rewards/margins": 4.09730339050293, + "rewards/rejected": -5.810821056365967, + "step": 4351 + }, + { + "epoch": 0.68, + "learning_rate": 1.0955401213755901e-05, + "logits/chosen": -1.2620673179626465, + "logits/rejected": -2.7143702507019043, + "logps/chosen": -129.97836303710938, + "logps/rejected": -329.06414794921875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1737101078033447, + "rewards/margins": 6.345573902130127, + "rewards/rejected": -9.51928424835205, + "step": 4352 + }, + { + "epoch": 0.68, + "learning_rate": 1.0954667773224753e-05, + "logits/chosen": -2.9161462783813477, + "logits/rejected": -3.281367063522339, + "logps/chosen": -121.47592163085938, + "logps/rejected": -395.679931640625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0853583812713623, + "rewards/margins": 6.018955707550049, + "rewards/rejected": -8.104313850402832, + "step": 4353 + }, + { + "epoch": 0.68, + "learning_rate": 1.0953934332693605e-05, + "logits/chosen": -3.130746603012085, + "logits/rejected": -2.1604301929473877, + "logps/chosen": -227.24993896484375, + "logps/rejected": -232.52859497070312, + "loss": 1.8652, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.540304660797119, + "rewards/margins": 0.5693517923355103, + "rewards/rejected": -5.10965633392334, + "step": 4354 + }, + { + "epoch": 0.68, + "learning_rate": 1.0953200892162457e-05, + "logits/chosen": -2.6431772708892822, + "logits/rejected": -3.056569814682007, + "logps/chosen": -166.9820556640625, + "logps/rejected": -296.5294189453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6008002758026123, + "rewards/margins": 7.392536163330078, + "rewards/rejected": -8.993335723876953, + "step": 4355 + }, + { + "epoch": 0.68, + "learning_rate": 1.0952467451631308e-05, + "logits/chosen": -2.4074466228485107, + "logits/rejected": -3.051835536956787, + "logps/chosen": -199.03126525878906, + "logps/rejected": -317.08056640625, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1294167041778564, + "rewards/margins": 5.6652727127075195, + "rewards/rejected": -7.794689178466797, + "step": 4356 + }, + { + "epoch": 0.68, + "learning_rate": 1.095173401110016e-05, + "logits/chosen": -1.0144267082214355, + "logits/rejected": -2.282761573791504, + "logps/chosen": -106.41092681884766, + "logps/rejected": -279.06768798828125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9416077136993408, + "rewards/margins": 6.099100112915039, + "rewards/rejected": -8.0407075881958, + "step": 4357 + }, + { + "epoch": 0.68, + "learning_rate": 1.0951000570569012e-05, + "logits/chosen": -2.5744590759277344, + "logits/rejected": -3.0251758098602295, + "logps/chosen": -421.4097900390625, + "logps/rejected": -572.0479736328125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0436203479766846, + "rewards/margins": 6.344949722290039, + "rewards/rejected": -8.388569831848145, + "step": 4358 + }, + { + "epoch": 0.68, + "learning_rate": 1.0950267130037864e-05, + "logits/chosen": -2.795281171798706, + "logits/rejected": -3.161165714263916, + "logps/chosen": -180.5839080810547, + "logps/rejected": -256.72216796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6798648834228516, + "rewards/margins": 6.934809684753418, + "rewards/rejected": -8.61467456817627, + "step": 4359 + }, + { + "epoch": 0.68, + "learning_rate": 1.0949533689506716e-05, + "logits/chosen": -2.4817872047424316, + "logits/rejected": -3.1480910778045654, + "logps/chosen": -103.76667022705078, + "logps/rejected": -256.7856750488281, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3487548828125, + "rewards/margins": 6.212393283843994, + "rewards/rejected": -8.561147689819336, + "step": 4360 + }, + { + "epoch": 0.68, + "learning_rate": 1.094880024897557e-05, + "logits/chosen": -2.971778631210327, + "logits/rejected": -2.349637508392334, + "logps/chosen": -213.42333984375, + "logps/rejected": -327.49029541015625, + "loss": 2.8363, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.475562572479248, + "rewards/margins": 1.1872317790985107, + "rewards/rejected": -5.662794589996338, + "step": 4361 + }, + { + "epoch": 0.68, + "learning_rate": 1.0948066808444421e-05, + "logits/chosen": -2.795454740524292, + "logits/rejected": -2.7045350074768066, + "logps/chosen": -113.20805358886719, + "logps/rejected": -241.2831573486328, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2449860572814941, + "rewards/margins": 6.375776290893555, + "rewards/rejected": -7.620762825012207, + "step": 4362 + }, + { + "epoch": 0.68, + "learning_rate": 1.0947333367913273e-05, + "logits/chosen": -3.0303895473480225, + "logits/rejected": -2.002875328063965, + "logps/chosen": -422.53839111328125, + "logps/rejected": -185.61109924316406, + "loss": 1.5552, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0913665294647217, + "rewards/margins": 2.349830150604248, + "rewards/rejected": -5.441196918487549, + "step": 4363 + }, + { + "epoch": 0.68, + "learning_rate": 1.0946599927382125e-05, + "logits/chosen": -3.0560901165008545, + "logits/rejected": -2.376605272293091, + "logps/chosen": -261.406494140625, + "logps/rejected": -334.2195129394531, + "loss": 4.9228, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.866010665893555, + "rewards/margins": -4.892115592956543, + "rewards/rejected": -1.9738953113555908, + "step": 4364 + }, + { + "epoch": 0.68, + "learning_rate": 1.0945866486850977e-05, + "logits/chosen": -2.2225265502929688, + "logits/rejected": -3.0948851108551025, + "logps/chosen": -154.258056640625, + "logps/rejected": -267.0367431640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0336384773254395, + "rewards/margins": 7.9834885597229, + "rewards/rejected": -9.01712703704834, + "step": 4365 + }, + { + "epoch": 0.68, + "learning_rate": 1.0945133046319829e-05, + "logits/chosen": -2.2634732723236084, + "logits/rejected": -2.90328311920166, + "logps/chosen": -251.43344116210938, + "logps/rejected": -564.6109619140625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7727882862091064, + "rewards/margins": 7.4350385665893555, + "rewards/rejected": -11.207826614379883, + "step": 4366 + }, + { + "epoch": 0.68, + "learning_rate": 1.0944399605788682e-05, + "logits/chosen": -2.1782727241516113, + "logits/rejected": -3.0630743503570557, + "logps/chosen": -507.8906555175781, + "logps/rejected": -616.6702880859375, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.667108178138733, + "rewards/margins": 5.874722480773926, + "rewards/rejected": -7.541831016540527, + "step": 4367 + }, + { + "epoch": 0.68, + "learning_rate": 1.0943666165257534e-05, + "logits/chosen": -3.183274984359741, + "logits/rejected": -2.8348259925842285, + "logps/chosen": -513.818359375, + "logps/rejected": -420.4371032714844, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28139954805374146, + "rewards/margins": 8.447014808654785, + "rewards/rejected": -8.728414535522461, + "step": 4368 + }, + { + "epoch": 0.68, + "learning_rate": 1.0942932724726388e-05, + "logits/chosen": -2.0152170658111572, + "logits/rejected": -3.131166934967041, + "logps/chosen": -645.35888671875, + "logps/rejected": -647.2869262695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5525482296943665, + "rewards/margins": 10.09322738647461, + "rewards/rejected": -10.645774841308594, + "step": 4369 + }, + { + "epoch": 0.68, + "learning_rate": 1.094219928419524e-05, + "logits/chosen": -2.9971976280212402, + "logits/rejected": -3.001816511154175, + "logps/chosen": -214.440673828125, + "logps/rejected": -401.8455505371094, + "loss": 1.534, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8877875804901123, + "rewards/margins": 2.4565606117248535, + "rewards/rejected": -6.344348430633545, + "step": 4370 + }, + { + "epoch": 0.68, + "learning_rate": 1.0941465843664092e-05, + "logits/chosen": -1.6081441640853882, + "logits/rejected": -3.0548205375671387, + "logps/chosen": -115.45855712890625, + "logps/rejected": -338.539306640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.530681848526001, + "rewards/margins": 8.246949195861816, + "rewards/rejected": -9.777630805969238, + "step": 4371 + }, + { + "epoch": 0.68, + "learning_rate": 1.0940732403132944e-05, + "logits/chosen": -2.864686965942383, + "logits/rejected": -2.3105850219726562, + "logps/chosen": -175.5671844482422, + "logps/rejected": -166.483642578125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5599865913391113, + "rewards/margins": 5.372091293334961, + "rewards/rejected": -7.9320783615112305, + "step": 4372 + }, + { + "epoch": 0.68, + "learning_rate": 1.0939998962601795e-05, + "logits/chosen": -2.186600923538208, + "logits/rejected": -3.1192283630371094, + "logps/chosen": -108.78449249267578, + "logps/rejected": -210.77490234375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.680436134338379, + "rewards/margins": 6.635791778564453, + "rewards/rejected": -8.316227912902832, + "step": 4373 + }, + { + "epoch": 0.68, + "learning_rate": 1.0939265522070647e-05, + "logits/chosen": -2.8213789463043213, + "logits/rejected": -3.1425936222076416, + "logps/chosen": -710.6034545898438, + "logps/rejected": -545.2291259765625, + "loss": 2.5057, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.934731960296631, + "rewards/margins": 1.9300041198730469, + "rewards/rejected": -5.864736557006836, + "step": 4374 + }, + { + "epoch": 0.68, + "learning_rate": 1.09385320815395e-05, + "logits/chosen": -2.950566053390503, + "logits/rejected": -2.7548820972442627, + "logps/chosen": -433.9021301269531, + "logps/rejected": -407.6235046386719, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.473994731903076, + "rewards/margins": 5.0267133712768555, + "rewards/rejected": -7.50070858001709, + "step": 4375 + }, + { + "epoch": 0.68, + "learning_rate": 1.0937798641008351e-05, + "logits/chosen": -2.573347330093384, + "logits/rejected": -2.8328323364257812, + "logps/chosen": -197.79049682617188, + "logps/rejected": -295.1805419921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.066256046295166, + "rewards/margins": 7.6997880935668945, + "rewards/rejected": -8.766044616699219, + "step": 4376 + }, + { + "epoch": 0.68, + "learning_rate": 1.0937065200477203e-05, + "logits/chosen": -2.589336633682251, + "logits/rejected": -3.0938968658447266, + "logps/chosen": -194.30418395996094, + "logps/rejected": -141.83511352539062, + "loss": 3.2111, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.15035343170166, + "rewards/margins": 0.24686551094055176, + "rewards/rejected": -4.397218704223633, + "step": 4377 + }, + { + "epoch": 0.68, + "learning_rate": 1.0936331759946057e-05, + "logits/chosen": -3.0160839557647705, + "logits/rejected": -1.667555570602417, + "logps/chosen": -471.84588623046875, + "logps/rejected": -329.17919921875, + "loss": 3.3341, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.819649696350098, + "rewards/margins": 0.756854772567749, + "rewards/rejected": -6.576504707336426, + "step": 4378 + }, + { + "epoch": 0.68, + "learning_rate": 1.0935598319414908e-05, + "logits/chosen": -2.1032981872558594, + "logits/rejected": -2.957125425338745, + "logps/chosen": -196.9756622314453, + "logps/rejected": -321.3818664550781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5826744437217712, + "rewards/margins": 6.4183149337768555, + "rewards/rejected": -7.00098991394043, + "step": 4379 + }, + { + "epoch": 0.68, + "learning_rate": 1.093486487888376e-05, + "logits/chosen": -3.051788330078125, + "logits/rejected": -3.0734574794769287, + "logps/chosen": -274.27020263671875, + "logps/rejected": -289.0816650390625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6690120697021484, + "rewards/margins": 6.821533679962158, + "rewards/rejected": -8.490545272827148, + "step": 4380 + }, + { + "epoch": 0.68, + "learning_rate": 1.0934131438352612e-05, + "logits/chosen": -2.8199820518493652, + "logits/rejected": -3.092179536819458, + "logps/chosen": -73.66944122314453, + "logps/rejected": -343.67095947265625, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.732985258102417, + "rewards/margins": 5.061077117919922, + "rewards/rejected": -7.794062614440918, + "step": 4381 + }, + { + "epoch": 0.68, + "learning_rate": 1.0933397997821464e-05, + "logits/chosen": -2.579101085662842, + "logits/rejected": -3.102128028869629, + "logps/chosen": -204.55972290039062, + "logps/rejected": -177.15219116210938, + "loss": 2.7537, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.720539093017578, + "rewards/margins": 1.3173298835754395, + "rewards/rejected": -5.037868976593018, + "step": 4382 + }, + { + "epoch": 0.68, + "learning_rate": 1.0932664557290316e-05, + "logits/chosen": -2.23636794090271, + "logits/rejected": -2.9169836044311523, + "logps/chosen": -138.87338256835938, + "logps/rejected": -373.5148620605469, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6506496667861938, + "rewards/margins": 5.625103950500488, + "rewards/rejected": -7.275753974914551, + "step": 4383 + }, + { + "epoch": 0.68, + "learning_rate": 1.0931931116759168e-05, + "logits/chosen": -3.1280157566070557, + "logits/rejected": -2.994553565979004, + "logps/chosen": -187.87228393554688, + "logps/rejected": -343.0606689453125, + "loss": 1.2724, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9685394763946533, + "rewards/margins": 0.4477806091308594, + "rewards/rejected": -2.4163200855255127, + "step": 4384 + }, + { + "epoch": 0.68, + "learning_rate": 1.093119767622802e-05, + "logits/chosen": -2.0979080200195312, + "logits/rejected": -3.113037586212158, + "logps/chosen": -114.47525024414062, + "logps/rejected": -281.94281005859375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8738467693328857, + "rewards/margins": 6.1712646484375, + "rewards/rejected": -8.045111656188965, + "step": 4385 + }, + { + "epoch": 0.68, + "learning_rate": 1.0930464235696872e-05, + "logits/chosen": -2.9959115982055664, + "logits/rejected": -2.265833854675293, + "logps/chosen": -198.55007934570312, + "logps/rejected": -315.4366760253906, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5174639225006104, + "rewards/margins": 8.57171630859375, + "rewards/rejected": -10.089179992675781, + "step": 4386 + }, + { + "epoch": 0.68, + "learning_rate": 1.0929730795165725e-05, + "logits/chosen": -3.1413111686706543, + "logits/rejected": -3.236806869506836, + "logps/chosen": -464.74700927734375, + "logps/rejected": -467.02392578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8823654651641846, + "rewards/margins": 7.931819915771484, + "rewards/rejected": -10.814186096191406, + "step": 4387 + }, + { + "epoch": 0.68, + "learning_rate": 1.0928997354634577e-05, + "logits/chosen": -1.9967352151870728, + "logits/rejected": -2.8333301544189453, + "logps/chosen": -195.04827880859375, + "logps/rejected": -437.61199951171875, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4762566089630127, + "rewards/margins": 4.8915276527404785, + "rewards/rejected": -6.36778450012207, + "step": 4388 + }, + { + "epoch": 0.68, + "learning_rate": 1.0928263914103429e-05, + "logits/chosen": -2.1285927295684814, + "logits/rejected": -2.9874284267425537, + "logps/chosen": -128.64686584472656, + "logps/rejected": -149.38648986816406, + "loss": 1.9758, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.502845048904419, + "rewards/margins": 1.278749704360962, + "rewards/rejected": -4.781594753265381, + "step": 4389 + }, + { + "epoch": 0.68, + "learning_rate": 1.092753047357228e-05, + "logits/chosen": -1.7543227672576904, + "logits/rejected": -2.711171865463257, + "logps/chosen": -43.5780029296875, + "logps/rejected": -216.20968627929688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.706967830657959, + "rewards/margins": 8.045276641845703, + "rewards/rejected": -9.752243995666504, + "step": 4390 + }, + { + "epoch": 0.68, + "learning_rate": 1.0926797033041133e-05, + "logits/chosen": -0.8873894810676575, + "logits/rejected": -2.24104380607605, + "logps/chosen": -155.77149963378906, + "logps/rejected": -327.2522888183594, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.453756332397461, + "rewards/margins": 7.695175647735596, + "rewards/rejected": -10.148932456970215, + "step": 4391 + }, + { + "epoch": 0.68, + "learning_rate": 1.0926063592509985e-05, + "logits/chosen": -2.8419597148895264, + "logits/rejected": -3.3376624584198, + "logps/chosen": -67.59420776367188, + "logps/rejected": -216.38796997070312, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956548810005188, + "rewards/margins": 4.017165184020996, + "rewards/rejected": -5.9737138748168945, + "step": 4392 + }, + { + "epoch": 0.68, + "learning_rate": 1.0925330151978836e-05, + "logits/chosen": -2.016984462738037, + "logits/rejected": -3.0921075344085693, + "logps/chosen": -643.2818603515625, + "logps/rejected": -760.8171997070312, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.482840061187744, + "rewards/margins": 6.099496841430664, + "rewards/rejected": -8.58233642578125, + "step": 4393 + }, + { + "epoch": 0.68, + "learning_rate": 1.0924596711447688e-05, + "logits/chosen": -2.239668369293213, + "logits/rejected": -2.7294766902923584, + "logps/chosen": -36.806846618652344, + "logps/rejected": -99.44808959960938, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6318751573562622, + "rewards/margins": 3.2001287937164307, + "rewards/rejected": -4.832004070281982, + "step": 4394 + }, + { + "epoch": 0.68, + "learning_rate": 1.092386327091654e-05, + "logits/chosen": -1.324285626411438, + "logits/rejected": -2.9986279010772705, + "logps/chosen": -66.85115051269531, + "logps/rejected": -292.622802734375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6729717254638672, + "rewards/margins": 6.074110984802246, + "rewards/rejected": -6.747082710266113, + "step": 4395 + }, + { + "epoch": 0.68, + "learning_rate": 1.0923129830385394e-05, + "logits/chosen": -2.290703535079956, + "logits/rejected": -2.714414596557617, + "logps/chosen": -127.61726379394531, + "logps/rejected": -292.82000732421875, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.44171142578125, + "rewards/margins": 6.91477108001709, + "rewards/rejected": -9.356483459472656, + "step": 4396 + }, + { + "epoch": 0.68, + "learning_rate": 1.0922396389854246e-05, + "logits/chosen": -3.0169482231140137, + "logits/rejected": -2.9336767196655273, + "logps/chosen": -113.95689392089844, + "logps/rejected": -253.93504333496094, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.428055763244629, + "rewards/margins": 4.418760299682617, + "rewards/rejected": -7.846816062927246, + "step": 4397 + }, + { + "epoch": 0.68, + "learning_rate": 1.0921662949323097e-05, + "logits/chosen": -2.785053014755249, + "logits/rejected": -2.9547553062438965, + "logps/chosen": -583.3687744140625, + "logps/rejected": -527.7993774414062, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8319938778877258, + "rewards/margins": 7.7535858154296875, + "rewards/rejected": -8.585578918457031, + "step": 4398 + }, + { + "epoch": 0.68, + "learning_rate": 1.092092950879195e-05, + "logits/chosen": -2.9161882400512695, + "logits/rejected": -2.96557879447937, + "logps/chosen": -117.96598052978516, + "logps/rejected": -175.56158447265625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7935795783996582, + "rewards/margins": 4.82778787612915, + "rewards/rejected": -6.621367454528809, + "step": 4399 + }, + { + "epoch": 0.68, + "learning_rate": 1.0920196068260801e-05, + "logits/chosen": -1.609679102897644, + "logits/rejected": -3.038761615753174, + "logps/chosen": -93.28800964355469, + "logps/rejected": -313.0651550292969, + "loss": 0.2239, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5306100845336914, + "rewards/margins": 3.4061970710754395, + "rewards/rejected": -6.936807155609131, + "step": 4400 + }, + { + "epoch": 0.68, + "learning_rate": 1.0919462627729655e-05, + "logits/chosen": -0.8621997237205505, + "logits/rejected": -2.6026418209075928, + "logps/chosen": -182.93548583984375, + "logps/rejected": -429.7447509765625, + "loss": 4.6459, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.083201885223389, + "rewards/margins": 0.24022960662841797, + "rewards/rejected": -6.323431491851807, + "step": 4401 + }, + { + "epoch": 0.68, + "learning_rate": 1.0918729187198507e-05, + "logits/chosen": -2.951490640640259, + "logits/rejected": -3.074744701385498, + "logps/chosen": -74.90699768066406, + "logps/rejected": -218.93109130859375, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.677077293395996, + "rewards/margins": 2.9758129119873047, + "rewards/rejected": -4.652890205383301, + "step": 4402 + }, + { + "epoch": 0.68, + "learning_rate": 1.0917995746667359e-05, + "logits/chosen": -2.895421028137207, + "logits/rejected": -1.3045190572738647, + "logps/chosen": -263.77642822265625, + "logps/rejected": -241.0459747314453, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2708733081817627, + "rewards/margins": 7.087527275085449, + "rewards/rejected": -8.35840129852295, + "step": 4403 + }, + { + "epoch": 0.68, + "learning_rate": 1.091726230613621e-05, + "logits/chosen": -2.9358129501342773, + "logits/rejected": -3.0522515773773193, + "logps/chosen": -42.77389907836914, + "logps/rejected": -177.18190002441406, + "loss": 0.0434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1228922605514526, + "rewards/margins": 3.4612913131713867, + "rewards/rejected": -4.584183692932129, + "step": 4404 + }, + { + "epoch": 0.69, + "learning_rate": 1.0916528865605064e-05, + "logits/chosen": -2.5443196296691895, + "logits/rejected": -2.6888067722320557, + "logps/chosen": -207.6726531982422, + "logps/rejected": -370.0026550292969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4921847581863403, + "rewards/margins": 6.843875885009766, + "rewards/rejected": -8.336061477661133, + "step": 4405 + }, + { + "epoch": 0.69, + "learning_rate": 1.0915795425073916e-05, + "logits/chosen": -1.673651099205017, + "logits/rejected": -3.035748243331909, + "logps/chosen": -151.5210723876953, + "logps/rejected": -480.44439697265625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5888984203338623, + "rewards/margins": 7.108863830566406, + "rewards/rejected": -9.697761535644531, + "step": 4406 + }, + { + "epoch": 0.69, + "learning_rate": 1.0915061984542768e-05, + "logits/chosen": -2.4735844135284424, + "logits/rejected": -2.998370409011841, + "logps/chosen": -186.7266845703125, + "logps/rejected": -314.001220703125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.31378173828125, + "rewards/margins": 7.248767852783203, + "rewards/rejected": -8.562549591064453, + "step": 4407 + }, + { + "epoch": 0.69, + "learning_rate": 1.091432854401162e-05, + "logits/chosen": -2.4510858058929443, + "logits/rejected": -3.1237897872924805, + "logps/chosen": -504.26123046875, + "logps/rejected": -611.0406494140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0754932388663292, + "rewards/margins": 8.505091667175293, + "rewards/rejected": -8.580585479736328, + "step": 4408 + }, + { + "epoch": 0.69, + "learning_rate": 1.0913595103480472e-05, + "logits/chosen": -2.0383079051971436, + "logits/rejected": -2.8456368446350098, + "logps/chosen": -249.7689971923828, + "logps/rejected": -416.5014343261719, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.764984130859375, + "rewards/margins": 8.382467269897461, + "rewards/rejected": -11.147451400756836, + "step": 4409 + }, + { + "epoch": 0.69, + "learning_rate": 1.0912861662949323e-05, + "logits/chosen": -2.7565672397613525, + "logits/rejected": -2.71280574798584, + "logps/chosen": -218.14187622070312, + "logps/rejected": -332.84722900390625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2232751846313477, + "rewards/margins": 6.144491195678711, + "rewards/rejected": -8.367766380310059, + "step": 4410 + }, + { + "epoch": 0.69, + "learning_rate": 1.0912128222418175e-05, + "logits/chosen": -2.916313886642456, + "logits/rejected": -3.0700650215148926, + "logps/chosen": -185.71719360351562, + "logps/rejected": -306.5055236816406, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1626923084259033, + "rewards/margins": 5.570416450500488, + "rewards/rejected": -7.7331085205078125, + "step": 4411 + }, + { + "epoch": 0.69, + "learning_rate": 1.0911394781887027e-05, + "logits/chosen": -1.9841212034225464, + "logits/rejected": -2.7558999061584473, + "logps/chosen": -33.99812316894531, + "logps/rejected": -163.36915588378906, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5381579399108887, + "rewards/margins": 5.205950736999512, + "rewards/rejected": -6.7441086769104, + "step": 4412 + }, + { + "epoch": 0.69, + "learning_rate": 1.0910661341355879e-05, + "logits/chosen": -3.0318572521209717, + "logits/rejected": -2.4480769634246826, + "logps/chosen": -644.3351440429688, + "logps/rejected": -434.2391662597656, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2551639080047607, + "rewards/margins": 6.192140579223633, + "rewards/rejected": -7.447304725646973, + "step": 4413 + }, + { + "epoch": 0.69, + "learning_rate": 1.0909927900824733e-05, + "logits/chosen": -3.2007970809936523, + "logits/rejected": -2.439643144607544, + "logps/chosen": -411.4849548339844, + "logps/rejected": -257.3417663574219, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6009063720703125, + "rewards/margins": 7.602381706237793, + "rewards/rejected": -8.203288078308105, + "step": 4414 + }, + { + "epoch": 0.69, + "learning_rate": 1.0909194460293584e-05, + "logits/chosen": -2.6945862770080566, + "logits/rejected": -3.0292227268218994, + "logps/chosen": -366.220703125, + "logps/rejected": -361.8489074707031, + "loss": 4.2255, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.473995685577393, + "rewards/margins": -1.2321255207061768, + "rewards/rejected": -4.241870403289795, + "step": 4415 + }, + { + "epoch": 0.69, + "learning_rate": 1.0908461019762436e-05, + "logits/chosen": -3.1902332305908203, + "logits/rejected": -2.9374825954437256, + "logps/chosen": -129.86801147460938, + "logps/rejected": -144.3626251220703, + "loss": 3.5937, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.016598701477051, + "rewards/margins": -1.060044765472412, + "rewards/rejected": -2.9565539360046387, + "step": 4416 + }, + { + "epoch": 0.69, + "learning_rate": 1.0907727579231288e-05, + "logits/chosen": -3.106659173965454, + "logits/rejected": -3.0620181560516357, + "logps/chosen": -137.6710968017578, + "logps/rejected": -105.277587890625, + "loss": 1.3098, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.487813711166382, + "rewards/margins": 3.016530752182007, + "rewards/rejected": -5.5043439865112305, + "step": 4417 + }, + { + "epoch": 0.69, + "learning_rate": 1.090699413870014e-05, + "logits/chosen": -2.7119345664978027, + "logits/rejected": -2.3603830337524414, + "logps/chosen": -134.61636352539062, + "logps/rejected": -203.873291015625, + "loss": 3.5275, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.267434597015381, + "rewards/margins": 0.5108356475830078, + "rewards/rejected": -5.778270244598389, + "step": 4418 + }, + { + "epoch": 0.69, + "learning_rate": 1.0906260698168992e-05, + "logits/chosen": -2.977307081222534, + "logits/rejected": -1.8720594644546509, + "logps/chosen": -307.9471435546875, + "logps/rejected": -113.57148742675781, + "loss": 1.7526, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2796854972839355, + "rewards/margins": 2.5361855030059814, + "rewards/rejected": -4.815871238708496, + "step": 4419 + }, + { + "epoch": 0.69, + "learning_rate": 1.0905527257637844e-05, + "logits/chosen": -3.0630478858947754, + "logits/rejected": -2.8147172927856445, + "logps/chosen": -269.9021301269531, + "logps/rejected": -389.188232421875, + "loss": 2.8943, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8859405517578125, + "rewards/margins": -0.6269454956054688, + "rewards/rejected": -4.258995056152344, + "step": 4420 + }, + { + "epoch": 0.69, + "learning_rate": 1.0904793817106696e-05, + "logits/chosen": -2.103860378265381, + "logits/rejected": -3.072795867919922, + "logps/chosen": -188.006591796875, + "logps/rejected": -296.48016357421875, + "loss": 0.0915, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.359868288040161, + "rewards/margins": 3.387420177459717, + "rewards/rejected": -5.747288703918457, + "step": 4421 + }, + { + "epoch": 0.69, + "learning_rate": 1.0904060376575548e-05, + "logits/chosen": -1.614486813545227, + "logits/rejected": -2.9704689979553223, + "logps/chosen": -50.52216339111328, + "logps/rejected": -262.62091064453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5288867950439453, + "rewards/margins": 7.296910285949707, + "rewards/rejected": -7.825797080993652, + "step": 4422 + }, + { + "epoch": 0.69, + "learning_rate": 1.0903326936044401e-05, + "logits/chosen": -2.6386988162994385, + "logits/rejected": -3.0909903049468994, + "logps/chosen": -28.140804290771484, + "logps/rejected": -133.96871948242188, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7671422958374023, + "rewards/margins": 5.507908821105957, + "rewards/rejected": -6.275051116943359, + "step": 4423 + }, + { + "epoch": 0.69, + "learning_rate": 1.0902593495513253e-05, + "logits/chosen": -3.299527406692505, + "logits/rejected": -2.927433490753174, + "logps/chosen": -135.50457763671875, + "logps/rejected": -89.8773422241211, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6753456592559814, + "rewards/margins": 4.182337760925293, + "rewards/rejected": -5.8576836585998535, + "step": 4424 + }, + { + "epoch": 0.69, + "learning_rate": 1.0901860054982105e-05, + "logits/chosen": -2.855210542678833, + "logits/rejected": -3.182734727859497, + "logps/chosen": -174.8131561279297, + "logps/rejected": -316.91363525390625, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1746163368225098, + "rewards/margins": 4.3963541984558105, + "rewards/rejected": -6.57097053527832, + "step": 4425 + }, + { + "epoch": 0.69, + "learning_rate": 1.0901126614450957e-05, + "logits/chosen": -1.6582375764846802, + "logits/rejected": -2.9189226627349854, + "logps/chosen": -141.533935546875, + "logps/rejected": -318.34283447265625, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6690239906311035, + "rewards/margins": 5.30594539642334, + "rewards/rejected": -7.974969863891602, + "step": 4426 + }, + { + "epoch": 0.69, + "learning_rate": 1.0900393173919809e-05, + "logits/chosen": -2.968385934829712, + "logits/rejected": -3.215221881866455, + "logps/chosen": -184.24728393554688, + "logps/rejected": -115.07331848144531, + "loss": 3.4224, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.602425575256348, + "rewards/margins": -1.7660561800003052, + "rewards/rejected": -2.836369514465332, + "step": 4427 + }, + { + "epoch": 0.69, + "learning_rate": 1.089965973338866e-05, + "logits/chosen": -3.02376389503479, + "logits/rejected": -2.7859487533569336, + "logps/chosen": -204.39529418945312, + "logps/rejected": -173.2650146484375, + "loss": 1.1061, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.719540596008301, + "rewards/margins": 2.7563679218292236, + "rewards/rejected": -6.475908279418945, + "step": 4428 + }, + { + "epoch": 0.69, + "learning_rate": 1.0898926292857512e-05, + "logits/chosen": -1.716531753540039, + "logits/rejected": -2.96242618560791, + "logps/chosen": -31.027549743652344, + "logps/rejected": -253.5600128173828, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9390314221382141, + "rewards/margins": 6.354457855224609, + "rewards/rejected": -7.293489456176758, + "step": 4429 + }, + { + "epoch": 0.69, + "learning_rate": 1.0898192852326364e-05, + "logits/chosen": -2.934685468673706, + "logits/rejected": -3.00046968460083, + "logps/chosen": -700.4669189453125, + "logps/rejected": -355.6236572265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2806960940361023, + "rewards/margins": 8.434572219848633, + "rewards/rejected": -8.153875350952148, + "step": 4430 + }, + { + "epoch": 0.69, + "learning_rate": 1.0897459411795216e-05, + "logits/chosen": -1.501654863357544, + "logits/rejected": -2.706575870513916, + "logps/chosen": -88.04914855957031, + "logps/rejected": -346.87677001953125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8393425345420837, + "rewards/margins": 6.789931774139404, + "rewards/rejected": -7.629274368286133, + "step": 4431 + }, + { + "epoch": 0.69, + "learning_rate": 1.089672597126407e-05, + "logits/chosen": -1.7576698064804077, + "logits/rejected": -2.9048476219177246, + "logps/chosen": -120.3219223022461, + "logps/rejected": -347.08502197265625, + "loss": 2.7473, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.346863269805908, + "rewards/margins": 0.1286914348602295, + "rewards/rejected": -4.475554943084717, + "step": 4432 + }, + { + "epoch": 0.69, + "learning_rate": 1.0895992530732922e-05, + "logits/chosen": -2.014591932296753, + "logits/rejected": -3.2180140018463135, + "logps/chosen": -471.14788818359375, + "logps/rejected": -662.3536376953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38325807452201843, + "rewards/margins": 7.672689437866211, + "rewards/rejected": -7.289431571960449, + "step": 4433 + }, + { + "epoch": 0.69, + "learning_rate": 1.0895259090201774e-05, + "logits/chosen": -2.380744218826294, + "logits/rejected": -3.0677366256713867, + "logps/chosen": -284.8528137207031, + "logps/rejected": -402.30908203125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9893333911895752, + "rewards/margins": 8.13699722290039, + "rewards/rejected": -9.126330375671387, + "step": 4434 + }, + { + "epoch": 0.69, + "learning_rate": 1.0894525649670627e-05, + "logits/chosen": -1.8311558961868286, + "logits/rejected": -2.9256088733673096, + "logps/chosen": -164.88040161132812, + "logps/rejected": -279.4329833984375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1138272285461426, + "rewards/margins": 5.567502021789551, + "rewards/rejected": -7.681328773498535, + "step": 4435 + }, + { + "epoch": 0.69, + "learning_rate": 1.0893792209139479e-05, + "logits/chosen": -3.0521724224090576, + "logits/rejected": -1.9716825485229492, + "logps/chosen": -573.80078125, + "logps/rejected": -389.50469970703125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3880929946899414, + "rewards/margins": 5.42636775970459, + "rewards/rejected": -5.814460754394531, + "step": 4436 + }, + { + "epoch": 0.69, + "learning_rate": 1.0893058768608331e-05, + "logits/chosen": -2.8102774620056152, + "logits/rejected": -1.6412360668182373, + "logps/chosen": -105.1068115234375, + "logps/rejected": -301.86102294921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3418163061141968, + "rewards/margins": 7.457080841064453, + "rewards/rejected": -8.798896789550781, + "step": 4437 + }, + { + "epoch": 0.69, + "learning_rate": 1.0892325328077183e-05, + "logits/chosen": -3.1370749473571777, + "logits/rejected": -2.0821034908294678, + "logps/chosen": -656.25732421875, + "logps/rejected": -436.3824768066406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1995147466659546, + "rewards/margins": 8.482624053955078, + "rewards/rejected": -9.682138442993164, + "step": 4438 + }, + { + "epoch": 0.69, + "learning_rate": 1.0891591887546035e-05, + "logits/chosen": -3.2211763858795166, + "logits/rejected": -2.9821159839630127, + "logps/chosen": -424.04608154296875, + "logps/rejected": -455.0892639160156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44404298067092896, + "rewards/margins": 8.806561470031738, + "rewards/rejected": -9.250604629516602, + "step": 4439 + }, + { + "epoch": 0.69, + "learning_rate": 1.0890858447014887e-05, + "logits/chosen": -2.270761489868164, + "logits/rejected": -3.0324013233184814, + "logps/chosen": -664.86279296875, + "logps/rejected": -750.5479736328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1001853942871094, + "rewards/margins": 7.376112937927246, + "rewards/rejected": -8.476299285888672, + "step": 4440 + }, + { + "epoch": 0.69, + "learning_rate": 1.089012500648374e-05, + "logits/chosen": -1.3580548763275146, + "logits/rejected": -2.6817266941070557, + "logps/chosen": -123.40724182128906, + "logps/rejected": -265.24945068359375, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8105945587158203, + "rewards/margins": 5.71263313293457, + "rewards/rejected": -8.52322769165039, + "step": 4441 + }, + { + "epoch": 0.69, + "learning_rate": 1.0889391565952592e-05, + "logits/chosen": -2.0967016220092773, + "logits/rejected": -3.0418245792388916, + "logps/chosen": -323.1697082519531, + "logps/rejected": -443.6446228027344, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.934650182723999, + "rewards/margins": 8.025409698486328, + "rewards/rejected": -8.960060119628906, + "step": 4442 + }, + { + "epoch": 0.69, + "learning_rate": 1.0888658125421444e-05, + "logits/chosen": -2.4975829124450684, + "logits/rejected": -3.1009440422058105, + "logps/chosen": -132.63577270507812, + "logps/rejected": -155.19659423828125, + "loss": 3.5334, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.187958240509033, + "rewards/margins": -3.107053756713867, + "rewards/rejected": -2.080904483795166, + "step": 4443 + }, + { + "epoch": 0.69, + "learning_rate": 1.0887924684890296e-05, + "logits/chosen": -2.5484652519226074, + "logits/rejected": -3.140355110168457, + "logps/chosen": -242.1085205078125, + "logps/rejected": -384.4830322265625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8352231979370117, + "rewards/margins": 6.6604533195495605, + "rewards/rejected": -7.495676040649414, + "step": 4444 + }, + { + "epoch": 0.69, + "learning_rate": 1.0887191244359148e-05, + "logits/chosen": -2.3037564754486084, + "logits/rejected": -2.968414306640625, + "logps/chosen": -195.7794189453125, + "logps/rejected": -428.6034240722656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8293720483779907, + "rewards/margins": 7.681615829467773, + "rewards/rejected": -8.510988235473633, + "step": 4445 + }, + { + "epoch": 0.69, + "learning_rate": 1.0886457803828e-05, + "logits/chosen": -1.8653888702392578, + "logits/rejected": -3.0779638290405273, + "logps/chosen": -221.63027954101562, + "logps/rejected": -471.9419860839844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.161830186843872, + "rewards/margins": 8.109657287597656, + "rewards/rejected": -9.27148723602295, + "step": 4446 + }, + { + "epoch": 0.69, + "learning_rate": 1.0885724363296851e-05, + "logits/chosen": -1.2288190126419067, + "logits/rejected": -2.813459873199463, + "logps/chosen": -135.23214721679688, + "logps/rejected": -434.1913146972656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2597440481185913, + "rewards/margins": 9.374788284301758, + "rewards/rejected": -10.63453197479248, + "step": 4447 + }, + { + "epoch": 0.69, + "learning_rate": 1.0884990922765703e-05, + "logits/chosen": -1.4172645807266235, + "logits/rejected": -2.8995437622070312, + "logps/chosen": -204.59912109375, + "logps/rejected": -383.43060302734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2323956489562988, + "rewards/margins": 7.367751598358154, + "rewards/rejected": -8.600147247314453, + "step": 4448 + }, + { + "epoch": 0.69, + "learning_rate": 1.0884257482234555e-05, + "logits/chosen": -1.8929574489593506, + "logits/rejected": -2.6225497722625732, + "logps/chosen": -151.73948669433594, + "logps/rejected": -411.9685974121094, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6948328018188477, + "rewards/margins": 6.911869049072266, + "rewards/rejected": -8.606701850891113, + "step": 4449 + }, + { + "epoch": 0.69, + "learning_rate": 1.0883524041703409e-05, + "logits/chosen": -2.9940524101257324, + "logits/rejected": -3.0824906826019287, + "logps/chosen": -68.71205139160156, + "logps/rejected": -214.95114135742188, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3474273681640625, + "rewards/margins": 4.862704753875732, + "rewards/rejected": -6.210132122039795, + "step": 4450 + }, + { + "epoch": 0.69, + "learning_rate": 1.088279060117226e-05, + "logits/chosen": -1.727031946182251, + "logits/rejected": -3.076526641845703, + "logps/chosen": -220.87835693359375, + "logps/rejected": -329.3017272949219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7384010553359985, + "rewards/margins": 7.917187690734863, + "rewards/rejected": -8.655588150024414, + "step": 4451 + }, + { + "epoch": 0.69, + "learning_rate": 1.0882057160641112e-05, + "logits/chosen": -1.7084307670593262, + "logits/rejected": -2.990933418273926, + "logps/chosen": -111.20692443847656, + "logps/rejected": -337.2468566894531, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1707134246826172, + "rewards/margins": 6.968137264251709, + "rewards/rejected": -8.138851165771484, + "step": 4452 + }, + { + "epoch": 0.69, + "learning_rate": 1.0881323720109964e-05, + "logits/chosen": -2.7600462436676025, + "logits/rejected": -2.5694808959960938, + "logps/chosen": -145.25991821289062, + "logps/rejected": -185.3924560546875, + "loss": 3.0667, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.145412921905518, + "rewards/margins": -0.10259890556335449, + "rewards/rejected": -4.042813777923584, + "step": 4453 + }, + { + "epoch": 0.69, + "learning_rate": 1.0880590279578816e-05, + "logits/chosen": -2.2676494121551514, + "logits/rejected": -2.675384998321533, + "logps/chosen": -127.81409454345703, + "logps/rejected": -324.0501708984375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.826975703239441, + "rewards/margins": 7.180638313293457, + "rewards/rejected": -9.007614135742188, + "step": 4454 + }, + { + "epoch": 0.69, + "learning_rate": 1.0879856839047668e-05, + "logits/chosen": -2.9503419399261475, + "logits/rejected": -2.0235209465026855, + "logps/chosen": -493.9052734375, + "logps/rejected": -329.92950439453125, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1856101751327515, + "rewards/margins": 5.5004191398620605, + "rewards/rejected": -6.686029434204102, + "step": 4455 + }, + { + "epoch": 0.69, + "learning_rate": 1.087912339851652e-05, + "logits/chosen": -1.3025866746902466, + "logits/rejected": -2.9281153678894043, + "logps/chosen": -79.40117645263672, + "logps/rejected": -410.46044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7482681274414062, + "rewards/margins": 10.69007682800293, + "rewards/rejected": -12.438344955444336, + "step": 4456 + }, + { + "epoch": 0.69, + "learning_rate": 1.0878389957985372e-05, + "logits/chosen": -2.028806686401367, + "logits/rejected": -3.0358002185821533, + "logps/chosen": -107.3359146118164, + "logps/rejected": -394.9964904785156, + "loss": 0.9378, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.604818344116211, + "rewards/margins": 2.969529628753662, + "rewards/rejected": -6.574347496032715, + "step": 4457 + }, + { + "epoch": 0.69, + "learning_rate": 1.0877656517454225e-05, + "logits/chosen": -2.8520264625549316, + "logits/rejected": -2.633209705352783, + "logps/chosen": -254.061767578125, + "logps/rejected": -174.15455627441406, + "loss": 4.6428, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.223917484283447, + "rewards/margins": -1.9603822231292725, + "rewards/rejected": -4.263535022735596, + "step": 4458 + }, + { + "epoch": 0.69, + "learning_rate": 1.0876923076923077e-05, + "logits/chosen": -1.9958522319793701, + "logits/rejected": -2.908323287963867, + "logps/chosen": -61.657081604003906, + "logps/rejected": -192.21556091308594, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6033660769462585, + "rewards/margins": 5.600554466247559, + "rewards/rejected": -6.203920364379883, + "step": 4459 + }, + { + "epoch": 0.69, + "learning_rate": 1.087618963639193e-05, + "logits/chosen": -3.104597568511963, + "logits/rejected": -3.029676914215088, + "logps/chosen": -199.9998779296875, + "logps/rejected": -223.3214111328125, + "loss": 2.273, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4214425086975098, + "rewards/margins": 1.6654298305511475, + "rewards/rejected": -5.086872100830078, + "step": 4460 + }, + { + "epoch": 0.69, + "learning_rate": 1.0875456195860781e-05, + "logits/chosen": -2.7438693046569824, + "logits/rejected": -3.0326621532440186, + "logps/chosen": -272.36328125, + "logps/rejected": -647.0880737304688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3471481800079346, + "rewards/margins": 8.767674446105957, + "rewards/rejected": -10.114822387695312, + "step": 4461 + }, + { + "epoch": 0.69, + "learning_rate": 1.0874722755329633e-05, + "logits/chosen": -2.353773832321167, + "logits/rejected": -3.106889486312866, + "logps/chosen": -135.0745086669922, + "logps/rejected": -176.0881805419922, + "loss": 1.301, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0005245208740234, + "rewards/margins": 2.279984474182129, + "rewards/rejected": -4.280508995056152, + "step": 4462 + }, + { + "epoch": 0.69, + "learning_rate": 1.0873989314798485e-05, + "logits/chosen": -3.084357738494873, + "logits/rejected": -2.1698150634765625, + "logps/chosen": -249.38748168945312, + "logps/rejected": -183.63523864746094, + "loss": 4.4428, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.924780368804932, + "rewards/margins": -1.967592477798462, + "rewards/rejected": -2.9571878910064697, + "step": 4463 + }, + { + "epoch": 0.69, + "learning_rate": 1.0873255874267337e-05, + "logits/chosen": -2.8890421390533447, + "logits/rejected": -3.00294828414917, + "logps/chosen": -267.7771301269531, + "logps/rejected": -420.26702880859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3627815246582031, + "rewards/margins": 7.556987762451172, + "rewards/rejected": -7.919769287109375, + "step": 4464 + }, + { + "epoch": 0.69, + "learning_rate": 1.0872522433736189e-05, + "logits/chosen": -2.996610164642334, + "logits/rejected": -3.0320534706115723, + "logps/chosen": -141.39393615722656, + "logps/rejected": -96.21481323242188, + "loss": 2.7792, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3289575576782227, + "rewards/margins": -0.29125404357910156, + "rewards/rejected": -3.037703514099121, + "step": 4465 + }, + { + "epoch": 0.69, + "learning_rate": 1.087178899320504e-05, + "logits/chosen": -2.1060452461242676, + "logits/rejected": -2.82999587059021, + "logps/chosen": -194.75030517578125, + "logps/rejected": -448.771728515625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2088475227355957, + "rewards/margins": 7.723223686218262, + "rewards/rejected": -8.932071685791016, + "step": 4466 + }, + { + "epoch": 0.69, + "learning_rate": 1.0871055552673894e-05, + "logits/chosen": -1.6863120794296265, + "logits/rejected": -2.806560516357422, + "logps/chosen": -212.9138641357422, + "logps/rejected": -558.6370849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24435731768608093, + "rewards/margins": 10.070283889770508, + "rewards/rejected": -10.314640998840332, + "step": 4467 + }, + { + "epoch": 0.69, + "learning_rate": 1.0870322112142746e-05, + "logits/chosen": -2.5838115215301514, + "logits/rejected": -2.8983678817749023, + "logps/chosen": -104.22667694091797, + "logps/rejected": -206.927490234375, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28492283821105957, + "rewards/margins": 5.762007236480713, + "rewards/rejected": -6.046930313110352, + "step": 4468 + }, + { + "epoch": 0.7, + "learning_rate": 1.08695886716116e-05, + "logits/chosen": -3.170433282852173, + "logits/rejected": -3.217494010925293, + "logps/chosen": -25.095211029052734, + "logps/rejected": -124.65449523925781, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28274446725845337, + "rewards/margins": 5.12628173828125, + "rewards/rejected": -5.409026145935059, + "step": 4469 + }, + { + "epoch": 0.7, + "learning_rate": 1.0868855231080451e-05, + "logits/chosen": -2.951409339904785, + "logits/rejected": -2.6537985801696777, + "logps/chosen": -388.55169677734375, + "logps/rejected": -472.5206604003906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9597798585891724, + "rewards/margins": 8.932575225830078, + "rewards/rejected": -10.892354965209961, + "step": 4470 + }, + { + "epoch": 0.7, + "learning_rate": 1.0868121790549303e-05, + "logits/chosen": -2.55120587348938, + "logits/rejected": -3.037243604660034, + "logps/chosen": -677.6768798828125, + "logps/rejected": -668.2608032226562, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1073317527770996, + "rewards/margins": 7.075376987457275, + "rewards/rejected": -9.182708740234375, + "step": 4471 + }, + { + "epoch": 0.7, + "learning_rate": 1.0867388350018155e-05, + "logits/chosen": -3.0804171562194824, + "logits/rejected": -2.225700616836548, + "logps/chosen": -479.72894287109375, + "logps/rejected": -327.1949462890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3525375425815582, + "rewards/margins": 8.043646812438965, + "rewards/rejected": -8.396184921264648, + "step": 4472 + }, + { + "epoch": 0.7, + "learning_rate": 1.0866654909487007e-05, + "logits/chosen": -1.9713255167007446, + "logits/rejected": -2.9460866451263428, + "logps/chosen": -116.50889587402344, + "logps/rejected": -286.3383483886719, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5121424198150635, + "rewards/margins": 5.733349800109863, + "rewards/rejected": -7.245491981506348, + "step": 4473 + }, + { + "epoch": 0.7, + "learning_rate": 1.0865921468955859e-05, + "logits/chosen": -2.697580099105835, + "logits/rejected": -3.1158740520477295, + "logps/chosen": -38.95499801635742, + "logps/rejected": -218.029052734375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9462476968765259, + "rewards/margins": 5.920440673828125, + "rewards/rejected": -6.8666887283325195, + "step": 4474 + }, + { + "epoch": 0.7, + "learning_rate": 1.086518802842471e-05, + "logits/chosen": -3.0647928714752197, + "logits/rejected": -2.3315083980560303, + "logps/chosen": -325.66070556640625, + "logps/rejected": -194.5928192138672, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9858169555664062, + "rewards/margins": 7.221551895141602, + "rewards/rejected": -8.207368850708008, + "step": 4475 + }, + { + "epoch": 0.7, + "learning_rate": 1.0864454587893564e-05, + "logits/chosen": -2.97910213470459, + "logits/rejected": -3.024240016937256, + "logps/chosen": -181.58641052246094, + "logps/rejected": -181.16064453125, + "loss": 1.9462, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6601860523223877, + "rewards/margins": 0.5108059644699097, + "rewards/rejected": -4.170991897583008, + "step": 4476 + }, + { + "epoch": 0.7, + "learning_rate": 1.0863721147362416e-05, + "logits/chosen": -2.9781901836395264, + "logits/rejected": -2.177279233932495, + "logps/chosen": -387.53961181640625, + "logps/rejected": -218.72018432617188, + "loss": 1.0533, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.303730010986328, + "rewards/margins": 0.4721606969833374, + "rewards/rejected": -3.775890827178955, + "step": 4477 + }, + { + "epoch": 0.7, + "learning_rate": 1.0862987706831268e-05, + "logits/chosen": -2.6767265796661377, + "logits/rejected": -3.0102248191833496, + "logps/chosen": -235.87350463867188, + "logps/rejected": -194.70591735839844, + "loss": 2.5441, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.435086727142334, + "rewards/margins": 0.7238976955413818, + "rewards/rejected": -5.158984661102295, + "step": 4478 + }, + { + "epoch": 0.7, + "learning_rate": 1.086225426630012e-05, + "logits/chosen": -2.543400287628174, + "logits/rejected": -3.063565492630005, + "logps/chosen": -634.4417114257812, + "logps/rejected": -537.0845947265625, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0148682594299316, + "rewards/margins": 5.933095455169678, + "rewards/rejected": -6.947963714599609, + "step": 4479 + }, + { + "epoch": 0.7, + "learning_rate": 1.0861520825768972e-05, + "logits/chosen": -2.8723554611206055, + "logits/rejected": -3.25984787940979, + "logps/chosen": -602.4725341796875, + "logps/rejected": -1026.29296875, + "loss": 4.5803, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.335603713989258, + "rewards/margins": -1.7936477661132812, + "rewards/rejected": -2.5419554710388184, + "step": 4480 + }, + { + "epoch": 0.7, + "learning_rate": 1.0860787385237824e-05, + "logits/chosen": -2.8187410831451416, + "logits/rejected": -3.1896255016326904, + "logps/chosen": -72.2226791381836, + "logps/rejected": -196.13839721679688, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3369730710983276, + "rewards/margins": 5.477045059204102, + "rewards/rejected": -6.814018249511719, + "step": 4481 + }, + { + "epoch": 0.7, + "learning_rate": 1.0860053944706676e-05, + "logits/chosen": -2.473560094833374, + "logits/rejected": -2.836810827255249, + "logps/chosen": -380.6754455566406, + "logps/rejected": -618.4788818359375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.100121259689331, + "rewards/margins": 8.020841598510742, + "rewards/rejected": -9.120963096618652, + "step": 4482 + }, + { + "epoch": 0.7, + "learning_rate": 1.0859320504175527e-05, + "logits/chosen": -2.524221897125244, + "logits/rejected": -3.0693135261535645, + "logps/chosen": -163.93218994140625, + "logps/rejected": -286.97479248046875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5296112298965454, + "rewards/margins": 6.232049942016602, + "rewards/rejected": -6.761660575866699, + "step": 4483 + }, + { + "epoch": 0.7, + "learning_rate": 1.085858706364438e-05, + "logits/chosen": -2.7616119384765625, + "logits/rejected": -3.146886110305786, + "logps/chosen": -242.60791015625, + "logps/rejected": -251.67666625976562, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1794731616973877, + "rewards/margins": 4.884493827819824, + "rewards/rejected": -6.063967227935791, + "step": 4484 + }, + { + "epoch": 0.7, + "learning_rate": 1.0857853623113233e-05, + "logits/chosen": -3.0051212310791016, + "logits/rejected": -3.1797659397125244, + "logps/chosen": -143.33970642089844, + "logps/rejected": -260.596435546875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0634164810180664, + "rewards/margins": 7.332939147949219, + "rewards/rejected": -8.396355628967285, + "step": 4485 + }, + { + "epoch": 0.7, + "learning_rate": 1.0857120182582085e-05, + "logits/chosen": -2.5100865364074707, + "logits/rejected": -2.884836435317993, + "logps/chosen": -205.13526916503906, + "logps/rejected": -232.82339477539062, + "loss": 3.539, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.988258123397827, + "rewards/margins": 0.9535391330718994, + "rewards/rejected": -4.941797256469727, + "step": 4486 + }, + { + "epoch": 0.7, + "learning_rate": 1.0856386742050937e-05, + "logits/chosen": -1.8568847179412842, + "logits/rejected": -2.251776933670044, + "logps/chosen": -116.1281967163086, + "logps/rejected": -159.8915252685547, + "loss": 1.2071, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.17398738861084, + "rewards/margins": 2.530430555343628, + "rewards/rejected": -4.704417705535889, + "step": 4487 + }, + { + "epoch": 0.7, + "learning_rate": 1.0855653301519789e-05, + "logits/chosen": -2.9995813369750977, + "logits/rejected": -3.0644543170928955, + "logps/chosen": -454.9309997558594, + "logps/rejected": -478.67327880859375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.614326000213623, + "rewards/margins": 5.3367815017700195, + "rewards/rejected": -7.951107978820801, + "step": 4488 + }, + { + "epoch": 0.7, + "learning_rate": 1.085491986098864e-05, + "logits/chosen": -2.4423727989196777, + "logits/rejected": -2.963684320449829, + "logps/chosen": -205.58248901367188, + "logps/rejected": -428.90875244140625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2467788457870483, + "rewards/margins": 6.066572666168213, + "rewards/rejected": -7.313351631164551, + "step": 4489 + }, + { + "epoch": 0.7, + "learning_rate": 1.0854186420457492e-05, + "logits/chosen": -2.947033405303955, + "logits/rejected": -2.0982017517089844, + "logps/chosen": -798.8335571289062, + "logps/rejected": -584.0291137695312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0371017456054688, + "rewards/margins": 8.549415588378906, + "rewards/rejected": -9.586517333984375, + "step": 4490 + }, + { + "epoch": 0.7, + "learning_rate": 1.0853452979926344e-05, + "logits/chosen": -0.67523592710495, + "logits/rejected": -2.920530319213867, + "logps/chosen": -143.69590759277344, + "logps/rejected": -494.2649841308594, + "loss": 2.6261, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9550576210021973, + "rewards/margins": 0.24712133407592773, + "rewards/rejected": -3.202178955078125, + "step": 4491 + }, + { + "epoch": 0.7, + "learning_rate": 1.0852719539395196e-05, + "logits/chosen": -3.19096040725708, + "logits/rejected": -2.0243232250213623, + "logps/chosen": -357.4567565917969, + "logps/rejected": -305.42816162109375, + "loss": 2.5172, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6228294372558594, + "rewards/margins": 1.1961874961853027, + "rewards/rejected": -4.81901741027832, + "step": 4492 + }, + { + "epoch": 0.7, + "learning_rate": 1.0851986098864048e-05, + "logits/chosen": -2.000173330307007, + "logits/rejected": -3.0277206897735596, + "logps/chosen": -95.40579223632812, + "logps/rejected": -388.7962646484375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8802768588066101, + "rewards/margins": 7.19371223449707, + "rewards/rejected": -8.073988914489746, + "step": 4493 + }, + { + "epoch": 0.7, + "learning_rate": 1.0851252658332902e-05, + "logits/chosen": -2.931940793991089, + "logits/rejected": -3.0301597118377686, + "logps/chosen": -176.47166442871094, + "logps/rejected": -137.73951721191406, + "loss": 3.1628, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.110006809234619, + "rewards/margins": -2.5043258666992188, + "rewards/rejected": -2.6056809425354004, + "step": 4494 + }, + { + "epoch": 0.7, + "learning_rate": 1.0850519217801753e-05, + "logits/chosen": -3.110590934753418, + "logits/rejected": -1.9173043966293335, + "logps/chosen": -273.55780029296875, + "logps/rejected": -213.5263671875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17719420790672302, + "rewards/margins": 4.497951984405518, + "rewards/rejected": -4.675146102905273, + "step": 4495 + }, + { + "epoch": 0.7, + "learning_rate": 1.0849785777270605e-05, + "logits/chosen": -0.8807496428489685, + "logits/rejected": -1.6314525604248047, + "logps/chosen": -152.3670654296875, + "logps/rejected": -217.8012237548828, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3312351703643799, + "rewards/margins": 6.057198524475098, + "rewards/rejected": -7.388433933258057, + "step": 4496 + }, + { + "epoch": 0.7, + "learning_rate": 1.0849052336739457e-05, + "logits/chosen": -3.019620180130005, + "logits/rejected": -3.035687208175659, + "logps/chosen": -100.76620483398438, + "logps/rejected": -153.9076690673828, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002124011516571045, + "rewards/margins": 6.245062828063965, + "rewards/rejected": -6.242938995361328, + "step": 4497 + }, + { + "epoch": 0.7, + "learning_rate": 1.0848318896208309e-05, + "logits/chosen": -2.386716604232788, + "logits/rejected": -2.2268872261047363, + "logps/chosen": -569.953369140625, + "logps/rejected": -388.8133544921875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2272109985351562, + "rewards/margins": 6.991827964782715, + "rewards/rejected": -8.219038963317871, + "step": 4498 + }, + { + "epoch": 0.7, + "learning_rate": 1.0847585455677161e-05, + "logits/chosen": -3.1662511825561523, + "logits/rejected": -2.7791860103607178, + "logps/chosen": -198.05520629882812, + "logps/rejected": -43.22382354736328, + "loss": 1.5072, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0490357875823975, + "rewards/margins": -0.08616971969604492, + "rewards/rejected": -2.9628660678863525, + "step": 4499 + }, + { + "epoch": 0.7, + "learning_rate": 1.0846852015146013e-05, + "logits/chosen": -2.9971537590026855, + "logits/rejected": -3.216846227645874, + "logps/chosen": -86.17298889160156, + "logps/rejected": -130.26596069335938, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.325451135635376, + "rewards/margins": 3.9938392639160156, + "rewards/rejected": -5.3192901611328125, + "step": 4500 + }, + { + "epoch": 0.7, + "learning_rate": 1.0846118574614866e-05, + "logits/chosen": -2.650179624557495, + "logits/rejected": -3.096132516860962, + "logps/chosen": -93.58065795898438, + "logps/rejected": -245.22088623046875, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7331230640411377, + "rewards/margins": 4.46072244644165, + "rewards/rejected": -6.193845748901367, + "step": 4501 + }, + { + "epoch": 0.7, + "learning_rate": 1.0845385134083718e-05, + "logits/chosen": -2.520869255065918, + "logits/rejected": -3.210574150085449, + "logps/chosen": -106.64501190185547, + "logps/rejected": -299.4921875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7683655023574829, + "rewards/margins": 6.93606424331665, + "rewards/rejected": -7.704429626464844, + "step": 4502 + }, + { + "epoch": 0.7, + "learning_rate": 1.0844651693552572e-05, + "logits/chosen": -3.0407803058624268, + "logits/rejected": -2.484757900238037, + "logps/chosen": -162.71585083007812, + "logps/rejected": -159.41192626953125, + "loss": 2.103, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5948567390441895, + "rewards/margins": 1.813711166381836, + "rewards/rejected": -5.408567905426025, + "step": 4503 + }, + { + "epoch": 0.7, + "learning_rate": 1.0843918253021424e-05, + "logits/chosen": -1.679658055305481, + "logits/rejected": -3.00343918800354, + "logps/chosen": -94.47769165039062, + "logps/rejected": -238.15467834472656, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6377069354057312, + "rewards/margins": 4.9102091789245605, + "rewards/rejected": -5.547916412353516, + "step": 4504 + }, + { + "epoch": 0.7, + "learning_rate": 1.0843184812490276e-05, + "logits/chosen": -2.9442977905273438, + "logits/rejected": -1.6979745626449585, + "logps/chosen": -215.06405639648438, + "logps/rejected": -243.365234375, + "loss": 2.1355, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5386781692504883, + "rewards/margins": 1.1705706119537354, + "rewards/rejected": -4.7092485427856445, + "step": 4505 + }, + { + "epoch": 0.7, + "learning_rate": 1.0842451371959127e-05, + "logits/chosen": -2.3847458362579346, + "logits/rejected": -3.163773536682129, + "logps/chosen": -141.2043914794922, + "logps/rejected": -334.027587890625, + "loss": 0.0847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9487498998641968, + "rewards/margins": 4.612053394317627, + "rewards/rejected": -5.560803413391113, + "step": 4506 + }, + { + "epoch": 0.7, + "learning_rate": 1.084171793142798e-05, + "logits/chosen": -3.13718581199646, + "logits/rejected": -2.3272297382354736, + "logps/chosen": -889.1881103515625, + "logps/rejected": -374.2351989746094, + "loss": 2.4906, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2475037574768066, + "rewards/margins": 1.155717134475708, + "rewards/rejected": -4.403221130371094, + "step": 4507 + }, + { + "epoch": 0.7, + "learning_rate": 1.0840984490896831e-05, + "logits/chosen": -2.9902138710021973, + "logits/rejected": -2.343449831008911, + "logps/chosen": -443.87005615234375, + "logps/rejected": -462.58697509765625, + "loss": 2.7982, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.945539951324463, + "rewards/margins": 0.6253242492675781, + "rewards/rejected": -5.570864200592041, + "step": 4508 + }, + { + "epoch": 0.7, + "learning_rate": 1.0840251050365683e-05, + "logits/chosen": -1.6530839204788208, + "logits/rejected": -2.69211483001709, + "logps/chosen": -166.8930206298828, + "logps/rejected": -487.0223083496094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.91571843624115, + "rewards/margins": 7.3667449951171875, + "rewards/rejected": -9.282463073730469, + "step": 4509 + }, + { + "epoch": 0.7, + "learning_rate": 1.0839517609834535e-05, + "logits/chosen": -2.4136064052581787, + "logits/rejected": -2.789391279220581, + "logps/chosen": -314.50738525390625, + "logps/rejected": -490.1624450683594, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8871047496795654, + "rewards/margins": 5.785456657409668, + "rewards/rejected": -7.6725616455078125, + "step": 4510 + }, + { + "epoch": 0.7, + "learning_rate": 1.0838784169303387e-05, + "logits/chosen": -2.765336275100708, + "logits/rejected": -1.9049988985061646, + "logps/chosen": -208.7879638671875, + "logps/rejected": -236.73150634765625, + "loss": 3.1366, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.143347263336182, + "rewards/margins": 1.2809464931488037, + "rewards/rejected": -5.424293518066406, + "step": 4511 + }, + { + "epoch": 0.7, + "learning_rate": 1.083805072877224e-05, + "logits/chosen": -3.0649795532226562, + "logits/rejected": -2.9207494258880615, + "logps/chosen": -124.29934692382812, + "logps/rejected": -82.81053924560547, + "loss": 1.2379, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.342594623565674, + "rewards/margins": 0.4625285863876343, + "rewards/rejected": -2.8051233291625977, + "step": 4512 + }, + { + "epoch": 0.7, + "learning_rate": 1.0837317288241092e-05, + "logits/chosen": -2.5982470512390137, + "logits/rejected": -2.8686251640319824, + "logps/chosen": -48.0576286315918, + "logps/rejected": -223.8800048828125, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2596056461334229, + "rewards/margins": 4.573419570922852, + "rewards/rejected": -5.833024978637695, + "step": 4513 + }, + { + "epoch": 0.7, + "learning_rate": 1.0836583847709944e-05, + "logits/chosen": -2.221278190612793, + "logits/rejected": -3.0327351093292236, + "logps/chosen": -85.51795959472656, + "logps/rejected": -359.22821044921875, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2520102262496948, + "rewards/margins": 5.324410438537598, + "rewards/rejected": -6.576420783996582, + "step": 4514 + }, + { + "epoch": 0.7, + "learning_rate": 1.0835850407178796e-05, + "logits/chosen": -1.5562666654586792, + "logits/rejected": -2.61753511428833, + "logps/chosen": -170.5765380859375, + "logps/rejected": -309.78350830078125, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0541614294052124, + "rewards/margins": 6.031382083892822, + "rewards/rejected": -7.085543632507324, + "step": 4515 + }, + { + "epoch": 0.7, + "learning_rate": 1.0835116966647648e-05, + "logits/chosen": -2.9803173542022705, + "logits/rejected": -2.0297882556915283, + "logps/chosen": -335.57464599609375, + "logps/rejected": -391.7906799316406, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42545586824417114, + "rewards/margins": 6.399393081665039, + "rewards/rejected": -6.8248491287231445, + "step": 4516 + }, + { + "epoch": 0.7, + "learning_rate": 1.08343835261165e-05, + "logits/chosen": -1.9328378438949585, + "logits/rejected": -3.199110984802246, + "logps/chosen": -89.99649047851562, + "logps/rejected": -376.7225341796875, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7107877731323242, + "rewards/margins": 3.7871155738830566, + "rewards/rejected": -5.497903347015381, + "step": 4517 + }, + { + "epoch": 0.7, + "learning_rate": 1.0833650085585352e-05, + "logits/chosen": -3.1325862407684326, + "logits/rejected": -2.799649477005005, + "logps/chosen": -467.03857421875, + "logps/rejected": -243.77914428710938, + "loss": 3.2821, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.533180236816406, + "rewards/margins": -1.8628278970718384, + "rewards/rejected": -2.6703522205352783, + "step": 4518 + }, + { + "epoch": 0.7, + "learning_rate": 1.0832916645054204e-05, + "logits/chosen": -1.763241171836853, + "logits/rejected": -2.6365203857421875, + "logps/chosen": -66.6293716430664, + "logps/rejected": -178.83587646484375, + "loss": 1.083, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6612462997436523, + "rewards/margins": 2.9561681747436523, + "rewards/rejected": -5.617414474487305, + "step": 4519 + }, + { + "epoch": 0.7, + "learning_rate": 1.0832183204523055e-05, + "logits/chosen": -0.9771867394447327, + "logits/rejected": -2.923363447189331, + "logps/chosen": -71.2948989868164, + "logps/rejected": -366.345458984375, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9069037437438965, + "rewards/margins": 4.052803993225098, + "rewards/rejected": -5.959707736968994, + "step": 4520 + }, + { + "epoch": 0.7, + "learning_rate": 1.0831449763991909e-05, + "logits/chosen": -3.2263989448547363, + "logits/rejected": -3.426102876663208, + "logps/chosen": -60.38520812988281, + "logps/rejected": -193.31491088867188, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8551087379455566, + "rewards/margins": 4.582975387573242, + "rewards/rejected": -6.438083648681641, + "step": 4521 + }, + { + "epoch": 0.7, + "learning_rate": 1.0830716323460761e-05, + "logits/chosen": -2.8965907096862793, + "logits/rejected": -1.8695831298828125, + "logps/chosen": -185.031494140625, + "logps/rejected": -53.023345947265625, + "loss": 3.6627, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.747884750366211, + "rewards/margins": -3.6224265098571777, + "rewards/rejected": -1.1254582405090332, + "step": 4522 + }, + { + "epoch": 0.7, + "learning_rate": 1.0829982882929613e-05, + "logits/chosen": -2.537522315979004, + "logits/rejected": -3.2351691722869873, + "logps/chosen": -236.7667999267578, + "logps/rejected": -418.35174560546875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3029694557189941, + "rewards/margins": 5.088198661804199, + "rewards/rejected": -6.391168594360352, + "step": 4523 + }, + { + "epoch": 0.7, + "learning_rate": 1.0829249442398465e-05, + "logits/chosen": -2.721312999725342, + "logits/rejected": -2.7969977855682373, + "logps/chosen": -96.47559356689453, + "logps/rejected": -190.6568603515625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.843539834022522, + "rewards/margins": 6.754485130310059, + "rewards/rejected": -7.598024368286133, + "step": 4524 + }, + { + "epoch": 0.7, + "learning_rate": 1.0828516001867317e-05, + "logits/chosen": -2.527289867401123, + "logits/rejected": -1.9840357303619385, + "logps/chosen": -536.1376953125, + "logps/rejected": -477.77978515625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8412662744522095, + "rewards/margins": 7.259825229644775, + "rewards/rejected": -9.101091384887695, + "step": 4525 + }, + { + "epoch": 0.7, + "learning_rate": 1.0827782561336168e-05, + "logits/chosen": -0.6763880252838135, + "logits/rejected": -2.262378215789795, + "logps/chosen": -140.76528930664062, + "logps/rejected": -357.9799499511719, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.413778305053711, + "rewards/margins": 4.3082275390625, + "rewards/rejected": -5.722005844116211, + "step": 4526 + }, + { + "epoch": 0.7, + "learning_rate": 1.082704912080502e-05, + "logits/chosen": -2.7640206813812256, + "logits/rejected": -3.186509370803833, + "logps/chosen": -113.20077514648438, + "logps/rejected": -152.4438934326172, + "loss": 1.2365, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3615729808807373, + "rewards/margins": 1.5816702842712402, + "rewards/rejected": -3.9432432651519775, + "step": 4527 + }, + { + "epoch": 0.7, + "learning_rate": 1.0826315680273872e-05, + "logits/chosen": -3.014622211456299, + "logits/rejected": -3.2039616107940674, + "logps/chosen": -116.36898040771484, + "logps/rejected": -144.01571655273438, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17112618684768677, + "rewards/margins": 3.781844139099121, + "rewards/rejected": -3.952970504760742, + "step": 4528 + }, + { + "epoch": 0.7, + "learning_rate": 1.0825582239742724e-05, + "logits/chosen": -2.2244038581848145, + "logits/rejected": -2.8643672466278076, + "logps/chosen": -43.17195129394531, + "logps/rejected": -197.74972534179688, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.838158369064331, + "rewards/margins": 4.507585525512695, + "rewards/rejected": -5.345743656158447, + "step": 4529 + }, + { + "epoch": 0.7, + "learning_rate": 1.0824848799211578e-05, + "logits/chosen": -2.803755760192871, + "logits/rejected": -2.8369333744049072, + "logps/chosen": -393.36541748046875, + "logps/rejected": -469.82061767578125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2142914533615112, + "rewards/margins": 6.2374372482299805, + "rewards/rejected": -7.451728820800781, + "step": 4530 + }, + { + "epoch": 0.7, + "learning_rate": 1.082411535868043e-05, + "logits/chosen": -3.0794100761413574, + "logits/rejected": -3.1644468307495117, + "logps/chosen": -391.0341796875, + "logps/rejected": -380.7018127441406, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6400696039199829, + "rewards/margins": 4.744370937347412, + "rewards/rejected": -5.384440898895264, + "step": 4531 + }, + { + "epoch": 0.7, + "learning_rate": 1.0823381918149281e-05, + "logits/chosen": -2.5280888080596924, + "logits/rejected": -1.7206408977508545, + "logps/chosen": -404.58441162109375, + "logps/rejected": -263.3507080078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5373340845108032, + "rewards/margins": 7.428832054138184, + "rewards/rejected": -7.966166019439697, + "step": 4532 + }, + { + "epoch": 0.7, + "learning_rate": 1.0822648477618133e-05, + "logits/chosen": -2.939887523651123, + "logits/rejected": -2.685781955718994, + "logps/chosen": -398.4387512207031, + "logps/rejected": -530.49609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9596114754676819, + "rewards/margins": 9.252120018005371, + "rewards/rejected": -10.21173095703125, + "step": 4533 + }, + { + "epoch": 0.71, + "learning_rate": 1.0821915037086985e-05, + "logits/chosen": -1.4513481855392456, + "logits/rejected": -3.1529483795166016, + "logps/chosen": -192.8538360595703, + "logps/rejected": -681.2752075195312, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7676079273223877, + "rewards/margins": 3.2878363132476807, + "rewards/rejected": -4.055444240570068, + "step": 4534 + }, + { + "epoch": 0.71, + "learning_rate": 1.0821181596555839e-05, + "logits/chosen": -2.0454161167144775, + "logits/rejected": -3.0138134956359863, + "logps/chosen": -137.31346130371094, + "logps/rejected": -326.69921875, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.316218614578247, + "rewards/margins": 5.849432945251465, + "rewards/rejected": -7.165651321411133, + "step": 4535 + }, + { + "epoch": 0.71, + "learning_rate": 1.082044815602469e-05, + "logits/chosen": -3.05521559715271, + "logits/rejected": -2.043537139892578, + "logps/chosen": -494.71868896484375, + "logps/rejected": -370.5351257324219, + "loss": 2.1489, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9710745811462402, + "rewards/margins": 2.509965658187866, + "rewards/rejected": -5.4810404777526855, + "step": 4536 + }, + { + "epoch": 0.71, + "learning_rate": 1.0819714715493542e-05, + "logits/chosen": -3.0250163078308105, + "logits/rejected": -2.2170865535736084, + "logps/chosen": -246.25741577148438, + "logps/rejected": -158.74716186523438, + "loss": 0.7455, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0220818519592285, + "rewards/margins": 2.2223846912384033, + "rewards/rejected": -4.244466304779053, + "step": 4537 + }, + { + "epoch": 0.71, + "learning_rate": 1.0818981274962394e-05, + "logits/chosen": -2.565809726715088, + "logits/rejected": -2.7913293838500977, + "logps/chosen": -338.40716552734375, + "logps/rejected": -371.6146545410156, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0279510021209717, + "rewards/margins": 6.541722297668457, + "rewards/rejected": -7.569673538208008, + "step": 4538 + }, + { + "epoch": 0.71, + "learning_rate": 1.0818247834431248e-05, + "logits/chosen": -3.2042627334594727, + "logits/rejected": -2.4594600200653076, + "logps/chosen": -291.6434631347656, + "logps/rejected": -74.59791564941406, + "loss": 2.7541, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.252599716186523, + "rewards/margins": -2.2975351810455322, + "rewards/rejected": -1.9550644159317017, + "step": 4539 + }, + { + "epoch": 0.71, + "learning_rate": 1.08175143939001e-05, + "logits/chosen": -2.137671947479248, + "logits/rejected": -2.869123935699463, + "logps/chosen": -161.8237762451172, + "logps/rejected": -234.1651611328125, + "loss": 1.6984, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.494374990463257, + "rewards/margins": 3.325603485107422, + "rewards/rejected": -5.8199782371521, + "step": 4540 + }, + { + "epoch": 0.71, + "learning_rate": 1.0816780953368952e-05, + "logits/chosen": -2.630784034729004, + "logits/rejected": -3.160715103149414, + "logps/chosen": -349.415283203125, + "logps/rejected": -391.5805969238281, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41441500186920166, + "rewards/margins": 5.5253448486328125, + "rewards/rejected": -5.939759731292725, + "step": 4541 + }, + { + "epoch": 0.71, + "learning_rate": 1.0816047512837804e-05, + "logits/chosen": -3.0337977409362793, + "logits/rejected": -2.9152820110321045, + "logps/chosen": -99.3961181640625, + "logps/rejected": -347.65338134765625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.46470308303833, + "rewards/margins": 6.437224388122559, + "rewards/rejected": -7.901927471160889, + "step": 4542 + }, + { + "epoch": 0.71, + "learning_rate": 1.0815314072306655e-05, + "logits/chosen": -1.8822822570800781, + "logits/rejected": -2.4589054584503174, + "logps/chosen": -115.24783325195312, + "logps/rejected": -155.8400115966797, + "loss": 0.0798, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.51104736328125, + "rewards/margins": 2.516746997833252, + "rewards/rejected": -4.027794361114502, + "step": 4543 + }, + { + "epoch": 0.71, + "learning_rate": 1.0814580631775507e-05, + "logits/chosen": -3.0191829204559326, + "logits/rejected": -2.9324684143066406, + "logps/chosen": -228.29440307617188, + "logps/rejected": -262.82220458984375, + "loss": 2.7643, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.048308372497559, + "rewards/margins": 1.147855281829834, + "rewards/rejected": -5.196163654327393, + "step": 4544 + }, + { + "epoch": 0.71, + "learning_rate": 1.081384719124436e-05, + "logits/chosen": -3.0316359996795654, + "logits/rejected": -2.2829272747039795, + "logps/chosen": -373.9532775878906, + "logps/rejected": -272.03558349609375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8227131366729736, + "rewards/margins": 5.717890739440918, + "rewards/rejected": -6.5406036376953125, + "step": 4545 + }, + { + "epoch": 0.71, + "learning_rate": 1.0813113750713211e-05, + "logits/chosen": -3.2073075771331787, + "logits/rejected": -3.0097484588623047, + "logps/chosen": -224.33255004882812, + "logps/rejected": -252.44122314453125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7115761041641235, + "rewards/margins": 6.622562408447266, + "rewards/rejected": -7.334138870239258, + "step": 4546 + }, + { + "epoch": 0.71, + "learning_rate": 1.0812380310182063e-05, + "logits/chosen": -3.1351327896118164, + "logits/rejected": -2.114030599594116, + "logps/chosen": -255.6616973876953, + "logps/rejected": -226.37158203125, + "loss": 2.2434, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.554184675216675, + "rewards/margins": 1.3763458728790283, + "rewards/rejected": -4.930530548095703, + "step": 4547 + }, + { + "epoch": 0.71, + "learning_rate": 1.0811646869650916e-05, + "logits/chosen": -1.7159861326217651, + "logits/rejected": -3.072051763534546, + "logps/chosen": -176.36273193359375, + "logps/rejected": -279.9356689453125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8674545288085938, + "rewards/margins": 5.855463027954102, + "rewards/rejected": -6.722917556762695, + "step": 4548 + }, + { + "epoch": 0.71, + "learning_rate": 1.0810913429119768e-05, + "logits/chosen": -3.026984930038452, + "logits/rejected": -2.130805492401123, + "logps/chosen": -390.8812255859375, + "logps/rejected": -304.46246337890625, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3764888048171997, + "rewards/margins": 5.160584449768066, + "rewards/rejected": -6.537073612213135, + "step": 4549 + }, + { + "epoch": 0.71, + "learning_rate": 1.081017998858862e-05, + "logits/chosen": -3.023446798324585, + "logits/rejected": -3.1756742000579834, + "logps/chosen": -147.84353637695312, + "logps/rejected": -199.04164123535156, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1307220458984375, + "rewards/margins": 5.747689247131348, + "rewards/rejected": -6.878411293029785, + "step": 4550 + }, + { + "epoch": 0.71, + "learning_rate": 1.0809446548057472e-05, + "logits/chosen": -2.578315258026123, + "logits/rejected": -2.9222090244293213, + "logps/chosen": -124.89581298828125, + "logps/rejected": -234.1807098388672, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3906590938568115, + "rewards/margins": 5.5459794998168945, + "rewards/rejected": -6.936638355255127, + "step": 4551 + }, + { + "epoch": 0.71, + "learning_rate": 1.0808713107526324e-05, + "logits/chosen": -2.4171817302703857, + "logits/rejected": -3.1549370288848877, + "logps/chosen": -73.09001159667969, + "logps/rejected": -253.9889678955078, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052032470703125, + "rewards/margins": 2.9040956497192383, + "rewards/rejected": -4.956128120422363, + "step": 4552 + }, + { + "epoch": 0.71, + "learning_rate": 1.0807979666995176e-05, + "logits/chosen": -2.7647528648376465, + "logits/rejected": -2.9660608768463135, + "logps/chosen": -156.77725219726562, + "logps/rejected": -163.38482666015625, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3259025514125824, + "rewards/margins": 4.458789825439453, + "rewards/rejected": -4.784692764282227, + "step": 4553 + }, + { + "epoch": 0.71, + "learning_rate": 1.0807246226464028e-05, + "logits/chosen": -3.099661350250244, + "logits/rejected": -2.1168534755706787, + "logps/chosen": -212.67694091796875, + "logps/rejected": -76.6826171875, + "loss": 2.4879, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.707820892333984, + "rewards/margins": -0.35198020935058594, + "rewards/rejected": -4.355840682983398, + "step": 4554 + }, + { + "epoch": 0.71, + "learning_rate": 1.080651278593288e-05, + "logits/chosen": -1.9285602569580078, + "logits/rejected": -3.0010929107666016, + "logps/chosen": -133.9912872314453, + "logps/rejected": -425.7577819824219, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7098031044006348, + "rewards/margins": 6.447803497314453, + "rewards/rejected": -8.157607078552246, + "step": 4555 + }, + { + "epoch": 0.71, + "learning_rate": 1.0805779345401733e-05, + "logits/chosen": -1.8170757293701172, + "logits/rejected": -3.1983118057250977, + "logps/chosen": -184.0821990966797, + "logps/rejected": -485.793212890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0103721618652344, + "rewards/margins": 6.882472991943359, + "rewards/rejected": -8.892845153808594, + "step": 4556 + }, + { + "epoch": 0.71, + "learning_rate": 1.0805045904870585e-05, + "logits/chosen": -3.217294216156006, + "logits/rejected": -3.1766393184661865, + "logps/chosen": -201.27017211914062, + "logps/rejected": -290.8718566894531, + "loss": 0.0381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2604040205478668, + "rewards/margins": 4.095937728881836, + "rewards/rejected": -4.35634183883667, + "step": 4557 + }, + { + "epoch": 0.71, + "learning_rate": 1.0804312464339437e-05, + "logits/chosen": -3.2023937702178955, + "logits/rejected": -2.8138697147369385, + "logps/chosen": -444.7665710449219, + "logps/rejected": -223.70091247558594, + "loss": 3.0609, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.876648187637329, + "rewards/margins": 0.872664213180542, + "rewards/rejected": -4.749312400817871, + "step": 4558 + }, + { + "epoch": 0.71, + "learning_rate": 1.0803579023808289e-05, + "logits/chosen": -2.3151469230651855, + "logits/rejected": -3.0222463607788086, + "logps/chosen": -185.05453491210938, + "logps/rejected": -348.3217468261719, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2019851207733154, + "rewards/margins": 5.438885688781738, + "rewards/rejected": -6.640871047973633, + "step": 4559 + }, + { + "epoch": 0.71, + "learning_rate": 1.080284558327714e-05, + "logits/chosen": -1.8992178440093994, + "logits/rejected": -3.194859027862549, + "logps/chosen": -61.571075439453125, + "logps/rejected": -412.0998229980469, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9449106454849243, + "rewards/margins": 4.726772308349609, + "rewards/rejected": -5.671682834625244, + "step": 4560 + }, + { + "epoch": 0.71, + "learning_rate": 1.0802112142745993e-05, + "logits/chosen": -2.948420524597168, + "logits/rejected": -1.6725149154663086, + "logps/chosen": -398.3199462890625, + "logps/rejected": -228.7550048828125, + "loss": 4.1098, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.624855995178223, + "rewards/margins": -4.062353134155273, + "rewards/rejected": -0.5625028610229492, + "step": 4561 + }, + { + "epoch": 0.71, + "learning_rate": 1.0801378702214844e-05, + "logits/chosen": -2.3412609100341797, + "logits/rejected": -3.0614326000213623, + "logps/chosen": -180.7323760986328, + "logps/rejected": -228.46090698242188, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9993770122528076, + "rewards/margins": 5.352536678314209, + "rewards/rejected": -7.3519134521484375, + "step": 4562 + }, + { + "epoch": 0.71, + "learning_rate": 1.0800645261683696e-05, + "logits/chosen": -3.061476707458496, + "logits/rejected": -2.1844139099121094, + "logps/chosen": -333.04583740234375, + "logps/rejected": -190.52859497070312, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.88609778881073, + "rewards/margins": 4.789895534515381, + "rewards/rejected": -5.6759934425354, + "step": 4563 + }, + { + "epoch": 0.71, + "learning_rate": 1.0799911821152548e-05, + "logits/chosen": -1.6176801919937134, + "logits/rejected": -2.9940476417541504, + "logps/chosen": -79.04521179199219, + "logps/rejected": -359.0668640136719, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.069583535194397, + "rewards/margins": 6.90995979309082, + "rewards/rejected": -7.979543685913086, + "step": 4564 + }, + { + "epoch": 0.71, + "learning_rate": 1.0799178380621402e-05, + "logits/chosen": -2.4394216537475586, + "logits/rejected": -2.869004964828491, + "logps/chosen": -41.855899810791016, + "logps/rejected": -139.96792602539062, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4439191818237305, + "rewards/margins": 5.181221008300781, + "rewards/rejected": -7.625140190124512, + "step": 4565 + }, + { + "epoch": 0.71, + "learning_rate": 1.0798444940090254e-05, + "logits/chosen": -2.3438565731048584, + "logits/rejected": -3.1654958724975586, + "logps/chosen": -91.21917724609375, + "logps/rejected": -327.5507507324219, + "loss": 0.1773, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4346609115600586, + "rewards/margins": 2.022538185119629, + "rewards/rejected": -4.4571990966796875, + "step": 4566 + }, + { + "epoch": 0.71, + "learning_rate": 1.0797711499559106e-05, + "logits/chosen": -2.5161828994750977, + "logits/rejected": -3.101966381072998, + "logps/chosen": -263.37652587890625, + "logps/rejected": -363.276611328125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37616121768951416, + "rewards/margins": 4.085233688354492, + "rewards/rejected": -4.461394786834717, + "step": 4567 + }, + { + "epoch": 0.71, + "learning_rate": 1.0796978059027957e-05, + "logits/chosen": -3.142778158187866, + "logits/rejected": -3.0794527530670166, + "logps/chosen": -341.72161865234375, + "logps/rejected": -373.8126220703125, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0261549949645996, + "rewards/margins": 4.781980991363525, + "rewards/rejected": -6.808135986328125, + "step": 4568 + }, + { + "epoch": 0.71, + "learning_rate": 1.079624461849681e-05, + "logits/chosen": -2.0646262168884277, + "logits/rejected": -3.0151944160461426, + "logps/chosen": -184.62582397460938, + "logps/rejected": -341.2043762207031, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.322628378868103, + "rewards/margins": 7.186680793762207, + "rewards/rejected": -8.509308815002441, + "step": 4569 + }, + { + "epoch": 0.71, + "learning_rate": 1.0795511177965663e-05, + "logits/chosen": -1.132128357887268, + "logits/rejected": -2.8679709434509277, + "logps/chosen": -84.57112121582031, + "logps/rejected": -328.1988525390625, + "loss": 0.0816, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.062692165374756, + "rewards/margins": 3.3473522663116455, + "rewards/rejected": -6.4100446701049805, + "step": 4570 + }, + { + "epoch": 0.71, + "learning_rate": 1.0794777737434515e-05, + "logits/chosen": -2.6166841983795166, + "logits/rejected": -2.943436622619629, + "logps/chosen": -332.0502624511719, + "logps/rejected": -316.33258056640625, + "loss": 1.8904, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9323943853378296, + "rewards/margins": 1.5671495199203491, + "rewards/rejected": -3.4995439052581787, + "step": 4571 + }, + { + "epoch": 0.71, + "learning_rate": 1.0794044296903367e-05, + "logits/chosen": -2.432061195373535, + "logits/rejected": -3.0585219860076904, + "logps/chosen": -203.35745239257812, + "logps/rejected": -174.94229125976562, + "loss": 1.7041, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8431105613708496, + "rewards/margins": 0.7134020328521729, + "rewards/rejected": -4.556512832641602, + "step": 4572 + }, + { + "epoch": 0.71, + "learning_rate": 1.0793310856372219e-05, + "logits/chosen": -2.4643759727478027, + "logits/rejected": -2.569856882095337, + "logps/chosen": -71.53445434570312, + "logps/rejected": -51.830726623535156, + "loss": 1.4354, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.980437755584717, + "rewards/margins": 0.058100342750549316, + "rewards/rejected": -3.0385379791259766, + "step": 4573 + }, + { + "epoch": 0.71, + "learning_rate": 1.0792577415841072e-05, + "logits/chosen": -2.806473970413208, + "logits/rejected": -3.145106554031372, + "logps/chosen": -77.36549377441406, + "logps/rejected": -71.20360565185547, + "loss": 2.1183, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.192220687866211, + "rewards/margins": 0.05830669403076172, + "rewards/rejected": -4.250527381896973, + "step": 4574 + }, + { + "epoch": 0.71, + "learning_rate": 1.0791843975309924e-05, + "logits/chosen": -2.57743763923645, + "logits/rejected": -3.1236748695373535, + "logps/chosen": -209.9427032470703, + "logps/rejected": -358.5415344238281, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1714191436767578, + "rewards/margins": 6.118880271911621, + "rewards/rejected": -7.290299415588379, + "step": 4575 + }, + { + "epoch": 0.71, + "learning_rate": 1.0791110534778776e-05, + "logits/chosen": -2.5201144218444824, + "logits/rejected": -3.0202243328094482, + "logps/chosen": -385.89019775390625, + "logps/rejected": -228.1439208984375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0080995559692383, + "rewards/margins": 5.020758628845215, + "rewards/rejected": -6.028858184814453, + "step": 4576 + }, + { + "epoch": 0.71, + "learning_rate": 1.0790377094247628e-05, + "logits/chosen": -1.8019382953643799, + "logits/rejected": -2.9599461555480957, + "logps/chosen": -81.61981201171875, + "logps/rejected": -222.06336975097656, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.663278579711914, + "rewards/margins": 3.9905354976654053, + "rewards/rejected": -7.653814315795898, + "step": 4577 + }, + { + "epoch": 0.71, + "learning_rate": 1.078964365371648e-05, + "logits/chosen": -3.097576379776001, + "logits/rejected": -2.4372212886810303, + "logps/chosen": -110.79469299316406, + "logps/rejected": -82.71492004394531, + "loss": 2.57, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.605045318603516, + "rewards/margins": 0.07026362419128418, + "rewards/rejected": -4.675308704376221, + "step": 4578 + }, + { + "epoch": 0.71, + "learning_rate": 1.0788910213185331e-05, + "logits/chosen": -2.8195722103118896, + "logits/rejected": -3.284200429916382, + "logps/chosen": -61.03334045410156, + "logps/rejected": -230.6408233642578, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1648247241973877, + "rewards/margins": 4.0293121337890625, + "rewards/rejected": -6.194136619567871, + "step": 4579 + }, + { + "epoch": 0.71, + "learning_rate": 1.0788176772654183e-05, + "logits/chosen": -2.2555222511291504, + "logits/rejected": -3.196190595626831, + "logps/chosen": -237.9110107421875, + "logps/rejected": -442.489501953125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.630192279815674, + "rewards/margins": 6.721831798553467, + "rewards/rejected": -9.35202407836914, + "step": 4580 + }, + { + "epoch": 0.71, + "learning_rate": 1.0787443332123035e-05, + "logits/chosen": -2.5278632640838623, + "logits/rejected": -3.130923271179199, + "logps/chosen": -197.97479248046875, + "logps/rejected": -289.8255615234375, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3005266189575195, + "rewards/margins": 4.441532135009766, + "rewards/rejected": -6.742058753967285, + "step": 4581 + }, + { + "epoch": 0.71, + "learning_rate": 1.0786709891591887e-05, + "logits/chosen": -2.474876642227173, + "logits/rejected": -3.150785207748413, + "logps/chosen": -71.87332153320312, + "logps/rejected": -152.02691650390625, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4844486713409424, + "rewards/margins": 2.5018820762634277, + "rewards/rejected": -3.986330986022949, + "step": 4582 + }, + { + "epoch": 0.71, + "learning_rate": 1.078597645106074e-05, + "logits/chosen": -2.497462034225464, + "logits/rejected": -3.0795159339904785, + "logps/chosen": -117.7671890258789, + "logps/rejected": -506.5894775390625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3152496814727783, + "rewards/margins": 5.553611755371094, + "rewards/rejected": -6.868861198425293, + "step": 4583 + }, + { + "epoch": 0.71, + "learning_rate": 1.0785243010529593e-05, + "logits/chosen": -3.18658185005188, + "logits/rejected": -3.269388198852539, + "logps/chosen": -367.4564514160156, + "logps/rejected": -227.12109375, + "loss": 2.3087, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.208296298980713, + "rewards/margins": 1.1530265808105469, + "rewards/rejected": -5.36132287979126, + "step": 4584 + }, + { + "epoch": 0.71, + "learning_rate": 1.0784509569998444e-05, + "logits/chosen": -3.0579004287719727, + "logits/rejected": -2.83797025680542, + "logps/chosen": -462.4902038574219, + "logps/rejected": -244.93478393554688, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5771759748458862, + "rewards/margins": 5.878959655761719, + "rewards/rejected": -7.4561357498168945, + "step": 4585 + }, + { + "epoch": 0.71, + "learning_rate": 1.0783776129467296e-05, + "logits/chosen": -2.7676854133605957, + "logits/rejected": -3.1117727756500244, + "logps/chosen": -187.77798461914062, + "logps/rejected": -158.1844024658203, + "loss": 1.3979, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9177160263061523, + "rewards/margins": 0.2980372905731201, + "rewards/rejected": -4.215753555297852, + "step": 4586 + }, + { + "epoch": 0.71, + "learning_rate": 1.0783042688936148e-05, + "logits/chosen": -2.6118929386138916, + "logits/rejected": -3.0501363277435303, + "logps/chosen": -621.63525390625, + "logps/rejected": -537.7884521484375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0442794561386108, + "rewards/margins": 6.459807872772217, + "rewards/rejected": -7.504087448120117, + "step": 4587 + }, + { + "epoch": 0.71, + "learning_rate": 1.0782309248405e-05, + "logits/chosen": -0.7657938599586487, + "logits/rejected": -2.329472064971924, + "logps/chosen": -265.1634826660156, + "logps/rejected": -607.483642578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3299150466918945, + "rewards/margins": 6.715044021606445, + "rewards/rejected": -9.044958114624023, + "step": 4588 + }, + { + "epoch": 0.71, + "learning_rate": 1.0781575807873852e-05, + "logits/chosen": -1.1529548168182373, + "logits/rejected": -2.8167941570281982, + "logps/chosen": -67.6018295288086, + "logps/rejected": -211.320068359375, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5212070941925049, + "rewards/margins": 3.8682055473327637, + "rewards/rejected": -5.389412879943848, + "step": 4589 + }, + { + "epoch": 0.71, + "learning_rate": 1.0780842367342704e-05, + "logits/chosen": -2.984971523284912, + "logits/rejected": -2.8549892902374268, + "logps/chosen": -129.45053100585938, + "logps/rejected": -202.8983154296875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1261651515960693, + "rewards/margins": 5.281144142150879, + "rewards/rejected": -6.407309055328369, + "step": 4590 + }, + { + "epoch": 0.71, + "learning_rate": 1.0780108926811556e-05, + "logits/chosen": -3.10121488571167, + "logits/rejected": -2.9687089920043945, + "logps/chosen": -230.66134643554688, + "logps/rejected": -359.59954833984375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8670850992202759, + "rewards/margins": 6.613437652587891, + "rewards/rejected": -8.480523109436035, + "step": 4591 + }, + { + "epoch": 0.71, + "learning_rate": 1.077937548628041e-05, + "logits/chosen": -1.965528130531311, + "logits/rejected": -3.0675227642059326, + "logps/chosen": -185.43148803710938, + "logps/rejected": -508.668701171875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2289056777954102, + "rewards/margins": 6.607463836669922, + "rewards/rejected": -7.836369514465332, + "step": 4592 + }, + { + "epoch": 0.71, + "learning_rate": 1.0778642045749261e-05, + "logits/chosen": -3.144829750061035, + "logits/rejected": -2.4896461963653564, + "logps/chosen": -262.06903076171875, + "logps/rejected": -260.39862060546875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.677507758140564, + "rewards/margins": 4.9031524658203125, + "rewards/rejected": -6.580660343170166, + "step": 4593 + }, + { + "epoch": 0.71, + "learning_rate": 1.0777908605218113e-05, + "logits/chosen": -2.80655837059021, + "logits/rejected": -3.1217920780181885, + "logps/chosen": -45.20730209350586, + "logps/rejected": -209.65194702148438, + "loss": 0.1354, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2255024909973145, + "rewards/margins": 3.6196799278259277, + "rewards/rejected": -5.845182418823242, + "step": 4594 + }, + { + "epoch": 0.71, + "learning_rate": 1.0777175164686965e-05, + "logits/chosen": -1.7396485805511475, + "logits/rejected": -2.372357130050659, + "logps/chosen": -91.08740997314453, + "logps/rejected": -223.40794372558594, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2618720531463623, + "rewards/margins": 5.820419788360596, + "rewards/rejected": -7.082291603088379, + "step": 4595 + }, + { + "epoch": 0.71, + "learning_rate": 1.0776441724155817e-05, + "logits/chosen": -3.0522115230560303, + "logits/rejected": -2.4265387058258057, + "logps/chosen": -488.3666687011719, + "logps/rejected": -242.01217651367188, + "loss": 3.349, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7913436889648438, + "rewards/margins": 1.3563923835754395, + "rewards/rejected": -5.147736549377441, + "step": 4596 + }, + { + "epoch": 0.71, + "learning_rate": 1.0775708283624669e-05, + "logits/chosen": -2.7436206340789795, + "logits/rejected": -3.127727508544922, + "logps/chosen": -226.344482421875, + "logps/rejected": -382.87884521484375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9003465175628662, + "rewards/margins": 5.811612129211426, + "rewards/rejected": -7.711958408355713, + "step": 4597 + }, + { + "epoch": 0.72, + "learning_rate": 1.077497484309352e-05, + "logits/chosen": -1.423879861831665, + "logits/rejected": -2.7319772243499756, + "logps/chosen": -80.5943374633789, + "logps/rejected": -201.28237915039062, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7440290451049805, + "rewards/margins": 5.846051216125488, + "rewards/rejected": -7.590080261230469, + "step": 4598 + }, + { + "epoch": 0.72, + "learning_rate": 1.0774241402562372e-05, + "logits/chosen": -3.1814401149749756, + "logits/rejected": -2.4605629444122314, + "logps/chosen": -329.1015319824219, + "logps/rejected": -176.67660522460938, + "loss": 3.6048, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.564828872680664, + "rewards/margins": -0.8297097682952881, + "rewards/rejected": -3.735118865966797, + "step": 4599 + }, + { + "epoch": 0.72, + "learning_rate": 1.0773507962031224e-05, + "logits/chosen": -2.1929681301116943, + "logits/rejected": -3.2200424671173096, + "logps/chosen": -182.98324584960938, + "logps/rejected": -408.41253662109375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2800929546356201, + "rewards/margins": 7.399443626403809, + "rewards/rejected": -8.679536819458008, + "step": 4600 + }, + { + "epoch": 0.72, + "learning_rate": 1.0772774521500078e-05, + "logits/chosen": -3.141679048538208, + "logits/rejected": -3.1593165397644043, + "logps/chosen": -128.73324584960938, + "logps/rejected": -287.51678466796875, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.619084119796753, + "rewards/margins": 4.45538854598999, + "rewards/rejected": -6.074472904205322, + "step": 4601 + }, + { + "epoch": 0.72, + "learning_rate": 1.077204108096893e-05, + "logits/chosen": -2.685917615890503, + "logits/rejected": -3.0311169624328613, + "logps/chosen": -280.38958740234375, + "logps/rejected": -414.3109130859375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.488316059112549, + "rewards/margins": 5.7661662101745605, + "rewards/rejected": -8.25448226928711, + "step": 4602 + }, + { + "epoch": 0.72, + "learning_rate": 1.0771307640437782e-05, + "logits/chosen": -2.362508773803711, + "logits/rejected": -3.043985605239868, + "logps/chosen": -250.64820861816406, + "logps/rejected": -530.34228515625, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.513558864593506, + "rewards/margins": 5.134451389312744, + "rewards/rejected": -7.64801025390625, + "step": 4603 + }, + { + "epoch": 0.72, + "learning_rate": 1.0770574199906635e-05, + "logits/chosen": -2.9934628009796143, + "logits/rejected": -3.0228755474090576, + "logps/chosen": -107.13774108886719, + "logps/rejected": -167.05140686035156, + "loss": 0.4059, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3573460578918457, + "rewards/margins": 2.080594062805176, + "rewards/rejected": -4.4379401206970215, + "step": 4604 + }, + { + "epoch": 0.72, + "learning_rate": 1.0769840759375487e-05, + "logits/chosen": -2.5765137672424316, + "logits/rejected": -3.066444158554077, + "logps/chosen": -164.8197479248047, + "logps/rejected": -351.0535888671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8180588483810425, + "rewards/margins": 8.878453254699707, + "rewards/rejected": -10.696512222290039, + "step": 4605 + }, + { + "epoch": 0.72, + "learning_rate": 1.0769107318844339e-05, + "logits/chosen": -2.7783870697021484, + "logits/rejected": -3.102792263031006, + "logps/chosen": -501.2738037109375, + "logps/rejected": -448.2248229980469, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5747084617614746, + "rewards/margins": 5.217555999755859, + "rewards/rejected": -7.792263984680176, + "step": 4606 + }, + { + "epoch": 0.72, + "learning_rate": 1.0768373878313191e-05, + "logits/chosen": -2.1039435863494873, + "logits/rejected": -3.248764991760254, + "logps/chosen": -482.138916015625, + "logps/rejected": -627.5030517578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8546417951583862, + "rewards/margins": 7.971388816833496, + "rewards/rejected": -8.826030731201172, + "step": 4607 + }, + { + "epoch": 0.72, + "learning_rate": 1.0767640437782043e-05, + "logits/chosen": -2.247002601623535, + "logits/rejected": -2.6420726776123047, + "logps/chosen": -350.7360534667969, + "logps/rejected": -463.7110900878906, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0846619606018066, + "rewards/margins": 5.491419792175293, + "rewards/rejected": -7.5760817527771, + "step": 4608 + }, + { + "epoch": 0.72, + "learning_rate": 1.0766906997250895e-05, + "logits/chosen": -2.1507604122161865, + "logits/rejected": -3.0547964572906494, + "logps/chosen": -97.4549789428711, + "logps/rejected": -396.5281982421875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9721820950508118, + "rewards/margins": 8.318721771240234, + "rewards/rejected": -9.290903091430664, + "step": 4609 + }, + { + "epoch": 0.72, + "learning_rate": 1.0766173556719748e-05, + "logits/chosen": -3.2224671840667725, + "logits/rejected": -2.7577099800109863, + "logps/chosen": -115.47554016113281, + "logps/rejected": -130.55239868164062, + "loss": 2.3233, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.216537952423096, + "rewards/margins": 0.022569894790649414, + "rewards/rejected": -4.239108085632324, + "step": 4610 + }, + { + "epoch": 0.72, + "learning_rate": 1.07654401161886e-05, + "logits/chosen": -3.2481489181518555, + "logits/rejected": -2.7417025566101074, + "logps/chosen": -312.2977294921875, + "logps/rejected": -67.57542419433594, + "loss": 5.7042, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.072484493255615, + "rewards/margins": -5.70074462890625, + "rewards/rejected": -1.3717397451400757, + "step": 4611 + }, + { + "epoch": 0.72, + "learning_rate": 1.0764706675657452e-05, + "logits/chosen": -2.9427623748779297, + "logits/rejected": -3.047233819961548, + "logps/chosen": -217.49911499023438, + "logps/rejected": -236.3123779296875, + "loss": 1.9811, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2175729274749756, + "rewards/margins": 0.4522120952606201, + "rewards/rejected": -3.6697850227355957, + "step": 4612 + }, + { + "epoch": 0.72, + "learning_rate": 1.0763973235126304e-05, + "logits/chosen": -3.0024290084838867, + "logits/rejected": -2.329552412033081, + "logps/chosen": -134.04098510742188, + "logps/rejected": -109.83047485351562, + "loss": 3.5708, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.0121235847473145, + "rewards/margins": -1.1338295936584473, + "rewards/rejected": -3.878293752670288, + "step": 4613 + }, + { + "epoch": 0.72, + "learning_rate": 1.0763239794595156e-05, + "logits/chosen": -2.923582077026367, + "logits/rejected": -1.8129457235336304, + "logps/chosen": -335.83837890625, + "logps/rejected": -245.87936401367188, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5928360223770142, + "rewards/margins": 7.594489097595215, + "rewards/rejected": -8.187325477600098, + "step": 4614 + }, + { + "epoch": 0.72, + "learning_rate": 1.0762506354064008e-05, + "logits/chosen": -3.2375149726867676, + "logits/rejected": -2.9663240909576416, + "logps/chosen": -115.54147338867188, + "logps/rejected": -255.8526611328125, + "loss": 0.6249, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7237167358398438, + "rewards/margins": 3.0006792545318604, + "rewards/rejected": -4.724395751953125, + "step": 4615 + }, + { + "epoch": 0.72, + "learning_rate": 1.076177291353286e-05, + "logits/chosen": -1.9169635772705078, + "logits/rejected": -2.96242618560791, + "logps/chosen": -109.58306121826172, + "logps/rejected": -277.9064025878906, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5243687629699707, + "rewards/margins": 6.4644880294799805, + "rewards/rejected": -7.988856792449951, + "step": 4616 + }, + { + "epoch": 0.72, + "learning_rate": 1.0761039473001711e-05, + "logits/chosen": -2.9443130493164062, + "logits/rejected": -3.057168960571289, + "logps/chosen": -149.53707885742188, + "logps/rejected": -251.76580810546875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9120339155197144, + "rewards/margins": 5.089727878570557, + "rewards/rejected": -7.001761436462402, + "step": 4617 + }, + { + "epoch": 0.72, + "learning_rate": 1.0760306032470563e-05, + "logits/chosen": -1.1990063190460205, + "logits/rejected": -2.5664541721343994, + "logps/chosen": -61.457218170166016, + "logps/rejected": -228.99362182617188, + "loss": 0.1981, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.472874164581299, + "rewards/margins": 4.016895294189453, + "rewards/rejected": -6.489768981933594, + "step": 4618 + }, + { + "epoch": 0.72, + "learning_rate": 1.0759572591939417e-05, + "logits/chosen": -3.013857841491699, + "logits/rejected": -2.7788355350494385, + "logps/chosen": -587.8690185546875, + "logps/rejected": -483.2384033203125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4549270868301392, + "rewards/margins": 5.976982116699219, + "rewards/rejected": -7.431909561157227, + "step": 4619 + }, + { + "epoch": 0.72, + "learning_rate": 1.0758839151408269e-05, + "logits/chosen": -2.640622615814209, + "logits/rejected": -3.004815101623535, + "logps/chosen": -67.71527099609375, + "logps/rejected": -160.75001525878906, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6564024686813354, + "rewards/margins": 3.674628257751465, + "rewards/rejected": -5.33103084564209, + "step": 4620 + }, + { + "epoch": 0.72, + "learning_rate": 1.075810571087712e-05, + "logits/chosen": -3.007695436477661, + "logits/rejected": -3.3160033226013184, + "logps/chosen": -115.94795227050781, + "logps/rejected": -200.7069549560547, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6622352600097656, + "rewards/margins": 4.275356292724609, + "rewards/rejected": -5.937591552734375, + "step": 4621 + }, + { + "epoch": 0.72, + "learning_rate": 1.0757372270345972e-05, + "logits/chosen": -2.559755325317383, + "logits/rejected": -3.029153823852539, + "logps/chosen": -214.49313354492188, + "logps/rejected": -288.9266662597656, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7772936820983887, + "rewards/margins": 4.656853199005127, + "rewards/rejected": -6.434146881103516, + "step": 4622 + }, + { + "epoch": 0.72, + "learning_rate": 1.0756638829814824e-05, + "logits/chosen": -3.1436550617218018, + "logits/rejected": -2.4531075954437256, + "logps/chosen": -324.205322265625, + "logps/rejected": -416.08673095703125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6875877380371094, + "rewards/margins": 6.010773658752441, + "rewards/rejected": -8.69836139678955, + "step": 4623 + }, + { + "epoch": 0.72, + "learning_rate": 1.0755905389283676e-05, + "logits/chosen": -1.114876627922058, + "logits/rejected": -2.9453301429748535, + "logps/chosen": -102.88438415527344, + "logps/rejected": -319.8477478027344, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5488927364349365, + "rewards/margins": 4.688938140869141, + "rewards/rejected": -6.23783016204834, + "step": 4624 + }, + { + "epoch": 0.72, + "learning_rate": 1.0755171948752528e-05, + "logits/chosen": -2.8736002445220947, + "logits/rejected": -3.115898609161377, + "logps/chosen": -70.03858184814453, + "logps/rejected": -439.36602783203125, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4289817810058594, + "rewards/margins": 6.267185688018799, + "rewards/rejected": -8.6961669921875, + "step": 4625 + }, + { + "epoch": 0.72, + "learning_rate": 1.075443850822138e-05, + "logits/chosen": -1.652636170387268, + "logits/rejected": -2.805149555206299, + "logps/chosen": -155.39590454101562, + "logps/rejected": -428.3352355957031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.011010766029358, + "rewards/margins": 7.597232341766357, + "rewards/rejected": -8.608242988586426, + "step": 4626 + }, + { + "epoch": 0.72, + "learning_rate": 1.0753705067690232e-05, + "logits/chosen": -1.4406778812408447, + "logits/rejected": -2.5633270740509033, + "logps/chosen": -230.2498779296875, + "logps/rejected": -443.13897705078125, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4074745178222656, + "rewards/margins": 5.94370174407959, + "rewards/rejected": -7.3511762619018555, + "step": 4627 + }, + { + "epoch": 0.72, + "learning_rate": 1.0752971627159085e-05, + "logits/chosen": -1.5669933557510376, + "logits/rejected": -3.07957124710083, + "logps/chosen": -89.84027099609375, + "logps/rejected": -222.68341064453125, + "loss": 1.649, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.310041904449463, + "rewards/margins": 1.1868563890457153, + "rewards/rejected": -4.496898174285889, + "step": 4628 + }, + { + "epoch": 0.72, + "learning_rate": 1.0752238186627937e-05, + "logits/chosen": -2.550999641418457, + "logits/rejected": -3.0005297660827637, + "logps/chosen": -56.93144226074219, + "logps/rejected": -334.859130859375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1354520320892334, + "rewards/margins": 6.954555034637451, + "rewards/rejected": -8.090006828308105, + "step": 4629 + }, + { + "epoch": 0.72, + "learning_rate": 1.0751504746096789e-05, + "logits/chosen": -1.762416958808899, + "logits/rejected": -2.7235260009765625, + "logps/chosen": -194.8078155517578, + "logps/rejected": -373.78326416015625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0210025310516357, + "rewards/margins": 6.483302116394043, + "rewards/rejected": -8.504304885864258, + "step": 4630 + }, + { + "epoch": 0.72, + "learning_rate": 1.0750771305565641e-05, + "logits/chosen": -2.6601409912109375, + "logits/rejected": -2.954871892929077, + "logps/chosen": -385.3743591308594, + "logps/rejected": -352.150634765625, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.555302381515503, + "rewards/margins": 3.0777862071990967, + "rewards/rejected": -4.6330885887146, + "step": 4631 + }, + { + "epoch": 0.72, + "learning_rate": 1.0750037865034493e-05, + "logits/chosen": -2.3552231788635254, + "logits/rejected": -3.069432497024536, + "logps/chosen": -110.5544204711914, + "logps/rejected": -336.293701171875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.717960000038147, + "rewards/margins": 6.850620746612549, + "rewards/rejected": -8.568580627441406, + "step": 4632 + }, + { + "epoch": 0.72, + "learning_rate": 1.0749304424503345e-05, + "logits/chosen": -2.587536573410034, + "logits/rejected": -3.088454246520996, + "logps/chosen": -137.87225341796875, + "logps/rejected": -490.46466064453125, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9320151805877686, + "rewards/margins": 3.9798240661621094, + "rewards/rejected": -6.911839485168457, + "step": 4633 + }, + { + "epoch": 0.72, + "learning_rate": 1.0748570983972197e-05, + "logits/chosen": -3.0061001777648926, + "logits/rejected": -3.0974676609039307, + "logps/chosen": -79.32219696044922, + "logps/rejected": -161.47195434570312, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7698571681976318, + "rewards/margins": 4.693337440490723, + "rewards/rejected": -6.463194370269775, + "step": 4634 + }, + { + "epoch": 0.72, + "learning_rate": 1.0747837543441049e-05, + "logits/chosen": -1.398307204246521, + "logits/rejected": -2.756107807159424, + "logps/chosen": -92.37910461425781, + "logps/rejected": -193.44326782226562, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7049041986465454, + "rewards/margins": 4.438544750213623, + "rewards/rejected": -6.143448829650879, + "step": 4635 + }, + { + "epoch": 0.72, + "learning_rate": 1.0747104102909902e-05, + "logits/chosen": -2.080359697341919, + "logits/rejected": -3.0672576427459717, + "logps/chosen": -143.95057678222656, + "logps/rejected": -457.4337463378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9738128781318665, + "rewards/margins": 9.46165943145752, + "rewards/rejected": -10.43547248840332, + "step": 4636 + }, + { + "epoch": 0.72, + "learning_rate": 1.0746370662378754e-05, + "logits/chosen": -2.724113941192627, + "logits/rejected": -3.1332168579101562, + "logps/chosen": -180.25738525390625, + "logps/rejected": -363.18194580078125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3376402854919434, + "rewards/margins": 5.996655464172363, + "rewards/rejected": -8.334295272827148, + "step": 4637 + }, + { + "epoch": 0.72, + "learning_rate": 1.0745637221847608e-05, + "logits/chosen": -1.5479463338851929, + "logits/rejected": -2.6769375801086426, + "logps/chosen": -180.48944091796875, + "logps/rejected": -353.7137756347656, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.788299560546875, + "rewards/margins": 7.669395446777344, + "rewards/rejected": -9.457695007324219, + "step": 4638 + }, + { + "epoch": 0.72, + "learning_rate": 1.074490378131646e-05, + "logits/chosen": -3.042501211166382, + "logits/rejected": -2.934910535812378, + "logps/chosen": -426.0516052246094, + "logps/rejected": -461.1571350097656, + "loss": 0.4443, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1189510822296143, + "rewards/margins": 4.636765480041504, + "rewards/rejected": -6.755716323852539, + "step": 4639 + }, + { + "epoch": 0.72, + "learning_rate": 1.0744170340785311e-05, + "logits/chosen": -2.49569034576416, + "logits/rejected": -3.0291764736175537, + "logps/chosen": -111.14578247070312, + "logps/rejected": -285.818359375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1270023584365845, + "rewards/margins": 5.130825042724609, + "rewards/rejected": -6.2578277587890625, + "step": 4640 + }, + { + "epoch": 0.72, + "learning_rate": 1.0743436900254163e-05, + "logits/chosen": -2.086442470550537, + "logits/rejected": -2.9636027812957764, + "logps/chosen": -216.7488250732422, + "logps/rejected": -301.3324279785156, + "loss": 3.0654, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.282689571380615, + "rewards/margins": -0.5929288864135742, + "rewards/rejected": -3.689760684967041, + "step": 4641 + }, + { + "epoch": 0.72, + "learning_rate": 1.0742703459723015e-05, + "logits/chosen": -1.7119208574295044, + "logits/rejected": -2.7509915828704834, + "logps/chosen": -152.52316284179688, + "logps/rejected": -309.4337158203125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48893359303474426, + "rewards/margins": 7.181667327880859, + "rewards/rejected": -7.670600891113281, + "step": 4642 + }, + { + "epoch": 0.72, + "learning_rate": 1.0741970019191867e-05, + "logits/chosen": -2.0993824005126953, + "logits/rejected": -2.571232557296753, + "logps/chosen": -115.85319519042969, + "logps/rejected": -248.806884765625, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.942836046218872, + "rewards/margins": 6.589910507202148, + "rewards/rejected": -8.532747268676758, + "step": 4643 + }, + { + "epoch": 0.72, + "learning_rate": 1.0741236578660719e-05, + "logits/chosen": -2.8236472606658936, + "logits/rejected": -3.0152344703674316, + "logps/chosen": -471.0721435546875, + "logps/rejected": -610.73779296875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5895248651504517, + "rewards/margins": 7.6426591873168945, + "rewards/rejected": -9.232184410095215, + "step": 4644 + }, + { + "epoch": 0.72, + "learning_rate": 1.0740503138129572e-05, + "logits/chosen": -1.7563042640686035, + "logits/rejected": -2.9089488983154297, + "logps/chosen": -50.61311340332031, + "logps/rejected": -180.72567749023438, + "loss": 0.0547, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.475874662399292, + "rewards/margins": 2.8831331729888916, + "rewards/rejected": -5.359007835388184, + "step": 4645 + }, + { + "epoch": 0.72, + "learning_rate": 1.0739769697598424e-05, + "logits/chosen": -2.7698612213134766, + "logits/rejected": -3.0496318340301514, + "logps/chosen": -705.2979736328125, + "logps/rejected": -599.6494750976562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47906339168548584, + "rewards/margins": 9.423750877380371, + "rewards/rejected": -9.902813911437988, + "step": 4646 + }, + { + "epoch": 0.72, + "learning_rate": 1.0739036257067276e-05, + "logits/chosen": -2.1947178840637207, + "logits/rejected": -2.7804160118103027, + "logps/chosen": -142.8876953125, + "logps/rejected": -297.1842041015625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1503028869628906, + "rewards/margins": 6.919780254364014, + "rewards/rejected": -9.070082664489746, + "step": 4647 + }, + { + "epoch": 0.72, + "learning_rate": 1.0738302816536128e-05, + "logits/chosen": -1.1236610412597656, + "logits/rejected": -1.767691731452942, + "logps/chosen": -309.2943420410156, + "logps/rejected": -378.27642822265625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.185723066329956, + "rewards/margins": 6.841226577758789, + "rewards/rejected": -8.026949882507324, + "step": 4648 + }, + { + "epoch": 0.72, + "learning_rate": 1.073756937600498e-05, + "logits/chosen": -3.2579588890075684, + "logits/rejected": -2.9220101833343506, + "logps/chosen": -67.65740966796875, + "logps/rejected": -110.58810424804688, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2248272895812988, + "rewards/margins": 3.595531702041626, + "rewards/rejected": -4.820359230041504, + "step": 4649 + }, + { + "epoch": 0.72, + "learning_rate": 1.0736835935473832e-05, + "logits/chosen": -1.1796432733535767, + "logits/rejected": -2.614107847213745, + "logps/chosen": -52.5960807800293, + "logps/rejected": -269.287841796875, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8070949912071228, + "rewards/margins": 7.069015026092529, + "rewards/rejected": -7.876110076904297, + "step": 4650 + }, + { + "epoch": 0.72, + "learning_rate": 1.0736102494942684e-05, + "logits/chosen": -1.9178930521011353, + "logits/rejected": -2.7129766941070557, + "logps/chosen": -139.48365783691406, + "logps/rejected": -423.70794677734375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9782841205596924, + "rewards/margins": 6.240039348602295, + "rewards/rejected": -7.218323707580566, + "step": 4651 + }, + { + "epoch": 0.72, + "learning_rate": 1.0735369054411536e-05, + "logits/chosen": -2.9878158569335938, + "logits/rejected": -2.806361198425293, + "logps/chosen": -124.79798126220703, + "logps/rejected": -187.44422912597656, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1354336738586426, + "rewards/margins": 3.0233631134033203, + "rewards/rejected": -5.158796787261963, + "step": 4652 + }, + { + "epoch": 0.72, + "learning_rate": 1.0734635613880387e-05, + "logits/chosen": -1.410818099975586, + "logits/rejected": -2.8614187240600586, + "logps/chosen": -83.99917602539062, + "logps/rejected": -331.822021484375, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5549943447113037, + "rewards/margins": 6.123052597045898, + "rewards/rejected": -7.678047180175781, + "step": 4653 + }, + { + "epoch": 0.72, + "learning_rate": 1.0733902173349241e-05, + "logits/chosen": -2.503870964050293, + "logits/rejected": -1.7449311017990112, + "logps/chosen": -227.60484313964844, + "logps/rejected": -182.90792846679688, + "loss": 5.3791, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.687828063964844, + "rewards/margins": -5.369325637817383, + "rewards/rejected": -1.3185020685195923, + "step": 4654 + }, + { + "epoch": 0.72, + "learning_rate": 1.0733168732818093e-05, + "logits/chosen": -2.8971469402313232, + "logits/rejected": -2.402808427810669, + "logps/chosen": -533.6341552734375, + "logps/rejected": -552.3652954101562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08488771319389343, + "rewards/margins": 11.259071350097656, + "rewards/rejected": -11.17418384552002, + "step": 4655 + }, + { + "epoch": 0.72, + "learning_rate": 1.0732435292286945e-05, + "logits/chosen": -2.255049228668213, + "logits/rejected": -3.037170886993408, + "logps/chosen": -114.81964111328125, + "logps/rejected": -236.44918823242188, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9404782056808472, + "rewards/margins": 5.928919792175293, + "rewards/rejected": -6.86939811706543, + "step": 4656 + }, + { + "epoch": 0.72, + "learning_rate": 1.0731701851755797e-05, + "logits/chosen": -1.4671969413757324, + "logits/rejected": -2.4185571670532227, + "logps/chosen": -130.05905151367188, + "logps/rejected": -344.61083984375, + "loss": 2.5908, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.369790077209473, + "rewards/margins": 0.6108016967773438, + "rewards/rejected": -4.980591773986816, + "step": 4657 + }, + { + "epoch": 0.72, + "learning_rate": 1.0730968411224649e-05, + "logits/chosen": -2.948204517364502, + "logits/rejected": -3.1239943504333496, + "logps/chosen": -82.52884674072266, + "logps/rejected": -167.08056640625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4848896265029907, + "rewards/margins": 5.289383411407471, + "rewards/rejected": -6.774272918701172, + "step": 4658 + }, + { + "epoch": 0.72, + "learning_rate": 1.07302349706935e-05, + "logits/chosen": -2.872206211090088, + "logits/rejected": -1.407800555229187, + "logps/chosen": -329.3212890625, + "logps/rejected": -301.2780456542969, + "loss": 2.9915, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.193972587585449, + "rewards/margins": 1.3106410503387451, + "rewards/rejected": -6.504613876342773, + "step": 4659 + }, + { + "epoch": 0.72, + "learning_rate": 1.0729501530162352e-05, + "logits/chosen": -2.3927836418151855, + "logits/rejected": -2.9673445224761963, + "logps/chosen": -185.0436248779297, + "logps/rejected": -157.65145874023438, + "loss": 2.7333, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.131872177124023, + "rewards/margins": 0.15326857566833496, + "rewards/rejected": -4.2851409912109375, + "step": 4660 + }, + { + "epoch": 0.72, + "learning_rate": 1.0728768089631204e-05, + "logits/chosen": -0.9176146388053894, + "logits/rejected": -2.5638725757598877, + "logps/chosen": -177.69166564941406, + "logps/rejected": -595.2066040039062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3396737575531006, + "rewards/margins": 8.182744979858398, + "rewards/rejected": -9.522418022155762, + "step": 4661 + }, + { + "epoch": 0.73, + "learning_rate": 1.0728034649100056e-05, + "logits/chosen": -3.0754446983337402, + "logits/rejected": -2.044862747192383, + "logps/chosen": -364.84674072265625, + "logps/rejected": -286.37359619140625, + "loss": 2.0511, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8851654529571533, + "rewards/margins": 1.5271196365356445, + "rewards/rejected": -5.412284851074219, + "step": 4662 + }, + { + "epoch": 0.73, + "learning_rate": 1.072730120856891e-05, + "logits/chosen": -2.116809129714966, + "logits/rejected": -2.751552104949951, + "logps/chosen": -72.35816955566406, + "logps/rejected": -304.3588562011719, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8394383788108826, + "rewards/margins": 8.51318073272705, + "rewards/rejected": -9.352619171142578, + "step": 4663 + }, + { + "epoch": 0.73, + "learning_rate": 1.0726567768037761e-05, + "logits/chosen": -3.0043036937713623, + "logits/rejected": -2.800882339477539, + "logps/chosen": -304.728515625, + "logps/rejected": -269.9520568847656, + "loss": 3.6087, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4406867027282715, + "rewards/margins": -0.8148939609527588, + "rewards/rejected": -3.6257927417755127, + "step": 4664 + }, + { + "epoch": 0.73, + "learning_rate": 1.0725834327506613e-05, + "logits/chosen": -3.0690393447875977, + "logits/rejected": -3.114325761795044, + "logps/chosen": -115.78959655761719, + "logps/rejected": -175.4419403076172, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6460391879081726, + "rewards/margins": 4.5619916915893555, + "rewards/rejected": -5.208030700683594, + "step": 4665 + }, + { + "epoch": 0.73, + "learning_rate": 1.0725100886975465e-05, + "logits/chosen": -2.0281474590301514, + "logits/rejected": -2.7338180541992188, + "logps/chosen": -104.70185852050781, + "logps/rejected": -222.24362182617188, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2037352323532104, + "rewards/margins": 5.337189197540283, + "rewards/rejected": -6.540924072265625, + "step": 4666 + }, + { + "epoch": 0.73, + "learning_rate": 1.0724367446444317e-05, + "logits/chosen": -3.033893585205078, + "logits/rejected": -3.1261935234069824, + "logps/chosen": -560.8596801757812, + "logps/rejected": -572.9464111328125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07854005694389343, + "rewards/margins": 6.666171073913574, + "rewards/rejected": -6.744711399078369, + "step": 4667 + }, + { + "epoch": 0.73, + "learning_rate": 1.0723634005913169e-05, + "logits/chosen": -2.5845792293548584, + "logits/rejected": -2.9734256267547607, + "logps/chosen": -674.42333984375, + "logps/rejected": -485.3565979003906, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.513336181640625, + "rewards/margins": 6.183925151824951, + "rewards/rejected": -6.697261333465576, + "step": 4668 + }, + { + "epoch": 0.73, + "learning_rate": 1.0722900565382021e-05, + "logits/chosen": -2.0430266857147217, + "logits/rejected": -3.1199874877929688, + "logps/chosen": -118.36932373046875, + "logps/rejected": -256.67291259765625, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5545259118080139, + "rewards/margins": 5.111469268798828, + "rewards/rejected": -5.665995121002197, + "step": 4669 + }, + { + "epoch": 0.73, + "learning_rate": 1.0722167124850874e-05, + "logits/chosen": -1.6191177368164062, + "logits/rejected": -3.0311999320983887, + "logps/chosen": -176.35824584960938, + "logps/rejected": -230.59359741210938, + "loss": 3.7267, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.644184589385986, + "rewards/margins": -0.6615657806396484, + "rewards/rejected": -4.982618808746338, + "step": 4670 + }, + { + "epoch": 0.73, + "learning_rate": 1.0721433684319726e-05, + "logits/chosen": -1.279760479927063, + "logits/rejected": -2.844904661178589, + "logps/chosen": -88.4146728515625, + "logps/rejected": -417.6806640625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0777816772460938, + "rewards/margins": 5.703554153442383, + "rewards/rejected": -6.781335830688477, + "step": 4671 + }, + { + "epoch": 0.73, + "learning_rate": 1.072070024378858e-05, + "logits/chosen": -2.157346487045288, + "logits/rejected": -3.0558836460113525, + "logps/chosen": -88.70243072509766, + "logps/rejected": -259.4236145019531, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0634682178497314, + "rewards/margins": 5.951972484588623, + "rewards/rejected": -9.015440940856934, + "step": 4672 + }, + { + "epoch": 0.73, + "learning_rate": 1.0719966803257432e-05, + "logits/chosen": -3.0746994018554688, + "logits/rejected": -2.2807390689849854, + "logps/chosen": -357.2003173828125, + "logps/rejected": -375.33441162109375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.337633490562439, + "rewards/margins": 5.703949928283691, + "rewards/rejected": -7.041583061218262, + "step": 4673 + }, + { + "epoch": 0.73, + "learning_rate": 1.0719233362726284e-05, + "logits/chosen": -2.9670987129211426, + "logits/rejected": -3.057650566101074, + "logps/chosen": -111.96116638183594, + "logps/rejected": -116.91204071044922, + "loss": 2.3834, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5703647136688232, + "rewards/margins": -0.6277108192443848, + "rewards/rejected": -2.9426538944244385, + "step": 4674 + }, + { + "epoch": 0.73, + "learning_rate": 1.0718499922195136e-05, + "logits/chosen": -2.5503265857696533, + "logits/rejected": -3.019378423690796, + "logps/chosen": -580.01513671875, + "logps/rejected": -590.7828369140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3450569212436676, + "rewards/margins": 6.779078006744385, + "rewards/rejected": -6.43402099609375, + "step": 4675 + }, + { + "epoch": 0.73, + "learning_rate": 1.0717766481663987e-05, + "logits/chosen": -2.217087984085083, + "logits/rejected": -2.6804468631744385, + "logps/chosen": -84.13008117675781, + "logps/rejected": -400.1746826171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3968380689620972, + "rewards/margins": 6.931015491485596, + "rewards/rejected": -8.32785415649414, + "step": 4676 + }, + { + "epoch": 0.73, + "learning_rate": 1.071703304113284e-05, + "logits/chosen": -3.084141969680786, + "logits/rejected": -3.2011559009552, + "logps/chosen": -113.45237731933594, + "logps/rejected": -235.57310485839844, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.694443702697754, + "rewards/margins": 5.523889541625977, + "rewards/rejected": -7.2183332443237305, + "step": 4677 + }, + { + "epoch": 0.73, + "learning_rate": 1.0716299600601691e-05, + "logits/chosen": -2.873877763748169, + "logits/rejected": -2.9811408519744873, + "logps/chosen": -25.199031829833984, + "logps/rejected": -331.2587585449219, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4518444538116455, + "rewards/margins": 6.864468574523926, + "rewards/rejected": -8.316312789916992, + "step": 4678 + }, + { + "epoch": 0.73, + "learning_rate": 1.0715566160070543e-05, + "logits/chosen": -2.5195119380950928, + "logits/rejected": -2.983680486679077, + "logps/chosen": -145.843505859375, + "logps/rejected": -304.62030029296875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.157474160194397, + "rewards/margins": 6.001959800720215, + "rewards/rejected": -7.159433841705322, + "step": 4679 + }, + { + "epoch": 0.73, + "learning_rate": 1.0714832719539395e-05, + "logits/chosen": -2.66965389251709, + "logits/rejected": -3.1935534477233887, + "logps/chosen": -353.717041015625, + "logps/rejected": -375.6661682128906, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7757991552352905, + "rewards/margins": 5.589689254760742, + "rewards/rejected": -6.365488529205322, + "step": 4680 + }, + { + "epoch": 0.73, + "learning_rate": 1.0714099279008248e-05, + "logits/chosen": -2.8427743911743164, + "logits/rejected": -2.315347194671631, + "logps/chosen": -257.2629089355469, + "logps/rejected": -354.01080322265625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1938698291778564, + "rewards/margins": 8.162912368774414, + "rewards/rejected": -9.356781959533691, + "step": 4681 + }, + { + "epoch": 0.73, + "learning_rate": 1.07133658384771e-05, + "logits/chosen": -3.0159242153167725, + "logits/rejected": -3.0449109077453613, + "logps/chosen": -138.61245727539062, + "logps/rejected": -218.4991912841797, + "loss": 1.8241, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.600945472717285, + "rewards/margins": -0.24668371677398682, + "rewards/rejected": -4.35426139831543, + "step": 4682 + }, + { + "epoch": 0.73, + "learning_rate": 1.0712632397945952e-05, + "logits/chosen": -2.808248519897461, + "logits/rejected": -3.1349759101867676, + "logps/chosen": -106.14688873291016, + "logps/rejected": -128.12193298339844, + "loss": 2.1562, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.196575880050659, + "rewards/margins": 1.3565480709075928, + "rewards/rejected": -4.553123950958252, + "step": 4683 + }, + { + "epoch": 0.73, + "learning_rate": 1.0711898957414804e-05, + "logits/chosen": -1.8584257364273071, + "logits/rejected": -3.1402883529663086, + "logps/chosen": -249.25079345703125, + "logps/rejected": -435.6050109863281, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8598175048828125, + "rewards/margins": 4.8120317459106445, + "rewards/rejected": -7.671849250793457, + "step": 4684 + }, + { + "epoch": 0.73, + "learning_rate": 1.0711165516883656e-05, + "logits/chosen": -2.86188006401062, + "logits/rejected": -3.2149240970611572, + "logps/chosen": -615.9884643554688, + "logps/rejected": -908.8742065429688, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2302193641662598, + "rewards/margins": 6.430415153503418, + "rewards/rejected": -7.6606340408325195, + "step": 4685 + }, + { + "epoch": 0.73, + "learning_rate": 1.0710432076352508e-05, + "logits/chosen": -2.179243564605713, + "logits/rejected": -2.018389940261841, + "logps/chosen": -300.284912109375, + "logps/rejected": -224.15260314941406, + "loss": 3.1547, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.74723219871521, + "rewards/margins": -1.6220484972000122, + "rewards/rejected": -2.125183582305908, + "step": 4686 + }, + { + "epoch": 0.73, + "learning_rate": 1.070969863582136e-05, + "logits/chosen": -3.0808019638061523, + "logits/rejected": -2.929931879043579, + "logps/chosen": -225.6475830078125, + "logps/rejected": -441.69500732421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6539497375488281, + "rewards/margins": 7.815458297729492, + "rewards/rejected": -9.46940803527832, + "step": 4687 + }, + { + "epoch": 0.73, + "learning_rate": 1.0708965195290212e-05, + "logits/chosen": -3.0674636363983154, + "logits/rejected": -1.9797114133834839, + "logps/chosen": -486.3335266113281, + "logps/rejected": -383.43743896484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33472442626953125, + "rewards/margins": 6.875757217407227, + "rewards/rejected": -7.210481643676758, + "step": 4688 + }, + { + "epoch": 0.73, + "learning_rate": 1.0708231754759064e-05, + "logits/chosen": -1.8093076944351196, + "logits/rejected": -3.1143648624420166, + "logps/chosen": -154.91798400878906, + "logps/rejected": -272.80218505859375, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5509757995605469, + "rewards/margins": 5.121737480163574, + "rewards/rejected": -6.672713279724121, + "step": 4689 + }, + { + "epoch": 0.73, + "learning_rate": 1.0707498314227917e-05, + "logits/chosen": -2.9634737968444824, + "logits/rejected": -3.242849349975586, + "logps/chosen": -168.18685913085938, + "logps/rejected": -333.6810302734375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24617500603199005, + "rewards/margins": 6.103706359863281, + "rewards/rejected": -6.349881172180176, + "step": 4690 + }, + { + "epoch": 0.73, + "learning_rate": 1.0706764873696769e-05, + "logits/chosen": -3.09438419342041, + "logits/rejected": -2.9592723846435547, + "logps/chosen": -91.64373016357422, + "logps/rejected": -156.53533935546875, + "loss": 2.2864, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3211655616760254, + "rewards/margins": 0.516747236251831, + "rewards/rejected": -3.8379127979278564, + "step": 4691 + }, + { + "epoch": 0.73, + "learning_rate": 1.070603143316562e-05, + "logits/chosen": -3.1029880046844482, + "logits/rejected": -3.3539462089538574, + "logps/chosen": -555.6947021484375, + "logps/rejected": -453.2119140625, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7979305982589722, + "rewards/margins": 4.8453898429870605, + "rewards/rejected": -5.643320560455322, + "step": 4692 + }, + { + "epoch": 0.73, + "learning_rate": 1.0705297992634473e-05, + "logits/chosen": -2.978111505508423, + "logits/rejected": -2.5903255939483643, + "logps/chosen": -107.5869140625, + "logps/rejected": -198.38710021972656, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3675304651260376, + "rewards/margins": 5.602134704589844, + "rewards/rejected": -6.96966552734375, + "step": 4693 + }, + { + "epoch": 0.73, + "learning_rate": 1.0704564552103325e-05, + "logits/chosen": -2.38016414642334, + "logits/rejected": -3.284101963043213, + "logps/chosen": -65.56218719482422, + "logps/rejected": -222.5489044189453, + "loss": 1.4522, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.469062566757202, + "rewards/margins": 1.2592992782592773, + "rewards/rejected": -3.7283618450164795, + "step": 4694 + }, + { + "epoch": 0.73, + "learning_rate": 1.0703831111572176e-05, + "logits/chosen": -2.888988733291626, + "logits/rejected": -3.146329402923584, + "logps/chosen": -253.54071044921875, + "logps/rejected": -406.64752197265625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4127910137176514, + "rewards/margins": 4.933797836303711, + "rewards/rejected": -7.346589088439941, + "step": 4695 + }, + { + "epoch": 0.73, + "learning_rate": 1.0703097671041028e-05, + "logits/chosen": -2.262613296508789, + "logits/rejected": -2.810175895690918, + "logps/chosen": -98.89781951904297, + "logps/rejected": -298.2119140625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8442253470420837, + "rewards/margins": 7.049001693725586, + "rewards/rejected": -7.893226623535156, + "step": 4696 + }, + { + "epoch": 0.73, + "learning_rate": 1.070236423050988e-05, + "logits/chosen": -3.0177085399627686, + "logits/rejected": -1.9184693098068237, + "logps/chosen": -408.94757080078125, + "logps/rejected": -122.24107360839844, + "loss": 3.5159, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6046905517578125, + "rewards/margins": -0.4704291820526123, + "rewards/rejected": -3.1342613697052, + "step": 4697 + }, + { + "epoch": 0.73, + "learning_rate": 1.0701630789978732e-05, + "logits/chosen": -3.0519487857818604, + "logits/rejected": -2.7252681255340576, + "logps/chosen": -581.5045776367188, + "logps/rejected": -385.67108154296875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4902751445770264, + "rewards/margins": 4.661998748779297, + "rewards/rejected": -6.152273654937744, + "step": 4698 + }, + { + "epoch": 0.73, + "learning_rate": 1.0700897349447586e-05, + "logits/chosen": -3.065220594406128, + "logits/rejected": -2.2502448558807373, + "logps/chosen": -443.1034851074219, + "logps/rejected": -338.053955078125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5860263109207153, + "rewards/margins": 5.947556972503662, + "rewards/rejected": -6.533583164215088, + "step": 4699 + }, + { + "epoch": 0.73, + "learning_rate": 1.0700163908916438e-05, + "logits/chosen": -2.7188427448272705, + "logits/rejected": -2.742940902709961, + "logps/chosen": -114.7719955444336, + "logps/rejected": -270.6043701171875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8902913331985474, + "rewards/margins": 4.629558086395264, + "rewards/rejected": -5.5198493003845215, + "step": 4700 + }, + { + "epoch": 0.73, + "learning_rate": 1.069943046838529e-05, + "logits/chosen": -3.226855516433716, + "logits/rejected": -2.6340973377227783, + "logps/chosen": -425.21978759765625, + "logps/rejected": -294.32391357421875, + "loss": 2.2367, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3057732582092285, + "rewards/margins": 3.4816958904266357, + "rewards/rejected": -5.787468910217285, + "step": 4701 + }, + { + "epoch": 0.73, + "learning_rate": 1.0698697027854141e-05, + "logits/chosen": -3.011875629425049, + "logits/rejected": -2.3952372074127197, + "logps/chosen": -237.48306274414062, + "logps/rejected": -468.7357482910156, + "loss": 1.426, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.450814962387085, + "rewards/margins": 4.687100410461426, + "rewards/rejected": -8.137914657592773, + "step": 4702 + }, + { + "epoch": 0.73, + "learning_rate": 1.0697963587322993e-05, + "logits/chosen": -2.953448534011841, + "logits/rejected": -2.7664637565612793, + "logps/chosen": -290.38116455078125, + "logps/rejected": -480.6460266113281, + "loss": 2.4153, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6933584213256836, + "rewards/margins": -0.3259570598602295, + "rewards/rejected": -3.367401361465454, + "step": 4703 + }, + { + "epoch": 0.73, + "learning_rate": 1.0697230146791847e-05, + "logits/chosen": -3.0051987171173096, + "logits/rejected": -2.968390703201294, + "logps/chosen": -201.45404052734375, + "logps/rejected": -114.90715789794922, + "loss": 3.1633, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.974725723266602, + "rewards/margins": -2.0140984058380127, + "rewards/rejected": -2.960627317428589, + "step": 4704 + }, + { + "epoch": 0.73, + "learning_rate": 1.0696496706260699e-05, + "logits/chosen": -1.8734461069107056, + "logits/rejected": -2.662175416946411, + "logps/chosen": -211.6195831298828, + "logps/rejected": -334.70306396484375, + "loss": 2.3211, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.561279773712158, + "rewards/margins": 1.3091518878936768, + "rewards/rejected": -5.870431423187256, + "step": 4705 + }, + { + "epoch": 0.73, + "learning_rate": 1.069576326572955e-05, + "logits/chosen": -2.7895212173461914, + "logits/rejected": -2.5351359844207764, + "logps/chosen": -458.5151672363281, + "logps/rejected": -476.898681640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3211243152618408, + "rewards/margins": 7.950467109680176, + "rewards/rejected": -9.271591186523438, + "step": 4706 + }, + { + "epoch": 0.73, + "learning_rate": 1.0695029825198402e-05, + "logits/chosen": -2.5058889389038086, + "logits/rejected": -3.146373748779297, + "logps/chosen": -117.72377014160156, + "logps/rejected": -184.24635314941406, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0352071523666382, + "rewards/margins": 3.3776841163635254, + "rewards/rejected": -4.412891387939453, + "step": 4707 + }, + { + "epoch": 0.73, + "learning_rate": 1.0694296384667256e-05, + "logits/chosen": -2.2565932273864746, + "logits/rejected": -3.111290216445923, + "logps/chosen": -111.05915069580078, + "logps/rejected": -329.42987060546875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7292411923408508, + "rewards/margins": 6.584237098693848, + "rewards/rejected": -7.313477993011475, + "step": 4708 + }, + { + "epoch": 0.73, + "learning_rate": 1.0693562944136108e-05, + "logits/chosen": -2.241391181945801, + "logits/rejected": -2.777320384979248, + "logps/chosen": -139.11236572265625, + "logps/rejected": -275.32733154296875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.735032081604004, + "rewards/margins": 6.144175052642822, + "rewards/rejected": -7.879207134246826, + "step": 4709 + }, + { + "epoch": 0.73, + "learning_rate": 1.069282950360496e-05, + "logits/chosen": -3.1791863441467285, + "logits/rejected": -2.3431737422943115, + "logps/chosen": -515.497802734375, + "logps/rejected": -150.64950561523438, + "loss": 2.1811, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.849408149719238, + "rewards/margins": 1.0022459030151367, + "rewards/rejected": -5.851654052734375, + "step": 4710 + }, + { + "epoch": 0.73, + "learning_rate": 1.0692096063073812e-05, + "logits/chosen": -2.9508140087127686, + "logits/rejected": -3.3345096111297607, + "logps/chosen": -161.95712280273438, + "logps/rejected": -200.3125762939453, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6581588387489319, + "rewards/margins": 4.688762664794922, + "rewards/rejected": -5.346921920776367, + "step": 4711 + }, + { + "epoch": 0.73, + "learning_rate": 1.0691362622542663e-05, + "logits/chosen": -1.687455415725708, + "logits/rejected": -3.0675694942474365, + "logps/chosen": -184.40008544921875, + "logps/rejected": -475.47540283203125, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9153664112091064, + "rewards/margins": 3.567615509033203, + "rewards/rejected": -5.482982158660889, + "step": 4712 + }, + { + "epoch": 0.73, + "learning_rate": 1.0690629182011515e-05, + "logits/chosen": -2.527763605117798, + "logits/rejected": -3.1017587184906006, + "logps/chosen": -339.42950439453125, + "logps/rejected": -478.00421142578125, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.415867567062378, + "rewards/margins": 4.75587797164917, + "rewards/rejected": -7.171745300292969, + "step": 4713 + }, + { + "epoch": 0.73, + "learning_rate": 1.0689895741480367e-05, + "logits/chosen": -1.5735208988189697, + "logits/rejected": -2.8537776470184326, + "logps/chosen": -42.52843475341797, + "logps/rejected": -333.4156494140625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5638525485992432, + "rewards/margins": 7.1541361808776855, + "rewards/rejected": -8.717988967895508, + "step": 4714 + }, + { + "epoch": 0.73, + "learning_rate": 1.0689162300949219e-05, + "logits/chosen": -2.8773906230926514, + "logits/rejected": -3.0688679218292236, + "logps/chosen": -19.652511596679688, + "logps/rejected": -142.76434326171875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.134613037109375, + "rewards/margins": 4.537777900695801, + "rewards/rejected": -5.672390937805176, + "step": 4715 + }, + { + "epoch": 0.73, + "learning_rate": 1.0688428860418071e-05, + "logits/chosen": -2.860952138900757, + "logits/rejected": -2.6912567615509033, + "logps/chosen": -112.66588592529297, + "logps/rejected": -266.296630859375, + "loss": 0.1279, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5068199634552, + "rewards/margins": 4.563287258148193, + "rewards/rejected": -7.070107460021973, + "step": 4716 + }, + { + "epoch": 0.73, + "learning_rate": 1.0687695419886925e-05, + "logits/chosen": -1.8007290363311768, + "logits/rejected": -2.8573360443115234, + "logps/chosen": -67.30895233154297, + "logps/rejected": -257.7496337890625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8188120126724243, + "rewards/margins": 6.070356369018555, + "rewards/rejected": -7.889167785644531, + "step": 4717 + }, + { + "epoch": 0.73, + "learning_rate": 1.0686961979355776e-05, + "logits/chosen": -1.1184659004211426, + "logits/rejected": -2.6991078853607178, + "logps/chosen": -117.08793640136719, + "logps/rejected": -437.1734924316406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6313519477844238, + "rewards/margins": 8.675413131713867, + "rewards/rejected": -10.306764602661133, + "step": 4718 + }, + { + "epoch": 0.73, + "learning_rate": 1.0686228538824628e-05, + "logits/chosen": -1.9020506143569946, + "logits/rejected": -3.108553171157837, + "logps/chosen": -78.27948760986328, + "logps/rejected": -254.4764404296875, + "loss": 0.0712, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5977872610092163, + "rewards/margins": 2.841484785079956, + "rewards/rejected": -4.439271926879883, + "step": 4719 + }, + { + "epoch": 0.73, + "learning_rate": 1.068549509829348e-05, + "logits/chosen": -2.5345518589019775, + "logits/rejected": -2.838467836380005, + "logps/chosen": -364.900634765625, + "logps/rejected": -501.97967529296875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8450103998184204, + "rewards/margins": 7.200654983520508, + "rewards/rejected": -9.04566478729248, + "step": 4720 + }, + { + "epoch": 0.73, + "learning_rate": 1.0684761657762332e-05, + "logits/chosen": -2.6015396118164062, + "logits/rejected": -3.1945433616638184, + "logps/chosen": -247.70851135253906, + "logps/rejected": -610.0966796875, + "loss": 2.8077, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.247271537780762, + "rewards/margins": 1.415715217590332, + "rewards/rejected": -5.662986755371094, + "step": 4721 + }, + { + "epoch": 0.73, + "learning_rate": 1.0684028217231184e-05, + "logits/chosen": -3.0047285556793213, + "logits/rejected": -3.195272922515869, + "logps/chosen": -420.1402893066406, + "logps/rejected": -361.9839782714844, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3173489570617676, + "rewards/margins": 5.541562557220459, + "rewards/rejected": -5.858911514282227, + "step": 4722 + }, + { + "epoch": 0.73, + "learning_rate": 1.0683294776700036e-05, + "logits/chosen": -1.9211184978485107, + "logits/rejected": -3.141653299331665, + "logps/chosen": -251.22967529296875, + "logps/rejected": -432.6549072265625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9831222295761108, + "rewards/margins": 6.231787204742432, + "rewards/rejected": -7.214909553527832, + "step": 4723 + }, + { + "epoch": 0.73, + "learning_rate": 1.0682561336168888e-05, + "logits/chosen": -2.970491409301758, + "logits/rejected": -2.799833297729492, + "logps/chosen": -102.31462097167969, + "logps/rejected": -217.98611450195312, + "loss": 1.9255, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3142764568328857, + "rewards/margins": 1.8726692199707031, + "rewards/rejected": -5.18694543838501, + "step": 4724 + }, + { + "epoch": 0.73, + "learning_rate": 1.068182789563774e-05, + "logits/chosen": -3.114712953567505, + "logits/rejected": -2.4798390865325928, + "logps/chosen": -429.1070251464844, + "logps/rejected": -367.2066650390625, + "loss": 1.6837, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.055567741394043, + "rewards/margins": 1.4432494640350342, + "rewards/rejected": -5.498817443847656, + "step": 4725 + }, + { + "epoch": 0.73, + "learning_rate": 1.0681094455106593e-05, + "logits/chosen": -1.589318037033081, + "logits/rejected": -3.157402515411377, + "logps/chosen": -119.09563446044922, + "logps/rejected": -362.80169677734375, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1096187829971313, + "rewards/margins": 3.4679388999938965, + "rewards/rejected": -4.577557563781738, + "step": 4726 + }, + { + "epoch": 0.74, + "learning_rate": 1.0680361014575445e-05, + "logits/chosen": -3.01460337638855, + "logits/rejected": -3.1263322830200195, + "logps/chosen": -441.7393493652344, + "logps/rejected": -646.3780517578125, + "loss": 2.3355, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5059642791748047, + "rewards/margins": -0.6798304319381714, + "rewards/rejected": -2.8261337280273438, + "step": 4727 + }, + { + "epoch": 0.74, + "learning_rate": 1.0679627574044297e-05, + "logits/chosen": -2.2895238399505615, + "logits/rejected": -2.6839890480041504, + "logps/chosen": -200.72109985351562, + "logps/rejected": -427.01263427734375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.064509630203247, + "rewards/margins": 6.316665172576904, + "rewards/rejected": -8.38117504119873, + "step": 4728 + }, + { + "epoch": 0.74, + "learning_rate": 1.0678894133513149e-05, + "logits/chosen": -1.895676851272583, + "logits/rejected": -3.0969302654266357, + "logps/chosen": -110.40319061279297, + "logps/rejected": -312.5130310058594, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.94857257604599, + "rewards/margins": 3.9892196655273438, + "rewards/rejected": -4.9377923011779785, + "step": 4729 + }, + { + "epoch": 0.74, + "learning_rate": 1.0678160692982e-05, + "logits/chosen": -3.111631393432617, + "logits/rejected": -3.1664466857910156, + "logps/chosen": -77.13134765625, + "logps/rejected": -242.7958984375, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1673943996429443, + "rewards/margins": 4.125504970550537, + "rewards/rejected": -5.292899131774902, + "step": 4730 + }, + { + "epoch": 0.74, + "learning_rate": 1.0677427252450853e-05, + "logits/chosen": -1.74929940700531, + "logits/rejected": -3.1344995498657227, + "logps/chosen": -161.7582244873047, + "logps/rejected": -539.1981811523438, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.480825424194336, + "rewards/margins": 5.305176734924316, + "rewards/rejected": -6.786002159118652, + "step": 4731 + }, + { + "epoch": 0.74, + "learning_rate": 1.0676693811919704e-05, + "logits/chosen": -2.8617546558380127, + "logits/rejected": -3.0715341567993164, + "logps/chosen": -259.5667724609375, + "logps/rejected": -204.25738525390625, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7956544160842896, + "rewards/margins": 2.4750022888183594, + "rewards/rejected": -4.270656585693359, + "step": 4732 + }, + { + "epoch": 0.74, + "learning_rate": 1.0675960371388556e-05, + "logits/chosen": -3.120668411254883, + "logits/rejected": -2.6548075675964355, + "logps/chosen": -521.4456787109375, + "logps/rejected": -422.84344482421875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0619926452636719, + "rewards/margins": 6.01827335357666, + "rewards/rejected": -7.080265998840332, + "step": 4733 + }, + { + "epoch": 0.74, + "learning_rate": 1.0675226930857408e-05, + "logits/chosen": -2.2605485916137695, + "logits/rejected": -3.0072996616363525, + "logps/chosen": -151.4178924560547, + "logps/rejected": -343.252685546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9735753536224365, + "rewards/margins": 7.931121826171875, + "rewards/rejected": -9.90469741821289, + "step": 4734 + }, + { + "epoch": 0.74, + "learning_rate": 1.0674493490326262e-05, + "logits/chosen": -2.998263120651245, + "logits/rejected": -3.115729331970215, + "logps/chosen": -50.93326187133789, + "logps/rejected": -269.282470703125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.042738914489746, + "rewards/margins": 6.310555458068848, + "rewards/rejected": -7.353294372558594, + "step": 4735 + }, + { + "epoch": 0.74, + "learning_rate": 1.0673760049795114e-05, + "logits/chosen": -2.918705940246582, + "logits/rejected": -3.21018123626709, + "logps/chosen": -176.71791076660156, + "logps/rejected": -192.86593627929688, + "loss": 3.8498, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.0781989097595215, + "rewards/margins": -1.0099759101867676, + "rewards/rejected": -3.068222761154175, + "step": 4736 + }, + { + "epoch": 0.74, + "learning_rate": 1.0673026609263966e-05, + "logits/chosen": -2.8553271293640137, + "logits/rejected": -1.1190637350082397, + "logps/chosen": -402.3740234375, + "logps/rejected": -246.23779296875, + "loss": 4.7003, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.732425689697266, + "rewards/margins": -0.4533367156982422, + "rewards/rejected": -5.279088973999023, + "step": 4737 + }, + { + "epoch": 0.74, + "learning_rate": 1.0672293168732819e-05, + "logits/chosen": -2.8471431732177734, + "logits/rejected": -3.0451459884643555, + "logps/chosen": -152.12950134277344, + "logps/rejected": -240.76266479492188, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3630971908569336, + "rewards/margins": 5.221041679382324, + "rewards/rejected": -6.584138870239258, + "step": 4738 + }, + { + "epoch": 0.74, + "learning_rate": 1.0671559728201671e-05, + "logits/chosen": -2.984513759613037, + "logits/rejected": -2.9181933403015137, + "logps/chosen": -157.88739013671875, + "logps/rejected": -59.901798248291016, + "loss": 5.772, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.195099830627441, + "rewards/margins": -5.768875598907471, + "rewards/rejected": -1.4262244701385498, + "step": 4739 + }, + { + "epoch": 0.74, + "learning_rate": 1.0670826287670523e-05, + "logits/chosen": -2.996128797531128, + "logits/rejected": -2.5407299995422363, + "logps/chosen": -439.13592529296875, + "logps/rejected": -328.8403015136719, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8556724786758423, + "rewards/margins": 4.4393534660339355, + "rewards/rejected": -6.295025825500488, + "step": 4740 + }, + { + "epoch": 0.74, + "learning_rate": 1.0670092847139375e-05, + "logits/chosen": -2.9182240962982178, + "logits/rejected": -2.500153064727783, + "logps/chosen": -316.24462890625, + "logps/rejected": -254.60621643066406, + "loss": 2.9777, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.263448238372803, + "rewards/margins": 1.5377323627471924, + "rewards/rejected": -5.801180362701416, + "step": 4741 + }, + { + "epoch": 0.74, + "learning_rate": 1.0669359406608227e-05, + "logits/chosen": -1.045961856842041, + "logits/rejected": -2.4746906757354736, + "logps/chosen": -305.0028076171875, + "logps/rejected": -698.1510620117188, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.998578667640686, + "rewards/margins": 8.454787254333496, + "rewards/rejected": -9.45336627960205, + "step": 4742 + }, + { + "epoch": 0.74, + "learning_rate": 1.066862596607708e-05, + "logits/chosen": -2.9833009243011475, + "logits/rejected": -3.071519136428833, + "logps/chosen": -524.7225341796875, + "logps/rejected": -205.73394775390625, + "loss": 4.4337, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.079416751861572, + "rewards/margins": -1.690840244293213, + "rewards/rejected": -3.3885769844055176, + "step": 4743 + }, + { + "epoch": 0.74, + "learning_rate": 1.0667892525545932e-05, + "logits/chosen": -3.1089773178100586, + "logits/rejected": -2.8134138584136963, + "logps/chosen": -516.3341064453125, + "logps/rejected": -391.34259033203125, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1959457397460938, + "rewards/margins": 4.927258491516113, + "rewards/rejected": -7.123204231262207, + "step": 4744 + }, + { + "epoch": 0.74, + "learning_rate": 1.0667159085014784e-05, + "logits/chosen": -2.9480397701263428, + "logits/rejected": -2.119032621383667, + "logps/chosen": -366.9137878417969, + "logps/rejected": -353.45965576171875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8993202447891235, + "rewards/margins": 5.985990524291992, + "rewards/rejected": -6.885310649871826, + "step": 4745 + }, + { + "epoch": 0.74, + "learning_rate": 1.0666425644483636e-05, + "logits/chosen": -1.4462406635284424, + "logits/rejected": -3.0722599029541016, + "logps/chosen": -41.94560623168945, + "logps/rejected": -309.37738037109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7461088299751282, + "rewards/margins": 7.931520462036133, + "rewards/rejected": -8.677629470825195, + "step": 4746 + }, + { + "epoch": 0.74, + "learning_rate": 1.0665692203952488e-05, + "logits/chosen": -2.9670181274414062, + "logits/rejected": -2.6415364742279053, + "logps/chosen": -297.1954345703125, + "logps/rejected": -336.0726318359375, + "loss": 2.0385, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.340485572814941, + "rewards/margins": 0.49121689796447754, + "rewards/rejected": -4.831702709197998, + "step": 4747 + }, + { + "epoch": 0.74, + "learning_rate": 1.066495876342134e-05, + "logits/chosen": -2.076317548751831, + "logits/rejected": -2.9963252544403076, + "logps/chosen": -148.16488647460938, + "logps/rejected": -358.16326904296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7678544521331787, + "rewards/margins": 6.450593948364258, + "rewards/rejected": -8.218448638916016, + "step": 4748 + }, + { + "epoch": 0.74, + "learning_rate": 1.0664225322890191e-05, + "logits/chosen": -2.9130895137786865, + "logits/rejected": -3.079352617263794, + "logps/chosen": -214.8673858642578, + "logps/rejected": -180.15286254882812, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.344722032546997, + "rewards/margins": 4.795557975769043, + "rewards/rejected": -6.140280246734619, + "step": 4749 + }, + { + "epoch": 0.74, + "learning_rate": 1.0663491882359043e-05, + "logits/chosen": -2.661471366882324, + "logits/rejected": -3.1379313468933105, + "logps/chosen": -295.82281494140625, + "logps/rejected": -418.36956787109375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6156074404716492, + "rewards/margins": 6.269486427307129, + "rewards/rejected": -6.885093688964844, + "step": 4750 + }, + { + "epoch": 0.74, + "learning_rate": 1.0662758441827895e-05, + "logits/chosen": -2.102508783340454, + "logits/rejected": -3.2631757259368896, + "logps/chosen": -368.2113952636719, + "logps/rejected": -598.830078125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5558746457099915, + "rewards/margins": 6.796730041503906, + "rewards/rejected": -7.352604866027832, + "step": 4751 + }, + { + "epoch": 0.74, + "learning_rate": 1.0662025001296749e-05, + "logits/chosen": -2.6006531715393066, + "logits/rejected": -3.0082857608795166, + "logps/chosen": -250.02499389648438, + "logps/rejected": -172.07598876953125, + "loss": 1.0428, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.484452724456787, + "rewards/margins": 1.4593839645385742, + "rewards/rejected": -3.9438366889953613, + "step": 4752 + }, + { + "epoch": 0.74, + "learning_rate": 1.06612915607656e-05, + "logits/chosen": -3.1348717212677, + "logits/rejected": -2.263455390930176, + "logps/chosen": -197.0139617919922, + "logps/rejected": -101.68711853027344, + "loss": 0.7539, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.818434238433838, + "rewards/margins": 1.926506519317627, + "rewards/rejected": -4.744940757751465, + "step": 4753 + }, + { + "epoch": 0.74, + "learning_rate": 1.0660558120234453e-05, + "logits/chosen": -2.2280232906341553, + "logits/rejected": -3.0692381858825684, + "logps/chosen": -455.3572998046875, + "logps/rejected": -603.12353515625, + "loss": 2.1175, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6337692737579346, + "rewards/margins": 0.8801589012145996, + "rewards/rejected": -3.513928174972534, + "step": 4754 + }, + { + "epoch": 0.74, + "learning_rate": 1.0659824679703304e-05, + "logits/chosen": -2.3901190757751465, + "logits/rejected": -2.8520450592041016, + "logps/chosen": -127.13465881347656, + "logps/rejected": -169.9873046875, + "loss": 0.7272, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5170717239379883, + "rewards/margins": 2.8298487663269043, + "rewards/rejected": -5.346920013427734, + "step": 4755 + }, + { + "epoch": 0.74, + "learning_rate": 1.0659091239172156e-05, + "logits/chosen": -3.0878586769104004, + "logits/rejected": -2.9433233737945557, + "logps/chosen": -114.53948211669922, + "logps/rejected": -153.45297241210938, + "loss": 0.8913, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1442275047302246, + "rewards/margins": 0.3087853193283081, + "rewards/rejected": -2.4530129432678223, + "step": 4756 + }, + { + "epoch": 0.74, + "learning_rate": 1.0658357798641008e-05, + "logits/chosen": -2.391853094100952, + "logits/rejected": -3.116102457046509, + "logps/chosen": -264.23919677734375, + "logps/rejected": -411.8493957519531, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.011614203453064, + "rewards/margins": 6.01860237121582, + "rewards/rejected": -7.030216217041016, + "step": 4757 + }, + { + "epoch": 0.74, + "learning_rate": 1.065762435810986e-05, + "logits/chosen": -2.1517117023468018, + "logits/rejected": -3.029737710952759, + "logps/chosen": -99.4315185546875, + "logps/rejected": -400.5384521484375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5838081240653992, + "rewards/margins": 5.498479843139648, + "rewards/rejected": -6.082287788391113, + "step": 4758 + }, + { + "epoch": 0.74, + "learning_rate": 1.0656890917578712e-05, + "logits/chosen": -3.0877976417541504, + "logits/rejected": -1.9269917011260986, + "logps/chosen": -487.4012451171875, + "logps/rejected": -329.4201354980469, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15196609497070312, + "rewards/margins": 6.104091644287109, + "rewards/rejected": -6.2560577392578125, + "step": 4759 + }, + { + "epoch": 0.74, + "learning_rate": 1.0656157477047564e-05, + "logits/chosen": -2.519404411315918, + "logits/rejected": -2.835853338241577, + "logps/chosen": -156.99375915527344, + "logps/rejected": -320.76934814453125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3954071998596191, + "rewards/margins": 5.912588119506836, + "rewards/rejected": -7.307994842529297, + "step": 4760 + }, + { + "epoch": 0.74, + "learning_rate": 1.0655424036516417e-05, + "logits/chosen": -2.8052778244018555, + "logits/rejected": -2.518770217895508, + "logps/chosen": -268.0860900878906, + "logps/rejected": -309.7628479003906, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4105961322784424, + "rewards/margins": 4.492081165313721, + "rewards/rejected": -5.902677536010742, + "step": 4761 + }, + { + "epoch": 0.74, + "learning_rate": 1.065469059598527e-05, + "logits/chosen": -2.174274444580078, + "logits/rejected": -3.0222580432891846, + "logps/chosen": -170.4780731201172, + "logps/rejected": -268.2513427734375, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3103985786437988, + "rewards/margins": 3.9627482891082764, + "rewards/rejected": -5.273146629333496, + "step": 4762 + }, + { + "epoch": 0.74, + "learning_rate": 1.0653957155454121e-05, + "logits/chosen": -1.9972796440124512, + "logits/rejected": -2.881479263305664, + "logps/chosen": -227.55531311035156, + "logps/rejected": -235.06661987304688, + "loss": 2.2262, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0527095794677734, + "rewards/margins": 0.02057194709777832, + "rewards/rejected": -3.073281764984131, + "step": 4763 + }, + { + "epoch": 0.74, + "learning_rate": 1.0653223714922973e-05, + "logits/chosen": -3.0807313919067383, + "logits/rejected": -2.645688056945801, + "logps/chosen": -295.15185546875, + "logps/rejected": -165.99923706054688, + "loss": 3.3104, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.093993186950684, + "rewards/margins": -1.7302817106246948, + "rewards/rejected": -2.3637115955352783, + "step": 4764 + }, + { + "epoch": 0.74, + "learning_rate": 1.0652490274391825e-05, + "logits/chosen": -3.175710916519165, + "logits/rejected": -3.138054847717285, + "logps/chosen": -584.7537841796875, + "logps/rejected": -519.658935546875, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14390385150909424, + "rewards/margins": 4.325401306152344, + "rewards/rejected": -4.469305515289307, + "step": 4765 + }, + { + "epoch": 0.74, + "learning_rate": 1.0651756833860677e-05, + "logits/chosen": -2.9329891204833984, + "logits/rejected": -2.2690751552581787, + "logps/chosen": -405.3852233886719, + "logps/rejected": -299.2022705078125, + "loss": 0.1907, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4264235496520996, + "rewards/margins": 4.407157897949219, + "rewards/rejected": -6.833581924438477, + "step": 4766 + }, + { + "epoch": 0.74, + "learning_rate": 1.0651023393329529e-05, + "logits/chosen": -3.074979543685913, + "logits/rejected": -3.067516803741455, + "logps/chosen": -134.9281005859375, + "logps/rejected": -172.00506591796875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1961623430252075, + "rewards/margins": 4.523736953735352, + "rewards/rejected": -5.7198991775512695, + "step": 4767 + }, + { + "epoch": 0.74, + "learning_rate": 1.065028995279838e-05, + "logits/chosen": -2.168782949447632, + "logits/rejected": -2.929154872894287, + "logps/chosen": -144.23138427734375, + "logps/rejected": -361.9798278808594, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4081847667694092, + "rewards/margins": 5.912497043609619, + "rewards/rejected": -7.320681571960449, + "step": 4768 + }, + { + "epoch": 0.74, + "learning_rate": 1.0649556512267232e-05, + "logits/chosen": -3.0406627655029297, + "logits/rejected": -3.127861738204956, + "logps/chosen": -31.125839233398438, + "logps/rejected": -112.23634338378906, + "loss": 0.0448, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.820594072341919, + "rewards/margins": 3.247519016265869, + "rewards/rejected": -5.068113327026367, + "step": 4769 + }, + { + "epoch": 0.74, + "learning_rate": 1.0648823071736086e-05, + "logits/chosen": -2.931096315383911, + "logits/rejected": -2.4387011528015137, + "logps/chosen": -223.07345581054688, + "logps/rejected": -153.99066162109375, + "loss": 1.5048, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5342774391174316, + "rewards/margins": 0.4928615093231201, + "rewards/rejected": -3.0271389484405518, + "step": 4770 + }, + { + "epoch": 0.74, + "learning_rate": 1.0648089631204938e-05, + "logits/chosen": -2.707632064819336, + "logits/rejected": -1.8591372966766357, + "logps/chosen": -215.33644104003906, + "logps/rejected": -170.75584411621094, + "loss": 4.7303, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.35876989364624, + "rewards/margins": -0.9538102149963379, + "rewards/rejected": -5.404959678649902, + "step": 4771 + }, + { + "epoch": 0.74, + "learning_rate": 1.0647356190673791e-05, + "logits/chosen": -3.022289514541626, + "logits/rejected": -2.99896502494812, + "logps/chosen": -266.75396728515625, + "logps/rejected": -339.5546875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.92619788646698, + "rewards/margins": 6.489314079284668, + "rewards/rejected": -7.4155120849609375, + "step": 4772 + }, + { + "epoch": 0.74, + "learning_rate": 1.0646622750142643e-05, + "logits/chosen": -3.1531150341033936, + "logits/rejected": -2.482736587524414, + "logps/chosen": -187.17694091796875, + "logps/rejected": -179.83377075195312, + "loss": 1.9438, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0808982849121094, + "rewards/margins": 1.4278063774108887, + "rewards/rejected": -4.508704662322998, + "step": 4773 + }, + { + "epoch": 0.74, + "learning_rate": 1.0645889309611495e-05, + "logits/chosen": -2.9311435222625732, + "logits/rejected": -2.735037326812744, + "logps/chosen": -162.99588012695312, + "logps/rejected": -231.21603393554688, + "loss": 2.0784, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6406638622283936, + "rewards/margins": 0.3029770851135254, + "rewards/rejected": -3.943640947341919, + "step": 4774 + }, + { + "epoch": 0.74, + "learning_rate": 1.0645155869080347e-05, + "logits/chosen": -2.4954280853271484, + "logits/rejected": -2.8598062992095947, + "logps/chosen": -122.05953979492188, + "logps/rejected": -250.68650817871094, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.649977445602417, + "rewards/margins": 4.61588716506958, + "rewards/rejected": -6.265864372253418, + "step": 4775 + }, + { + "epoch": 0.74, + "learning_rate": 1.0644422428549199e-05, + "logits/chosen": -3.17862606048584, + "logits/rejected": -3.262693166732788, + "logps/chosen": -133.3329620361328, + "logps/rejected": -192.74998474121094, + "loss": 0.0634, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0684590339660645, + "rewards/margins": 3.640151262283325, + "rewards/rejected": -5.7086100578308105, + "step": 4776 + }, + { + "epoch": 0.74, + "learning_rate": 1.064368898801805e-05, + "logits/chosen": -1.2569799423217773, + "logits/rejected": -3.0351102352142334, + "logps/chosen": -90.11209106445312, + "logps/rejected": -350.79168701171875, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1136012077331543, + "rewards/margins": 3.7981390953063965, + "rewards/rejected": -5.911740303039551, + "step": 4777 + }, + { + "epoch": 0.74, + "learning_rate": 1.0642955547486903e-05, + "logits/chosen": -2.097381591796875, + "logits/rejected": -2.9686379432678223, + "logps/chosen": -40.48704528808594, + "logps/rejected": -222.36654663085938, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4684317111968994, + "rewards/margins": 4.844227313995361, + "rewards/rejected": -6.31265926361084, + "step": 4778 + }, + { + "epoch": 0.74, + "learning_rate": 1.0642222106955756e-05, + "logits/chosen": -3.2260823249816895, + "logits/rejected": -1.8582220077514648, + "logps/chosen": -551.5972900390625, + "logps/rejected": -387.36871337890625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.968906044960022, + "rewards/margins": 4.450085639953613, + "rewards/rejected": -6.418992042541504, + "step": 4779 + }, + { + "epoch": 0.74, + "learning_rate": 1.0641488666424608e-05, + "logits/chosen": -2.9572207927703857, + "logits/rejected": -1.3829624652862549, + "logps/chosen": -253.0446014404297, + "logps/rejected": -64.59989929199219, + "loss": 2.9966, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9972169399261475, + "rewards/margins": -1.5578346252441406, + "rewards/rejected": -2.439382314682007, + "step": 4780 + }, + { + "epoch": 0.74, + "learning_rate": 1.064075522589346e-05, + "logits/chosen": -2.968083143234253, + "logits/rejected": -2.8185362815856934, + "logps/chosen": -127.146728515625, + "logps/rejected": -173.07843017578125, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.434701919555664, + "rewards/margins": 3.2497425079345703, + "rewards/rejected": -4.684444427490234, + "step": 4781 + }, + { + "epoch": 0.74, + "learning_rate": 1.0640021785362312e-05, + "logits/chosen": -2.9383599758148193, + "logits/rejected": -2.8944289684295654, + "logps/chosen": -169.72125244140625, + "logps/rejected": -156.51138305664062, + "loss": 1.952, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7306466102600098, + "rewards/margins": 0.8882569074630737, + "rewards/rejected": -3.618903398513794, + "step": 4782 + }, + { + "epoch": 0.74, + "learning_rate": 1.0639288344831164e-05, + "logits/chosen": -3.0874531269073486, + "logits/rejected": -3.1875085830688477, + "logps/chosen": -32.306884765625, + "logps/rejected": -182.75479125976562, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7125234603881836, + "rewards/margins": 4.455642223358154, + "rewards/rejected": -6.168166160583496, + "step": 4783 + }, + { + "epoch": 0.74, + "learning_rate": 1.0638554904300016e-05, + "logits/chosen": -2.389955759048462, + "logits/rejected": -3.0969254970550537, + "logps/chosen": -140.97134399414062, + "logps/rejected": -250.3099365234375, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8868560791015625, + "rewards/margins": 5.0945634841918945, + "rewards/rejected": -6.981419563293457, + "step": 4784 + }, + { + "epoch": 0.74, + "learning_rate": 1.0637821463768868e-05, + "logits/chosen": -2.1813924312591553, + "logits/rejected": -3.1125080585479736, + "logps/chosen": -152.587158203125, + "logps/rejected": -330.49627685546875, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.544244408607483, + "rewards/margins": 4.057665824890137, + "rewards/rejected": -5.601910591125488, + "step": 4785 + }, + { + "epoch": 0.74, + "learning_rate": 1.063708802323772e-05, + "logits/chosen": -1.2575901746749878, + "logits/rejected": -3.034095525741577, + "logps/chosen": -85.3631362915039, + "logps/rejected": -399.64971923828125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.021260142326355, + "rewards/margins": 5.717096328735352, + "rewards/rejected": -6.738356113433838, + "step": 4786 + }, + { + "epoch": 0.74, + "learning_rate": 1.0636354582706571e-05, + "logits/chosen": -1.3630409240722656, + "logits/rejected": -2.157064914703369, + "logps/chosen": -161.35739135742188, + "logps/rejected": -118.4094009399414, + "loss": 2.5277, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.67146635055542, + "rewards/margins": -0.14755463600158691, + "rewards/rejected": -3.523911714553833, + "step": 4787 + }, + { + "epoch": 0.74, + "learning_rate": 1.0635621142175425e-05, + "logits/chosen": -1.4930042028427124, + "logits/rejected": -3.193300724029541, + "logps/chosen": -213.07044982910156, + "logps/rejected": -458.73785400390625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9024131894111633, + "rewards/margins": 5.874752044677734, + "rewards/rejected": -6.777165412902832, + "step": 4788 + }, + { + "epoch": 0.74, + "learning_rate": 1.0634887701644277e-05, + "logits/chosen": -2.5696682929992676, + "logits/rejected": -2.887094020843506, + "logps/chosen": -442.93707275390625, + "logps/rejected": -347.009033203125, + "loss": 4.5901, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.119160175323486, + "rewards/margins": -2.189605474472046, + "rewards/rejected": -2.9295547008514404, + "step": 4789 + }, + { + "epoch": 0.74, + "learning_rate": 1.0634154261113129e-05, + "logits/chosen": -2.830355405807495, + "logits/rejected": -3.0716872215270996, + "logps/chosen": -39.56166076660156, + "logps/rejected": -142.3330841064453, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5290477275848389, + "rewards/margins": 2.5486910343170166, + "rewards/rejected": -4.0777387619018555, + "step": 4790 + }, + { + "epoch": 0.75, + "learning_rate": 1.063342082058198e-05, + "logits/chosen": -2.896907091140747, + "logits/rejected": -3.018018960952759, + "logps/chosen": -477.28753662109375, + "logps/rejected": -578.1689453125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.61589515209198, + "rewards/margins": 4.7022600173950195, + "rewards/rejected": -6.318155288696289, + "step": 4791 + }, + { + "epoch": 0.75, + "learning_rate": 1.0632687380050832e-05, + "logits/chosen": -2.6261959075927734, + "logits/rejected": -3.191136360168457, + "logps/chosen": -95.50765991210938, + "logps/rejected": -219.87478637695312, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9439060688018799, + "rewards/margins": 4.259189605712891, + "rewards/rejected": -5.203095436096191, + "step": 4792 + }, + { + "epoch": 0.75, + "learning_rate": 1.0631953939519684e-05, + "logits/chosen": -3.123161554336548, + "logits/rejected": -2.4522619247436523, + "logps/chosen": -482.3094177246094, + "logps/rejected": -500.1983337402344, + "loss": 1.6609, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.259716033935547, + "rewards/margins": 0.0763312578201294, + "rewards/rejected": -3.336047410964966, + "step": 4793 + }, + { + "epoch": 0.75, + "learning_rate": 1.0631220498988536e-05, + "logits/chosen": -3.0192108154296875, + "logits/rejected": -2.7819736003875732, + "logps/chosen": -138.35958862304688, + "logps/rejected": -303.9766845703125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6050063371658325, + "rewards/margins": 5.897171974182129, + "rewards/rejected": -7.502178192138672, + "step": 4794 + }, + { + "epoch": 0.75, + "learning_rate": 1.0630487058457388e-05, + "logits/chosen": -2.4070498943328857, + "logits/rejected": -2.6353235244750977, + "logps/chosen": -1038.4608154296875, + "logps/rejected": -722.713623046875, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0569045543670654, + "rewards/margins": 5.405427932739258, + "rewards/rejected": -6.462332248687744, + "step": 4795 + }, + { + "epoch": 0.75, + "learning_rate": 1.062975361792624e-05, + "logits/chosen": -2.9774222373962402, + "logits/rejected": -3.0245141983032227, + "logps/chosen": -109.67179107666016, + "logps/rejected": -291.9039001464844, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8495339155197144, + "rewards/margins": 6.3196120262146, + "rewards/rejected": -8.169145584106445, + "step": 4796 + }, + { + "epoch": 0.75, + "learning_rate": 1.0629020177395093e-05, + "logits/chosen": -3.088489055633545, + "logits/rejected": -2.5379550457000732, + "logps/chosen": -698.8679809570312, + "logps/rejected": -599.7840576171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.412448525428772, + "rewards/margins": 7.490522861480713, + "rewards/rejected": -7.902971267700195, + "step": 4797 + }, + { + "epoch": 0.75, + "learning_rate": 1.0628286736863945e-05, + "logits/chosen": -2.8135061264038086, + "logits/rejected": -3.1305134296417236, + "logps/chosen": -77.13129425048828, + "logps/rejected": -311.14306640625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7740397453308105, + "rewards/margins": 6.552605152130127, + "rewards/rejected": -8.326644897460938, + "step": 4798 + }, + { + "epoch": 0.75, + "learning_rate": 1.0627553296332797e-05, + "logits/chosen": -3.304581880569458, + "logits/rejected": -2.953436851501465, + "logps/chosen": -526.3972778320312, + "logps/rejected": -395.48077392578125, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5738352537155151, + "rewards/margins": 3.8705687522888184, + "rewards/rejected": -5.444403648376465, + "step": 4799 + }, + { + "epoch": 0.75, + "learning_rate": 1.0626819855801649e-05, + "logits/chosen": -3.04365611076355, + "logits/rejected": -2.1976048946380615, + "logps/chosen": -485.82763671875, + "logps/rejected": -449.5888671875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4936401844024658, + "rewards/margins": 6.634305000305176, + "rewards/rejected": -8.127944946289062, + "step": 4800 + }, + { + "epoch": 0.75, + "learning_rate": 1.0626086415270501e-05, + "logits/chosen": -2.803819179534912, + "logits/rejected": -3.328083038330078, + "logps/chosen": -128.21017456054688, + "logps/rejected": -365.9847106933594, + "loss": 0.073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.215703010559082, + "rewards/margins": 2.7141666412353516, + "rewards/rejected": -3.9298696517944336, + "step": 4801 + }, + { + "epoch": 0.75, + "learning_rate": 1.0625352974739353e-05, + "logits/chosen": -3.187431812286377, + "logits/rejected": -2.9294912815093994, + "logps/chosen": -288.9388427734375, + "logps/rejected": -594.4706420898438, + "loss": 3.1595, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.158195734024048, + "rewards/margins": -2.4196810722351074, + "rewards/rejected": -0.7385146617889404, + "step": 4802 + }, + { + "epoch": 0.75, + "learning_rate": 1.0624619534208205e-05, + "logits/chosen": -1.8289556503295898, + "logits/rejected": -2.7593696117401123, + "logps/chosen": -222.23770141601562, + "logps/rejected": -258.57562255859375, + "loss": 0.8608, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.731238603591919, + "rewards/margins": 4.144253253936768, + "rewards/rejected": -5.875491619110107, + "step": 4803 + }, + { + "epoch": 0.75, + "learning_rate": 1.0623886093677058e-05, + "logits/chosen": -3.156900405883789, + "logits/rejected": -2.7439043521881104, + "logps/chosen": -155.682373046875, + "logps/rejected": -160.81004333496094, + "loss": 1.3739, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7349472045898438, + "rewards/margins": 1.6333173513412476, + "rewards/rejected": -4.368264675140381, + "step": 4804 + }, + { + "epoch": 0.75, + "learning_rate": 1.062315265314591e-05, + "logits/chosen": -2.4035840034484863, + "logits/rejected": -2.9408881664276123, + "logps/chosen": -144.94168090820312, + "logps/rejected": -274.339599609375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.645368218421936, + "rewards/margins": 5.397909164428711, + "rewards/rejected": -6.043277263641357, + "step": 4805 + }, + { + "epoch": 0.75, + "learning_rate": 1.0622419212614764e-05, + "logits/chosen": -2.979585647583008, + "logits/rejected": -2.073826551437378, + "logps/chosen": -154.07286071777344, + "logps/rejected": -88.23688507080078, + "loss": 2.6097, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7555365562438965, + "rewards/margins": -1.346300721168518, + "rewards/rejected": -2.409235954284668, + "step": 4806 + }, + { + "epoch": 0.75, + "learning_rate": 1.0621685772083616e-05, + "logits/chosen": -3.0542526245117188, + "logits/rejected": -1.6045770645141602, + "logps/chosen": -778.2194213867188, + "logps/rejected": -315.6528015136719, + "loss": 2.0204, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7891101837158203, + "rewards/margins": 0.4154622554779053, + "rewards/rejected": -3.2045726776123047, + "step": 4807 + }, + { + "epoch": 0.75, + "learning_rate": 1.0620952331552468e-05, + "logits/chosen": -2.9544811248779297, + "logits/rejected": -2.0126726627349854, + "logps/chosen": -216.83303833007812, + "logps/rejected": -136.95144653320312, + "loss": 2.2929, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.777083396911621, + "rewards/margins": -0.7630274295806885, + "rewards/rejected": -3.0140559673309326, + "step": 4808 + }, + { + "epoch": 0.75, + "learning_rate": 1.062021889102132e-05, + "logits/chosen": -2.969644546508789, + "logits/rejected": -3.0916218757629395, + "logps/chosen": -232.7478485107422, + "logps/rejected": -307.1231384277344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1783994436264038, + "rewards/margins": 6.297786712646484, + "rewards/rejected": -7.4761857986450195, + "step": 4809 + }, + { + "epoch": 0.75, + "learning_rate": 1.0619485450490171e-05, + "logits/chosen": -1.4602795839309692, + "logits/rejected": -3.1053688526153564, + "logps/chosen": -148.44393920898438, + "logps/rejected": -315.34271240234375, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1295890808105469, + "rewards/margins": 4.860401630401611, + "rewards/rejected": -5.989991188049316, + "step": 4810 + }, + { + "epoch": 0.75, + "learning_rate": 1.0618752009959023e-05, + "logits/chosen": -2.2057573795318604, + "logits/rejected": -3.117372989654541, + "logps/chosen": -194.31008911132812, + "logps/rejected": -241.2425537109375, + "loss": 0.2466, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5519204139709473, + "rewards/margins": 1.275108814239502, + "rewards/rejected": -2.827029228210449, + "step": 4811 + }, + { + "epoch": 0.75, + "learning_rate": 1.0618018569427875e-05, + "logits/chosen": -2.499856948852539, + "logits/rejected": -3.093935966491699, + "logps/chosen": -126.09607696533203, + "logps/rejected": -449.8681640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4566792249679565, + "rewards/margins": 8.338342666625977, + "rewards/rejected": -9.795022964477539, + "step": 4812 + }, + { + "epoch": 0.75, + "learning_rate": 1.0617285128896727e-05, + "logits/chosen": -3.231513738632202, + "logits/rejected": -2.878213405609131, + "logps/chosen": -188.49497985839844, + "logps/rejected": -258.1112060546875, + "loss": 2.8107, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.126039505004883, + "rewards/margins": -0.16533923149108887, + "rewards/rejected": -3.960700035095215, + "step": 4813 + }, + { + "epoch": 0.75, + "learning_rate": 1.0616551688365579e-05, + "logits/chosen": -2.2027370929718018, + "logits/rejected": -3.1065165996551514, + "logps/chosen": -237.231689453125, + "logps/rejected": -621.0792846679688, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7150455713272095, + "rewards/margins": 5.092008590698242, + "rewards/rejected": -6.807054042816162, + "step": 4814 + }, + { + "epoch": 0.75, + "learning_rate": 1.0615818247834432e-05, + "logits/chosen": -2.479828119277954, + "logits/rejected": -3.188056230545044, + "logps/chosen": -189.51895141601562, + "logps/rejected": -320.69482421875, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13699015974998474, + "rewards/margins": 4.3634233474731445, + "rewards/rejected": -4.500413417816162, + "step": 4815 + }, + { + "epoch": 0.75, + "learning_rate": 1.0615084807303284e-05, + "logits/chosen": -3.0736162662506104, + "logits/rejected": -2.8149218559265137, + "logps/chosen": -388.04351806640625, + "logps/rejected": -392.423095703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8099960684776306, + "rewards/margins": 7.378788948059082, + "rewards/rejected": -8.1887845993042, + "step": 4816 + }, + { + "epoch": 0.75, + "learning_rate": 1.0614351366772136e-05, + "logits/chosen": -3.119126319885254, + "logits/rejected": -2.401167631149292, + "logps/chosen": -166.925537109375, + "logps/rejected": -178.27682495117188, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1847801208496094, + "rewards/margins": 3.009915351867676, + "rewards/rejected": -5.194695472717285, + "step": 4817 + }, + { + "epoch": 0.75, + "learning_rate": 1.0613617926240988e-05, + "logits/chosen": -3.0400078296661377, + "logits/rejected": -2.0772528648376465, + "logps/chosen": -1077.1575927734375, + "logps/rejected": -786.0340576171875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7112083435058594, + "rewards/margins": 5.7547526359558105, + "rewards/rejected": -6.46596097946167, + "step": 4818 + }, + { + "epoch": 0.75, + "learning_rate": 1.061288448570984e-05, + "logits/chosen": -3.1534531116485596, + "logits/rejected": -2.4806911945343018, + "logps/chosen": -837.0424194335938, + "logps/rejected": -603.7677001953125, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3292884826660156, + "rewards/margins": 5.596561431884766, + "rewards/rejected": -6.925849914550781, + "step": 4819 + }, + { + "epoch": 0.75, + "learning_rate": 1.0612151045178692e-05, + "logits/chosen": -3.1330251693725586, + "logits/rejected": -2.4863967895507812, + "logps/chosen": -108.58881378173828, + "logps/rejected": -168.174560546875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1353317499160767, + "rewards/margins": 4.425783157348633, + "rewards/rejected": -5.56111478805542, + "step": 4820 + }, + { + "epoch": 0.75, + "learning_rate": 1.0611417604647544e-05, + "logits/chosen": -3.0561580657958984, + "logits/rejected": -2.8496294021606445, + "logps/chosen": -354.1341247558594, + "logps/rejected": -217.4127655029297, + "loss": 1.5665, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8193786144256592, + "rewards/margins": 0.5097233057022095, + "rewards/rejected": -2.329102039337158, + "step": 4821 + }, + { + "epoch": 0.75, + "learning_rate": 1.0610684164116396e-05, + "logits/chosen": -3.047010660171509, + "logits/rejected": -3.201882839202881, + "logps/chosen": -85.81575775146484, + "logps/rejected": -252.72848510742188, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0367847681045532, + "rewards/margins": 4.63524055480957, + "rewards/rejected": -5.672025680541992, + "step": 4822 + }, + { + "epoch": 0.75, + "learning_rate": 1.0609950723585247e-05, + "logits/chosen": -3.051337242126465, + "logits/rejected": -3.0670673847198486, + "logps/chosen": -201.12222290039062, + "logps/rejected": -176.87899780273438, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4302958548069, + "rewards/margins": 3.816110372543335, + "rewards/rejected": -4.246406555175781, + "step": 4823 + }, + { + "epoch": 0.75, + "learning_rate": 1.0609217283054101e-05, + "logits/chosen": -3.084394931793213, + "logits/rejected": -2.0447893142700195, + "logps/chosen": -220.210205078125, + "logps/rejected": -158.40875244140625, + "loss": 2.2776, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.90673565864563, + "rewards/margins": -0.663596510887146, + "rewards/rejected": -2.2431390285491943, + "step": 4824 + }, + { + "epoch": 0.75, + "learning_rate": 1.0608483842522953e-05, + "logits/chosen": -3.140223264694214, + "logits/rejected": -2.4859321117401123, + "logps/chosen": -421.4804382324219, + "logps/rejected": -318.08843994140625, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5674415826797485, + "rewards/margins": 4.242291450500488, + "rewards/rejected": -5.809732913970947, + "step": 4825 + }, + { + "epoch": 0.75, + "learning_rate": 1.0607750401991805e-05, + "logits/chosen": -2.96757435798645, + "logits/rejected": -3.1783931255340576, + "logps/chosen": -351.3120422363281, + "logps/rejected": -193.54058837890625, + "loss": 1.9479, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.384491443634033, + "rewards/margins": -0.153733491897583, + "rewards/rejected": -4.230758190155029, + "step": 4826 + }, + { + "epoch": 0.75, + "learning_rate": 1.0607016961460657e-05, + "logits/chosen": -1.3290058374404907, + "logits/rejected": -2.8074538707733154, + "logps/chosen": -103.29206848144531, + "logps/rejected": -310.5518798828125, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6152828335762024, + "rewards/margins": 5.90253210067749, + "rewards/rejected": -6.517814636230469, + "step": 4827 + }, + { + "epoch": 0.75, + "learning_rate": 1.0606283520929508e-05, + "logits/chosen": -2.9942820072174072, + "logits/rejected": -1.9726765155792236, + "logps/chosen": -214.3607177734375, + "logps/rejected": -177.09889221191406, + "loss": 2.5224, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8066601753234863, + "rewards/margins": -0.10729169845581055, + "rewards/rejected": -3.6993680000305176, + "step": 4828 + }, + { + "epoch": 0.75, + "learning_rate": 1.060555008039836e-05, + "logits/chosen": -2.8475327491760254, + "logits/rejected": -2.0713448524475098, + "logps/chosen": -293.5058288574219, + "logps/rejected": -162.62655639648438, + "loss": 2.0322, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.347858428955078, + "rewards/margins": 0.3227272033691406, + "rewards/rejected": -3.6705856323242188, + "step": 4829 + }, + { + "epoch": 0.75, + "learning_rate": 1.0604816639867212e-05, + "logits/chosen": -2.765094518661499, + "logits/rejected": -1.922533392906189, + "logps/chosen": -193.14126586914062, + "logps/rejected": -88.32575225830078, + "loss": 2.4157, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.357095718383789, + "rewards/margins": -0.8979101181030273, + "rewards/rejected": -3.4591853618621826, + "step": 4830 + }, + { + "epoch": 0.75, + "learning_rate": 1.0604083199336064e-05, + "logits/chosen": -1.9975767135620117, + "logits/rejected": -2.7871828079223633, + "logps/chosen": -241.08932495117188, + "logps/rejected": -527.1227416992188, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9627082347869873, + "rewards/margins": 7.318252086639404, + "rewards/rejected": -9.280960083007812, + "step": 4831 + }, + { + "epoch": 0.75, + "learning_rate": 1.0603349758804918e-05, + "logits/chosen": -1.891766905784607, + "logits/rejected": -2.8652138710021973, + "logps/chosen": -67.17772674560547, + "logps/rejected": -284.30767822265625, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2817623615264893, + "rewards/margins": 4.049343585968018, + "rewards/rejected": -6.331106185913086, + "step": 4832 + }, + { + "epoch": 0.75, + "learning_rate": 1.060261631827377e-05, + "logits/chosen": -2.5562374591827393, + "logits/rejected": -3.1786134243011475, + "logps/chosen": -393.05621337890625, + "logps/rejected": -575.8414916992188, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3512718677520752, + "rewards/margins": 6.17461633682251, + "rewards/rejected": -7.525888442993164, + "step": 4833 + }, + { + "epoch": 0.75, + "learning_rate": 1.0601882877742621e-05, + "logits/chosen": -1.7071893215179443, + "logits/rejected": -3.163336992263794, + "logps/chosen": -301.031982421875, + "logps/rejected": -673.9027709960938, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5153757333755493, + "rewards/margins": 5.579672336578369, + "rewards/rejected": -6.095047950744629, + "step": 4834 + }, + { + "epoch": 0.75, + "learning_rate": 1.0601149437211473e-05, + "logits/chosen": -2.8902783393859863, + "logits/rejected": -3.101217031478882, + "logps/chosen": -343.3945007324219, + "logps/rejected": -382.19305419921875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14686205983161926, + "rewards/margins": 5.850751876831055, + "rewards/rejected": -5.7038893699646, + "step": 4835 + }, + { + "epoch": 0.75, + "learning_rate": 1.0600415996680325e-05, + "logits/chosen": -1.5968356132507324, + "logits/rejected": -2.7400169372558594, + "logps/chosen": -620.915283203125, + "logps/rejected": -754.7916259765625, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8105591535568237, + "rewards/margins": 4.779736518859863, + "rewards/rejected": -6.590295791625977, + "step": 4836 + }, + { + "epoch": 0.75, + "learning_rate": 1.0599682556149177e-05, + "logits/chosen": -3.0822551250457764, + "logits/rejected": -2.037446975708008, + "logps/chosen": -216.5067138671875, + "logps/rejected": -45.68340301513672, + "loss": 4.3175, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.93418025970459, + "rewards/margins": -4.300937652587891, + "rewards/rejected": -1.633242130279541, + "step": 4837 + }, + { + "epoch": 0.75, + "learning_rate": 1.059894911561803e-05, + "logits/chosen": -3.257416009902954, + "logits/rejected": -1.94296395778656, + "logps/chosen": -632.0884399414062, + "logps/rejected": -388.12646484375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6509262323379517, + "rewards/margins": 5.170132637023926, + "rewards/rejected": -6.821059226989746, + "step": 4838 + }, + { + "epoch": 0.75, + "learning_rate": 1.0598215675086883e-05, + "logits/chosen": -3.0721757411956787, + "logits/rejected": -2.1756060123443604, + "logps/chosen": -233.49057006835938, + "logps/rejected": -122.70999145507812, + "loss": 2.7645, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.032474517822266, + "rewards/margins": -2.699202537536621, + "rewards/rejected": -2.3332722187042236, + "step": 4839 + }, + { + "epoch": 0.75, + "learning_rate": 1.0597482234555734e-05, + "logits/chosen": -3.297806739807129, + "logits/rejected": -2.972149610519409, + "logps/chosen": -160.03627014160156, + "logps/rejected": -130.2998046875, + "loss": 1.7805, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.660478115081787, + "rewards/margins": -0.07612383365631104, + "rewards/rejected": -3.5843541622161865, + "step": 4840 + }, + { + "epoch": 0.75, + "learning_rate": 1.0596748794024588e-05, + "logits/chosen": -2.5648391246795654, + "logits/rejected": -3.1845061779022217, + "logps/chosen": -102.17489624023438, + "logps/rejected": -282.9261779785156, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1500868797302246, + "rewards/margins": 5.547364234924316, + "rewards/rejected": -7.697451114654541, + "step": 4841 + }, + { + "epoch": 0.75, + "learning_rate": 1.059601535349344e-05, + "logits/chosen": -2.6442766189575195, + "logits/rejected": -3.090590000152588, + "logps/chosen": -114.9166030883789, + "logps/rejected": -337.3731689453125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1443305015563965, + "rewards/margins": 6.144677639007568, + "rewards/rejected": -8.289008140563965, + "step": 4842 + }, + { + "epoch": 0.75, + "learning_rate": 1.0595281912962292e-05, + "logits/chosen": -3.182042360305786, + "logits/rejected": -2.742670774459839, + "logps/chosen": -433.80242919921875, + "logps/rejected": -474.4854431152344, + "loss": 3.4973, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.689371585845947, + "rewards/margins": -3.206547737121582, + "rewards/rejected": -1.4828239679336548, + "step": 4843 + }, + { + "epoch": 0.75, + "learning_rate": 1.0594548472431144e-05, + "logits/chosen": -3.035939931869507, + "logits/rejected": -3.1861560344696045, + "logps/chosen": -267.78826904296875, + "logps/rejected": -399.5283203125, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6776094436645508, + "rewards/margins": 3.7805793285369873, + "rewards/rejected": -5.458189010620117, + "step": 4844 + }, + { + "epoch": 0.75, + "learning_rate": 1.0593815031899995e-05, + "logits/chosen": -2.8380472660064697, + "logits/rejected": -2.103421449661255, + "logps/chosen": -305.6429443359375, + "logps/rejected": -309.77459716796875, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8236870169639587, + "rewards/margins": 4.357731819152832, + "rewards/rejected": -5.181418418884277, + "step": 4845 + }, + { + "epoch": 0.75, + "learning_rate": 1.0593081591368847e-05, + "logits/chosen": -3.0736355781555176, + "logits/rejected": -2.898242235183716, + "logps/chosen": -202.956298828125, + "logps/rejected": -322.27899169921875, + "loss": 0.7868, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9142502546310425, + "rewards/margins": 2.1495940685272217, + "rewards/rejected": -4.063844203948975, + "step": 4846 + }, + { + "epoch": 0.75, + "learning_rate": 1.05923481508377e-05, + "logits/chosen": -2.6309261322021484, + "logits/rejected": -2.8499245643615723, + "logps/chosen": -52.019622802734375, + "logps/rejected": -223.42178344726562, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4317815899848938, + "rewards/margins": 4.8650407791137695, + "rewards/rejected": -5.296822547912598, + "step": 4847 + }, + { + "epoch": 0.75, + "learning_rate": 1.0591614710306551e-05, + "logits/chosen": -2.3460590839385986, + "logits/rejected": -3.0831689834594727, + "logps/chosen": -91.77367401123047, + "logps/rejected": -234.4969482421875, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9538886547088623, + "rewards/margins": 2.9653611183166504, + "rewards/rejected": -4.919249534606934, + "step": 4848 + }, + { + "epoch": 0.75, + "learning_rate": 1.0590881269775403e-05, + "logits/chosen": -1.4604215621948242, + "logits/rejected": -2.700549602508545, + "logps/chosen": -97.3499984741211, + "logps/rejected": -444.4768371582031, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.57297945022583, + "rewards/margins": 6.317873001098633, + "rewards/rejected": -7.890852451324463, + "step": 4849 + }, + { + "epoch": 0.75, + "learning_rate": 1.0590147829244257e-05, + "logits/chosen": -2.8790454864501953, + "logits/rejected": -2.8482704162597656, + "logps/chosen": -133.74256896972656, + "logps/rejected": -443.77117919921875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2789188623428345, + "rewards/margins": 6.140047073364258, + "rewards/rejected": -7.418966293334961, + "step": 4850 + }, + { + "epoch": 0.75, + "learning_rate": 1.0589414388713108e-05, + "logits/chosen": -1.8193005323410034, + "logits/rejected": -3.082977533340454, + "logps/chosen": -133.85540771484375, + "logps/rejected": -409.0089111328125, + "loss": 0.0634, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7866625785827637, + "rewards/margins": 4.998365879058838, + "rewards/rejected": -6.785028457641602, + "step": 4851 + }, + { + "epoch": 0.75, + "learning_rate": 1.058868094818196e-05, + "logits/chosen": -2.4104301929473877, + "logits/rejected": -2.8065245151519775, + "logps/chosen": -70.57645416259766, + "logps/rejected": -220.9658203125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.83491051197052, + "rewards/margins": 5.292912483215332, + "rewards/rejected": -6.1278228759765625, + "step": 4852 + }, + { + "epoch": 0.75, + "learning_rate": 1.0587947507650812e-05, + "logits/chosen": -2.3587307929992676, + "logits/rejected": -3.0183401107788086, + "logps/chosen": -253.83963012695312, + "logps/rejected": -339.516357421875, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6527435779571533, + "rewards/margins": 5.054122447967529, + "rewards/rejected": -6.7068657875061035, + "step": 4853 + }, + { + "epoch": 0.75, + "learning_rate": 1.0587214067119664e-05, + "logits/chosen": -2.9299778938293457, + "logits/rejected": -3.0858285427093506, + "logps/chosen": -141.00877380371094, + "logps/rejected": -316.9571228027344, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.586862325668335, + "rewards/margins": 5.669084548950195, + "rewards/rejected": -8.25594711303711, + "step": 4854 + }, + { + "epoch": 0.76, + "learning_rate": 1.0586480626588516e-05, + "logits/chosen": -3.1268324851989746, + "logits/rejected": -3.329789400100708, + "logps/chosen": -33.550270080566406, + "logps/rejected": -138.6498565673828, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5038363933563232, + "rewards/margins": 4.133420944213867, + "rewards/rejected": -5.6372575759887695, + "step": 4855 + }, + { + "epoch": 0.76, + "learning_rate": 1.0585747186057368e-05, + "logits/chosen": -2.797539472579956, + "logits/rejected": -3.1604437828063965, + "logps/chosen": -35.25297546386719, + "logps/rejected": -105.61857604980469, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.566367745399475, + "rewards/margins": 1.9791131019592285, + "rewards/rejected": -3.545480966567993, + "step": 4856 + }, + { + "epoch": 0.76, + "learning_rate": 1.058501374552622e-05, + "logits/chosen": -3.0525944232940674, + "logits/rejected": -3.064696788787842, + "logps/chosen": -78.72895812988281, + "logps/rejected": -180.15982055664062, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8905310034751892, + "rewards/margins": 5.975831508636475, + "rewards/rejected": -6.866362571716309, + "step": 4857 + }, + { + "epoch": 0.76, + "learning_rate": 1.0584280304995072e-05, + "logits/chosen": -2.285273551940918, + "logits/rejected": -2.754459857940674, + "logps/chosen": -135.3582000732422, + "logps/rejected": -272.63690185546875, + "loss": 1.8286, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.693063497543335, + "rewards/margins": 1.16835618019104, + "rewards/rejected": -3.861419677734375, + "step": 4858 + }, + { + "epoch": 0.76, + "learning_rate": 1.0583546864463925e-05, + "logits/chosen": -2.2558486461639404, + "logits/rejected": -3.128082036972046, + "logps/chosen": -356.42529296875, + "logps/rejected": -380.1485595703125, + "loss": 1.3916, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.489575147628784, + "rewards/margins": 0.3937124013900757, + "rewards/rejected": -2.8832874298095703, + "step": 4859 + }, + { + "epoch": 0.76, + "learning_rate": 1.0582813423932777e-05, + "logits/chosen": -3.0827770233154297, + "logits/rejected": -3.011382818222046, + "logps/chosen": -358.95977783203125, + "logps/rejected": -403.65789794921875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6681467294692993, + "rewards/margins": 5.381488800048828, + "rewards/rejected": -7.049635410308838, + "step": 4860 + }, + { + "epoch": 0.76, + "learning_rate": 1.0582079983401629e-05, + "logits/chosen": -2.14290189743042, + "logits/rejected": -2.7360100746154785, + "logps/chosen": -135.0868377685547, + "logps/rejected": -416.3912658691406, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4878361225128174, + "rewards/margins": 5.650725841522217, + "rewards/rejected": -7.138562202453613, + "step": 4861 + }, + { + "epoch": 0.76, + "learning_rate": 1.058134654287048e-05, + "logits/chosen": -2.7048850059509277, + "logits/rejected": -2.6546928882598877, + "logps/chosen": -475.2694091796875, + "logps/rejected": -484.2544860839844, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9573700428009033, + "rewards/margins": 6.0030107498168945, + "rewards/rejected": -6.960380554199219, + "step": 4862 + }, + { + "epoch": 0.76, + "learning_rate": 1.0580613102339333e-05, + "logits/chosen": -2.938617467880249, + "logits/rejected": -3.0893774032592773, + "logps/chosen": -36.28704071044922, + "logps/rejected": -119.36188507080078, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2635719776153564, + "rewards/margins": 4.658068656921387, + "rewards/rejected": -5.921640872955322, + "step": 4863 + }, + { + "epoch": 0.76, + "learning_rate": 1.0579879661808185e-05, + "logits/chosen": -2.719513416290283, + "logits/rejected": -3.225304126739502, + "logps/chosen": -372.6553955078125, + "logps/rejected": -457.75054931640625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0541373491287231, + "rewards/margins": 6.004855155944824, + "rewards/rejected": -7.058992385864258, + "step": 4864 + }, + { + "epoch": 0.76, + "learning_rate": 1.0579146221277036e-05, + "logits/chosen": -3.2637343406677246, + "logits/rejected": -2.930997848510742, + "logps/chosen": -288.442626953125, + "logps/rejected": -251.82073974609375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6801879405975342, + "rewards/margins": 6.531805038452148, + "rewards/rejected": -8.211993217468262, + "step": 4865 + }, + { + "epoch": 0.76, + "learning_rate": 1.0578412780745888e-05, + "logits/chosen": -2.204695701599121, + "logits/rejected": -2.98275089263916, + "logps/chosen": -43.74778747558594, + "logps/rejected": -121.20128631591797, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29187527298927307, + "rewards/margins": 3.985635757446289, + "rewards/rejected": -4.277511119842529, + "step": 4866 + }, + { + "epoch": 0.76, + "learning_rate": 1.057767934021474e-05, + "logits/chosen": -1.9020917415618896, + "logits/rejected": -2.4483094215393066, + "logps/chosen": -193.48150634765625, + "logps/rejected": -250.33523559570312, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.19549560546875, + "rewards/margins": 6.465383529663086, + "rewards/rejected": -7.660879135131836, + "step": 4867 + }, + { + "epoch": 0.76, + "learning_rate": 1.0576945899683594e-05, + "logits/chosen": -1.4505908489227295, + "logits/rejected": -2.8100850582122803, + "logps/chosen": -71.42098999023438, + "logps/rejected": -356.2843017578125, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.909062385559082, + "rewards/margins": 5.487815856933594, + "rewards/rejected": -7.396878242492676, + "step": 4868 + }, + { + "epoch": 0.76, + "learning_rate": 1.0576212459152446e-05, + "logits/chosen": -2.11203932762146, + "logits/rejected": -2.9444267749786377, + "logps/chosen": -139.67034912109375, + "logps/rejected": -259.128662109375, + "loss": 2.0342, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.437178134918213, + "rewards/margins": 0.7977392673492432, + "rewards/rejected": -4.234917640686035, + "step": 4869 + }, + { + "epoch": 0.76, + "learning_rate": 1.0575479018621298e-05, + "logits/chosen": -2.9778738021850586, + "logits/rejected": -3.1054365634918213, + "logps/chosen": -171.56394958496094, + "logps/rejected": -280.64984130859375, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2990460395812988, + "rewards/margins": 6.058516502380371, + "rewards/rejected": -7.35756254196167, + "step": 4870 + }, + { + "epoch": 0.76, + "learning_rate": 1.057474557809015e-05, + "logits/chosen": -2.411917209625244, + "logits/rejected": -2.6355631351470947, + "logps/chosen": -245.44833374023438, + "logps/rejected": -244.02780151367188, + "loss": 2.1258, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.109534740447998, + "rewards/margins": 1.4010634422302246, + "rewards/rejected": -5.510598182678223, + "step": 4871 + }, + { + "epoch": 0.76, + "learning_rate": 1.0574012137559003e-05, + "logits/chosen": -3.093592882156372, + "logits/rejected": -3.263105869293213, + "logps/chosen": -180.5718994140625, + "logps/rejected": -482.16796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9247612953186035, + "rewards/margins": 6.888854026794434, + "rewards/rejected": -8.813615798950195, + "step": 4872 + }, + { + "epoch": 0.76, + "learning_rate": 1.0573278697027855e-05, + "logits/chosen": -1.850553274154663, + "logits/rejected": -3.0774178504943848, + "logps/chosen": -234.92825317382812, + "logps/rejected": -554.3089599609375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.619855523109436, + "rewards/margins": 7.691195011138916, + "rewards/rejected": -9.311050415039062, + "step": 4873 + }, + { + "epoch": 0.76, + "learning_rate": 1.0572545256496707e-05, + "logits/chosen": -1.9666211605072021, + "logits/rejected": -3.255067825317383, + "logps/chosen": -232.13113403320312, + "logps/rejected": -416.58599853515625, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9027276039123535, + "rewards/margins": 4.913374900817871, + "rewards/rejected": -6.816102981567383, + "step": 4874 + }, + { + "epoch": 0.76, + "learning_rate": 1.0571811815965559e-05, + "logits/chosen": -2.7189857959747314, + "logits/rejected": -3.2813758850097656, + "logps/chosen": -82.7509536743164, + "logps/rejected": -324.6361999511719, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1895344257354736, + "rewards/margins": 6.135017395019531, + "rewards/rejected": -7.324551582336426, + "step": 4875 + }, + { + "epoch": 0.76, + "learning_rate": 1.057107837543441e-05, + "logits/chosen": -2.899264335632324, + "logits/rejected": -3.2331912517547607, + "logps/chosen": -244.7938232421875, + "logps/rejected": -216.35659790039062, + "loss": 0.095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.051572322845459, + "rewards/margins": 2.4016993045806885, + "rewards/rejected": -4.453271865844727, + "step": 4876 + }, + { + "epoch": 0.76, + "learning_rate": 1.0570344934903264e-05, + "logits/chosen": -3.2076804637908936, + "logits/rejected": -2.968728542327881, + "logps/chosen": -180.87454223632812, + "logps/rejected": -237.710693359375, + "loss": 1.5626, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8308300971984863, + "rewards/margins": 2.351796865463257, + "rewards/rejected": -5.182626724243164, + "step": 4877 + }, + { + "epoch": 0.76, + "learning_rate": 1.0569611494372116e-05, + "logits/chosen": -2.5290894508361816, + "logits/rejected": -3.071682929992676, + "logps/chosen": -242.84646606445312, + "logps/rejected": -481.7398681640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.522770345211029, + "rewards/margins": 6.653416633605957, + "rewards/rejected": -7.176187038421631, + "step": 4878 + }, + { + "epoch": 0.76, + "learning_rate": 1.0568878053840968e-05, + "logits/chosen": -1.880548357963562, + "logits/rejected": -2.8024709224700928, + "logps/chosen": -111.96981811523438, + "logps/rejected": -253.46835327148438, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.455593466758728, + "rewards/margins": 4.522990703582764, + "rewards/rejected": -5.978584289550781, + "step": 4879 + }, + { + "epoch": 0.76, + "learning_rate": 1.056814461330982e-05, + "logits/chosen": -2.5776267051696777, + "logits/rejected": -3.2881314754486084, + "logps/chosen": -266.48040771484375, + "logps/rejected": -424.65570068359375, + "loss": 0.1377, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7972618341445923, + "rewards/margins": 3.0376272201538086, + "rewards/rejected": -4.8348894119262695, + "step": 4880 + }, + { + "epoch": 0.76, + "learning_rate": 1.0567411172778672e-05, + "logits/chosen": -2.775749683380127, + "logits/rejected": -3.005509376525879, + "logps/chosen": -198.60536193847656, + "logps/rejected": -445.7533874511719, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.193459391593933, + "rewards/margins": 7.03663444519043, + "rewards/rejected": -8.230093955993652, + "step": 4881 + }, + { + "epoch": 0.76, + "learning_rate": 1.0566677732247523e-05, + "logits/chosen": -2.6085948944091797, + "logits/rejected": -3.147871255874634, + "logps/chosen": -202.90171813964844, + "logps/rejected": -228.70220947265625, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2009072303771973, + "rewards/margins": 3.5216965675354004, + "rewards/rejected": -5.722603797912598, + "step": 4882 + }, + { + "epoch": 0.76, + "learning_rate": 1.0565944291716375e-05, + "logits/chosen": -3.1712327003479004, + "logits/rejected": -3.001526117324829, + "logps/chosen": -244.47988891601562, + "logps/rejected": -96.55458068847656, + "loss": 1.7279, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5002493858337402, + "rewards/margins": 0.5402907133102417, + "rewards/rejected": -4.0405402183532715, + "step": 4883 + }, + { + "epoch": 0.76, + "learning_rate": 1.0565210851185227e-05, + "logits/chosen": -2.0871379375457764, + "logits/rejected": -2.9730215072631836, + "logps/chosen": -155.02426147460938, + "logps/rejected": -181.7353973388672, + "loss": 1.7235, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.017293930053711, + "rewards/margins": 1.825729250907898, + "rewards/rejected": -4.843023300170898, + "step": 4884 + }, + { + "epoch": 0.76, + "learning_rate": 1.0564477410654079e-05, + "logits/chosen": -2.246974468231201, + "logits/rejected": -3.1075944900512695, + "logps/chosen": -76.92464447021484, + "logps/rejected": -213.52972412109375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2536354064941406, + "rewards/margins": 5.224667549133301, + "rewards/rejected": -6.478302955627441, + "step": 4885 + }, + { + "epoch": 0.76, + "learning_rate": 1.0563743970122933e-05, + "logits/chosen": -2.174898624420166, + "logits/rejected": -2.7581114768981934, + "logps/chosen": -235.07247924804688, + "logps/rejected": -486.3265380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4215835332870483, + "rewards/margins": 10.418156623840332, + "rewards/rejected": -11.839739799499512, + "step": 4886 + }, + { + "epoch": 0.76, + "learning_rate": 1.0563010529591785e-05, + "logits/chosen": -2.986015796661377, + "logits/rejected": -2.8904709815979004, + "logps/chosen": -151.3910675048828, + "logps/rejected": -172.01034545898438, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8277416229248047, + "rewards/margins": 4.20152473449707, + "rewards/rejected": -5.029266357421875, + "step": 4887 + }, + { + "epoch": 0.76, + "learning_rate": 1.0562277089060636e-05, + "logits/chosen": -2.3214704990386963, + "logits/rejected": -3.288740634918213, + "logps/chosen": -54.085243225097656, + "logps/rejected": -221.142578125, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5148402452468872, + "rewards/margins": 5.677348613739014, + "rewards/rejected": -7.1921892166137695, + "step": 4888 + }, + { + "epoch": 0.76, + "learning_rate": 1.0561543648529488e-05, + "logits/chosen": -2.804004430770874, + "logits/rejected": -3.00335693359375, + "logps/chosen": -77.31394958496094, + "logps/rejected": -142.27210998535156, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.101538062095642, + "rewards/margins": 3.9988064765930176, + "rewards/rejected": -5.100344657897949, + "step": 4889 + }, + { + "epoch": 0.76, + "learning_rate": 1.056081020799834e-05, + "logits/chosen": -3.054550886154175, + "logits/rejected": -3.142500400543213, + "logps/chosen": -97.9135971069336, + "logps/rejected": -220.2502899169922, + "loss": 1.2897, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3572921752929688, + "rewards/margins": 0.964888334274292, + "rewards/rejected": -3.32218074798584, + "step": 4890 + }, + { + "epoch": 0.76, + "learning_rate": 1.0560076767467192e-05, + "logits/chosen": -2.967750072479248, + "logits/rejected": -2.6005921363830566, + "logps/chosen": -169.67825317382812, + "logps/rejected": -157.31942749023438, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39707183837890625, + "rewards/margins": 5.7238874435424805, + "rewards/rejected": -6.120959281921387, + "step": 4891 + }, + { + "epoch": 0.76, + "learning_rate": 1.0559343326936044e-05, + "logits/chosen": -2.7851064205169678, + "logits/rejected": -3.313389301300049, + "logps/chosen": -479.87542724609375, + "logps/rejected": -636.7274780273438, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0045524537563323975, + "rewards/margins": 5.664815902709961, + "rewards/rejected": -5.6602630615234375, + "step": 4892 + }, + { + "epoch": 0.76, + "learning_rate": 1.0558609886404896e-05, + "logits/chosen": -3.368928909301758, + "logits/rejected": -3.259084463119507, + "logps/chosen": -359.54730224609375, + "logps/rejected": -125.750244140625, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8371437788009644, + "rewards/margins": 2.6031441688537598, + "rewards/rejected": -3.4402878284454346, + "step": 4893 + }, + { + "epoch": 0.76, + "learning_rate": 1.0557876445873748e-05, + "logits/chosen": -3.215160846710205, + "logits/rejected": -3.090158462524414, + "logps/chosen": -182.96884155273438, + "logps/rejected": -210.86248779296875, + "loss": 1.4786, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1574273109436035, + "rewards/margins": 1.1042596101760864, + "rewards/rejected": -4.2616868019104, + "step": 4894 + }, + { + "epoch": 0.76, + "learning_rate": 1.0557143005342601e-05, + "logits/chosen": -1.113246202468872, + "logits/rejected": -3.068037986755371, + "logps/chosen": -44.44493103027344, + "logps/rejected": -383.64019775390625, + "loss": 0.2196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9128596782684326, + "rewards/margins": 3.2434017658233643, + "rewards/rejected": -6.156261444091797, + "step": 4895 + }, + { + "epoch": 0.76, + "learning_rate": 1.0556409564811453e-05, + "logits/chosen": -2.9066905975341797, + "logits/rejected": -2.9418046474456787, + "logps/chosen": -76.35749816894531, + "logps/rejected": -76.24797058105469, + "loss": 1.3642, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4046430587768555, + "rewards/margins": 1.8821598291397095, + "rewards/rejected": -4.286802768707275, + "step": 4896 + }, + { + "epoch": 0.76, + "learning_rate": 1.0555676124280305e-05, + "logits/chosen": -3.2759945392608643, + "logits/rejected": -2.475123167037964, + "logps/chosen": -1262.87646484375, + "logps/rejected": -684.8162841796875, + "loss": 4.304, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7563416957855225, + "rewards/margins": -0.6275973320007324, + "rewards/rejected": -3.12874436378479, + "step": 4897 + }, + { + "epoch": 0.76, + "learning_rate": 1.0554942683749157e-05, + "logits/chosen": -2.4331483840942383, + "logits/rejected": -3.173628568649292, + "logps/chosen": -92.60053253173828, + "logps/rejected": -197.8020782470703, + "loss": 1.7632, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7650365829467773, + "rewards/margins": 0.7043343782424927, + "rewards/rejected": -3.4693710803985596, + "step": 4898 + }, + { + "epoch": 0.76, + "learning_rate": 1.0554209243218009e-05, + "logits/chosen": -2.773848533630371, + "logits/rejected": -3.1865715980529785, + "logps/chosen": -22.142913818359375, + "logps/rejected": -274.01300048828125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.02848219871521, + "rewards/margins": 5.887652397155762, + "rewards/rejected": -6.916134834289551, + "step": 4899 + }, + { + "epoch": 0.76, + "learning_rate": 1.055347580268686e-05, + "logits/chosen": -1.726032018661499, + "logits/rejected": -3.0309927463531494, + "logps/chosen": -204.04454040527344, + "logps/rejected": -216.36407470703125, + "loss": 2.7857, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7467575073242188, + "rewards/margins": -1.034659504890442, + "rewards/rejected": -2.7120981216430664, + "step": 4900 + }, + { + "epoch": 0.76, + "learning_rate": 1.0552742362155713e-05, + "logits/chosen": -3.0178701877593994, + "logits/rejected": -2.5065135955810547, + "logps/chosen": -437.61785888671875, + "logps/rejected": -186.80393981933594, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5384072065353394, + "rewards/margins": 6.08128547668457, + "rewards/rejected": -6.619692802429199, + "step": 4901 + }, + { + "epoch": 0.76, + "learning_rate": 1.0552008921624564e-05, + "logits/chosen": -2.2492599487304688, + "logits/rejected": -3.0497138500213623, + "logps/chosen": -179.5202178955078, + "logps/rejected": -397.81640625, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5714561939239502, + "rewards/margins": 5.181403160095215, + "rewards/rejected": -6.752859592437744, + "step": 4902 + }, + { + "epoch": 0.76, + "learning_rate": 1.0551275481093416e-05, + "logits/chosen": -3.146174192428589, + "logits/rejected": -2.701012134552002, + "logps/chosen": -605.827392578125, + "logps/rejected": -468.1688232421875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.010430097579956, + "rewards/margins": 6.839715957641602, + "rewards/rejected": -7.850146293640137, + "step": 4903 + }, + { + "epoch": 0.76, + "learning_rate": 1.055054204056227e-05, + "logits/chosen": -2.926715135574341, + "logits/rejected": -3.0966291427612305, + "logps/chosen": -59.439857482910156, + "logps/rejected": -154.50721740722656, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3449881076812744, + "rewards/margins": 3.7511162757873535, + "rewards/rejected": -5.096104621887207, + "step": 4904 + }, + { + "epoch": 0.76, + "learning_rate": 1.0549808600031122e-05, + "logits/chosen": -3.1669790744781494, + "logits/rejected": -2.6653552055358887, + "logps/chosen": -114.8778076171875, + "logps/rejected": -77.94945526123047, + "loss": 2.3526, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.307615280151367, + "rewards/margins": -0.29079222679138184, + "rewards/rejected": -4.016822814941406, + "step": 4905 + }, + { + "epoch": 0.76, + "learning_rate": 1.0549075159499975e-05, + "logits/chosen": -2.9792041778564453, + "logits/rejected": -3.049840211868286, + "logps/chosen": -134.03237915039062, + "logps/rejected": -121.3118896484375, + "loss": 0.9956, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3304412364959717, + "rewards/margins": 1.112052083015442, + "rewards/rejected": -4.442493438720703, + "step": 4906 + }, + { + "epoch": 0.76, + "learning_rate": 1.0548341718968827e-05, + "logits/chosen": -3.129152297973633, + "logits/rejected": -3.1707961559295654, + "logps/chosen": -177.80372619628906, + "logps/rejected": -124.62596130371094, + "loss": 0.8329, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5313689708709717, + "rewards/margins": 1.027491569519043, + "rewards/rejected": -3.5588607788085938, + "step": 4907 + }, + { + "epoch": 0.76, + "learning_rate": 1.0547608278437679e-05, + "logits/chosen": -3.1614158153533936, + "logits/rejected": -3.0286145210266113, + "logps/chosen": -144.56353759765625, + "logps/rejected": -182.60693359375, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7636597156524658, + "rewards/margins": 4.804261207580566, + "rewards/rejected": -5.567920684814453, + "step": 4908 + }, + { + "epoch": 0.76, + "learning_rate": 1.0546874837906531e-05, + "logits/chosen": -1.9230116605758667, + "logits/rejected": -2.9311370849609375, + "logps/chosen": -234.71873474121094, + "logps/rejected": -354.37860107421875, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5976898670196533, + "rewards/margins": 4.687063217163086, + "rewards/rejected": -6.284753322601318, + "step": 4909 + }, + { + "epoch": 0.76, + "learning_rate": 1.0546141397375383e-05, + "logits/chosen": -2.8641581535339355, + "logits/rejected": -2.991072177886963, + "logps/chosen": -316.9645690917969, + "logps/rejected": -396.39105224609375, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9315986633300781, + "rewards/margins": 4.88320255279541, + "rewards/rejected": -6.8148016929626465, + "step": 4910 + }, + { + "epoch": 0.76, + "learning_rate": 1.0545407956844235e-05, + "logits/chosen": -2.480701208114624, + "logits/rejected": -3.069728374481201, + "logps/chosen": -60.30824661254883, + "logps/rejected": -356.83782958984375, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3169552087783813, + "rewards/margins": 5.026982307434082, + "rewards/rejected": -6.343937873840332, + "step": 4911 + }, + { + "epoch": 0.76, + "learning_rate": 1.0544674516313087e-05, + "logits/chosen": -3.1423099040985107, + "logits/rejected": -3.197997808456421, + "logps/chosen": -162.90826416015625, + "logps/rejected": -314.3590393066406, + "loss": 2.796, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8786213397979736, + "rewards/margins": 1.659148931503296, + "rewards/rejected": -5.537769794464111, + "step": 4912 + }, + { + "epoch": 0.76, + "learning_rate": 1.054394107578194e-05, + "logits/chosen": -2.389741897583008, + "logits/rejected": -2.9767823219299316, + "logps/chosen": -129.6280517578125, + "logps/rejected": -388.22833251953125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.278211832046509, + "rewards/margins": 8.154193878173828, + "rewards/rejected": -10.432405471801758, + "step": 4913 + }, + { + "epoch": 0.76, + "learning_rate": 1.0543207635250792e-05, + "logits/chosen": -2.8886594772338867, + "logits/rejected": -1.7551456689834595, + "logps/chosen": -173.51429748535156, + "logps/rejected": -219.5328826904297, + "loss": 1.0139, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1379380226135254, + "rewards/margins": 2.642167806625366, + "rewards/rejected": -4.7801055908203125, + "step": 4914 + }, + { + "epoch": 0.76, + "learning_rate": 1.0542474194719644e-05, + "logits/chosen": -2.857797861099243, + "logits/rejected": -3.180375099182129, + "logps/chosen": -72.18606567382812, + "logps/rejected": -167.36785888671875, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9700949788093567, + "rewards/margins": 3.9664154052734375, + "rewards/rejected": -4.9365105628967285, + "step": 4915 + }, + { + "epoch": 0.76, + "learning_rate": 1.0541740754188496e-05, + "logits/chosen": -2.6131012439727783, + "logits/rejected": -3.106773853302002, + "logps/chosen": -83.94642639160156, + "logps/rejected": -208.05752563476562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03292196989059448, + "rewards/margins": 6.992851734161377, + "rewards/rejected": -7.025774002075195, + "step": 4916 + }, + { + "epoch": 0.76, + "learning_rate": 1.0541007313657348e-05, + "logits/chosen": -2.9539108276367188, + "logits/rejected": -3.1315174102783203, + "logps/chosen": -271.21844482421875, + "logps/rejected": -165.256591796875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.739112913608551, + "rewards/margins": 6.464029312133789, + "rewards/rejected": -7.2031426429748535, + "step": 4917 + }, + { + "epoch": 0.76, + "learning_rate": 1.05402738731262e-05, + "logits/chosen": -3.1744728088378906, + "logits/rejected": -2.4543445110321045, + "logps/chosen": -133.6099090576172, + "logps/rejected": -236.4388885498047, + "loss": 0.0921, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7094955444335938, + "rewards/margins": 2.3386592864990234, + "rewards/rejected": -5.048154830932617, + "step": 4918 + }, + { + "epoch": 0.77, + "learning_rate": 1.0539540432595051e-05, + "logits/chosen": -2.371232271194458, + "logits/rejected": -3.074617624282837, + "logps/chosen": -152.56961059570312, + "logps/rejected": -360.50848388671875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8469886779785156, + "rewards/margins": 4.888974666595459, + "rewards/rejected": -5.735963344573975, + "step": 4919 + }, + { + "epoch": 0.77, + "learning_rate": 1.0538806992063903e-05, + "logits/chosen": -2.6195008754730225, + "logits/rejected": -3.1687402725219727, + "logps/chosen": -39.453216552734375, + "logps/rejected": -338.10455322265625, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0609591007232666, + "rewards/margins": 6.377474784851074, + "rewards/rejected": -8.438433647155762, + "step": 4920 + }, + { + "epoch": 0.77, + "learning_rate": 1.0538073551532755e-05, + "logits/chosen": -2.423074722290039, + "logits/rejected": -3.0692782402038574, + "logps/chosen": -56.805965423583984, + "logps/rejected": -229.82830810546875, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3429856300354004, + "rewards/margins": 5.0408616065979, + "rewards/rejected": -7.383847236633301, + "step": 4921 + }, + { + "epoch": 0.77, + "learning_rate": 1.0537340111001609e-05, + "logits/chosen": -1.2923592329025269, + "logits/rejected": -2.807227373123169, + "logps/chosen": -90.63577270507812, + "logps/rejected": -337.88543701171875, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.859020948410034, + "rewards/margins": 4.985659599304199, + "rewards/rejected": -7.8446807861328125, + "step": 4922 + }, + { + "epoch": 0.77, + "learning_rate": 1.053660667047046e-05, + "logits/chosen": -3.0542097091674805, + "logits/rejected": -2.909491539001465, + "logps/chosen": -243.7252197265625, + "logps/rejected": -220.32962036132812, + "loss": 1.5369, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.741593360900879, + "rewards/margins": 1.914795160293579, + "rewards/rejected": -4.656388282775879, + "step": 4923 + }, + { + "epoch": 0.77, + "learning_rate": 1.0535873229939313e-05, + "logits/chosen": -2.748952865600586, + "logits/rejected": -3.148871898651123, + "logps/chosen": -428.4112548828125, + "logps/rejected": -257.2640380859375, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1812820434570312, + "rewards/margins": 3.907290458679199, + "rewards/rejected": -5.0885725021362305, + "step": 4924 + }, + { + "epoch": 0.77, + "learning_rate": 1.0535139789408164e-05, + "logits/chosen": -2.4718918800354004, + "logits/rejected": -3.1010167598724365, + "logps/chosen": -143.18048095703125, + "logps/rejected": -226.27589416503906, + "loss": 2.2394, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9242982864379883, + "rewards/margins": -0.6047654151916504, + "rewards/rejected": -3.319532871246338, + "step": 4925 + }, + { + "epoch": 0.77, + "learning_rate": 1.0534406348877016e-05, + "logits/chosen": -3.174403667449951, + "logits/rejected": -3.0439929962158203, + "logps/chosen": -250.08676147460938, + "logps/rejected": -323.3039245605469, + "loss": 2.0789, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.5774497985839844, + "rewards/margins": -1.9451072216033936, + "rewards/rejected": -1.6323425769805908, + "step": 4926 + }, + { + "epoch": 0.77, + "learning_rate": 1.0533672908345868e-05, + "logits/chosen": -3.0134167671203613, + "logits/rejected": -2.3426918983459473, + "logps/chosen": -257.0804443359375, + "logps/rejected": -314.01507568359375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3942787647247314, + "rewards/margins": 6.139410018920898, + "rewards/rejected": -7.533689022064209, + "step": 4927 + }, + { + "epoch": 0.77, + "learning_rate": 1.053293946781472e-05, + "logits/chosen": -3.2720108032226562, + "logits/rejected": -2.9650912284851074, + "logps/chosen": -295.6854553222656, + "logps/rejected": -231.62881469726562, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3554397225379944, + "rewards/margins": 3.3041908740997314, + "rewards/rejected": -3.65963077545166, + "step": 4928 + }, + { + "epoch": 0.77, + "learning_rate": 1.0532206027283572e-05, + "logits/chosen": -3.112252950668335, + "logits/rejected": -3.239969491958618, + "logps/chosen": -175.48678588867188, + "logps/rejected": -263.7238464355469, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21365126967430115, + "rewards/margins": 6.041919231414795, + "rewards/rejected": -6.255570411682129, + "step": 4929 + }, + { + "epoch": 0.77, + "learning_rate": 1.0531472586752425e-05, + "logits/chosen": -1.4712485074996948, + "logits/rejected": -2.895578622817993, + "logps/chosen": -55.952178955078125, + "logps/rejected": -251.93502807617188, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6626827716827393, + "rewards/margins": 5.963181495666504, + "rewards/rejected": -7.625864505767822, + "step": 4930 + }, + { + "epoch": 0.77, + "learning_rate": 1.0530739146221277e-05, + "logits/chosen": -1.3221079111099243, + "logits/rejected": -2.8167362213134766, + "logps/chosen": -205.7174072265625, + "logps/rejected": -543.8568725585938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4053184986114502, + "rewards/margins": 6.706780433654785, + "rewards/rejected": -8.112098693847656, + "step": 4931 + }, + { + "epoch": 0.77, + "learning_rate": 1.053000570569013e-05, + "logits/chosen": -3.1403791904449463, + "logits/rejected": -2.7510743141174316, + "logps/chosen": -471.998779296875, + "logps/rejected": -409.0614929199219, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02934187650680542, + "rewards/margins": 5.838020324707031, + "rewards/rejected": -5.80867862701416, + "step": 4932 + }, + { + "epoch": 0.77, + "learning_rate": 1.0529272265158981e-05, + "logits/chosen": -3.1475749015808105, + "logits/rejected": -2.4551987648010254, + "logps/chosen": -373.3358154296875, + "logps/rejected": -157.1348419189453, + "loss": 1.1022, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.174790859222412, + "rewards/margins": 2.963160514831543, + "rewards/rejected": -6.137951850891113, + "step": 4933 + }, + { + "epoch": 0.77, + "learning_rate": 1.0528538824627833e-05, + "logits/chosen": -2.3103387355804443, + "logits/rejected": -2.7866458892822266, + "logps/chosen": -145.9913330078125, + "logps/rejected": -331.9454650878906, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8932358026504517, + "rewards/margins": 6.617445468902588, + "rewards/rejected": -8.51068115234375, + "step": 4934 + }, + { + "epoch": 0.77, + "learning_rate": 1.0527805384096685e-05, + "logits/chosen": -1.5774106979370117, + "logits/rejected": -3.1204543113708496, + "logps/chosen": -336.7092590332031, + "logps/rejected": -474.6016540527344, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6575204133987427, + "rewards/margins": 5.137805938720703, + "rewards/rejected": -6.795326232910156, + "step": 4935 + }, + { + "epoch": 0.77, + "learning_rate": 1.0527071943565537e-05, + "logits/chosen": -2.7663655281066895, + "logits/rejected": -3.0326640605926514, + "logps/chosen": -306.6942138671875, + "logps/rejected": -239.247314453125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.047958940267562866, + "rewards/margins": 6.022945404052734, + "rewards/rejected": -6.070904731750488, + "step": 4936 + }, + { + "epoch": 0.77, + "learning_rate": 1.0526338503034389e-05, + "logits/chosen": -3.134483814239502, + "logits/rejected": -2.3224430084228516, + "logps/chosen": -460.9895935058594, + "logps/rejected": -377.7967834472656, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1093108654022217, + "rewards/margins": 5.52327823638916, + "rewards/rejected": -7.632589340209961, + "step": 4937 + }, + { + "epoch": 0.77, + "learning_rate": 1.052560506250324e-05, + "logits/chosen": -3.0727286338806152, + "logits/rejected": -2.0480189323425293, + "logps/chosen": -261.36505126953125, + "logps/rejected": -135.29910278320312, + "loss": 1.8977, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.55741024017334, + "rewards/margins": -0.18755888938903809, + "rewards/rejected": -4.369851589202881, + "step": 4938 + }, + { + "epoch": 0.77, + "learning_rate": 1.0524871621972094e-05, + "logits/chosen": -1.9424281120300293, + "logits/rejected": -3.256072998046875, + "logps/chosen": -60.85615539550781, + "logps/rejected": -257.23406982421875, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6653094291687012, + "rewards/margins": 4.734860897064209, + "rewards/rejected": -6.40017032623291, + "step": 4939 + }, + { + "epoch": 0.77, + "learning_rate": 1.0524138181440946e-05, + "logits/chosen": -1.8164238929748535, + "logits/rejected": -2.584390163421631, + "logps/chosen": -185.71180725097656, + "logps/rejected": -479.23944091796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2324905395507812, + "rewards/margins": 9.171293258666992, + "rewards/rejected": -11.403783798217773, + "step": 4940 + }, + { + "epoch": 0.77, + "learning_rate": 1.05234047409098e-05, + "logits/chosen": -2.80806565284729, + "logits/rejected": -2.8716113567352295, + "logps/chosen": -105.58071899414062, + "logps/rejected": -172.94970703125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7245753407478333, + "rewards/margins": 5.73476505279541, + "rewards/rejected": -6.459341049194336, + "step": 4941 + }, + { + "epoch": 0.77, + "learning_rate": 1.0522671300378651e-05, + "logits/chosen": -1.7232075929641724, + "logits/rejected": -3.059096336364746, + "logps/chosen": -195.0811309814453, + "logps/rejected": -390.2921447753906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.150099277496338, + "rewards/margins": 8.316383361816406, + "rewards/rejected": -10.466482162475586, + "step": 4942 + }, + { + "epoch": 0.77, + "learning_rate": 1.0521937859847503e-05, + "logits/chosen": -2.426116943359375, + "logits/rejected": -2.777222156524658, + "logps/chosen": -190.7969970703125, + "logps/rejected": -287.9497375488281, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6267853379249573, + "rewards/margins": 8.205044746398926, + "rewards/rejected": -8.831830024719238, + "step": 4943 + }, + { + "epoch": 0.77, + "learning_rate": 1.0521204419316355e-05, + "logits/chosen": -2.6742217540740967, + "logits/rejected": -3.0045080184936523, + "logps/chosen": -267.0101318359375, + "logps/rejected": -133.97047424316406, + "loss": 2.7256, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5664615631103516, + "rewards/margins": -0.5801997184753418, + "rewards/rejected": -2.9862618446350098, + "step": 4944 + }, + { + "epoch": 0.77, + "learning_rate": 1.0520470978785207e-05, + "logits/chosen": -3.136765241622925, + "logits/rejected": -2.5078229904174805, + "logps/chosen": -282.5135498046875, + "logps/rejected": -160.31692504882812, + "loss": 0.9466, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.123663902282715, + "rewards/margins": 1.746281385421753, + "rewards/rejected": -5.869945526123047, + "step": 4945 + }, + { + "epoch": 0.77, + "learning_rate": 1.0519737538254059e-05, + "logits/chosen": -2.9695069789886475, + "logits/rejected": -3.069337844848633, + "logps/chosen": -588.449951171875, + "logps/rejected": -571.9908447265625, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7588638067245483, + "rewards/margins": 3.0833382606506348, + "rewards/rejected": -4.842202186584473, + "step": 4946 + }, + { + "epoch": 0.77, + "learning_rate": 1.051900409772291e-05, + "logits/chosen": -2.9855124950408936, + "logits/rejected": -2.0381345748901367, + "logps/chosen": -643.1873779296875, + "logps/rejected": -293.449462890625, + "loss": 1.5745, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4541077613830566, + "rewards/margins": 0.9740984439849854, + "rewards/rejected": -4.428205966949463, + "step": 4947 + }, + { + "epoch": 0.77, + "learning_rate": 1.0518270657191764e-05, + "logits/chosen": -2.437859296798706, + "logits/rejected": -1.7170644998550415, + "logps/chosen": -176.8846893310547, + "logps/rejected": -203.07949829101562, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6913712024688721, + "rewards/margins": 4.433365345001221, + "rewards/rejected": -5.124736785888672, + "step": 4948 + }, + { + "epoch": 0.77, + "learning_rate": 1.0517537216660616e-05, + "logits/chosen": -1.3833104372024536, + "logits/rejected": -3.1511731147766113, + "logps/chosen": -53.43810272216797, + "logps/rejected": -496.3740234375, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.635730266571045, + "rewards/margins": 5.935802936553955, + "rewards/rejected": -8.571533203125, + "step": 4949 + }, + { + "epoch": 0.77, + "learning_rate": 1.0516803776129468e-05, + "logits/chosen": -2.30239200592041, + "logits/rejected": -3.0672414302825928, + "logps/chosen": -81.82855224609375, + "logps/rejected": -309.1565246582031, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4947714805603027, + "rewards/margins": 7.364144325256348, + "rewards/rejected": -9.858915328979492, + "step": 4950 + }, + { + "epoch": 0.77, + "learning_rate": 1.051607033559832e-05, + "logits/chosen": -2.66971755027771, + "logits/rejected": -3.1577401161193848, + "logps/chosen": -240.04693603515625, + "logps/rejected": -334.134033203125, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.260877251625061, + "rewards/margins": 3.775165319442749, + "rewards/rejected": -5.0360426902771, + "step": 4951 + }, + { + "epoch": 0.77, + "learning_rate": 1.0515336895067172e-05, + "logits/chosen": -3.1657817363739014, + "logits/rejected": -2.0243682861328125, + "logps/chosen": -436.64825439453125, + "logps/rejected": -438.14312744140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.493495225906372, + "rewards/margins": 9.202460289001465, + "rewards/rejected": -10.695955276489258, + "step": 4952 + }, + { + "epoch": 0.77, + "learning_rate": 1.0514603454536024e-05, + "logits/chosen": -1.3251594305038452, + "logits/rejected": -2.676098346710205, + "logps/chosen": -53.03852462768555, + "logps/rejected": -246.43142700195312, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1997642517089844, + "rewards/margins": 4.989658832550049, + "rewards/rejected": -7.189423561096191, + "step": 4953 + }, + { + "epoch": 0.77, + "learning_rate": 1.0513870014004876e-05, + "logits/chosen": -2.5389599800109863, + "logits/rejected": -3.1592471599578857, + "logps/chosen": -443.82550048828125, + "logps/rejected": -433.3141784667969, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8951034545898438, + "rewards/margins": 3.6765761375427246, + "rewards/rejected": -5.571679592132568, + "step": 4954 + }, + { + "epoch": 0.77, + "learning_rate": 1.0513136573473728e-05, + "logits/chosen": -2.4215705394744873, + "logits/rejected": -2.9260661602020264, + "logps/chosen": -224.3536834716797, + "logps/rejected": -485.6773986816406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1460961103439331, + "rewards/margins": 9.762008666992188, + "rewards/rejected": -9.90810489654541, + "step": 4955 + }, + { + "epoch": 0.77, + "learning_rate": 1.051240313294258e-05, + "logits/chosen": -2.783503293991089, + "logits/rejected": -3.0521011352539062, + "logps/chosen": -217.26712036132812, + "logps/rejected": -184.95477294921875, + "loss": 0.6814, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.784496307373047, + "rewards/margins": 2.1059012413024902, + "rewards/rejected": -4.890397548675537, + "step": 4956 + }, + { + "epoch": 0.77, + "learning_rate": 1.0511669692411433e-05, + "logits/chosen": -2.502305269241333, + "logits/rejected": -3.0488157272338867, + "logps/chosen": -128.85736083984375, + "logps/rejected": -280.65301513671875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1953296661376953, + "rewards/margins": 6.596763610839844, + "rewards/rejected": -7.792093276977539, + "step": 4957 + }, + { + "epoch": 0.77, + "learning_rate": 1.0510936251880285e-05, + "logits/chosen": -2.9581947326660156, + "logits/rejected": -2.175361156463623, + "logps/chosen": -217.46798706054688, + "logps/rejected": -125.80397033691406, + "loss": 2.1073, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7111334800720215, + "rewards/margins": 0.06875467300415039, + "rewards/rejected": -3.7798879146575928, + "step": 4958 + }, + { + "epoch": 0.77, + "learning_rate": 1.0510202811349137e-05, + "logits/chosen": -2.8119640350341797, + "logits/rejected": -3.1880736351013184, + "logps/chosen": -91.55256652832031, + "logps/rejected": -193.75527954101562, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.395733594894409, + "rewards/margins": 3.310100555419922, + "rewards/rejected": -5.70583438873291, + "step": 4959 + }, + { + "epoch": 0.77, + "learning_rate": 1.0509469370817989e-05, + "logits/chosen": -3.053454637527466, + "logits/rejected": -2.7284226417541504, + "logps/chosen": -1096.132568359375, + "logps/rejected": -667.03173828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0069977045059204, + "rewards/margins": 7.586130142211914, + "rewards/rejected": -8.593128204345703, + "step": 4960 + }, + { + "epoch": 0.77, + "learning_rate": 1.050873593028684e-05, + "logits/chosen": -3.1700315475463867, + "logits/rejected": -1.990297794342041, + "logps/chosen": -243.02572631835938, + "logps/rejected": -80.12899780273438, + "loss": 2.8048, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.259326934814453, + "rewards/margins": -1.633882761001587, + "rewards/rejected": -2.625444173812866, + "step": 4961 + }, + { + "epoch": 0.77, + "learning_rate": 1.0508002489755692e-05, + "logits/chosen": -2.502133369445801, + "logits/rejected": -3.349135160446167, + "logps/chosen": -126.32695007324219, + "logps/rejected": -292.38421630859375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3718339204788208, + "rewards/margins": 5.67066764831543, + "rewards/rejected": -7.042501449584961, + "step": 4962 + }, + { + "epoch": 0.77, + "learning_rate": 1.0507269049224544e-05, + "logits/chosen": -2.000789165496826, + "logits/rejected": -3.050342559814453, + "logps/chosen": -67.13677978515625, + "logps/rejected": -304.5722351074219, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0243535041809082, + "rewards/margins": 7.387600898742676, + "rewards/rejected": -8.411953926086426, + "step": 4963 + }, + { + "epoch": 0.77, + "learning_rate": 1.0506535608693396e-05, + "logits/chosen": -2.2127697467803955, + "logits/rejected": -2.865055561065674, + "logps/chosen": -154.6627655029297, + "logps/rejected": -254.60403442382812, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7061344385147095, + "rewards/margins": 5.173050880432129, + "rewards/rejected": -5.879185199737549, + "step": 4964 + }, + { + "epoch": 0.77, + "learning_rate": 1.0505802168162248e-05, + "logits/chosen": -3.055729866027832, + "logits/rejected": -2.4807586669921875, + "logps/chosen": -576.441162109375, + "logps/rejected": -478.1007385253906, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3415481448173523, + "rewards/margins": 6.157833099365234, + "rewards/rejected": -6.499381065368652, + "step": 4965 + }, + { + "epoch": 0.77, + "learning_rate": 1.0505068727631102e-05, + "logits/chosen": -2.8647098541259766, + "logits/rejected": -1.861814260482788, + "logps/chosen": -150.4825439453125, + "logps/rejected": -205.5723876953125, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3328475952148438, + "rewards/margins": 5.112861633300781, + "rewards/rejected": -6.445709228515625, + "step": 4966 + }, + { + "epoch": 0.77, + "learning_rate": 1.0504335287099953e-05, + "logits/chosen": -1.0552375316619873, + "logits/rejected": -2.048489570617676, + "logps/chosen": -215.38836669921875, + "logps/rejected": -334.6232604980469, + "loss": 1.0798, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.491459846496582, + "rewards/margins": 2.4390921592712402, + "rewards/rejected": -4.930552005767822, + "step": 4967 + }, + { + "epoch": 0.77, + "learning_rate": 1.0503601846568805e-05, + "logits/chosen": -3.0112836360931396, + "logits/rejected": -2.3681581020355225, + "logps/chosen": -228.9064483642578, + "logps/rejected": -251.42935180664062, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8638451099395752, + "rewards/margins": 5.704669952392578, + "rewards/rejected": -6.568514823913574, + "step": 4968 + }, + { + "epoch": 0.77, + "learning_rate": 1.0502868406037657e-05, + "logits/chosen": -2.414686918258667, + "logits/rejected": -3.0844507217407227, + "logps/chosen": -385.0451965332031, + "logps/rejected": -374.1238708496094, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10540542006492615, + "rewards/margins": 5.517528533935547, + "rewards/rejected": -5.412123203277588, + "step": 4969 + }, + { + "epoch": 0.77, + "learning_rate": 1.0502134965506509e-05, + "logits/chosen": -2.0977554321289062, + "logits/rejected": -3.035055637359619, + "logps/chosen": -252.94131469726562, + "logps/rejected": -229.5849151611328, + "loss": 1.6772, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3265061378479004, + "rewards/margins": 1.6509385108947754, + "rewards/rejected": -4.977444648742676, + "step": 4970 + }, + { + "epoch": 0.77, + "learning_rate": 1.0501401524975361e-05, + "logits/chosen": -2.7022461891174316, + "logits/rejected": -3.168649673461914, + "logps/chosen": -159.66217041015625, + "logps/rejected": -225.52163696289062, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.815416932106018, + "rewards/margins": 3.814105272293091, + "rewards/rejected": -5.629522323608398, + "step": 4971 + }, + { + "epoch": 0.77, + "learning_rate": 1.0500668084444213e-05, + "logits/chosen": -2.2964344024658203, + "logits/rejected": -2.8616130352020264, + "logps/chosen": -154.925048828125, + "logps/rejected": -377.6692810058594, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3811393976211548, + "rewards/margins": 10.401185989379883, + "rewards/rejected": -11.78232479095459, + "step": 4972 + }, + { + "epoch": 0.77, + "learning_rate": 1.0499934643913066e-05, + "logits/chosen": -2.0958073139190674, + "logits/rejected": -2.8593051433563232, + "logps/chosen": -73.66950988769531, + "logps/rejected": -184.82550048828125, + "loss": 0.1748, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8889286518096924, + "rewards/margins": 2.9402246475219727, + "rewards/rejected": -4.829153060913086, + "step": 4973 + }, + { + "epoch": 0.77, + "learning_rate": 1.0499201203381918e-05, + "logits/chosen": -2.8010830879211426, + "logits/rejected": -3.3840246200561523, + "logps/chosen": -228.7592010498047, + "logps/rejected": -378.0184020996094, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3968453407287598, + "rewards/margins": 6.985307693481445, + "rewards/rejected": -8.382153511047363, + "step": 4974 + }, + { + "epoch": 0.77, + "learning_rate": 1.0498467762850772e-05, + "logits/chosen": -2.882720470428467, + "logits/rejected": -3.0693838596343994, + "logps/chosen": -169.94400024414062, + "logps/rejected": -249.47708129882812, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18120728433132172, + "rewards/margins": 6.159334182739258, + "rewards/rejected": -6.340540885925293, + "step": 4975 + }, + { + "epoch": 0.77, + "learning_rate": 1.0497734322319624e-05, + "logits/chosen": -3.008928060531616, + "logits/rejected": -1.8649461269378662, + "logps/chosen": -208.5276336669922, + "logps/rejected": -247.69244384765625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1695678234100342, + "rewards/margins": 5.61778450012207, + "rewards/rejected": -6.787352085113525, + "step": 4976 + }, + { + "epoch": 0.77, + "learning_rate": 1.0497000881788476e-05, + "logits/chosen": -1.6784403324127197, + "logits/rejected": -2.9171860218048096, + "logps/chosen": -95.86580657958984, + "logps/rejected": -391.0177307128906, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47521400451660156, + "rewards/margins": 8.11236572265625, + "rewards/rejected": -8.587579727172852, + "step": 4977 + }, + { + "epoch": 0.77, + "learning_rate": 1.0496267441257327e-05, + "logits/chosen": -2.2831032276153564, + "logits/rejected": -2.674898624420166, + "logps/chosen": -119.74754333496094, + "logps/rejected": -305.416259765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8793872594833374, + "rewards/margins": 9.064533233642578, + "rewards/rejected": -9.943920135498047, + "step": 4978 + }, + { + "epoch": 0.77, + "learning_rate": 1.049553400072618e-05, + "logits/chosen": -2.9339334964752197, + "logits/rejected": -2.8587374687194824, + "logps/chosen": -280.12030029296875, + "logps/rejected": -272.4864501953125, + "loss": 2.5752, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.195289611816406, + "rewards/margins": 0.9303576946258545, + "rewards/rejected": -5.12564754486084, + "step": 4979 + }, + { + "epoch": 0.77, + "learning_rate": 1.0494800560195031e-05, + "logits/chosen": -2.207200527191162, + "logits/rejected": -3.1632614135742188, + "logps/chosen": -159.4690704345703, + "logps/rejected": -371.17462158203125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1699638366699219, + "rewards/margins": 7.034522533416748, + "rewards/rejected": -8.204486846923828, + "step": 4980 + }, + { + "epoch": 0.77, + "learning_rate": 1.0494067119663883e-05, + "logits/chosen": -2.3123865127563477, + "logits/rejected": -3.027010202407837, + "logps/chosen": -61.246150970458984, + "logps/rejected": -202.06800842285156, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7654573917388916, + "rewards/margins": 4.945894241333008, + "rewards/rejected": -6.7113518714904785, + "step": 4981 + }, + { + "epoch": 0.77, + "learning_rate": 1.0493333679132735e-05, + "logits/chosen": -3.0696725845336914, + "logits/rejected": -2.436652898788452, + "logps/chosen": -669.4783325195312, + "logps/rejected": -484.6134338378906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4150147438049316, + "rewards/margins": 8.28282356262207, + "rewards/rejected": -9.697837829589844, + "step": 4982 + }, + { + "epoch": 0.77, + "learning_rate": 1.0492600238601587e-05, + "logits/chosen": -2.340869903564453, + "logits/rejected": -2.9495835304260254, + "logps/chosen": -141.49755859375, + "logps/rejected": -172.02532958984375, + "loss": 2.7698, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.152270793914795, + "rewards/margins": 1.1890249252319336, + "rewards/rejected": -6.3412957191467285, + "step": 4983 + }, + { + "epoch": 0.78, + "learning_rate": 1.049186679807044e-05, + "logits/chosen": -2.8442142009735107, + "logits/rejected": -3.0224812030792236, + "logps/chosen": -206.01492309570312, + "logps/rejected": -184.70074462890625, + "loss": 2.7907, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.867161750793457, + "rewards/margins": 0.15832161903381348, + "rewards/rejected": -4.025483131408691, + "step": 4984 + }, + { + "epoch": 0.78, + "learning_rate": 1.0491133357539292e-05, + "logits/chosen": -2.7500720024108887, + "logits/rejected": -3.139826774597168, + "logps/chosen": -98.68569946289062, + "logps/rejected": -467.68048095703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.373183012008667, + "rewards/margins": 8.598676681518555, + "rewards/rejected": -9.9718599319458, + "step": 4985 + }, + { + "epoch": 0.78, + "learning_rate": 1.0490399917008144e-05, + "logits/chosen": -2.7068052291870117, + "logits/rejected": -3.2479262351989746, + "logps/chosen": -89.11154174804688, + "logps/rejected": -407.94561767578125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7594166398048401, + "rewards/margins": 5.803829193115234, + "rewards/rejected": -6.56324577331543, + "step": 4986 + }, + { + "epoch": 0.78, + "learning_rate": 1.0489666476476996e-05, + "logits/chosen": -3.10477352142334, + "logits/rejected": -2.7717387676239014, + "logps/chosen": -115.95771789550781, + "logps/rejected": -167.44264221191406, + "loss": 3.7275, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.589018821716309, + "rewards/margins": -0.4695420265197754, + "rewards/rejected": -4.119476795196533, + "step": 4987 + }, + { + "epoch": 0.78, + "learning_rate": 1.0488933035945848e-05, + "logits/chosen": -3.0464560985565186, + "logits/rejected": -3.17868709564209, + "logps/chosen": -143.31723022460938, + "logps/rejected": -298.9954528808594, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.200555443763733, + "rewards/margins": 4.509763240814209, + "rewards/rejected": -5.710318565368652, + "step": 4988 + }, + { + "epoch": 0.78, + "learning_rate": 1.04881995954147e-05, + "logits/chosen": -3.119868755340576, + "logits/rejected": -2.496750831604004, + "logps/chosen": -246.1204833984375, + "logps/rejected": -281.1468505859375, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1606018543243408, + "rewards/margins": 4.962259292602539, + "rewards/rejected": -6.122860908508301, + "step": 4989 + }, + { + "epoch": 0.78, + "learning_rate": 1.0487466154883552e-05, + "logits/chosen": -3.00347900390625, + "logits/rejected": -2.883779764175415, + "logps/chosen": -194.47499084472656, + "logps/rejected": -170.68798828125, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4563263654708862, + "rewards/margins": 6.289867877960205, + "rewards/rejected": -7.746193885803223, + "step": 4990 + }, + { + "epoch": 0.78, + "learning_rate": 1.0486732714352404e-05, + "logits/chosen": -2.925229787826538, + "logits/rejected": -2.641571521759033, + "logps/chosen": -314.1029357910156, + "logps/rejected": -173.1352081298828, + "loss": 3.9041, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.208579063415527, + "rewards/margins": -1.131730556488037, + "rewards/rejected": -4.076848983764648, + "step": 4991 + }, + { + "epoch": 0.78, + "learning_rate": 1.0485999273821255e-05, + "logits/chosen": -2.9977335929870605, + "logits/rejected": -2.94317364692688, + "logps/chosen": -140.4864959716797, + "logps/rejected": -236.09274291992188, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9786137938499451, + "rewards/margins": 5.680144309997559, + "rewards/rejected": -6.658758163452148, + "step": 4992 + }, + { + "epoch": 0.78, + "learning_rate": 1.0485265833290109e-05, + "logits/chosen": -1.7845770120620728, + "logits/rejected": -2.8232920169830322, + "logps/chosen": -109.72660827636719, + "logps/rejected": -229.8490447998047, + "loss": 0.9164, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7229788303375244, + "rewards/margins": 1.1630197763442993, + "rewards/rejected": -3.8859987258911133, + "step": 4993 + }, + { + "epoch": 0.78, + "learning_rate": 1.0484532392758961e-05, + "logits/chosen": -1.4289848804473877, + "logits/rejected": -2.917682409286499, + "logps/chosen": -55.9803581237793, + "logps/rejected": -364.533935546875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2800111770629883, + "rewards/margins": 5.535902500152588, + "rewards/rejected": -7.815913677215576, + "step": 4994 + }, + { + "epoch": 0.78, + "learning_rate": 1.0483798952227813e-05, + "logits/chosen": -2.211195468902588, + "logits/rejected": -2.905499219894409, + "logps/chosen": -165.69386291503906, + "logps/rejected": -438.249755859375, + "loss": 3.6806, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.297494888305664, + "rewards/margins": -1.6320972442626953, + "rewards/rejected": -2.6653976440429688, + "step": 4995 + }, + { + "epoch": 0.78, + "learning_rate": 1.0483065511696665e-05, + "logits/chosen": -2.9332528114318848, + "logits/rejected": -3.081498146057129, + "logps/chosen": -523.8463745117188, + "logps/rejected": -249.48707580566406, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39803391695022583, + "rewards/margins": 4.788878917694092, + "rewards/rejected": -5.186912536621094, + "step": 4996 + }, + { + "epoch": 0.78, + "learning_rate": 1.0482332071165517e-05, + "logits/chosen": -2.6919336318969727, + "logits/rejected": -3.3204593658447266, + "logps/chosen": -52.368125915527344, + "logps/rejected": -396.5355224609375, + "loss": 0.1983, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.808476448059082, + "rewards/margins": 1.9131819009780884, + "rewards/rejected": -3.721658229827881, + "step": 4997 + }, + { + "epoch": 0.78, + "learning_rate": 1.0481598630634368e-05, + "logits/chosen": -3.139946222305298, + "logits/rejected": -2.7874419689178467, + "logps/chosen": -131.8009796142578, + "logps/rejected": -163.3109130859375, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2478395700454712, + "rewards/margins": 3.584153175354004, + "rewards/rejected": -4.831993103027344, + "step": 4998 + }, + { + "epoch": 0.78, + "learning_rate": 1.048086519010322e-05, + "logits/chosen": -3.1454529762268066, + "logits/rejected": -2.5139927864074707, + "logps/chosen": -228.12289428710938, + "logps/rejected": -282.9207763671875, + "loss": 0.2335, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1905243396759033, + "rewards/margins": 3.233839511871338, + "rewards/rejected": -5.424363613128662, + "step": 4999 + }, + { + "epoch": 0.78, + "learning_rate": 1.0480131749572072e-05, + "logits/chosen": -1.5499253273010254, + "logits/rejected": -3.03364634513855, + "logps/chosen": -240.4862060546875, + "logps/rejected": -472.6387023925781, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4687957763671875, + "rewards/margins": 6.389060974121094, + "rewards/rejected": -6.857856750488281, + "step": 5000 + }, + { + "epoch": 0.78, + "learning_rate": 1.0479398309040924e-05, + "logits/chosen": -2.813602924346924, + "logits/rejected": -3.1597273349761963, + "logps/chosen": -328.7406311035156, + "logps/rejected": -227.22096252441406, + "loss": 4.2535, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.252838611602783, + "rewards/margins": -1.864753007888794, + "rewards/rejected": -3.38808536529541, + "step": 5001 + }, + { + "epoch": 0.78, + "learning_rate": 1.0478664868509778e-05, + "logits/chosen": -2.997229814529419, + "logits/rejected": -2.538132429122925, + "logps/chosen": -161.4739990234375, + "logps/rejected": -214.92420959472656, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8516197204589844, + "rewards/margins": 6.593803405761719, + "rewards/rejected": -8.445423126220703, + "step": 5002 + }, + { + "epoch": 0.78, + "learning_rate": 1.047793142797863e-05, + "logits/chosen": -1.7035483121871948, + "logits/rejected": -2.75512957572937, + "logps/chosen": -58.60026550292969, + "logps/rejected": -168.82583618164062, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8728220462799072, + "rewards/margins": 3.723144769668579, + "rewards/rejected": -5.595966815948486, + "step": 5003 + }, + { + "epoch": 0.78, + "learning_rate": 1.0477197987447481e-05, + "logits/chosen": -2.290229320526123, + "logits/rejected": -2.9409797191619873, + "logps/chosen": -329.9541320800781, + "logps/rejected": -311.10833740234375, + "loss": 4.581, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.99847412109375, + "rewards/margins": -1.7277946472167969, + "rewards/rejected": -4.270679473876953, + "step": 5004 + }, + { + "epoch": 0.78, + "learning_rate": 1.0476464546916333e-05, + "logits/chosen": -3.0724382400512695, + "logits/rejected": -1.7451682090759277, + "logps/chosen": -292.2222900390625, + "logps/rejected": -102.24786376953125, + "loss": 0.3723, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4148151874542236, + "rewards/margins": 2.3992760181427, + "rewards/rejected": -3.814091205596924, + "step": 5005 + }, + { + "epoch": 0.78, + "learning_rate": 1.0475731106385185e-05, + "logits/chosen": -1.6374099254608154, + "logits/rejected": -3.175751209259033, + "logps/chosen": -103.19454956054688, + "logps/rejected": -444.2144470214844, + "loss": 3.6309, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.064520835876465, + "rewards/margins": -2.1499247550964355, + "rewards/rejected": -1.9145958423614502, + "step": 5006 + }, + { + "epoch": 0.78, + "learning_rate": 1.0474997665854039e-05, + "logits/chosen": -1.418115258216858, + "logits/rejected": -3.0335376262664795, + "logps/chosen": -131.03578186035156, + "logps/rejected": -489.6921081542969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9738147258758545, + "rewards/margins": 7.916309833526611, + "rewards/rejected": -9.890125274658203, + "step": 5007 + }, + { + "epoch": 0.78, + "learning_rate": 1.047426422532289e-05, + "logits/chosen": -1.8540068864822388, + "logits/rejected": -2.554253101348877, + "logps/chosen": -113.73175811767578, + "logps/rejected": -416.21728515625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.789790391921997, + "rewards/margins": 7.806456565856934, + "rewards/rejected": -9.596246719360352, + "step": 5008 + }, + { + "epoch": 0.78, + "learning_rate": 1.0473530784791742e-05, + "logits/chosen": -1.9385610818862915, + "logits/rejected": -2.937786817550659, + "logps/chosen": -116.01789855957031, + "logps/rejected": -308.024169921875, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.174306869506836, + "rewards/margins": 5.577862739562988, + "rewards/rejected": -6.752169609069824, + "step": 5009 + }, + { + "epoch": 0.78, + "learning_rate": 1.0472797344260594e-05, + "logits/chosen": -1.7961163520812988, + "logits/rejected": -3.0714621543884277, + "logps/chosen": -150.1363067626953, + "logps/rejected": -505.47918701171875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5590381622314453, + "rewards/margins": 6.594532012939453, + "rewards/rejected": -7.153570652008057, + "step": 5010 + }, + { + "epoch": 0.78, + "learning_rate": 1.0472063903729448e-05, + "logits/chosen": -1.8844672441482544, + "logits/rejected": -2.8957293033599854, + "logps/chosen": -76.68141174316406, + "logps/rejected": -239.96661376953125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0229606628417969, + "rewards/margins": 5.556275367736816, + "rewards/rejected": -6.579236030578613, + "step": 5011 + }, + { + "epoch": 0.78, + "learning_rate": 1.04713304631983e-05, + "logits/chosen": -1.6337608098983765, + "logits/rejected": -3.055567741394043, + "logps/chosen": -699.5927734375, + "logps/rejected": -709.03466796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04789122939109802, + "rewards/margins": 8.00908374786377, + "rewards/rejected": -7.961193084716797, + "step": 5012 + }, + { + "epoch": 0.78, + "learning_rate": 1.0470597022667152e-05, + "logits/chosen": -2.0384809970855713, + "logits/rejected": -3.1435673236846924, + "logps/chosen": -124.11668395996094, + "logps/rejected": -314.0865478515625, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24404069781303406, + "rewards/margins": 4.805423736572266, + "rewards/rejected": -5.049464225769043, + "step": 5013 + }, + { + "epoch": 0.78, + "learning_rate": 1.0469863582136004e-05, + "logits/chosen": -3.008359670639038, + "logits/rejected": -3.267427682876587, + "logps/chosen": -118.9950180053711, + "logps/rejected": -332.4102478027344, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0432742834091187, + "rewards/margins": 4.836258411407471, + "rewards/rejected": -5.879532814025879, + "step": 5014 + }, + { + "epoch": 0.78, + "learning_rate": 1.0469130141604855e-05, + "logits/chosen": -3.137711763381958, + "logits/rejected": -3.1573057174682617, + "logps/chosen": -103.07647705078125, + "logps/rejected": -154.94012451171875, + "loss": 0.106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5047019720077515, + "rewards/margins": 2.9555017948150635, + "rewards/rejected": -4.460203647613525, + "step": 5015 + }, + { + "epoch": 0.78, + "learning_rate": 1.0468396701073707e-05, + "logits/chosen": -3.1742703914642334, + "logits/rejected": -2.756131172180176, + "logps/chosen": -492.38958740234375, + "logps/rejected": -359.5440979003906, + "loss": 0.0997, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.050065279006958, + "rewards/margins": 3.6610336303710938, + "rewards/rejected": -4.711099147796631, + "step": 5016 + }, + { + "epoch": 0.78, + "learning_rate": 1.046766326054256e-05, + "logits/chosen": -2.031954765319824, + "logits/rejected": -3.0801217555999756, + "logps/chosen": -59.692440032958984, + "logps/rejected": -252.3002471923828, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.268787145614624, + "rewards/margins": 4.514137268066406, + "rewards/rejected": -5.782924652099609, + "step": 5017 + }, + { + "epoch": 0.78, + "learning_rate": 1.0466929820011411e-05, + "logits/chosen": -3.17172908782959, + "logits/rejected": -2.9967596530914307, + "logps/chosen": -303.77093505859375, + "logps/rejected": -176.73475646972656, + "loss": 5.9325, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.961725234985352, + "rewards/margins": -5.929407119750977, + "rewards/rejected": -0.032318115234375, + "step": 5018 + }, + { + "epoch": 0.78, + "learning_rate": 1.0466196379480265e-05, + "logits/chosen": -1.8700212240219116, + "logits/rejected": -2.722209930419922, + "logps/chosen": -184.9827880859375, + "logps/rejected": -402.4975891113281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9647594690322876, + "rewards/margins": 8.108280181884766, + "rewards/rejected": -9.073040008544922, + "step": 5019 + }, + { + "epoch": 0.78, + "learning_rate": 1.0465462938949117e-05, + "logits/chosen": -3.0107243061065674, + "logits/rejected": -3.1462550163269043, + "logps/chosen": -213.16041564941406, + "logps/rejected": -128.16600036621094, + "loss": 1.4837, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.693495273590088, + "rewards/margins": 1.7897028923034668, + "rewards/rejected": -4.483198165893555, + "step": 5020 + }, + { + "epoch": 0.78, + "learning_rate": 1.0464729498417968e-05, + "logits/chosen": -2.0861141681671143, + "logits/rejected": -3.015911817550659, + "logps/chosen": -75.7228012084961, + "logps/rejected": -365.7808532714844, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7896696329116821, + "rewards/margins": 5.704783916473389, + "rewards/rejected": -6.494453430175781, + "step": 5021 + }, + { + "epoch": 0.78, + "learning_rate": 1.046399605788682e-05, + "logits/chosen": -2.928382635116577, + "logits/rejected": -3.134610652923584, + "logps/chosen": -25.465328216552734, + "logps/rejected": -118.28958129882812, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8996162414550781, + "rewards/margins": 4.797394752502441, + "rewards/rejected": -5.6970109939575195, + "step": 5022 + }, + { + "epoch": 0.78, + "learning_rate": 1.0463262617355672e-05, + "logits/chosen": -2.581235647201538, + "logits/rejected": -2.4205760955810547, + "logps/chosen": -266.38262939453125, + "logps/rejected": -239.88092041015625, + "loss": 4.1899, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.681837558746338, + "rewards/margins": -1.1127920150756836, + "rewards/rejected": -2.569045305252075, + "step": 5023 + }, + { + "epoch": 0.78, + "learning_rate": 1.0462529176824524e-05, + "logits/chosen": -1.9942924976348877, + "logits/rejected": -3.1470425128936768, + "logps/chosen": -359.8050537109375, + "logps/rejected": -501.45611572265625, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5490890741348267, + "rewards/margins": 4.7006120681762695, + "rewards/rejected": -5.249701023101807, + "step": 5024 + }, + { + "epoch": 0.78, + "learning_rate": 1.0461795736293376e-05, + "logits/chosen": -3.2200679779052734, + "logits/rejected": -3.2273635864257812, + "logps/chosen": -221.45069885253906, + "logps/rejected": -163.71192932128906, + "loss": 2.3923, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.907523155212402, + "rewards/margins": -2.068767547607422, + "rewards/rejected": -2.8387558460235596, + "step": 5025 + }, + { + "epoch": 0.78, + "learning_rate": 1.0461062295762228e-05, + "logits/chosen": -2.6334168910980225, + "logits/rejected": -3.034710645675659, + "logps/chosen": -152.29800415039062, + "logps/rejected": -211.4560546875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4300430417060852, + "rewards/margins": 5.871140956878662, + "rewards/rejected": -6.301184177398682, + "step": 5026 + }, + { + "epoch": 0.78, + "learning_rate": 1.046032885523108e-05, + "logits/chosen": -3.2349894046783447, + "logits/rejected": -2.546098470687866, + "logps/chosen": -195.98800659179688, + "logps/rejected": -20.77246856689453, + "loss": 3.6362, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.676207065582275, + "rewards/margins": -3.608668088912964, + "rewards/rejected": -1.0675389766693115, + "step": 5027 + }, + { + "epoch": 0.78, + "learning_rate": 1.0459595414699933e-05, + "logits/chosen": -3.167977809906006, + "logits/rejected": -3.2626962661743164, + "logps/chosen": -166.89764404296875, + "logps/rejected": -299.5204772949219, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3518310785293579, + "rewards/margins": 5.986347198486328, + "rewards/rejected": -6.338178634643555, + "step": 5028 + }, + { + "epoch": 0.78, + "learning_rate": 1.0458861974168785e-05, + "logits/chosen": -2.5765857696533203, + "logits/rejected": -3.1407248973846436, + "logps/chosen": -122.52963256835938, + "logps/rejected": -199.83465576171875, + "loss": 1.4604, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.3674113750457764, + "rewards/margins": -1.127516746520996, + "rewards/rejected": -1.2398945093154907, + "step": 5029 + }, + { + "epoch": 0.78, + "learning_rate": 1.0458128533637637e-05, + "logits/chosen": -1.555246353149414, + "logits/rejected": -2.6330769062042236, + "logps/chosen": -216.55905151367188, + "logps/rejected": -285.51678466796875, + "loss": 1.7906, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6560065746307373, + "rewards/margins": 1.9889678955078125, + "rewards/rejected": -4.644974708557129, + "step": 5030 + }, + { + "epoch": 0.78, + "learning_rate": 1.0457395093106489e-05, + "logits/chosen": -3.0633018016815186, + "logits/rejected": -2.9828131198883057, + "logps/chosen": -109.06670379638672, + "logps/rejected": -194.40704345703125, + "loss": 0.2418, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8314560651779175, + "rewards/margins": 2.409465789794922, + "rewards/rejected": -4.240921974182129, + "step": 5031 + }, + { + "epoch": 0.78, + "learning_rate": 1.045666165257534e-05, + "logits/chosen": -2.1368026733398438, + "logits/rejected": -3.2210958003997803, + "logps/chosen": -209.03651428222656, + "logps/rejected": -230.8424072265625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04150819778442383, + "rewards/margins": 5.366331100463867, + "rewards/rejected": -5.407839298248291, + "step": 5032 + }, + { + "epoch": 0.78, + "learning_rate": 1.0455928212044193e-05, + "logits/chosen": -3.0979933738708496, + "logits/rejected": -3.1271426677703857, + "logps/chosen": -217.312744140625, + "logps/rejected": -237.45632934570312, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8773699998855591, + "rewards/margins": 5.581398010253906, + "rewards/rejected": -6.458768367767334, + "step": 5033 + }, + { + "epoch": 0.78, + "learning_rate": 1.0455194771513045e-05, + "logits/chosen": -1.5912063121795654, + "logits/rejected": -2.7022838592529297, + "logps/chosen": -88.83869934082031, + "logps/rejected": -289.281494140625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6105034351348877, + "rewards/margins": 5.091400146484375, + "rewards/rejected": -5.701903343200684, + "step": 5034 + }, + { + "epoch": 0.78, + "learning_rate": 1.0454461330981896e-05, + "logits/chosen": -3.162858724594116, + "logits/rejected": -3.0452022552490234, + "logps/chosen": -347.6036071777344, + "logps/rejected": -117.80601501464844, + "loss": 2.6612, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.003337860107422, + "rewards/margins": -0.8885406255722046, + "rewards/rejected": -2.1147971153259277, + "step": 5035 + }, + { + "epoch": 0.78, + "learning_rate": 1.0453727890450748e-05, + "logits/chosen": -2.141096353530884, + "logits/rejected": -2.922565221786499, + "logps/chosen": -115.64616394042969, + "logps/rejected": -283.9620361328125, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2076374292373657, + "rewards/margins": 6.506092548370361, + "rewards/rejected": -7.7137298583984375, + "step": 5036 + }, + { + "epoch": 0.78, + "learning_rate": 1.0452994449919602e-05, + "logits/chosen": -2.7505979537963867, + "logits/rejected": -3.2615020275115967, + "logps/chosen": -669.6968383789062, + "logps/rejected": -645.4296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8752899169921875, + "rewards/margins": 8.378069877624512, + "rewards/rejected": -7.502779960632324, + "step": 5037 + }, + { + "epoch": 0.78, + "learning_rate": 1.0452261009388454e-05, + "logits/chosen": -3.003887176513672, + "logits/rejected": -3.2181150913238525, + "logps/chosen": -139.54417419433594, + "logps/rejected": -417.540283203125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027656175196170807, + "rewards/margins": 7.001981735229492, + "rewards/rejected": -6.974325656890869, + "step": 5038 + }, + { + "epoch": 0.78, + "learning_rate": 1.0451527568857306e-05, + "logits/chosen": -1.5181301832199097, + "logits/rejected": -3.0579946041107178, + "logps/chosen": -136.57244873046875, + "logps/rejected": -374.4523010253906, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32476308941841125, + "rewards/margins": 5.597875595092773, + "rewards/rejected": -5.922638893127441, + "step": 5039 + }, + { + "epoch": 0.78, + "learning_rate": 1.0450794128326157e-05, + "logits/chosen": -2.293313503265381, + "logits/rejected": -2.7847771644592285, + "logps/chosen": -173.27883911132812, + "logps/rejected": -273.1787109375, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38821297883987427, + "rewards/margins": 5.180858612060547, + "rewards/rejected": -4.792645454406738, + "step": 5040 + }, + { + "epoch": 0.78, + "learning_rate": 1.0450060687795011e-05, + "logits/chosen": -2.969731330871582, + "logits/rejected": -3.0078892707824707, + "logps/chosen": -38.57221221923828, + "logps/rejected": -149.3797607421875, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9249826669692993, + "rewards/margins": 4.6573333740234375, + "rewards/rejected": -5.582315921783447, + "step": 5041 + }, + { + "epoch": 0.78, + "learning_rate": 1.0449327247263863e-05, + "logits/chosen": -3.062293529510498, + "logits/rejected": -3.1043038368225098, + "logps/chosen": -622.6053466796875, + "logps/rejected": -410.643310546875, + "loss": 2.3129, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3045761585235596, + "rewards/margins": -1.0604579448699951, + "rewards/rejected": -2.2441182136535645, + "step": 5042 + }, + { + "epoch": 0.78, + "learning_rate": 1.0448593806732715e-05, + "logits/chosen": -0.4476809799671173, + "logits/rejected": -3.3217921257019043, + "logps/chosen": -87.71641540527344, + "logps/rejected": -416.24713134765625, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9004585146903992, + "rewards/margins": 5.546428203582764, + "rewards/rejected": -6.4468865394592285, + "step": 5043 + }, + { + "epoch": 0.78, + "learning_rate": 1.0447860366201567e-05, + "logits/chosen": -2.07511043548584, + "logits/rejected": -2.858201026916504, + "logps/chosen": -153.97128295898438, + "logps/rejected": -467.0357666015625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6920841336250305, + "rewards/margins": 6.259041786193848, + "rewards/rejected": -6.9511260986328125, + "step": 5044 + }, + { + "epoch": 0.78, + "learning_rate": 1.0447126925670419e-05, + "logits/chosen": -3.034663438796997, + "logits/rejected": -2.1230368614196777, + "logps/chosen": -411.6939697265625, + "logps/rejected": -334.9591979980469, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20995259284973145, + "rewards/margins": 4.534074783325195, + "rewards/rejected": -4.744027614593506, + "step": 5045 + }, + { + "epoch": 0.78, + "learning_rate": 1.0446393485139272e-05, + "logits/chosen": -2.3379080295562744, + "logits/rejected": -2.4934098720550537, + "logps/chosen": -43.24237060546875, + "logps/rejected": -129.8247528076172, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0212836265563965, + "rewards/margins": 3.175492286682129, + "rewards/rejected": -4.196775913238525, + "step": 5046 + }, + { + "epoch": 0.78, + "learning_rate": 1.0445660044608124e-05, + "logits/chosen": -2.9807662963867188, + "logits/rejected": -3.1572885513305664, + "logps/chosen": -141.8211669921875, + "logps/rejected": -82.306396484375, + "loss": 0.7907, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7295575141906738, + "rewards/margins": 1.1616883277893066, + "rewards/rejected": -2.8912458419799805, + "step": 5047 + }, + { + "epoch": 0.79, + "learning_rate": 1.0444926604076976e-05, + "logits/chosen": -3.0102949142456055, + "logits/rejected": -3.197540283203125, + "logps/chosen": -90.23265838623047, + "logps/rejected": -109.8277587890625, + "loss": 2.5387, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.510732650756836, + "rewards/margins": 0.15383648872375488, + "rewards/rejected": -3.6645689010620117, + "step": 5048 + }, + { + "epoch": 0.79, + "learning_rate": 1.0444193163545828e-05, + "logits/chosen": -2.1861157417297363, + "logits/rejected": -3.03664493560791, + "logps/chosen": -102.43341064453125, + "logps/rejected": -332.98480224609375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14214381575584412, + "rewards/margins": 5.273334503173828, + "rewards/rejected": -5.415478706359863, + "step": 5049 + }, + { + "epoch": 0.79, + "learning_rate": 1.044345972301468e-05, + "logits/chosen": -2.206563711166382, + "logits/rejected": -3.1243419647216797, + "logps/chosen": -31.972881317138672, + "logps/rejected": -168.74960327148438, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46944910287857056, + "rewards/margins": 4.011048793792725, + "rewards/rejected": -4.48049783706665, + "step": 5050 + }, + { + "epoch": 0.79, + "learning_rate": 1.0442726282483532e-05, + "logits/chosen": -3.208601474761963, + "logits/rejected": -2.4238059520721436, + "logps/chosen": -305.939208984375, + "logps/rejected": -310.06866455078125, + "loss": 5.7412, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.917425632476807, + "rewards/margins": -5.7378058433532715, + "rewards/rejected": -0.17961978912353516, + "step": 5051 + }, + { + "epoch": 0.79, + "learning_rate": 1.0441992841952383e-05, + "logits/chosen": -3.068896770477295, + "logits/rejected": -3.180222988128662, + "logps/chosen": -66.17330932617188, + "logps/rejected": -229.0382843017578, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7437871098518372, + "rewards/margins": 5.984970569610596, + "rewards/rejected": -6.728757858276367, + "step": 5052 + }, + { + "epoch": 0.79, + "learning_rate": 1.0441259401421235e-05, + "logits/chosen": -2.213716506958008, + "logits/rejected": -2.881089687347412, + "logps/chosen": -189.97254943847656, + "logps/rejected": -345.24371337890625, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4564892053604126, + "rewards/margins": 5.422680854797363, + "rewards/rejected": -6.8791704177856445, + "step": 5053 + }, + { + "epoch": 0.79, + "learning_rate": 1.0440525960890087e-05, + "logits/chosen": -3.1215977668762207, + "logits/rejected": -3.1887705326080322, + "logps/chosen": -546.8436889648438, + "logps/rejected": -473.4203796386719, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3374381065368652, + "rewards/margins": 6.1283721923828125, + "rewards/rejected": -7.4658098220825195, + "step": 5054 + }, + { + "epoch": 0.79, + "learning_rate": 1.043979252035894e-05, + "logits/chosen": -2.9472241401672363, + "logits/rejected": -1.198431372642517, + "logps/chosen": -309.73504638671875, + "logps/rejected": -135.45069885253906, + "loss": 0.66, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6844123601913452, + "rewards/margins": 1.5614516735076904, + "rewards/rejected": -3.245863914489746, + "step": 5055 + }, + { + "epoch": 0.79, + "learning_rate": 1.0439059079827793e-05, + "logits/chosen": -2.1557657718658447, + "logits/rejected": -2.8328592777252197, + "logps/chosen": -136.8875274658203, + "logps/rejected": -304.1212158203125, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6170296669006348, + "rewards/margins": 4.894299030303955, + "rewards/rejected": -6.51132869720459, + "step": 5056 + }, + { + "epoch": 0.79, + "learning_rate": 1.0438325639296645e-05, + "logits/chosen": -2.0874459743499756, + "logits/rejected": -3.049379348754883, + "logps/chosen": -135.31289672851562, + "logps/rejected": -277.621826171875, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6325660943984985, + "rewards/margins": 3.6930994987487793, + "rewards/rejected": -4.325665473937988, + "step": 5057 + }, + { + "epoch": 0.79, + "learning_rate": 1.0437592198765496e-05, + "logits/chosen": -2.2900805473327637, + "logits/rejected": -3.0474050045013428, + "logps/chosen": -104.38371276855469, + "logps/rejected": -160.21588134765625, + "loss": 1.911, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.553760051727295, + "rewards/margins": 1.365265965461731, + "rewards/rejected": -3.9190261363983154, + "step": 5058 + }, + { + "epoch": 0.79, + "learning_rate": 1.0436858758234348e-05, + "logits/chosen": -2.9647982120513916, + "logits/rejected": -3.1726889610290527, + "logps/chosen": -46.296104431152344, + "logps/rejected": -183.49417114257812, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16971845924854279, + "rewards/margins": 6.082907676696777, + "rewards/rejected": -6.252626419067383, + "step": 5059 + }, + { + "epoch": 0.79, + "learning_rate": 1.04361253177032e-05, + "logits/chosen": -2.8201870918273926, + "logits/rejected": -3.0580244064331055, + "logps/chosen": -195.7229766845703, + "logps/rejected": -205.1121368408203, + "loss": 3.2258, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.174540042877197, + "rewards/margins": 0.12958526611328125, + "rewards/rejected": -4.3041253089904785, + "step": 5060 + }, + { + "epoch": 0.79, + "learning_rate": 1.0435391877172052e-05, + "logits/chosen": -3.0149378776550293, + "logits/rejected": -3.125286817550659, + "logps/chosen": -118.00625610351562, + "logps/rejected": -213.72946166992188, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9790886044502258, + "rewards/margins": 4.960122108459473, + "rewards/rejected": -5.939210414886475, + "step": 5061 + }, + { + "epoch": 0.79, + "learning_rate": 1.0434658436640904e-05, + "logits/chosen": -0.3060206472873688, + "logits/rejected": -3.0915584564208984, + "logps/chosen": -61.575706481933594, + "logps/rejected": -232.74514770507812, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8090044260025024, + "rewards/margins": 7.000610828399658, + "rewards/rejected": -7.809615135192871, + "step": 5062 + }, + { + "epoch": 0.79, + "learning_rate": 1.0433924996109756e-05, + "logits/chosen": -2.4804067611694336, + "logits/rejected": -3.2014315128326416, + "logps/chosen": -70.13529205322266, + "logps/rejected": -201.20632934570312, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4881906509399414, + "rewards/margins": 5.150685787200928, + "rewards/rejected": -5.638876438140869, + "step": 5063 + }, + { + "epoch": 0.79, + "learning_rate": 1.043319155557861e-05, + "logits/chosen": -0.4877113997936249, + "logits/rejected": -1.468027949333191, + "logps/chosen": -119.7596206665039, + "logps/rejected": -306.6351623535156, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4840134382247925, + "rewards/margins": 5.283989906311035, + "rewards/rejected": -6.768003463745117, + "step": 5064 + }, + { + "epoch": 0.79, + "learning_rate": 1.0432458115047461e-05, + "logits/chosen": -3.1032958030700684, + "logits/rejected": -2.927973985671997, + "logps/chosen": -236.06103515625, + "logps/rejected": -235.05386352539062, + "loss": 1.2216, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9415168762207031, + "rewards/margins": 1.616830587387085, + "rewards/rejected": -3.558347225189209, + "step": 5065 + }, + { + "epoch": 0.79, + "learning_rate": 1.0431724674516313e-05, + "logits/chosen": -2.642834186553955, + "logits/rejected": -3.0532779693603516, + "logps/chosen": -62.013370513916016, + "logps/rejected": -275.35845947265625, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0108990669250488, + "rewards/margins": 6.593719482421875, + "rewards/rejected": -7.604618072509766, + "step": 5066 + }, + { + "epoch": 0.79, + "learning_rate": 1.0430991233985165e-05, + "logits/chosen": -2.5120575428009033, + "logits/rejected": -2.9639246463775635, + "logps/chosen": -371.29986572265625, + "logps/rejected": -372.30029296875, + "loss": 2.9829, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6720917224884033, + "rewards/margins": -1.6759917736053467, + "rewards/rejected": -1.9960999488830566, + "step": 5067 + }, + { + "epoch": 0.79, + "learning_rate": 1.0430257793454017e-05, + "logits/chosen": -1.1650114059448242, + "logits/rejected": -2.8526225090026855, + "logps/chosen": -237.1691131591797, + "logps/rejected": -378.14617919921875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.662234902381897, + "rewards/margins": 5.998485565185547, + "rewards/rejected": -6.660719871520996, + "step": 5068 + }, + { + "epoch": 0.79, + "learning_rate": 1.0429524352922869e-05, + "logits/chosen": -3.2139315605163574, + "logits/rejected": -2.0331308841705322, + "logps/chosen": -256.7768859863281, + "logps/rejected": -115.52910614013672, + "loss": 2.5747, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7529709339141846, + "rewards/margins": -1.636770248413086, + "rewards/rejected": -2.1162006855010986, + "step": 5069 + }, + { + "epoch": 0.79, + "learning_rate": 1.042879091239172e-05, + "logits/chosen": -2.990694284439087, + "logits/rejected": -3.071678876876831, + "logps/chosen": -165.03494262695312, + "logps/rejected": -301.1222229003906, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7045425176620483, + "rewards/margins": 5.5944623947143555, + "rewards/rejected": -6.299004554748535, + "step": 5070 + }, + { + "epoch": 0.79, + "learning_rate": 1.0428057471860572e-05, + "logits/chosen": -3.0068576335906982, + "logits/rejected": -3.155029535293579, + "logps/chosen": -163.48448181152344, + "logps/rejected": -167.80299377441406, + "loss": 1.9981, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2902252674102783, + "rewards/margins": 0.18639099597930908, + "rewards/rejected": -2.476616144180298, + "step": 5071 + }, + { + "epoch": 0.79, + "learning_rate": 1.0427324031329424e-05, + "logits/chosen": -1.5856999158859253, + "logits/rejected": -3.0494680404663086, + "logps/chosen": -66.12727355957031, + "logps/rejected": -230.17254638671875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17300234735012054, + "rewards/margins": 5.511946678161621, + "rewards/rejected": -5.684948921203613, + "step": 5072 + }, + { + "epoch": 0.79, + "learning_rate": 1.0426590590798278e-05, + "logits/chosen": -2.3810040950775146, + "logits/rejected": -3.1345431804656982, + "logps/chosen": -73.7432632446289, + "logps/rejected": -241.60952758789062, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0758743286132812, + "rewards/margins": 4.335885524749756, + "rewards/rejected": -5.411760330200195, + "step": 5073 + }, + { + "epoch": 0.79, + "learning_rate": 1.042585715026713e-05, + "logits/chosen": -2.6366262435913086, + "logits/rejected": -2.917510986328125, + "logps/chosen": -143.51365661621094, + "logps/rejected": -214.74386596679688, + "loss": 0.2217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.282282829284668, + "rewards/margins": 3.1104538440704346, + "rewards/rejected": -4.392736434936523, + "step": 5074 + }, + { + "epoch": 0.79, + "learning_rate": 1.0425123709735983e-05, + "logits/chosen": -3.2896957397460938, + "logits/rejected": -3.258080005645752, + "logps/chosen": -31.81532859802246, + "logps/rejected": -148.61447143554688, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04764261096715927, + "rewards/margins": 5.155200004577637, + "rewards/rejected": -5.202842712402344, + "step": 5075 + }, + { + "epoch": 0.79, + "learning_rate": 1.0424390269204835e-05, + "logits/chosen": -2.587158679962158, + "logits/rejected": -3.1470823287963867, + "logps/chosen": -41.93893051147461, + "logps/rejected": -138.35418701171875, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0214818716049194, + "rewards/margins": 3.9979755878448486, + "rewards/rejected": -5.019457817077637, + "step": 5076 + }, + { + "epoch": 0.79, + "learning_rate": 1.0423656828673687e-05, + "logits/chosen": -2.915048599243164, + "logits/rejected": -2.8542633056640625, + "logps/chosen": -122.05342102050781, + "logps/rejected": -305.09918212890625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6499603390693665, + "rewards/margins": 5.563239097595215, + "rewards/rejected": -6.213199615478516, + "step": 5077 + }, + { + "epoch": 0.79, + "learning_rate": 1.0422923388142539e-05, + "logits/chosen": -2.8967158794403076, + "logits/rejected": -3.2047359943389893, + "logps/chosen": -136.77590942382812, + "logps/rejected": -108.62490844726562, + "loss": 2.6568, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1615729331970215, + "rewards/margins": -0.2568676471710205, + "rewards/rejected": -3.904705286026001, + "step": 5078 + }, + { + "epoch": 0.79, + "learning_rate": 1.0422189947611391e-05, + "logits/chosen": -2.281277656555176, + "logits/rejected": -3.241375207901001, + "logps/chosen": -328.0521240234375, + "logps/rejected": -572.4185180664062, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.772057056427002, + "rewards/margins": 7.113643646240234, + "rewards/rejected": -7.8857011795043945, + "step": 5079 + }, + { + "epoch": 0.79, + "learning_rate": 1.0421456507080243e-05, + "logits/chosen": -2.1327526569366455, + "logits/rejected": -2.9290452003479004, + "logps/chosen": -125.13681030273438, + "logps/rejected": -151.5294189453125, + "loss": 0.5902, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8781533241271973, + "rewards/margins": 1.788219690322876, + "rewards/rejected": -4.666373252868652, + "step": 5080 + }, + { + "epoch": 0.79, + "learning_rate": 1.0420723066549095e-05, + "logits/chosen": -2.773733139038086, + "logits/rejected": -3.253302812576294, + "logps/chosen": -352.4499206542969, + "logps/rejected": -355.2729797363281, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36809998750686646, + "rewards/margins": 4.307431221008301, + "rewards/rejected": -4.675531387329102, + "step": 5081 + }, + { + "epoch": 0.79, + "learning_rate": 1.0419989626017948e-05, + "logits/chosen": -3.1099443435668945, + "logits/rejected": -2.038114547729492, + "logps/chosen": -845.0305786132812, + "logps/rejected": -482.1744689941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.913906455039978, + "rewards/margins": 8.579675674438477, + "rewards/rejected": -7.665768623352051, + "step": 5082 + }, + { + "epoch": 0.79, + "learning_rate": 1.04192561854868e-05, + "logits/chosen": -1.326270580291748, + "logits/rejected": -3.0866072177886963, + "logps/chosen": -127.02972412109375, + "logps/rejected": -508.5887451171875, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.805321455001831, + "rewards/margins": 3.176231861114502, + "rewards/rejected": -4.981553077697754, + "step": 5083 + }, + { + "epoch": 0.79, + "learning_rate": 1.0418522744955652e-05, + "logits/chosen": -2.9533722400665283, + "logits/rejected": -3.250206708908081, + "logps/chosen": -229.91136169433594, + "logps/rejected": -186.90467834472656, + "loss": 4.5266, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.485448837280273, + "rewards/margins": -3.0986530780792236, + "rewards/rejected": -2.386795997619629, + "step": 5084 + }, + { + "epoch": 0.79, + "learning_rate": 1.0417789304424504e-05, + "logits/chosen": -1.8209277391433716, + "logits/rejected": -2.988516092300415, + "logps/chosen": -103.42646789550781, + "logps/rejected": -228.00979614257812, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0046958923339844, + "rewards/margins": 2.971095085144043, + "rewards/rejected": -4.975790977478027, + "step": 5085 + }, + { + "epoch": 0.79, + "learning_rate": 1.0417055863893356e-05, + "logits/chosen": -3.0736823081970215, + "logits/rejected": -2.7439589500427246, + "logps/chosen": -103.6505126953125, + "logps/rejected": -198.23324584960938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7995383739471436, + "rewards/margins": 6.36378288269043, + "rewards/rejected": -7.163321018218994, + "step": 5086 + }, + { + "epoch": 0.79, + "learning_rate": 1.0416322423362208e-05, + "logits/chosen": -3.2397894859313965, + "logits/rejected": -2.6734323501586914, + "logps/chosen": -142.80654907226562, + "logps/rejected": -163.5477752685547, + "loss": 1.6682, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.098273754119873, + "rewards/margins": 1.037442922592163, + "rewards/rejected": -4.135716438293457, + "step": 5087 + }, + { + "epoch": 0.79, + "learning_rate": 1.041558898283106e-05, + "logits/chosen": -1.873340129852295, + "logits/rejected": -2.973714590072632, + "logps/chosen": -21.31703758239746, + "logps/rejected": -205.5337677001953, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3483827114105225, + "rewards/margins": 4.426054000854492, + "rewards/rejected": -5.774436950683594, + "step": 5088 + }, + { + "epoch": 0.79, + "learning_rate": 1.0414855542299911e-05, + "logits/chosen": -2.5369017124176025, + "logits/rejected": -3.3727543354034424, + "logps/chosen": -31.46249008178711, + "logps/rejected": -133.09063720703125, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6463701725006104, + "rewards/margins": 4.379474639892578, + "rewards/rejected": -6.025844573974609, + "step": 5089 + }, + { + "epoch": 0.79, + "learning_rate": 1.0414122101768763e-05, + "logits/chosen": -1.184973955154419, + "logits/rejected": -2.849194049835205, + "logps/chosen": -120.18931579589844, + "logps/rejected": -346.33709716796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.173492431640625, + "rewards/margins": 5.57575798034668, + "rewards/rejected": -6.749250411987305, + "step": 5090 + }, + { + "epoch": 0.79, + "learning_rate": 1.0413388661237617e-05, + "logits/chosen": -2.814615249633789, + "logits/rejected": -2.9799163341522217, + "logps/chosen": -41.9751091003418, + "logps/rejected": -166.92771911621094, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.253623366355896, + "rewards/margins": 4.2716827392578125, + "rewards/rejected": -5.525306701660156, + "step": 5091 + }, + { + "epoch": 0.79, + "learning_rate": 1.0412655220706469e-05, + "logits/chosen": -1.979264497756958, + "logits/rejected": -3.1354875564575195, + "logps/chosen": -125.10870361328125, + "logps/rejected": -340.54205322265625, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8699913024902344, + "rewards/margins": 3.2569327354431152, + "rewards/rejected": -5.126924514770508, + "step": 5092 + }, + { + "epoch": 0.79, + "learning_rate": 1.041192178017532e-05, + "logits/chosen": -2.5203096866607666, + "logits/rejected": -3.035334825515747, + "logps/chosen": -101.02056884765625, + "logps/rejected": -274.79937744140625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2711143493652344, + "rewards/margins": 4.959200859069824, + "rewards/rejected": -6.230315208435059, + "step": 5093 + }, + { + "epoch": 0.79, + "learning_rate": 1.0411188339644172e-05, + "logits/chosen": -2.0873501300811768, + "logits/rejected": -2.7603724002838135, + "logps/chosen": -474.20440673828125, + "logps/rejected": -709.8858642578125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.817333996295929, + "rewards/margins": 6.902188301086426, + "rewards/rejected": -7.719521999359131, + "step": 5094 + }, + { + "epoch": 0.79, + "learning_rate": 1.0410454899113024e-05, + "logits/chosen": -2.5001189708709717, + "logits/rejected": -2.9486751556396484, + "logps/chosen": -174.09384155273438, + "logps/rejected": -357.7247314453125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.285413384437561, + "rewards/margins": 4.579238414764404, + "rewards/rejected": -5.864651679992676, + "step": 5095 + }, + { + "epoch": 0.79, + "learning_rate": 1.0409721458581876e-05, + "logits/chosen": -2.86942458152771, + "logits/rejected": -3.0324790477752686, + "logps/chosen": -160.38827514648438, + "logps/rejected": -216.73187255859375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0356087684631348, + "rewards/margins": 5.430149078369141, + "rewards/rejected": -6.465757846832275, + "step": 5096 + }, + { + "epoch": 0.79, + "learning_rate": 1.0408988018050728e-05, + "logits/chosen": -2.7099177837371826, + "logits/rejected": -1.6503992080688477, + "logps/chosen": -196.1062774658203, + "logps/rejected": -262.6697082519531, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7044700384140015, + "rewards/margins": 6.496267318725586, + "rewards/rejected": -7.200736999511719, + "step": 5097 + }, + { + "epoch": 0.79, + "learning_rate": 1.040825457751958e-05, + "logits/chosen": -1.7248615026474, + "logits/rejected": -2.453108787536621, + "logps/chosen": -225.75953674316406, + "logps/rejected": -339.31231689453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2684689462184906, + "rewards/margins": 8.074989318847656, + "rewards/rejected": -7.806520462036133, + "step": 5098 + }, + { + "epoch": 0.79, + "learning_rate": 1.0407521136988432e-05, + "logits/chosen": -2.4089083671569824, + "logits/rejected": -2.9803318977355957, + "logps/chosen": -141.70948791503906, + "logps/rejected": -343.06719970703125, + "loss": 0.0903, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8131675720214844, + "rewards/margins": 4.48332405090332, + "rewards/rejected": -6.296491622924805, + "step": 5099 + }, + { + "epoch": 0.79, + "learning_rate": 1.0406787696457285e-05, + "logits/chosen": -2.9627580642700195, + "logits/rejected": -3.092421293258667, + "logps/chosen": -108.46914672851562, + "logps/rejected": -218.7672576904297, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8465202450752258, + "rewards/margins": 4.154406547546387, + "rewards/rejected": -5.000926971435547, + "step": 5100 + }, + { + "epoch": 0.79, + "learning_rate": 1.0406054255926137e-05, + "logits/chosen": -2.9767556190490723, + "logits/rejected": -2.234654664993286, + "logps/chosen": -326.2651062011719, + "logps/rejected": -291.4145202636719, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.526471734046936, + "rewards/margins": 6.026082992553711, + "rewards/rejected": -7.552554130554199, + "step": 5101 + }, + { + "epoch": 0.79, + "learning_rate": 1.040532081539499e-05, + "logits/chosen": -2.5048580169677734, + "logits/rejected": -3.093336343765259, + "logps/chosen": -38.163909912109375, + "logps/rejected": -221.27439880371094, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35643070936203003, + "rewards/margins": 5.5157952308654785, + "rewards/rejected": -5.872225761413574, + "step": 5102 + }, + { + "epoch": 0.79, + "learning_rate": 1.0404587374863841e-05, + "logits/chosen": -1.944987416267395, + "logits/rejected": -3.2103934288024902, + "logps/chosen": -194.4559326171875, + "logps/rejected": -693.7730712890625, + "loss": 3.897, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.515135765075684, + "rewards/margins": 0.4795646667480469, + "rewards/rejected": -4.9947004318237305, + "step": 5103 + }, + { + "epoch": 0.79, + "learning_rate": 1.0403853934332693e-05, + "logits/chosen": -3.214991807937622, + "logits/rejected": -2.8254339694976807, + "logps/chosen": -203.4939727783203, + "logps/rejected": -136.160888671875, + "loss": 1.3146, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.605886936187744, + "rewards/margins": 0.7883589267730713, + "rewards/rejected": -3.3942458629608154, + "step": 5104 + }, + { + "epoch": 0.79, + "learning_rate": 1.0403120493801545e-05, + "logits/chosen": -2.2409651279449463, + "logits/rejected": -2.8840136528015137, + "logps/chosen": -51.536170959472656, + "logps/rejected": -512.8599853515625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33723488450050354, + "rewards/margins": 6.780431747436523, + "rewards/rejected": -7.117666721343994, + "step": 5105 + }, + { + "epoch": 0.79, + "learning_rate": 1.0402387053270397e-05, + "logits/chosen": -2.4149420261383057, + "logits/rejected": -2.8540992736816406, + "logps/chosen": -110.79389953613281, + "logps/rejected": -297.3692932128906, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4962509870529175, + "rewards/margins": 6.358896732330322, + "rewards/rejected": -7.855147361755371, + "step": 5106 + }, + { + "epoch": 0.79, + "learning_rate": 1.040165361273925e-05, + "logits/chosen": -3.0718727111816406, + "logits/rejected": -3.000561475753784, + "logps/chosen": -167.35574340820312, + "logps/rejected": -106.4605484008789, + "loss": 1.6275, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.865084648132324, + "rewards/margins": 1.3629755973815918, + "rewards/rejected": -4.228060245513916, + "step": 5107 + }, + { + "epoch": 0.79, + "learning_rate": 1.0400920172208102e-05, + "logits/chosen": -2.962038993835449, + "logits/rejected": -3.193391799926758, + "logps/chosen": -236.51419067382812, + "logps/rejected": -333.16949462890625, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.947893500328064, + "rewards/margins": 4.643563270568848, + "rewards/rejected": -5.591457366943359, + "step": 5108 + }, + { + "epoch": 0.79, + "learning_rate": 1.0400186731676956e-05, + "logits/chosen": -2.4187843799591064, + "logits/rejected": -3.178117513656616, + "logps/chosen": -72.73594665527344, + "logps/rejected": -229.2037811279297, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8904995322227478, + "rewards/margins": 4.950278282165527, + "rewards/rejected": -5.840777397155762, + "step": 5109 + }, + { + "epoch": 0.79, + "learning_rate": 1.0399453291145808e-05, + "logits/chosen": -3.0428550243377686, + "logits/rejected": -3.0263800621032715, + "logps/chosen": -114.3795394897461, + "logps/rejected": -242.6060791015625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2410308122634888, + "rewards/margins": 6.911931991577148, + "rewards/rejected": -8.152962684631348, + "step": 5110 + }, + { + "epoch": 0.79, + "learning_rate": 1.039871985061466e-05, + "logits/chosen": -1.768038034439087, + "logits/rejected": -3.0555214881896973, + "logps/chosen": -195.6186981201172, + "logps/rejected": -452.09661865234375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18478699028491974, + "rewards/margins": 6.5684967041015625, + "rewards/rejected": -6.753283977508545, + "step": 5111 + }, + { + "epoch": 0.8, + "learning_rate": 1.0397986410083511e-05, + "logits/chosen": -2.368135452270508, + "logits/rejected": -2.5424718856811523, + "logps/chosen": -89.05372619628906, + "logps/rejected": -404.1796875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6783946752548218, + "rewards/margins": 6.56601619720459, + "rewards/rejected": -7.244410514831543, + "step": 5112 + }, + { + "epoch": 0.8, + "learning_rate": 1.0397252969552363e-05, + "logits/chosen": -2.908909320831299, + "logits/rejected": -3.1596176624298096, + "logps/chosen": -117.0095443725586, + "logps/rejected": -283.9823913574219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4892864227294922, + "rewards/margins": 8.625375747680664, + "rewards/rejected": -9.114662170410156, + "step": 5113 + }, + { + "epoch": 0.8, + "learning_rate": 1.0396519529021215e-05, + "logits/chosen": -1.881504774093628, + "logits/rejected": -3.10774827003479, + "logps/chosen": -177.89031982421875, + "logps/rejected": -341.81988525390625, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.678591251373291, + "rewards/margins": 7.207176208496094, + "rewards/rejected": -8.885766983032227, + "step": 5114 + }, + { + "epoch": 0.8, + "learning_rate": 1.0395786088490067e-05, + "logits/chosen": -2.139207124710083, + "logits/rejected": -2.3771634101867676, + "logps/chosen": -282.95599365234375, + "logps/rejected": -391.2607116699219, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.045314073562622, + "rewards/margins": 6.832941055297852, + "rewards/rejected": -7.878255367279053, + "step": 5115 + }, + { + "epoch": 0.8, + "learning_rate": 1.0395052647958919e-05, + "logits/chosen": -1.277736783027649, + "logits/rejected": -3.069998025894165, + "logps/chosen": -155.2854766845703, + "logps/rejected": -585.9473876953125, + "loss": 2.5178, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.342974662780762, + "rewards/margins": -0.05655479431152344, + "rewards/rejected": -4.286419868469238, + "step": 5116 + }, + { + "epoch": 0.8, + "learning_rate": 1.0394319207427772e-05, + "logits/chosen": -2.937777280807495, + "logits/rejected": -3.0587685108184814, + "logps/chosen": -69.7834243774414, + "logps/rejected": -177.00296020507812, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5884168744087219, + "rewards/margins": 5.956281661987305, + "rewards/rejected": -6.544698715209961, + "step": 5117 + }, + { + "epoch": 0.8, + "learning_rate": 1.0393585766896624e-05, + "logits/chosen": -2.6103599071502686, + "logits/rejected": -2.9906208515167236, + "logps/chosen": -152.6085662841797, + "logps/rejected": -325.5771484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4910118281841278, + "rewards/margins": 7.337714195251465, + "rewards/rejected": -6.846702575683594, + "step": 5118 + }, + { + "epoch": 0.8, + "learning_rate": 1.0392852326365476e-05, + "logits/chosen": -3.024355173110962, + "logits/rejected": -2.633373260498047, + "logps/chosen": -547.230712890625, + "logps/rejected": -551.2011108398438, + "loss": 3.2347, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1265952587127686, + "rewards/margins": -0.4291541576385498, + "rewards/rejected": -2.6974411010742188, + "step": 5119 + }, + { + "epoch": 0.8, + "learning_rate": 1.0392118885834328e-05, + "logits/chosen": -1.5326777696609497, + "logits/rejected": -3.0081489086151123, + "logps/chosen": -130.87594604492188, + "logps/rejected": -331.82830810546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7399810552597046, + "rewards/margins": 6.364643096923828, + "rewards/rejected": -7.104624271392822, + "step": 5120 + }, + { + "epoch": 0.8, + "learning_rate": 1.039138544530318e-05, + "logits/chosen": -2.8032195568084717, + "logits/rejected": -3.019627094268799, + "logps/chosen": -397.54449462890625, + "logps/rejected": -554.20703125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.184665322303772, + "rewards/margins": 6.764840126037598, + "rewards/rejected": -7.949505805969238, + "step": 5121 + }, + { + "epoch": 0.8, + "learning_rate": 1.0390652004772032e-05, + "logits/chosen": -3.0659983158111572, + "logits/rejected": -2.1384620666503906, + "logps/chosen": -309.5517272949219, + "logps/rejected": -57.553401947021484, + "loss": 5.8669, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.911355018615723, + "rewards/margins": -5.8633270263671875, + "rewards/rejected": -0.04802751541137695, + "step": 5122 + }, + { + "epoch": 0.8, + "learning_rate": 1.0389918564240884e-05, + "logits/chosen": -3.1409428119659424, + "logits/rejected": -3.3110198974609375, + "logps/chosen": -314.0309753417969, + "logps/rejected": -564.79833984375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2165195345878601, + "rewards/margins": 7.045688152313232, + "rewards/rejected": -6.829168796539307, + "step": 5123 + }, + { + "epoch": 0.8, + "learning_rate": 1.0389185123709736e-05, + "logits/chosen": -1.7459287643432617, + "logits/rejected": -2.8610270023345947, + "logps/chosen": -111.93792724609375, + "logps/rejected": -268.7234802246094, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8899646997451782, + "rewards/margins": 6.356191635131836, + "rewards/rejected": -7.246156692504883, + "step": 5124 + }, + { + "epoch": 0.8, + "learning_rate": 1.0388451683178587e-05, + "logits/chosen": -2.9258430004119873, + "logits/rejected": -1.7352290153503418, + "logps/chosen": -250.67410278320312, + "logps/rejected": -115.37263488769531, + "loss": 2.9061, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9214606285095215, + "rewards/margins": -1.2537007331848145, + "rewards/rejected": -2.667759895324707, + "step": 5125 + }, + { + "epoch": 0.8, + "learning_rate": 1.0387718242647441e-05, + "logits/chosen": -2.598860740661621, + "logits/rejected": -3.1137657165527344, + "logps/chosen": -96.43465423583984, + "logps/rejected": -272.38177490234375, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4490509033203125, + "rewards/margins": 4.222119331359863, + "rewards/rejected": -5.671170234680176, + "step": 5126 + }, + { + "epoch": 0.8, + "learning_rate": 1.0386984802116293e-05, + "logits/chosen": -3.049196243286133, + "logits/rejected": -1.9982290267944336, + "logps/chosen": -555.821044921875, + "logps/rejected": -534.307373046875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7200570106506348, + "rewards/margins": 8.62466049194336, + "rewards/rejected": -9.344717979431152, + "step": 5127 + }, + { + "epoch": 0.8, + "learning_rate": 1.0386251361585145e-05, + "logits/chosen": -2.7502036094665527, + "logits/rejected": -3.0189709663391113, + "logps/chosen": -276.50677490234375, + "logps/rejected": -344.9150085449219, + "loss": 0.2822, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5492403507232666, + "rewards/margins": 2.62870454788208, + "rewards/rejected": -4.177945137023926, + "step": 5128 + }, + { + "epoch": 0.8, + "learning_rate": 1.0385517921053997e-05, + "logits/chosen": -1.9499608278274536, + "logits/rejected": -2.920814037322998, + "logps/chosen": -84.55998229980469, + "logps/rejected": -250.6336669921875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.539533257484436, + "rewards/margins": 5.094336032867432, + "rewards/rejected": -5.633869171142578, + "step": 5129 + }, + { + "epoch": 0.8, + "learning_rate": 1.0384784480522849e-05, + "logits/chosen": -3.129741668701172, + "logits/rejected": -2.7380988597869873, + "logps/chosen": -193.95196533203125, + "logps/rejected": -110.93724060058594, + "loss": 2.3562, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8046000003814697, + "rewards/margins": -0.22995543479919434, + "rewards/rejected": -2.5746445655822754, + "step": 5130 + }, + { + "epoch": 0.8, + "learning_rate": 1.03840510399917e-05, + "logits/chosen": -3.2068862915039062, + "logits/rejected": -3.030132293701172, + "logps/chosen": -400.1135559082031, + "logps/rejected": -373.7962341308594, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9153400659561157, + "rewards/margins": 5.173821926116943, + "rewards/rejected": -6.0891618728637695, + "step": 5131 + }, + { + "epoch": 0.8, + "learning_rate": 1.0383317599460552e-05, + "logits/chosen": -1.8772015571594238, + "logits/rejected": -2.867401361465454, + "logps/chosen": -223.9330291748047, + "logps/rejected": -240.49435424804688, + "loss": 0.3492, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.747364044189453, + "rewards/margins": 3.6284422874450684, + "rewards/rejected": -6.3758063316345215, + "step": 5132 + }, + { + "epoch": 0.8, + "learning_rate": 1.0382584158929404e-05, + "logits/chosen": -2.759932041168213, + "logits/rejected": -2.9829134941101074, + "logps/chosen": -321.1026611328125, + "logps/rejected": -251.50906372070312, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5222108364105225, + "rewards/margins": 2.7548904418945312, + "rewards/rejected": -4.277101516723633, + "step": 5133 + }, + { + "epoch": 0.8, + "learning_rate": 1.0381850718398256e-05, + "logits/chosen": -2.0120620727539062, + "logits/rejected": -2.8906710147857666, + "logps/chosen": -180.093994140625, + "logps/rejected": -476.24737548828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4656005799770355, + "rewards/margins": 9.557888984680176, + "rewards/rejected": -10.023488998413086, + "step": 5134 + }, + { + "epoch": 0.8, + "learning_rate": 1.038111727786711e-05, + "logits/chosen": -1.7477610111236572, + "logits/rejected": -3.0177083015441895, + "logps/chosen": -125.76043701171875, + "logps/rejected": -137.1276397705078, + "loss": 1.5795, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8288421630859375, + "rewards/margins": 1.406936764717102, + "rewards/rejected": -3.235779047012329, + "step": 5135 + }, + { + "epoch": 0.8, + "learning_rate": 1.0380383837335962e-05, + "logits/chosen": -2.081956624984741, + "logits/rejected": -2.499391794204712, + "logps/chosen": -1051.048828125, + "logps/rejected": -1115.1790771484375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6410423517227173, + "rewards/margins": 7.508152008056641, + "rewards/rejected": -8.149194717407227, + "step": 5136 + }, + { + "epoch": 0.8, + "learning_rate": 1.0379650396804813e-05, + "logits/chosen": -3.133157968521118, + "logits/rejected": -2.8714098930358887, + "logps/chosen": -248.18734741210938, + "logps/rejected": -165.53872680664062, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40394288301467896, + "rewards/margins": 4.808963775634766, + "rewards/rejected": -5.212906837463379, + "step": 5137 + }, + { + "epoch": 0.8, + "learning_rate": 1.0378916956273665e-05, + "logits/chosen": -2.926657199859619, + "logits/rejected": -3.144054651260376, + "logps/chosen": -751.1617431640625, + "logps/rejected": -698.218505859375, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.130364179611206, + "rewards/margins": 5.994363784790039, + "rewards/rejected": -7.124727725982666, + "step": 5138 + }, + { + "epoch": 0.8, + "learning_rate": 1.0378183515742517e-05, + "logits/chosen": -2.715393543243408, + "logits/rejected": -2.6749002933502197, + "logps/chosen": -122.66514587402344, + "logps/rejected": -237.97103881835938, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0490963459014893, + "rewards/margins": 5.502264976501465, + "rewards/rejected": -7.551361083984375, + "step": 5139 + }, + { + "epoch": 0.8, + "learning_rate": 1.0377450075211369e-05, + "logits/chosen": -1.206445574760437, + "logits/rejected": -3.0290896892547607, + "logps/chosen": -111.58500671386719, + "logps/rejected": -375.65972900390625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.161531925201416, + "rewards/margins": 5.936175346374512, + "rewards/rejected": -7.0977067947387695, + "step": 5140 + }, + { + "epoch": 0.8, + "learning_rate": 1.0376716634680223e-05, + "logits/chosen": -2.19118332862854, + "logits/rejected": -3.1443488597869873, + "logps/chosen": -132.73672485351562, + "logps/rejected": -390.7807922363281, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1580730378627777, + "rewards/margins": 6.8609819412231445, + "rewards/rejected": -7.019055366516113, + "step": 5141 + }, + { + "epoch": 0.8, + "learning_rate": 1.0375983194149074e-05, + "logits/chosen": -2.936838150024414, + "logits/rejected": -2.4006733894348145, + "logps/chosen": -130.55706787109375, + "logps/rejected": -127.43605041503906, + "loss": 0.0586, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.106663703918457, + "rewards/margins": 3.4649529457092285, + "rewards/rejected": -5.5716166496276855, + "step": 5142 + }, + { + "epoch": 0.8, + "learning_rate": 1.0375249753617926e-05, + "logits/chosen": -2.9568467140197754, + "logits/rejected": -3.237668037414551, + "logps/chosen": -45.05622863769531, + "logps/rejected": -187.31182861328125, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20056457817554474, + "rewards/margins": 5.627104759216309, + "rewards/rejected": -5.827669143676758, + "step": 5143 + }, + { + "epoch": 0.8, + "learning_rate": 1.037451631308678e-05, + "logits/chosen": -2.8701577186584473, + "logits/rejected": -2.9612278938293457, + "logps/chosen": -57.50038146972656, + "logps/rejected": -158.00054931640625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.600388526916504, + "rewards/margins": 5.733617782592773, + "rewards/rejected": -7.334006309509277, + "step": 5144 + }, + { + "epoch": 0.8, + "learning_rate": 1.0373782872555632e-05, + "logits/chosen": -3.1375064849853516, + "logits/rejected": -2.53678822517395, + "logps/chosen": -484.34417724609375, + "logps/rejected": -369.5229797363281, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9819801449775696, + "rewards/margins": 4.7292351722717285, + "rewards/rejected": -5.711215019226074, + "step": 5145 + }, + { + "epoch": 0.8, + "learning_rate": 1.0373049432024484e-05, + "logits/chosen": -2.2393221855163574, + "logits/rejected": -3.0099146366119385, + "logps/chosen": -127.64634704589844, + "logps/rejected": -181.86227416992188, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1937713623046875, + "rewards/margins": 3.0080761909484863, + "rewards/rejected": -4.201847553253174, + "step": 5146 + }, + { + "epoch": 0.8, + "learning_rate": 1.0372315991493336e-05, + "logits/chosen": -1.784135341644287, + "logits/rejected": -3.1877994537353516, + "logps/chosen": -248.98428344726562, + "logps/rejected": -789.6726684570312, + "loss": 1.7276, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.0152812004089355, + "rewards/margins": 2.2485249042510986, + "rewards/rejected": -7.263806343078613, + "step": 5147 + }, + { + "epoch": 0.8, + "learning_rate": 1.0371582550962187e-05, + "logits/chosen": -2.2835257053375244, + "logits/rejected": -2.744638204574585, + "logps/chosen": -216.45895385742188, + "logps/rejected": -355.3404541015625, + "loss": 0.1031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4201741218566895, + "rewards/margins": 4.276068687438965, + "rewards/rejected": -6.696242332458496, + "step": 5148 + }, + { + "epoch": 0.8, + "learning_rate": 1.037084911043104e-05, + "logits/chosen": -2.1116390228271484, + "logits/rejected": -3.0461554527282715, + "logps/chosen": -205.63919067382812, + "logps/rejected": -347.663818359375, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9655758142471313, + "rewards/margins": 4.948222637176514, + "rewards/rejected": -5.9137983322143555, + "step": 5149 + }, + { + "epoch": 0.8, + "learning_rate": 1.0370115669899891e-05, + "logits/chosen": -1.653791069984436, + "logits/rejected": -2.9041037559509277, + "logps/chosen": -187.57559204101562, + "logps/rejected": -345.97161865234375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08838959038257599, + "rewards/margins": 5.948579788208008, + "rewards/rejected": -5.860189914703369, + "step": 5150 + }, + { + "epoch": 0.8, + "learning_rate": 1.0369382229368743e-05, + "logits/chosen": -3.1472275257110596, + "logits/rejected": -2.967797040939331, + "logps/chosen": -343.055908203125, + "logps/rejected": -139.47877502441406, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2515884339809418, + "rewards/margins": 4.875133037567139, + "rewards/rejected": -4.623544692993164, + "step": 5151 + }, + { + "epoch": 0.8, + "learning_rate": 1.0368648788837595e-05, + "logits/chosen": -2.5314829349517822, + "logits/rejected": -2.8177695274353027, + "logps/chosen": -60.69085693359375, + "logps/rejected": -177.9379425048828, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3231241703033447, + "rewards/margins": 4.248247146606445, + "rewards/rejected": -5.571371555328369, + "step": 5152 + }, + { + "epoch": 0.8, + "learning_rate": 1.0367915348306449e-05, + "logits/chosen": -2.2009646892547607, + "logits/rejected": -2.6113502979278564, + "logps/chosen": -83.14842224121094, + "logps/rejected": -144.5714874267578, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7432942390441895, + "rewards/margins": 2.783090591430664, + "rewards/rejected": -5.5263848304748535, + "step": 5153 + }, + { + "epoch": 0.8, + "learning_rate": 1.03671819077753e-05, + "logits/chosen": -3.209606647491455, + "logits/rejected": -2.528881788253784, + "logps/chosen": -684.0413208007812, + "logps/rejected": -806.82470703125, + "loss": 4.5426, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.990849494934082, + "rewards/margins": -0.8488421440124512, + "rewards/rejected": -4.142007350921631, + "step": 5154 + }, + { + "epoch": 0.8, + "learning_rate": 1.0366448467244152e-05, + "logits/chosen": -2.9643640518188477, + "logits/rejected": -1.4083077907562256, + "logps/chosen": -462.7438659667969, + "logps/rejected": -203.0670928955078, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30113527178764343, + "rewards/margins": 7.42691707611084, + "rewards/rejected": -7.125782012939453, + "step": 5155 + }, + { + "epoch": 0.8, + "learning_rate": 1.0365715026713004e-05, + "logits/chosen": -2.9425253868103027, + "logits/rejected": -3.196512460708618, + "logps/chosen": -50.688629150390625, + "logps/rejected": -190.5502166748047, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8424637317657471, + "rewards/margins": 5.250751495361328, + "rewards/rejected": -6.093214988708496, + "step": 5156 + }, + { + "epoch": 0.8, + "learning_rate": 1.0364981586181856e-05, + "logits/chosen": -3.107734203338623, + "logits/rejected": -3.009201765060425, + "logps/chosen": -173.42510986328125, + "logps/rejected": -204.6926727294922, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7909576892852783, + "rewards/margins": 5.050865650177002, + "rewards/rejected": -6.841823577880859, + "step": 5157 + }, + { + "epoch": 0.8, + "learning_rate": 1.0364248145650708e-05, + "logits/chosen": -2.1012990474700928, + "logits/rejected": -2.9514942169189453, + "logps/chosen": -48.51286697387695, + "logps/rejected": -200.29978942871094, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8503290414810181, + "rewards/margins": 5.520317077636719, + "rewards/rejected": -6.3706464767456055, + "step": 5158 + }, + { + "epoch": 0.8, + "learning_rate": 1.036351470511956e-05, + "logits/chosen": -2.726252317428589, + "logits/rejected": -2.939788818359375, + "logps/chosen": -66.33656311035156, + "logps/rejected": -173.94898986816406, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3855105638504028, + "rewards/margins": 5.3574981689453125, + "rewards/rejected": -6.743008613586426, + "step": 5159 + }, + { + "epoch": 0.8, + "learning_rate": 1.0362781264588412e-05, + "logits/chosen": -3.0137598514556885, + "logits/rejected": -2.7062032222747803, + "logps/chosen": -153.84642028808594, + "logps/rejected": -209.2772216796875, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.278101682662964, + "rewards/margins": 4.681066989898682, + "rewards/rejected": -7.959168434143066, + "step": 5160 + }, + { + "epoch": 0.8, + "learning_rate": 1.0362047824057264e-05, + "logits/chosen": -2.8879201412200928, + "logits/rejected": -2.982936143875122, + "logps/chosen": -151.79864501953125, + "logps/rejected": -325.55517578125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.904092788696289, + "rewards/margins": 5.761381149291992, + "rewards/rejected": -7.665473937988281, + "step": 5161 + }, + { + "epoch": 0.8, + "learning_rate": 1.0361314383526117e-05, + "logits/chosen": -2.911684036254883, + "logits/rejected": -2.566194772720337, + "logps/chosen": -442.2914123535156, + "logps/rejected": -187.99229431152344, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8554298877716064, + "rewards/margins": 6.1395978927612305, + "rewards/rejected": -6.995028018951416, + "step": 5162 + }, + { + "epoch": 0.8, + "learning_rate": 1.0360580942994969e-05, + "logits/chosen": -2.8997817039489746, + "logits/rejected": -3.1526730060577393, + "logps/chosen": -66.58800506591797, + "logps/rejected": -204.95755004882812, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7105122804641724, + "rewards/margins": 7.165755271911621, + "rewards/rejected": -8.876267433166504, + "step": 5163 + }, + { + "epoch": 0.8, + "learning_rate": 1.0359847502463821e-05, + "logits/chosen": -2.089513063430786, + "logits/rejected": -3.0275652408599854, + "logps/chosen": -63.97050476074219, + "logps/rejected": -310.26068115234375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.464078903198242, + "rewards/margins": 6.598627090454102, + "rewards/rejected": -9.062705993652344, + "step": 5164 + }, + { + "epoch": 0.8, + "learning_rate": 1.0359114061932673e-05, + "logits/chosen": -2.2583208084106445, + "logits/rejected": -2.8815054893493652, + "logps/chosen": -309.39312744140625, + "logps/rejected": -518.082275390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7951675653457642, + "rewards/margins": 7.123866558074951, + "rewards/rejected": -8.919034004211426, + "step": 5165 + }, + { + "epoch": 0.8, + "learning_rate": 1.0358380621401525e-05, + "logits/chosen": -2.514699697494507, + "logits/rejected": -3.2043840885162354, + "logps/chosen": -131.77020263671875, + "logps/rejected": -280.9898376464844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1883347034454346, + "rewards/margins": 7.350237846374512, + "rewards/rejected": -8.538572311401367, + "step": 5166 + }, + { + "epoch": 0.8, + "learning_rate": 1.0357647180870377e-05, + "logits/chosen": -1.587762713432312, + "logits/rejected": -2.5365898609161377, + "logps/chosen": -71.08068084716797, + "logps/rejected": -319.9527587890625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0554680824279785, + "rewards/margins": 7.182091236114502, + "rewards/rejected": -9.23755931854248, + "step": 5167 + }, + { + "epoch": 0.8, + "learning_rate": 1.0356913740339228e-05, + "logits/chosen": -1.4362131357192993, + "logits/rejected": -2.863517999649048, + "logps/chosen": -103.84583282470703, + "logps/rejected": -196.5259246826172, + "loss": 1.2832, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3339927196502686, + "rewards/margins": 2.0703678131103516, + "rewards/rejected": -5.404360771179199, + "step": 5168 + }, + { + "epoch": 0.8, + "learning_rate": 1.035618029980808e-05, + "logits/chosen": -3.1027915477752686, + "logits/rejected": -1.2309057712554932, + "logps/chosen": -350.5434875488281, + "logps/rejected": -87.69120025634766, + "loss": 2.6144, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.994699001312256, + "rewards/margins": -2.529005527496338, + "rewards/rejected": -1.465693712234497, + "step": 5169 + }, + { + "epoch": 0.8, + "learning_rate": 1.0355446859276932e-05, + "logits/chosen": -1.8889073133468628, + "logits/rejected": -3.225037097930908, + "logps/chosen": -38.482948303222656, + "logps/rejected": -339.80389404296875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5261532068252563, + "rewards/margins": 5.326236248016357, + "rewards/rejected": -6.852389335632324, + "step": 5170 + }, + { + "epoch": 0.8, + "learning_rate": 1.0354713418745786e-05, + "logits/chosen": -3.1897408962249756, + "logits/rejected": -3.2129220962524414, + "logps/chosen": -354.1282043457031, + "logps/rejected": -342.276123046875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8353363275527954, + "rewards/margins": 7.208414554595947, + "rewards/rejected": -8.043750762939453, + "step": 5171 + }, + { + "epoch": 0.8, + "learning_rate": 1.0353979978214638e-05, + "logits/chosen": -2.6222801208496094, + "logits/rejected": -3.078758716583252, + "logps/chosen": -296.5538024902344, + "logps/rejected": -357.387451171875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028926849365234375, + "rewards/margins": 7.297623634338379, + "rewards/rejected": -7.2686967849731445, + "step": 5172 + }, + { + "epoch": 0.8, + "learning_rate": 1.035324653768349e-05, + "logits/chosen": -1.9080253839492798, + "logits/rejected": -3.1555285453796387, + "logps/chosen": -81.61383056640625, + "logps/rejected": -369.51287841796875, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7258682250976562, + "rewards/margins": 4.444164276123047, + "rewards/rejected": -5.170032501220703, + "step": 5173 + }, + { + "epoch": 0.8, + "learning_rate": 1.0352513097152341e-05, + "logits/chosen": -1.8892005681991577, + "logits/rejected": -3.123077154159546, + "logps/chosen": -40.531044006347656, + "logps/rejected": -250.9990692138672, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9038496017456055, + "rewards/margins": 5.3299455642700195, + "rewards/rejected": -8.233795166015625, + "step": 5174 + }, + { + "epoch": 0.8, + "learning_rate": 1.0351779656621195e-05, + "logits/chosen": -2.9322242736816406, + "logits/rejected": -3.1757140159606934, + "logps/chosen": -111.29981994628906, + "logps/rejected": -292.1899108886719, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7536571025848389, + "rewards/margins": 5.211899757385254, + "rewards/rejected": -6.965557098388672, + "step": 5175 + }, + { + "epoch": 0.8, + "learning_rate": 1.0351046216090047e-05, + "logits/chosen": -1.828748106956482, + "logits/rejected": -2.022542953491211, + "logps/chosen": -389.5072021484375, + "logps/rejected": -130.19049072265625, + "loss": 3.1485, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.766645908355713, + "rewards/margins": 0.010469436645507812, + "rewards/rejected": -4.777115345001221, + "step": 5176 + }, + { + "epoch": 0.81, + "learning_rate": 1.0350312775558899e-05, + "logits/chosen": -3.2168054580688477, + "logits/rejected": -2.671743392944336, + "logps/chosen": -189.98353576660156, + "logps/rejected": -238.00360107421875, + "loss": 1.4802, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7221970558166504, + "rewards/margins": 0.9348357915878296, + "rewards/rejected": -4.6570329666137695, + "step": 5177 + }, + { + "epoch": 0.81, + "learning_rate": 1.034957933502775e-05, + "logits/chosen": -2.8833184242248535, + "logits/rejected": -2.6648149490356445, + "logps/chosen": -174.88693237304688, + "logps/rejected": -343.74468994140625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8285102844238281, + "rewards/margins": 5.555022239685059, + "rewards/rejected": -6.383532524108887, + "step": 5178 + }, + { + "epoch": 0.81, + "learning_rate": 1.0348845894496602e-05, + "logits/chosen": -3.2022864818573, + "logits/rejected": -3.167093515396118, + "logps/chosen": -179.87246704101562, + "logps/rejected": -284.79168701171875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6728935241699219, + "rewards/margins": 5.511449813842773, + "rewards/rejected": -6.184343338012695, + "step": 5179 + }, + { + "epoch": 0.81, + "learning_rate": 1.0348112453965456e-05, + "logits/chosen": -2.8920094966888428, + "logits/rejected": -2.656604528427124, + "logps/chosen": -336.8231201171875, + "logps/rejected": -218.102294921875, + "loss": 6.1681, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.458314895629883, + "rewards/margins": -3.6389682292938232, + "rewards/rejected": -3.8193466663360596, + "step": 5180 + }, + { + "epoch": 0.81, + "learning_rate": 1.0347379013434308e-05, + "logits/chosen": -3.03920841217041, + "logits/rejected": -1.5034067630767822, + "logps/chosen": -244.51678466796875, + "logps/rejected": -110.86225891113281, + "loss": 0.6453, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.906642436981201, + "rewards/margins": 1.9385039806365967, + "rewards/rejected": -6.845146179199219, + "step": 5181 + }, + { + "epoch": 0.81, + "learning_rate": 1.034664557290316e-05, + "logits/chosen": -2.6721644401550293, + "logits/rejected": -2.6009743213653564, + "logps/chosen": -180.98410034179688, + "logps/rejected": -280.5229797363281, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.784912109375, + "rewards/margins": 8.447065353393555, + "rewards/rejected": -9.231977462768555, + "step": 5182 + }, + { + "epoch": 0.81, + "learning_rate": 1.0345912132372012e-05, + "logits/chosen": -1.0872858762741089, + "logits/rejected": -2.8941092491149902, + "logps/chosen": -39.33489227294922, + "logps/rejected": -204.0574951171875, + "loss": 0.0756, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8936518430709839, + "rewards/margins": 4.312311172485352, + "rewards/rejected": -6.205963134765625, + "step": 5183 + }, + { + "epoch": 0.81, + "learning_rate": 1.0345178691840864e-05, + "logits/chosen": -2.6770169734954834, + "logits/rejected": -2.8112118244171143, + "logps/chosen": -161.9451141357422, + "logps/rejected": -249.71890258789062, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7142720222473145, + "rewards/margins": 5.48081111907959, + "rewards/rejected": -8.195082664489746, + "step": 5184 + }, + { + "epoch": 0.81, + "learning_rate": 1.0344445251309715e-05, + "logits/chosen": -2.883169174194336, + "logits/rejected": -3.015808343887329, + "logps/chosen": -113.07723999023438, + "logps/rejected": -143.90634155273438, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0890135765075684, + "rewards/margins": 3.9931373596191406, + "rewards/rejected": -6.082150936126709, + "step": 5185 + }, + { + "epoch": 0.81, + "learning_rate": 1.0343711810778567e-05, + "logits/chosen": -3.173456907272339, + "logits/rejected": -2.2701237201690674, + "logps/chosen": -469.4208679199219, + "logps/rejected": -96.56610870361328, + "loss": 4.0007, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.191394329071045, + "rewards/margins": -3.947415351867676, + "rewards/rejected": -1.2439789772033691, + "step": 5186 + }, + { + "epoch": 0.81, + "learning_rate": 1.034297837024742e-05, + "logits/chosen": -2.994046926498413, + "logits/rejected": -3.21058988571167, + "logps/chosen": -134.2882843017578, + "logps/rejected": -94.39175415039062, + "loss": 2.9266, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.694187641143799, + "rewards/margins": -0.5707311630249023, + "rewards/rejected": -4.123456954956055, + "step": 5187 + }, + { + "epoch": 0.81, + "learning_rate": 1.0342244929716271e-05, + "logits/chosen": -3.189603090286255, + "logits/rejected": -3.1678619384765625, + "logps/chosen": -144.37281799316406, + "logps/rejected": -73.79913330078125, + "loss": 2.0143, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.60334587097168, + "rewards/margins": -0.31997478008270264, + "rewards/rejected": -5.2833709716796875, + "step": 5188 + }, + { + "epoch": 0.81, + "learning_rate": 1.0341511489185125e-05, + "logits/chosen": -2.551870346069336, + "logits/rejected": -2.831780195236206, + "logps/chosen": -93.81832885742188, + "logps/rejected": -255.13742065429688, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7067166566848755, + "rewards/margins": 5.6652069091796875, + "rewards/rejected": -7.371923446655273, + "step": 5189 + }, + { + "epoch": 0.81, + "learning_rate": 1.0340778048653977e-05, + "logits/chosen": -2.805293560028076, + "logits/rejected": -2.88025164604187, + "logps/chosen": -126.05430603027344, + "logps/rejected": -310.8998718261719, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2291760444641113, + "rewards/margins": 6.1600565910339355, + "rewards/rejected": -9.389232635498047, + "step": 5190 + }, + { + "epoch": 0.81, + "learning_rate": 1.0340044608122828e-05, + "logits/chosen": -1.7883952856063843, + "logits/rejected": -3.085378885269165, + "logps/chosen": -87.31005096435547, + "logps/rejected": -318.8863525390625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7732784748077393, + "rewards/margins": 6.606203079223633, + "rewards/rejected": -8.37948226928711, + "step": 5191 + }, + { + "epoch": 0.81, + "learning_rate": 1.033931116759168e-05, + "logits/chosen": -2.3816678524017334, + "logits/rejected": -3.1011056900024414, + "logps/chosen": -175.2850799560547, + "logps/rejected": -315.4504699707031, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3440189361572266, + "rewards/margins": 5.083314895629883, + "rewards/rejected": -6.427333831787109, + "step": 5192 + }, + { + "epoch": 0.81, + "learning_rate": 1.0338577727060532e-05, + "logits/chosen": -2.1536617279052734, + "logits/rejected": -3.1117653846740723, + "logps/chosen": -167.18829345703125, + "logps/rejected": -361.1984558105469, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02162894979119301, + "rewards/margins": 6.462007522583008, + "rewards/rejected": -6.483636856079102, + "step": 5193 + }, + { + "epoch": 0.81, + "learning_rate": 1.0337844286529384e-05, + "logits/chosen": -1.490469217300415, + "logits/rejected": -2.583958387374878, + "logps/chosen": -235.62026977539062, + "logps/rejected": -290.6431884765625, + "loss": 1.7897, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.577322959899902, + "rewards/margins": -0.9537303447723389, + "rewards/rejected": -3.6235923767089844, + "step": 5194 + }, + { + "epoch": 0.81, + "learning_rate": 1.0337110845998236e-05, + "logits/chosen": -2.9242331981658936, + "logits/rejected": -3.1503326892852783, + "logps/chosen": -93.33822631835938, + "logps/rejected": -205.73385620117188, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.004547357559204, + "rewards/margins": 4.621373653411865, + "rewards/rejected": -5.625921249389648, + "step": 5195 + }, + { + "epoch": 0.81, + "learning_rate": 1.0336377405467088e-05, + "logits/chosen": -2.4780616760253906, + "logits/rejected": -2.963252067565918, + "logps/chosen": -213.3076629638672, + "logps/rejected": -253.4342498779297, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7428039908409119, + "rewards/margins": 5.706729888916016, + "rewards/rejected": -6.449533939361572, + "step": 5196 + }, + { + "epoch": 0.81, + "learning_rate": 1.033564396493594e-05, + "logits/chosen": -2.8558149337768555, + "logits/rejected": -1.6445214748382568, + "logps/chosen": -475.4825744628906, + "logps/rejected": -459.32269287109375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.022322177886963, + "rewards/margins": 6.266684055328369, + "rewards/rejected": -10.289006233215332, + "step": 5197 + }, + { + "epoch": 0.81, + "learning_rate": 1.0334910524404793e-05, + "logits/chosen": -2.8619728088378906, + "logits/rejected": -2.590392827987671, + "logps/chosen": -283.005859375, + "logps/rejected": -386.75262451171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1067781448364258, + "rewards/margins": 7.886135101318359, + "rewards/rejected": -8.992914199829102, + "step": 5198 + }, + { + "epoch": 0.81, + "learning_rate": 1.0334177083873645e-05, + "logits/chosen": -1.6975786685943604, + "logits/rejected": -2.9198460578918457, + "logps/chosen": -65.8561782836914, + "logps/rejected": -238.66061401367188, + "loss": 0.0485, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.853814125061035, + "rewards/margins": 3.996858835220337, + "rewards/rejected": -7.850672721862793, + "step": 5199 + }, + { + "epoch": 0.81, + "learning_rate": 1.0333443643342497e-05, + "logits/chosen": -1.5389372110366821, + "logits/rejected": -2.7302563190460205, + "logps/chosen": -104.49951934814453, + "logps/rejected": -360.477783203125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.47239351272583, + "rewards/margins": 6.082784175872803, + "rewards/rejected": -8.555177688598633, + "step": 5200 + }, + { + "epoch": 0.81, + "learning_rate": 1.0332710202811349e-05, + "logits/chosen": -2.630350351333618, + "logits/rejected": -3.0947482585906982, + "logps/chosen": -578.3917236328125, + "logps/rejected": -621.0256958007812, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.786511242389679, + "rewards/margins": 4.819775581359863, + "rewards/rejected": -5.606287002563477, + "step": 5201 + }, + { + "epoch": 0.81, + "learning_rate": 1.03319767622802e-05, + "logits/chosen": -1.9835574626922607, + "logits/rejected": -2.8591866493225098, + "logps/chosen": -71.66114807128906, + "logps/rejected": -257.4963073730469, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4221646785736084, + "rewards/margins": 5.597903251647949, + "rewards/rejected": -9.02006721496582, + "step": 5202 + }, + { + "epoch": 0.81, + "learning_rate": 1.0331243321749053e-05, + "logits/chosen": -1.9743183851242065, + "logits/rejected": -2.9878714084625244, + "logps/chosen": -214.53042602539062, + "logps/rejected": -400.8148193359375, + "loss": 3.5766, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.494666576385498, + "rewards/margins": -0.7785391807556152, + "rewards/rejected": -2.716127395629883, + "step": 5203 + }, + { + "epoch": 0.81, + "learning_rate": 1.0330509881217904e-05, + "logits/chosen": -2.832643985748291, + "logits/rejected": -2.955497980117798, + "logps/chosen": -150.94100952148438, + "logps/rejected": -366.4786376953125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.812596082687378, + "rewards/margins": 7.216757774353027, + "rewards/rejected": -9.029354095458984, + "step": 5204 + }, + { + "epoch": 0.81, + "learning_rate": 1.0329776440686756e-05, + "logits/chosen": -2.7764110565185547, + "logits/rejected": -3.026486396789551, + "logps/chosen": -223.4725799560547, + "logps/rejected": -349.3314208984375, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5114033222198486, + "rewards/margins": 4.397483825683594, + "rewards/rejected": -4.908886909484863, + "step": 5205 + }, + { + "epoch": 0.81, + "learning_rate": 1.032904300015561e-05, + "logits/chosen": -3.064105272293091, + "logits/rejected": -3.113131046295166, + "logps/chosen": -550.0872192382812, + "logps/rejected": -469.4283447265625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36220091581344604, + "rewards/margins": 5.478100776672363, + "rewards/rejected": -5.840301513671875, + "step": 5206 + }, + { + "epoch": 0.81, + "learning_rate": 1.0328309559624462e-05, + "logits/chosen": -3.1091859340667725, + "logits/rejected": -2.862492084503174, + "logps/chosen": -751.129150390625, + "logps/rejected": -604.456787109375, + "loss": 4.2072, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4550933837890625, + "rewards/margins": -1.1540114879608154, + "rewards/rejected": -3.301081895828247, + "step": 5207 + }, + { + "epoch": 0.81, + "learning_rate": 1.0327576119093314e-05, + "logits/chosen": -2.970745801925659, + "logits/rejected": -3.1456098556518555, + "logps/chosen": -175.95069885253906, + "logps/rejected": -316.3287048339844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7700991034507751, + "rewards/margins": 6.992307662963867, + "rewards/rejected": -7.762407302856445, + "step": 5208 + }, + { + "epoch": 0.81, + "learning_rate": 1.0326842678562167e-05, + "logits/chosen": -2.8081483840942383, + "logits/rejected": -3.2565758228302, + "logps/chosen": -29.91779327392578, + "logps/rejected": -145.44293212890625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9683419466018677, + "rewards/margins": 4.262363910675049, + "rewards/rejected": -6.230705738067627, + "step": 5209 + }, + { + "epoch": 0.81, + "learning_rate": 1.032610923803102e-05, + "logits/chosen": -3.15887713432312, + "logits/rejected": -2.934401750564575, + "logps/chosen": -156.6192626953125, + "logps/rejected": -96.97571563720703, + "loss": 2.9435, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.343757629394531, + "rewards/margins": -2.880436897277832, + "rewards/rejected": -2.4633212089538574, + "step": 5210 + }, + { + "epoch": 0.81, + "learning_rate": 1.0325375797499871e-05, + "logits/chosen": -2.5321054458618164, + "logits/rejected": -2.9407951831817627, + "logps/chosen": -92.79913330078125, + "logps/rejected": -296.3926696777344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6859543323516846, + "rewards/margins": 6.239886283874512, + "rewards/rejected": -7.925840854644775, + "step": 5211 + }, + { + "epoch": 0.81, + "learning_rate": 1.0324642356968723e-05, + "logits/chosen": -2.783334493637085, + "logits/rejected": -3.006533622741699, + "logps/chosen": -252.5050811767578, + "logps/rejected": -372.986083984375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8965258598327637, + "rewards/margins": 6.527834892272949, + "rewards/rejected": -8.424361228942871, + "step": 5212 + }, + { + "epoch": 0.81, + "learning_rate": 1.0323908916437575e-05, + "logits/chosen": -3.04666805267334, + "logits/rejected": -3.149930477142334, + "logps/chosen": -141.24635314941406, + "logps/rejected": -218.53591918945312, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8752617239952087, + "rewards/margins": 5.142669200897217, + "rewards/rejected": -6.01793098449707, + "step": 5213 + }, + { + "epoch": 0.81, + "learning_rate": 1.0323175475906427e-05, + "logits/chosen": -3.1254515647888184, + "logits/rejected": -3.2117395401000977, + "logps/chosen": -452.55078125, + "logps/rejected": -438.56884765625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.516448974609375, + "rewards/margins": 5.973818778991699, + "rewards/rejected": -8.490267753601074, + "step": 5214 + }, + { + "epoch": 0.81, + "learning_rate": 1.032244203537528e-05, + "logits/chosen": -3.230024814605713, + "logits/rejected": -2.4755210876464844, + "logps/chosen": -206.15904235839844, + "logps/rejected": -144.22230529785156, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9639520049095154, + "rewards/margins": 5.916385650634766, + "rewards/rejected": -6.880337715148926, + "step": 5215 + }, + { + "epoch": 0.81, + "learning_rate": 1.0321708594844132e-05, + "logits/chosen": -2.9528982639312744, + "logits/rejected": -2.798699140548706, + "logps/chosen": -395.2051696777344, + "logps/rejected": -446.9020080566406, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0870990753173828, + "rewards/margins": 5.954004287719727, + "rewards/rejected": -7.041103363037109, + "step": 5216 + }, + { + "epoch": 0.81, + "learning_rate": 1.0320975154312984e-05, + "logits/chosen": -2.763561725616455, + "logits/rejected": -3.0367841720581055, + "logps/chosen": -195.19400024414062, + "logps/rejected": -289.0885009765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10490532219409943, + "rewards/margins": 6.88967227935791, + "rewards/rejected": -6.994577407836914, + "step": 5217 + }, + { + "epoch": 0.81, + "learning_rate": 1.0320241713781836e-05, + "logits/chosen": -2.2858192920684814, + "logits/rejected": -2.970539093017578, + "logps/chosen": -160.2064208984375, + "logps/rejected": -272.76959228515625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5617591738700867, + "rewards/margins": 7.461521148681641, + "rewards/rejected": -8.02328109741211, + "step": 5218 + }, + { + "epoch": 0.81, + "learning_rate": 1.0319508273250688e-05, + "logits/chosen": -3.083336591720581, + "logits/rejected": -3.207221746444702, + "logps/chosen": -520.51904296875, + "logps/rejected": -640.7456665039062, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4729981422424316, + "rewards/margins": 6.509432792663574, + "rewards/rejected": -7.982430934906006, + "step": 5219 + }, + { + "epoch": 0.81, + "learning_rate": 1.031877483271954e-05, + "logits/chosen": -2.676024913787842, + "logits/rejected": -3.0841081142425537, + "logps/chosen": -115.31919860839844, + "logps/rejected": -180.28094482421875, + "loss": 0.0562, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.241626501083374, + "rewards/margins": 4.04063606262207, + "rewards/rejected": -6.282262802124023, + "step": 5220 + }, + { + "epoch": 0.81, + "learning_rate": 1.0318041392188392e-05, + "logits/chosen": -2.8977067470550537, + "logits/rejected": -2.3256237506866455, + "logps/chosen": -287.91278076171875, + "logps/rejected": -430.322998046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.529904842376709, + "rewards/margins": 8.635364532470703, + "rewards/rejected": -10.165268898010254, + "step": 5221 + }, + { + "epoch": 0.81, + "learning_rate": 1.0317307951657243e-05, + "logits/chosen": -2.9896240234375, + "logits/rejected": -3.0642521381378174, + "logps/chosen": -225.5759735107422, + "logps/rejected": -340.9523620605469, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.184390664100647, + "rewards/margins": 6.410879135131836, + "rewards/rejected": -7.595269680023193, + "step": 5222 + }, + { + "epoch": 0.81, + "learning_rate": 1.0316574511126095e-05, + "logits/chosen": -1.3601802587509155, + "logits/rejected": -2.6562626361846924, + "logps/chosen": -116.51990509033203, + "logps/rejected": -162.0948486328125, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7250242233276367, + "rewards/margins": 5.478366374969482, + "rewards/rejected": -7.203390598297119, + "step": 5223 + }, + { + "epoch": 0.81, + "learning_rate": 1.0315841070594949e-05, + "logits/chosen": -2.642894744873047, + "logits/rejected": -3.0094025135040283, + "logps/chosen": -121.14077758789062, + "logps/rejected": -231.9166259765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1579132080078125, + "rewards/margins": 8.829813957214355, + "rewards/rejected": -10.987727165222168, + "step": 5224 + }, + { + "epoch": 0.81, + "learning_rate": 1.03151076300638e-05, + "logits/chosen": -1.3908170461654663, + "logits/rejected": -2.2115132808685303, + "logps/chosen": -303.2015075683594, + "logps/rejected": -466.6378173828125, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.339517116546631, + "rewards/margins": 5.018963813781738, + "rewards/rejected": -7.358481407165527, + "step": 5225 + }, + { + "epoch": 0.81, + "learning_rate": 1.0314374189532653e-05, + "logits/chosen": -3.176797866821289, + "logits/rejected": -2.7329554557800293, + "logps/chosen": -151.25245666503906, + "logps/rejected": -172.5784454345703, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1987144947052002, + "rewards/margins": 5.558327674865723, + "rewards/rejected": -6.757042407989502, + "step": 5226 + }, + { + "epoch": 0.81, + "learning_rate": 1.0313640749001504e-05, + "logits/chosen": -2.893127679824829, + "logits/rejected": -3.006789207458496, + "logps/chosen": -192.13165283203125, + "logps/rejected": -311.48175048828125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5877747535705566, + "rewards/margins": 8.234209060668945, + "rewards/rejected": -9.82198429107666, + "step": 5227 + }, + { + "epoch": 0.81, + "learning_rate": 1.0312907308470356e-05, + "logits/chosen": -1.791426658630371, + "logits/rejected": -3.1725234985351562, + "logps/chosen": -343.1538391113281, + "logps/rejected": -487.07867431640625, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31190186738967896, + "rewards/margins": 6.080569267272949, + "rewards/rejected": -6.3924713134765625, + "step": 5228 + }, + { + "epoch": 0.81, + "learning_rate": 1.0312173867939208e-05, + "logits/chosen": -2.815202474594116, + "logits/rejected": -3.0819551944732666, + "logps/chosen": -108.48262023925781, + "logps/rejected": -240.54107666015625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.18500554561615, + "rewards/margins": 7.823777675628662, + "rewards/rejected": -9.008783340454102, + "step": 5229 + }, + { + "epoch": 0.81, + "learning_rate": 1.031144042740806e-05, + "logits/chosen": -3.2235755920410156, + "logits/rejected": -2.3773505687713623, + "logps/chosen": -361.8641052246094, + "logps/rejected": -274.953857421875, + "loss": 1.4736, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4022622108459473, + "rewards/margins": 0.24543237686157227, + "rewards/rejected": -3.6476945877075195, + "step": 5230 + }, + { + "epoch": 0.81, + "learning_rate": 1.0310706986876912e-05, + "logits/chosen": -1.1193137168884277, + "logits/rejected": -2.7027623653411865, + "logps/chosen": -234.61138916015625, + "logps/rejected": -396.0534362792969, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.149013876914978, + "rewards/margins": 6.132753849029541, + "rewards/rejected": -7.281767845153809, + "step": 5231 + }, + { + "epoch": 0.81, + "learning_rate": 1.0309973546345764e-05, + "logits/chosen": -1.9271681308746338, + "logits/rejected": -2.981316566467285, + "logps/chosen": -108.79463195800781, + "logps/rejected": -314.02813720703125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30334436893463135, + "rewards/margins": 6.126398086547852, + "rewards/rejected": -6.429742813110352, + "step": 5232 + }, + { + "epoch": 0.81, + "learning_rate": 1.0309240105814617e-05, + "logits/chosen": -3.0872528553009033, + "logits/rejected": -2.2439606189727783, + "logps/chosen": -340.8792724609375, + "logps/rejected": -346.21356201171875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5354225039482117, + "rewards/margins": 6.278846740722656, + "rewards/rejected": -6.814269065856934, + "step": 5233 + }, + { + "epoch": 0.81, + "learning_rate": 1.030850666528347e-05, + "logits/chosen": -2.7820513248443604, + "logits/rejected": -2.686522960662842, + "logps/chosen": -423.99639892578125, + "logps/rejected": -456.94256591796875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0795321464538574, + "rewards/margins": 6.47825813293457, + "rewards/rejected": -8.557790756225586, + "step": 5234 + }, + { + "epoch": 0.81, + "learning_rate": 1.0307773224752321e-05, + "logits/chosen": -2.7677500247955322, + "logits/rejected": -3.184816837310791, + "logps/chosen": -91.43280792236328, + "logps/rejected": -155.17037963867188, + "loss": 1.2516, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.400261878967285, + "rewards/margins": 2.612700939178467, + "rewards/rejected": -7.012962341308594, + "step": 5235 + }, + { + "epoch": 0.81, + "learning_rate": 1.0307039784221173e-05, + "logits/chosen": -2.1100401878356934, + "logits/rejected": -3.175386905670166, + "logps/chosen": -322.5295104980469, + "logps/rejected": -486.26739501953125, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17386740446090698, + "rewards/margins": 5.660486221313477, + "rewards/rejected": -5.834353446960449, + "step": 5236 + }, + { + "epoch": 0.81, + "learning_rate": 1.0306306343690025e-05, + "logits/chosen": -2.2411890029907227, + "logits/rejected": -3.1079773902893066, + "logps/chosen": -88.08433532714844, + "logps/rejected": -272.4406433105469, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0989792346954346, + "rewards/margins": 6.215956687927246, + "rewards/rejected": -7.31493616104126, + "step": 5237 + }, + { + "epoch": 0.81, + "learning_rate": 1.0305572903158877e-05, + "logits/chosen": -3.1938281059265137, + "logits/rejected": -3.2979607582092285, + "logps/chosen": -87.90946960449219, + "logps/rejected": -156.492431640625, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0682694911956787, + "rewards/margins": 4.5040459632873535, + "rewards/rejected": -5.572315216064453, + "step": 5238 + }, + { + "epoch": 0.81, + "learning_rate": 1.0304839462627729e-05, + "logits/chosen": -2.7652671337127686, + "logits/rejected": -3.085820436477661, + "logps/chosen": -549.8391723632812, + "logps/rejected": -477.5726623535156, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8063161373138428, + "rewards/margins": 6.421710968017578, + "rewards/rejected": -7.228026866912842, + "step": 5239 + }, + { + "epoch": 0.81, + "learning_rate": 1.030410602209658e-05, + "logits/chosen": -2.2813351154327393, + "logits/rejected": -3.17449951171875, + "logps/chosen": -253.80963134765625, + "logps/rejected": -469.92578125, + "loss": 2.7724, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6437199115753174, + "rewards/margins": 0.0784754753112793, + "rewards/rejected": -3.7221953868865967, + "step": 5240 + }, + { + "epoch": 0.82, + "learning_rate": 1.0303372581565434e-05, + "logits/chosen": -1.272059679031372, + "logits/rejected": -3.0404880046844482, + "logps/chosen": -107.33039855957031, + "logps/rejected": -438.3363342285156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.25396990776062, + "rewards/margins": 7.335158348083496, + "rewards/rejected": -9.589128494262695, + "step": 5241 + }, + { + "epoch": 0.82, + "learning_rate": 1.0302639141034286e-05, + "logits/chosen": -3.120285987854004, + "logits/rejected": -2.980053663253784, + "logps/chosen": -162.9482879638672, + "logps/rejected": -164.87542724609375, + "loss": 2.4662, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.618529319763184, + "rewards/margins": 0.1541438102722168, + "rewards/rejected": -4.7726731300354, + "step": 5242 + }, + { + "epoch": 0.82, + "learning_rate": 1.030190570050314e-05, + "logits/chosen": -3.047894239425659, + "logits/rejected": -1.9238648414611816, + "logps/chosen": -578.774169921875, + "logps/rejected": -402.31768798828125, + "loss": 2.3882, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9304184913635254, + "rewards/margins": 2.3362183570861816, + "rewards/rejected": -5.266636371612549, + "step": 5243 + }, + { + "epoch": 0.82, + "learning_rate": 1.0301172259971991e-05, + "logits/chosen": -2.6323580741882324, + "logits/rejected": -3.1070759296417236, + "logps/chosen": -78.15859985351562, + "logps/rejected": -221.56768798828125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7109614610671997, + "rewards/margins": 5.949606895446777, + "rewards/rejected": -6.6605682373046875, + "step": 5244 + }, + { + "epoch": 0.82, + "learning_rate": 1.0300438819440843e-05, + "logits/chosen": -2.7739272117614746, + "logits/rejected": -3.2845988273620605, + "logps/chosen": -42.85432434082031, + "logps/rejected": -162.9092559814453, + "loss": 0.0777, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7759205102920532, + "rewards/margins": 3.6046323776245117, + "rewards/rejected": -5.380552768707275, + "step": 5245 + }, + { + "epoch": 0.82, + "learning_rate": 1.0299705378909695e-05, + "logits/chosen": -1.702894926071167, + "logits/rejected": -2.8824663162231445, + "logps/chosen": -80.47872161865234, + "logps/rejected": -249.72772216796875, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0762935876846313, + "rewards/margins": 5.637275218963623, + "rewards/rejected": -6.713568687438965, + "step": 5246 + }, + { + "epoch": 0.82, + "learning_rate": 1.0298971938378547e-05, + "logits/chosen": -1.2339587211608887, + "logits/rejected": -1.5859572887420654, + "logps/chosen": -63.72644805908203, + "logps/rejected": -306.218017578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3381330966949463, + "rewards/margins": 9.36789321899414, + "rewards/rejected": -10.706026077270508, + "step": 5247 + }, + { + "epoch": 0.82, + "learning_rate": 1.0298238497847399e-05, + "logits/chosen": -2.48771333694458, + "logits/rejected": -2.827791213989258, + "logps/chosen": -491.87347412109375, + "logps/rejected": -454.56219482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37817078828811646, + "rewards/margins": 8.24908447265625, + "rewards/rejected": -8.6272554397583, + "step": 5248 + }, + { + "epoch": 0.82, + "learning_rate": 1.0297505057316251e-05, + "logits/chosen": -1.5856081247329712, + "logits/rejected": -2.841456413269043, + "logps/chosen": -80.4508056640625, + "logps/rejected": -425.593017578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1209572553634644, + "rewards/margins": 11.864086151123047, + "rewards/rejected": -12.9850435256958, + "step": 5249 + }, + { + "epoch": 0.82, + "learning_rate": 1.0296771616785103e-05, + "logits/chosen": -1.889274001121521, + "logits/rejected": -3.0811033248901367, + "logps/chosen": -147.52313232421875, + "logps/rejected": -424.8768005371094, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20814095437526703, + "rewards/margins": 9.349418640136719, + "rewards/rejected": -9.557559967041016, + "step": 5250 + }, + { + "epoch": 0.82, + "learning_rate": 1.0296038176253956e-05, + "logits/chosen": -1.7983003854751587, + "logits/rejected": -2.8663930892944336, + "logps/chosen": -48.29584503173828, + "logps/rejected": -249.48965454101562, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5981854200363159, + "rewards/margins": 6.695853233337402, + "rewards/rejected": -7.294038772583008, + "step": 5251 + }, + { + "epoch": 0.82, + "learning_rate": 1.0295304735722808e-05, + "logits/chosen": -1.7944114208221436, + "logits/rejected": -2.464287281036377, + "logps/chosen": -344.8475341796875, + "logps/rejected": -459.61273193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8863083124160767, + "rewards/margins": 10.28443431854248, + "rewards/rejected": -11.170742988586426, + "step": 5252 + }, + { + "epoch": 0.82, + "learning_rate": 1.029457129519166e-05, + "logits/chosen": -2.8879239559173584, + "logits/rejected": -3.1440093517303467, + "logps/chosen": -34.745277404785156, + "logps/rejected": -134.91514587402344, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4216556549072266, + "rewards/margins": 4.412649631500244, + "rewards/rejected": -5.834305286407471, + "step": 5253 + }, + { + "epoch": 0.82, + "learning_rate": 1.0293837854660512e-05, + "logits/chosen": -3.035658121109009, + "logits/rejected": -2.205920457839966, + "logps/chosen": -158.79013061523438, + "logps/rejected": -128.93643188476562, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.514232158660889, + "rewards/margins": 1.2327873706817627, + "rewards/rejected": -6.7470197677612305, + "step": 5254 + }, + { + "epoch": 0.82, + "learning_rate": 1.0293104414129364e-05, + "logits/chosen": -1.861271619796753, + "logits/rejected": -2.9939279556274414, + "logps/chosen": -309.88189697265625, + "logps/rejected": -417.1464538574219, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.160359263420105, + "rewards/margins": 7.750759124755859, + "rewards/rejected": -8.911118507385254, + "step": 5255 + }, + { + "epoch": 0.82, + "learning_rate": 1.0292370973598216e-05, + "logits/chosen": -3.0420987606048584, + "logits/rejected": -1.2956284284591675, + "logps/chosen": -459.80010986328125, + "logps/rejected": -198.0557861328125, + "loss": 1.9783, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.776419162750244, + "rewards/margins": 1.7077393531799316, + "rewards/rejected": -6.484158515930176, + "step": 5256 + }, + { + "epoch": 0.82, + "learning_rate": 1.0291637533067068e-05, + "logits/chosen": -2.637507915496826, + "logits/rejected": -2.9707865715026855, + "logps/chosen": -416.9535217285156, + "logps/rejected": -348.7182922363281, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7716480493545532, + "rewards/margins": 4.607273101806641, + "rewards/rejected": -5.378920555114746, + "step": 5257 + }, + { + "epoch": 0.82, + "learning_rate": 1.029090409253592e-05, + "logits/chosen": -1.48587965965271, + "logits/rejected": -2.655441999435425, + "logps/chosen": -197.2230224609375, + "logps/rejected": -393.59063720703125, + "loss": 0.0558, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3486130237579346, + "rewards/margins": 4.380007266998291, + "rewards/rejected": -5.728620529174805, + "step": 5258 + }, + { + "epoch": 0.82, + "learning_rate": 1.0290170652004771e-05, + "logits/chosen": -2.5404129028320312, + "logits/rejected": -3.066826820373535, + "logps/chosen": -78.59085083007812, + "logps/rejected": -92.26443481445312, + "loss": 3.9732, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.441015243530273, + "rewards/margins": 0.4467041492462158, + "rewards/rejected": -4.88771915435791, + "step": 5259 + }, + { + "epoch": 0.82, + "learning_rate": 1.0289437211473625e-05, + "logits/chosen": -2.9698257446289062, + "logits/rejected": -3.203789234161377, + "logps/chosen": -140.50628662109375, + "logps/rejected": -290.64141845703125, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.982395589351654, + "rewards/margins": 4.59181547164917, + "rewards/rejected": -5.574211120605469, + "step": 5260 + }, + { + "epoch": 0.82, + "learning_rate": 1.0288703770942477e-05, + "logits/chosen": -1.8533819913864136, + "logits/rejected": -2.3346564769744873, + "logps/chosen": -88.45310974121094, + "logps/rejected": -267.8846130371094, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4767690598964691, + "rewards/margins": 8.476362228393555, + "rewards/rejected": -8.953131675720215, + "step": 5261 + }, + { + "epoch": 0.82, + "learning_rate": 1.0287970330411329e-05, + "logits/chosen": -3.0031020641326904, + "logits/rejected": -2.50529408454895, + "logps/chosen": -451.1943054199219, + "logps/rejected": -371.87103271484375, + "loss": 0.0253, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5371464490890503, + "rewards/margins": 5.597997188568115, + "rewards/rejected": -7.135143280029297, + "step": 5262 + }, + { + "epoch": 0.82, + "learning_rate": 1.028723688988018e-05, + "logits/chosen": -2.7345144748687744, + "logits/rejected": -3.021448850631714, + "logps/chosen": -351.1573486328125, + "logps/rejected": -417.135009765625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.843239963054657, + "rewards/margins": 5.947154998779297, + "rewards/rejected": -6.7903947830200195, + "step": 5263 + }, + { + "epoch": 0.82, + "learning_rate": 1.0286503449349032e-05, + "logits/chosen": -2.876946210861206, + "logits/rejected": -3.0379080772399902, + "logps/chosen": -192.04945373535156, + "logps/rejected": -417.0860290527344, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9929153919219971, + "rewards/margins": 7.277162551879883, + "rewards/rejected": -8.2700777053833, + "step": 5264 + }, + { + "epoch": 0.82, + "learning_rate": 1.0285770008817884e-05, + "logits/chosen": -2.158860445022583, + "logits/rejected": -3.0179102420806885, + "logps/chosen": -335.3354187011719, + "logps/rejected": -843.360107421875, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2512240409851074, + "rewards/margins": 6.60526704788208, + "rewards/rejected": -8.856491088867188, + "step": 5265 + }, + { + "epoch": 0.82, + "learning_rate": 1.0285036568286736e-05, + "logits/chosen": -2.8818140029907227, + "logits/rejected": -2.5270988941192627, + "logps/chosen": -242.7763671875, + "logps/rejected": -189.26052856445312, + "loss": 0.6731, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.07379150390625, + "rewards/margins": 2.7057700157165527, + "rewards/rejected": -6.779561519622803, + "step": 5266 + }, + { + "epoch": 0.82, + "learning_rate": 1.0284303127755588e-05, + "logits/chosen": -3.2067530155181885, + "logits/rejected": -3.2849676609039307, + "logps/chosen": -30.741058349609375, + "logps/rejected": -116.37312316894531, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3719826936721802, + "rewards/margins": 6.367773056030273, + "rewards/rejected": -7.739756107330322, + "step": 5267 + }, + { + "epoch": 0.82, + "learning_rate": 1.028356968722444e-05, + "logits/chosen": -1.6030367612838745, + "logits/rejected": -3.0023512840270996, + "logps/chosen": -341.00921630859375, + "logps/rejected": -493.0982360839844, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4864106178283691, + "rewards/margins": 4.498501777648926, + "rewards/rejected": -5.984911918640137, + "step": 5268 + }, + { + "epoch": 0.82, + "learning_rate": 1.0282836246693294e-05, + "logits/chosen": -3.0117580890655518, + "logits/rejected": -2.66378116607666, + "logps/chosen": -795.1178588867188, + "logps/rejected": -694.9072265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0805786848068237, + "rewards/margins": 6.9763689041137695, + "rewards/rejected": -8.056947708129883, + "step": 5269 + }, + { + "epoch": 0.82, + "learning_rate": 1.0282102806162145e-05, + "logits/chosen": -3.0314478874206543, + "logits/rejected": -2.8004848957061768, + "logps/chosen": -632.0780029296875, + "logps/rejected": -572.8331298828125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.74947589635849, + "rewards/margins": 7.014527320861816, + "rewards/rejected": -7.764003276824951, + "step": 5270 + }, + { + "epoch": 0.82, + "learning_rate": 1.0281369365630997e-05, + "logits/chosen": -1.5224512815475464, + "logits/rejected": -2.4215543270111084, + "logps/chosen": -282.017822265625, + "logps/rejected": -718.214111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6609108448028564, + "rewards/margins": 9.852161407470703, + "rewards/rejected": -11.51307201385498, + "step": 5271 + }, + { + "epoch": 0.82, + "learning_rate": 1.028063592509985e-05, + "logits/chosen": -2.92586612701416, + "logits/rejected": -2.1527819633483887, + "logps/chosen": -359.00714111328125, + "logps/rejected": -187.16448974609375, + "loss": 1.268, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8145720958709717, + "rewards/margins": 0.5726391077041626, + "rewards/rejected": -3.387211322784424, + "step": 5272 + }, + { + "epoch": 0.82, + "learning_rate": 1.0279902484568701e-05, + "logits/chosen": -1.322763204574585, + "logits/rejected": -2.8959007263183594, + "logps/chosen": -117.7288818359375, + "logps/rejected": -288.5733642578125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.880247712135315, + "rewards/margins": 5.406440734863281, + "rewards/rejected": -7.286688327789307, + "step": 5273 + }, + { + "epoch": 0.82, + "learning_rate": 1.0279169044037553e-05, + "logits/chosen": -1.8870385885238647, + "logits/rejected": -2.739382743835449, + "logps/chosen": -203.56201171875, + "logps/rejected": -393.6481628417969, + "loss": 4.8594, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.046535968780518, + "rewards/margins": -1.5692617893218994, + "rewards/rejected": -4.477274417877197, + "step": 5274 + }, + { + "epoch": 0.82, + "learning_rate": 1.0278435603506406e-05, + "logits/chosen": -3.003171443939209, + "logits/rejected": -1.5159112215042114, + "logps/chosen": -652.701171875, + "logps/rejected": -256.7723083496094, + "loss": 0.7595, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9823358058929443, + "rewards/margins": 3.3484318256378174, + "rewards/rejected": -6.330767631530762, + "step": 5275 + }, + { + "epoch": 0.82, + "learning_rate": 1.0277702162975258e-05, + "logits/chosen": -2.648810386657715, + "logits/rejected": -2.9437365531921387, + "logps/chosen": -113.6655044555664, + "logps/rejected": -496.0312194824219, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7474312782287598, + "rewards/margins": 8.825748443603516, + "rewards/rejected": -11.573179244995117, + "step": 5276 + }, + { + "epoch": 0.82, + "learning_rate": 1.027696872244411e-05, + "logits/chosen": -2.8910319805145264, + "logits/rejected": -2.295063018798828, + "logps/chosen": -139.4102783203125, + "logps/rejected": -244.4780731201172, + "loss": 1.9086, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2972612380981445, + "rewards/margins": 3.836060047149658, + "rewards/rejected": -8.133321762084961, + "step": 5277 + }, + { + "epoch": 0.82, + "learning_rate": 1.0276235281912964e-05, + "logits/chosen": -2.680637836456299, + "logits/rejected": -1.5432920455932617, + "logps/chosen": -320.7080383300781, + "logps/rejected": -345.2866516113281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8850715756416321, + "rewards/margins": 9.901032447814941, + "rewards/rejected": -10.786104202270508, + "step": 5278 + }, + { + "epoch": 0.82, + "learning_rate": 1.0275501841381816e-05, + "logits/chosen": -2.0593690872192383, + "logits/rejected": -2.028312921524048, + "logps/chosen": -143.56494140625, + "logps/rejected": -291.3955078125, + "loss": 0.075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3091299533843994, + "rewards/margins": 4.618537425994873, + "rewards/rejected": -6.927667140960693, + "step": 5279 + }, + { + "epoch": 0.82, + "learning_rate": 1.0274768400850668e-05, + "logits/chosen": -2.771167516708374, + "logits/rejected": -2.984968900680542, + "logps/chosen": -378.3983459472656, + "logps/rejected": -488.06317138671875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6760051846504211, + "rewards/margins": 6.083278656005859, + "rewards/rejected": -6.759283542633057, + "step": 5280 + }, + { + "epoch": 0.82, + "learning_rate": 1.027403496031952e-05, + "logits/chosen": -1.7147034406661987, + "logits/rejected": -2.5036158561706543, + "logps/chosen": -111.42717742919922, + "logps/rejected": -239.58587646484375, + "loss": 1.1219, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.648314476013184, + "rewards/margins": 3.295342445373535, + "rewards/rejected": -7.943656921386719, + "step": 5281 + }, + { + "epoch": 0.82, + "learning_rate": 1.0273301519788371e-05, + "logits/chosen": -2.7408268451690674, + "logits/rejected": -2.908158540725708, + "logps/chosen": -357.05938720703125, + "logps/rejected": -288.8193054199219, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.200292944908142, + "rewards/margins": 7.955085754394531, + "rewards/rejected": -9.155378341674805, + "step": 5282 + }, + { + "epoch": 0.82, + "learning_rate": 1.0272568079257223e-05, + "logits/chosen": -2.8800230026245117, + "logits/rejected": -3.191220760345459, + "logps/chosen": -132.52288818359375, + "logps/rejected": -264.00933837890625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3618922233581543, + "rewards/margins": 6.909363746643066, + "rewards/rejected": -9.271255493164062, + "step": 5283 + }, + { + "epoch": 0.82, + "learning_rate": 1.0271834638726075e-05, + "logits/chosen": -2.943373680114746, + "logits/rejected": -2.3829500675201416, + "logps/chosen": -267.63720703125, + "logps/rejected": -231.72982788085938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5302734375, + "rewards/margins": 7.174054145812988, + "rewards/rejected": -7.704327583312988, + "step": 5284 + }, + { + "epoch": 0.82, + "learning_rate": 1.0271101198194927e-05, + "logits/chosen": -1.877618670463562, + "logits/rejected": -2.691439151763916, + "logps/chosen": -86.73490905761719, + "logps/rejected": -299.5447998046875, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6224911212921143, + "rewards/margins": 6.385018825531006, + "rewards/rejected": -9.0075101852417, + "step": 5285 + }, + { + "epoch": 0.82, + "learning_rate": 1.0270367757663779e-05, + "logits/chosen": -2.0098631381988525, + "logits/rejected": -2.930683135986328, + "logps/chosen": -76.98726654052734, + "logps/rejected": -254.11312866210938, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9705414175987244, + "rewards/margins": 5.623064041137695, + "rewards/rejected": -6.593605995178223, + "step": 5286 + }, + { + "epoch": 0.82, + "learning_rate": 1.0269634317132632e-05, + "logits/chosen": -0.579646110534668, + "logits/rejected": -2.5338783264160156, + "logps/chosen": -77.76802825927734, + "logps/rejected": -452.0916748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.428410768508911, + "rewards/margins": 11.469406127929688, + "rewards/rejected": -13.89781665802002, + "step": 5287 + }, + { + "epoch": 0.82, + "learning_rate": 1.0268900876601484e-05, + "logits/chosen": -1.8563743829727173, + "logits/rejected": -2.816045045852661, + "logps/chosen": -98.77423858642578, + "logps/rejected": -188.27003479003906, + "loss": 1.3601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.740964412689209, + "rewards/margins": 2.134136438369751, + "rewards/rejected": -6.875101089477539, + "step": 5288 + }, + { + "epoch": 0.82, + "learning_rate": 1.0268167436070336e-05, + "logits/chosen": -3.053903102874756, + "logits/rejected": -3.057835340499878, + "logps/chosen": -609.7665405273438, + "logps/rejected": -474.07623291015625, + "loss": 1.8886, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9421792030334473, + "rewards/margins": 0.6091383695602417, + "rewards/rejected": -4.5513176918029785, + "step": 5289 + }, + { + "epoch": 0.82, + "learning_rate": 1.0267433995539188e-05, + "logits/chosen": -2.565455198287964, + "logits/rejected": -3.0710580348968506, + "logps/chosen": -140.7213592529297, + "logps/rejected": -263.73980712890625, + "loss": 2.8109, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.5312628746032715, + "rewards/margins": 0.4341566562652588, + "rewards/rejected": -4.965419769287109, + "step": 5290 + }, + { + "epoch": 0.82, + "learning_rate": 1.026670055500804e-05, + "logits/chosen": -1.955536127090454, + "logits/rejected": -2.8125054836273193, + "logps/chosen": -92.60462188720703, + "logps/rejected": -197.81781005859375, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3082334995269775, + "rewards/margins": 4.162418365478516, + "rewards/rejected": -7.470652103424072, + "step": 5291 + }, + { + "epoch": 0.82, + "learning_rate": 1.0265967114476892e-05, + "logits/chosen": -2.6742756366729736, + "logits/rejected": -2.978421688079834, + "logps/chosen": -340.3683166503906, + "logps/rejected": -334.4566345214844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7750707864761353, + "rewards/margins": 7.959633827209473, + "rewards/rejected": -9.734704971313477, + "step": 5292 + }, + { + "epoch": 0.82, + "learning_rate": 1.0265233673945744e-05, + "logits/chosen": -2.924699068069458, + "logits/rejected": -3.1880075931549072, + "logps/chosen": -675.1799926757812, + "logps/rejected": -645.0205078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4018983840942383, + "rewards/margins": 7.3320465087890625, + "rewards/rejected": -9.7339448928833, + "step": 5293 + }, + { + "epoch": 0.82, + "learning_rate": 1.0264500233414596e-05, + "logits/chosen": -2.856670379638672, + "logits/rejected": -3.1235005855560303, + "logps/chosen": -51.73340606689453, + "logps/rejected": -143.90843200683594, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.549591064453125, + "rewards/margins": 4.142070770263672, + "rewards/rejected": -6.691661834716797, + "step": 5294 + }, + { + "epoch": 0.82, + "learning_rate": 1.0263766792883447e-05, + "logits/chosen": -2.669997453689575, + "logits/rejected": -2.852804660797119, + "logps/chosen": -35.10044860839844, + "logps/rejected": -225.3104705810547, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5011215209960938, + "rewards/margins": 4.620724678039551, + "rewards/rejected": -7.1218461990356445, + "step": 5295 + }, + { + "epoch": 0.82, + "learning_rate": 1.0263033352352301e-05, + "logits/chosen": -1.9043530225753784, + "logits/rejected": -2.9256606101989746, + "logps/chosen": -205.8447265625, + "logps/rejected": -346.042724609375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6604255437850952, + "rewards/margins": 5.687427520751953, + "rewards/rejected": -6.347853183746338, + "step": 5296 + }, + { + "epoch": 0.82, + "learning_rate": 1.0262299911821153e-05, + "logits/chosen": -2.3519365787506104, + "logits/rejected": -2.0185372829437256, + "logps/chosen": -225.48057556152344, + "logps/rejected": -373.0368347167969, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1233338117599487, + "rewards/margins": 7.773371696472168, + "rewards/rejected": -8.896705627441406, + "step": 5297 + }, + { + "epoch": 0.82, + "learning_rate": 1.0261566471290005e-05, + "logits/chosen": -2.8972365856170654, + "logits/rejected": -2.786665678024292, + "logps/chosen": -173.35174560546875, + "logps/rejected": -199.8504180908203, + "loss": 2.5126, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.82403564453125, + "rewards/margins": 1.337815284729004, + "rewards/rejected": -4.161850929260254, + "step": 5298 + }, + { + "epoch": 0.82, + "learning_rate": 1.0260833030758857e-05, + "logits/chosen": -0.9281587600708008, + "logits/rejected": -2.8997161388397217, + "logps/chosen": -137.94314575195312, + "logps/rejected": -568.0604248046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9305728673934937, + "rewards/margins": 7.131276607513428, + "rewards/rejected": -8.061849594116211, + "step": 5299 + }, + { + "epoch": 0.82, + "learning_rate": 1.0260099590227709e-05, + "logits/chosen": -2.233651638031006, + "logits/rejected": -3.012636661529541, + "logps/chosen": -49.4611701965332, + "logps/rejected": -309.0521240234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4388541877269745, + "rewards/margins": 8.073014259338379, + "rewards/rejected": -8.511868476867676, + "step": 5300 + }, + { + "epoch": 0.82, + "learning_rate": 1.025936614969656e-05, + "logits/chosen": -2.8030896186828613, + "logits/rejected": -2.9775173664093018, + "logps/chosen": -88.23210144042969, + "logps/rejected": -273.9381103515625, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6021969318389893, + "rewards/margins": 4.834222793579102, + "rewards/rejected": -8.436419486999512, + "step": 5301 + }, + { + "epoch": 0.82, + "learning_rate": 1.0258632709165412e-05, + "logits/chosen": -2.559887170791626, + "logits/rejected": -2.872840642929077, + "logps/chosen": -300.09454345703125, + "logps/rejected": -334.0274658203125, + "loss": 2.8186, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6471662521362305, + "rewards/margins": -1.928734302520752, + "rewards/rejected": -2.7184319496154785, + "step": 5302 + }, + { + "epoch": 0.82, + "learning_rate": 1.0257899268634264e-05, + "logits/chosen": -2.730891466140747, + "logits/rejected": -2.966810703277588, + "logps/chosen": -475.0880126953125, + "logps/rejected": -485.7396240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16746215522289276, + "rewards/margins": 8.944650650024414, + "rewards/rejected": -9.112112045288086, + "step": 5303 + }, + { + "epoch": 0.82, + "learning_rate": 1.0257165828103118e-05, + "logits/chosen": -2.2717506885528564, + "logits/rejected": -2.820931911468506, + "logps/chosen": -140.7352294921875, + "logps/rejected": -431.24163818359375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1907966136932373, + "rewards/margins": 8.087663650512695, + "rewards/rejected": -10.278460502624512, + "step": 5304 + }, + { + "epoch": 0.83, + "learning_rate": 1.025643238757197e-05, + "logits/chosen": -3.076011896133423, + "logits/rejected": -2.683112144470215, + "logps/chosen": -137.56674194335938, + "logps/rejected": -141.28848266601562, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4808377027511597, + "rewards/margins": 4.156379699707031, + "rewards/rejected": -5.6372175216674805, + "step": 5305 + }, + { + "epoch": 0.83, + "learning_rate": 1.0255698947040821e-05, + "logits/chosen": -1.5256829261779785, + "logits/rejected": -2.894669771194458, + "logps/chosen": -150.9479217529297, + "logps/rejected": -457.7566833496094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8169211149215698, + "rewards/margins": 7.818608283996582, + "rewards/rejected": -9.635528564453125, + "step": 5306 + }, + { + "epoch": 0.83, + "learning_rate": 1.0254965506509673e-05, + "logits/chosen": -3.0454022884368896, + "logits/rejected": -2.505037784576416, + "logps/chosen": -112.01445770263672, + "logps/rejected": -168.54098510742188, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7201488614082336, + "rewards/margins": 6.654443264007568, + "rewards/rejected": -7.374592304229736, + "step": 5307 + }, + { + "epoch": 0.83, + "learning_rate": 1.0254232065978525e-05, + "logits/chosen": -2.699566125869751, + "logits/rejected": -2.2244551181793213, + "logps/chosen": -307.87042236328125, + "logps/rejected": -201.72178649902344, + "loss": 4.5876, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.101498126983643, + "rewards/margins": -1.1107406616210938, + "rewards/rejected": -3.990757703781128, + "step": 5308 + }, + { + "epoch": 0.83, + "learning_rate": 1.0253498625447377e-05, + "logits/chosen": -2.620634078979492, + "logits/rejected": -3.044341802597046, + "logps/chosen": -234.47994995117188, + "logps/rejected": -340.38934326171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4379771947860718, + "rewards/margins": 6.506421089172363, + "rewards/rejected": -7.944398880004883, + "step": 5309 + }, + { + "epoch": 0.83, + "learning_rate": 1.025276518491623e-05, + "logits/chosen": -2.723768472671509, + "logits/rejected": -2.7887392044067383, + "logps/chosen": -39.5537109375, + "logps/rejected": -250.16384887695312, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.181958794593811, + "rewards/margins": 4.326966285705566, + "rewards/rejected": -5.508925437927246, + "step": 5310 + }, + { + "epoch": 0.83, + "learning_rate": 1.0252031744385083e-05, + "logits/chosen": -2.575132369995117, + "logits/rejected": -2.572265148162842, + "logps/chosen": -338.18145751953125, + "logps/rejected": -274.7813720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2708069086074829, + "rewards/margins": 8.27597427368164, + "rewards/rejected": -8.546780586242676, + "step": 5311 + }, + { + "epoch": 0.83, + "learning_rate": 1.0251298303853934e-05, + "logits/chosen": -2.880481719970703, + "logits/rejected": -3.026456356048584, + "logps/chosen": -107.38514709472656, + "logps/rejected": -279.80615234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24363350868225098, + "rewards/margins": 7.828773498535156, + "rewards/rejected": -8.072407722473145, + "step": 5312 + }, + { + "epoch": 0.83, + "learning_rate": 1.0250564863322788e-05, + "logits/chosen": -1.3243221044540405, + "logits/rejected": -2.5193405151367188, + "logps/chosen": -44.40515899658203, + "logps/rejected": -268.97479248046875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.165454149246216, + "rewards/margins": 7.285581588745117, + "rewards/rejected": -9.451035499572754, + "step": 5313 + }, + { + "epoch": 0.83, + "learning_rate": 1.024983142279164e-05, + "logits/chosen": -1.8477692604064941, + "logits/rejected": -2.6450586318969727, + "logps/chosen": -120.23285675048828, + "logps/rejected": -362.771728515625, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.01542329788208, + "rewards/margins": 7.583579063415527, + "rewards/rejected": -9.599002838134766, + "step": 5314 + }, + { + "epoch": 0.83, + "learning_rate": 1.0249097982260492e-05, + "logits/chosen": -2.4596641063690186, + "logits/rejected": -3.0888538360595703, + "logps/chosen": -181.2641143798828, + "logps/rejected": -310.5303955078125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3197401762008667, + "rewards/margins": 7.381679534912109, + "rewards/rejected": -8.701419830322266, + "step": 5315 + }, + { + "epoch": 0.83, + "learning_rate": 1.0248364541729344e-05, + "logits/chosen": -3.0700809955596924, + "logits/rejected": -2.5120644569396973, + "logps/chosen": -373.6140441894531, + "logps/rejected": -132.140625, + "loss": 4.4724, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.011326313018799, + "rewards/margins": -1.7479743957519531, + "rewards/rejected": -4.263351917266846, + "step": 5316 + }, + { + "epoch": 0.83, + "learning_rate": 1.0247631101198196e-05, + "logits/chosen": -2.814854383468628, + "logits/rejected": -2.3137850761413574, + "logps/chosen": -228.52198791503906, + "logps/rejected": -225.37234497070312, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6249462366104126, + "rewards/margins": 5.64277458190918, + "rewards/rejected": -6.2677202224731445, + "step": 5317 + }, + { + "epoch": 0.83, + "learning_rate": 1.0246897660667047e-05, + "logits/chosen": -3.0770695209503174, + "logits/rejected": -2.347837448120117, + "logps/chosen": -377.54742431640625, + "logps/rejected": -174.93736267089844, + "loss": 5.7337, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.940696716308594, + "rewards/margins": -5.72920560836792, + "rewards/rejected": -1.211491346359253, + "step": 5318 + }, + { + "epoch": 0.83, + "learning_rate": 1.02461642201359e-05, + "logits/chosen": -1.223246693611145, + "logits/rejected": -2.9319305419921875, + "logps/chosen": -158.902587890625, + "logps/rejected": -588.1945190429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6070560812950134, + "rewards/margins": 10.686422348022461, + "rewards/rejected": -11.293478965759277, + "step": 5319 + }, + { + "epoch": 0.83, + "learning_rate": 1.0245430779604751e-05, + "logits/chosen": -1.7993953227996826, + "logits/rejected": -2.954897403717041, + "logps/chosen": -107.69307708740234, + "logps/rejected": -477.1972961425781, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9041485786437988, + "rewards/margins": 8.114446640014648, + "rewards/rejected": -10.018595695495605, + "step": 5320 + }, + { + "epoch": 0.83, + "learning_rate": 1.0244697339073603e-05, + "logits/chosen": -2.347843885421753, + "logits/rejected": -2.990065813064575, + "logps/chosen": -174.91555786132812, + "logps/rejected": -216.48934936523438, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3278533220291138, + "rewards/margins": 6.258140563964844, + "rewards/rejected": -7.585993766784668, + "step": 5321 + }, + { + "epoch": 0.83, + "learning_rate": 1.0243963898542457e-05, + "logits/chosen": -2.9399232864379883, + "logits/rejected": -2.4240663051605225, + "logps/chosen": -521.327880859375, + "logps/rejected": -384.06695556640625, + "loss": 4.3036, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.496896266937256, + "rewards/margins": -1.1732923984527588, + "rewards/rejected": -3.323603868484497, + "step": 5322 + }, + { + "epoch": 0.83, + "learning_rate": 1.0243230458011309e-05, + "logits/chosen": -2.263927459716797, + "logits/rejected": -2.911968231201172, + "logps/chosen": -282.4892578125, + "logps/rejected": -498.7045593261719, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3865325450897217, + "rewards/margins": 8.109475135803223, + "rewards/rejected": -10.496007919311523, + "step": 5323 + }, + { + "epoch": 0.83, + "learning_rate": 1.024249701748016e-05, + "logits/chosen": -3.0385148525238037, + "logits/rejected": -2.9356443881988525, + "logps/chosen": -64.94232177734375, + "logps/rejected": -128.06422424316406, + "loss": 2.634, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1161391735076904, + "rewards/margins": 0.47028541564941406, + "rewards/rejected": -3.5864245891571045, + "step": 5324 + }, + { + "epoch": 0.83, + "learning_rate": 1.0241763576949012e-05, + "logits/chosen": -2.7650046348571777, + "logits/rejected": -3.0896084308624268, + "logps/chosen": -311.1461486816406, + "logps/rejected": -252.84149169921875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3919547200202942, + "rewards/margins": 7.1085615158081055, + "rewards/rejected": -7.500515937805176, + "step": 5325 + }, + { + "epoch": 0.83, + "learning_rate": 1.0241030136417864e-05, + "logits/chosen": -2.497671365737915, + "logits/rejected": -2.9716269969940186, + "logps/chosen": -124.25318908691406, + "logps/rejected": -121.54466247558594, + "loss": 2.3963, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.805239200592041, + "rewards/margins": 2.1659388542175293, + "rewards/rejected": -5.971177577972412, + "step": 5326 + }, + { + "epoch": 0.83, + "learning_rate": 1.0240296695886716e-05, + "logits/chosen": -2.445483922958374, + "logits/rejected": -2.6989641189575195, + "logps/chosen": -200.57473754882812, + "logps/rejected": -289.58660888671875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0206148624420166, + "rewards/margins": 6.224143028259277, + "rewards/rejected": -8.244758605957031, + "step": 5327 + }, + { + "epoch": 0.83, + "learning_rate": 1.0239563255355568e-05, + "logits/chosen": -1.8082674741744995, + "logits/rejected": -2.576115369796753, + "logps/chosen": -202.67047119140625, + "logps/rejected": -179.64410400390625, + "loss": 4.8242, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.04289436340332, + "rewards/margins": -1.6737446784973145, + "rewards/rejected": -4.369149684906006, + "step": 5328 + }, + { + "epoch": 0.83, + "learning_rate": 1.023882981482442e-05, + "logits/chosen": -2.546722173690796, + "logits/rejected": -3.0948336124420166, + "logps/chosen": -355.5177917480469, + "logps/rejected": -484.11492919921875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01583707332611084, + "rewards/margins": 6.5465288162231445, + "rewards/rejected": -6.530691623687744, + "step": 5329 + }, + { + "epoch": 0.83, + "learning_rate": 1.0238096374293272e-05, + "logits/chosen": -2.442626953125, + "logits/rejected": -3.0451247692108154, + "logps/chosen": -51.7742919921875, + "logps/rejected": -348.0970458984375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8119778633117676, + "rewards/margins": 5.100580215454102, + "rewards/rejected": -6.912558078765869, + "step": 5330 + }, + { + "epoch": 0.83, + "learning_rate": 1.0237362933762125e-05, + "logits/chosen": -1.926125407218933, + "logits/rejected": -2.4548277854919434, + "logps/chosen": -54.52088928222656, + "logps/rejected": -150.95831298828125, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.602980375289917, + "rewards/margins": 4.259317874908447, + "rewards/rejected": -7.862298011779785, + "step": 5331 + }, + { + "epoch": 0.83, + "learning_rate": 1.0236629493230977e-05, + "logits/chosen": -2.27154278755188, + "logits/rejected": -3.032475233078003, + "logps/chosen": -190.91815185546875, + "logps/rejected": -378.2252197265625, + "loss": 6.202, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.837503433227539, + "rewards/margins": -3.703911542892456, + "rewards/rejected": -4.133592128753662, + "step": 5332 + }, + { + "epoch": 0.83, + "learning_rate": 1.0235896052699829e-05, + "logits/chosen": -2.4150826930999756, + "logits/rejected": -3.0384328365325928, + "logps/chosen": -388.0788269042969, + "logps/rejected": -625.8438110351562, + "loss": 3.1642, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.595170974731445, + "rewards/margins": 0.5000772476196289, + "rewards/rejected": -6.095248222351074, + "step": 5333 + }, + { + "epoch": 0.83, + "learning_rate": 1.0235162612168681e-05, + "logits/chosen": -2.951784610748291, + "logits/rejected": -3.0485284328460693, + "logps/chosen": -111.82596588134766, + "logps/rejected": -242.09213256835938, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.990106999874115, + "rewards/margins": 4.793099403381348, + "rewards/rejected": -5.783205986022949, + "step": 5334 + }, + { + "epoch": 0.83, + "learning_rate": 1.0234429171637533e-05, + "logits/chosen": -2.835268259048462, + "logits/rejected": -3.001405954360962, + "logps/chosen": -204.8253936767578, + "logps/rejected": -176.48390197753906, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5266772508621216, + "rewards/margins": 6.231206893920898, + "rewards/rejected": -7.7578840255737305, + "step": 5335 + }, + { + "epoch": 0.83, + "learning_rate": 1.0233695731106385e-05, + "logits/chosen": -3.0325496196746826, + "logits/rejected": -1.9942668676376343, + "logps/chosen": -728.44677734375, + "logps/rejected": -278.831298828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.680981457233429, + "rewards/margins": 8.266529083251953, + "rewards/rejected": -8.947510719299316, + "step": 5336 + }, + { + "epoch": 0.83, + "learning_rate": 1.0232962290575236e-05, + "logits/chosen": -2.456561803817749, + "logits/rejected": -3.1180248260498047, + "logps/chosen": -69.19422912597656, + "logps/rejected": -374.09893798828125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3981833457946777, + "rewards/margins": 6.21983528137207, + "rewards/rejected": -7.61801815032959, + "step": 5337 + }, + { + "epoch": 0.83, + "learning_rate": 1.0232228850044088e-05, + "logits/chosen": -2.142270803451538, + "logits/rejected": -3.007279872894287, + "logps/chosen": -139.7626190185547, + "logps/rejected": -221.27346801757812, + "loss": 2.5407, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.161293983459473, + "rewards/margins": 0.19058990478515625, + "rewards/rejected": -5.351883888244629, + "step": 5338 + }, + { + "epoch": 0.83, + "learning_rate": 1.023149540951294e-05, + "logits/chosen": -3.1091511249542236, + "logits/rejected": -1.945742130279541, + "logps/chosen": -186.85623168945312, + "logps/rejected": -122.81095886230469, + "loss": 1.7351, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.827744483947754, + "rewards/margins": 2.384194850921631, + "rewards/rejected": -5.211939334869385, + "step": 5339 + }, + { + "epoch": 0.83, + "learning_rate": 1.0230761968981794e-05, + "logits/chosen": -2.1235313415527344, + "logits/rejected": -2.4663443565368652, + "logps/chosen": -392.16888427734375, + "logps/rejected": -523.2241821289062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8503463864326477, + "rewards/margins": 9.392465591430664, + "rewards/rejected": -10.24281120300293, + "step": 5340 + }, + { + "epoch": 0.83, + "learning_rate": 1.0230028528450646e-05, + "logits/chosen": -3.045219659805298, + "logits/rejected": -3.006895065307617, + "logps/chosen": -294.86614990234375, + "logps/rejected": -220.966552734375, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6223770380020142, + "rewards/margins": 4.572623252868652, + "rewards/rejected": -6.195000648498535, + "step": 5341 + }, + { + "epoch": 0.83, + "learning_rate": 1.0229295087919498e-05, + "logits/chosen": -3.007606267929077, + "logits/rejected": -2.8450891971588135, + "logps/chosen": -126.41788482666016, + "logps/rejected": -92.61708068847656, + "loss": 1.3273, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.794023036956787, + "rewards/margins": 0.8613696098327637, + "rewards/rejected": -4.655392646789551, + "step": 5342 + }, + { + "epoch": 0.83, + "learning_rate": 1.022856164738835e-05, + "logits/chosen": -2.400139331817627, + "logits/rejected": -2.8705806732177734, + "logps/chosen": -226.3192138671875, + "logps/rejected": -208.1775665283203, + "loss": 4.0615, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.435499668121338, + "rewards/margins": -0.6619887351989746, + "rewards/rejected": -3.773510694503784, + "step": 5343 + }, + { + "epoch": 0.83, + "learning_rate": 1.0227828206857203e-05, + "logits/chosen": -2.3612265586853027, + "logits/rejected": -3.0585827827453613, + "logps/chosen": -182.24276733398438, + "logps/rejected": -405.8939208984375, + "loss": 3.0695, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.396513938903809, + "rewards/margins": 4.341964244842529, + "rewards/rejected": -8.738478660583496, + "step": 5344 + }, + { + "epoch": 0.83, + "learning_rate": 1.0227094766326055e-05, + "logits/chosen": -3.0744621753692627, + "logits/rejected": -2.589533567428589, + "logps/chosen": -421.0296936035156, + "logps/rejected": -203.84341430664062, + "loss": 5.6807, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.634382247924805, + "rewards/margins": -2.719169855117798, + "rewards/rejected": -4.915212631225586, + "step": 5345 + }, + { + "epoch": 0.83, + "learning_rate": 1.0226361325794907e-05, + "logits/chosen": -2.6595160961151123, + "logits/rejected": -2.9663350582122803, + "logps/chosen": -197.59259033203125, + "logps/rejected": -123.6825942993164, + "loss": 0.2716, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.822981357574463, + "rewards/margins": 2.890094757080078, + "rewards/rejected": -6.713076591491699, + "step": 5346 + }, + { + "epoch": 0.83, + "learning_rate": 1.0225627885263759e-05, + "logits/chosen": -1.4096248149871826, + "logits/rejected": -1.349795937538147, + "logps/chosen": -247.5000762939453, + "logps/rejected": -471.7774658203125, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.324335813522339, + "rewards/margins": 4.539545059204102, + "rewards/rejected": -7.8638811111450195, + "step": 5347 + }, + { + "epoch": 0.83, + "learning_rate": 1.022489444473261e-05, + "logits/chosen": -3.1791810989379883, + "logits/rejected": -3.2035417556762695, + "logps/chosen": -58.77342224121094, + "logps/rejected": -129.29815673828125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2513781785964966, + "rewards/margins": 5.733123779296875, + "rewards/rejected": -6.984501838684082, + "step": 5348 + }, + { + "epoch": 0.83, + "learning_rate": 1.0224161004201464e-05, + "logits/chosen": -1.5651472806930542, + "logits/rejected": -2.9877591133117676, + "logps/chosen": -148.6033935546875, + "logps/rejected": -262.3698425292969, + "loss": 1.8161, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.317039489746094, + "rewards/margins": 1.3343749046325684, + "rewards/rejected": -5.651414394378662, + "step": 5349 + }, + { + "epoch": 0.83, + "learning_rate": 1.0223427563670316e-05, + "logits/chosen": -2.675245523452759, + "logits/rejected": -2.843989610671997, + "logps/chosen": -161.761962890625, + "logps/rejected": -176.3809814453125, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29585590958595276, + "rewards/margins": 5.777040004730225, + "rewards/rejected": -6.0728960037231445, + "step": 5350 + }, + { + "epoch": 0.83, + "learning_rate": 1.0222694123139168e-05, + "logits/chosen": -2.53779935836792, + "logits/rejected": -2.892688274383545, + "logps/chosen": -48.184566497802734, + "logps/rejected": -195.1336212158203, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0877195596694946, + "rewards/margins": 5.4881744384765625, + "rewards/rejected": -6.575893878936768, + "step": 5351 + }, + { + "epoch": 0.83, + "learning_rate": 1.022196068260802e-05, + "logits/chosen": -2.8318958282470703, + "logits/rejected": -2.2243893146514893, + "logps/chosen": -485.7889709472656, + "logps/rejected": -309.46331787109375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9000183343887329, + "rewards/margins": 6.164973735809326, + "rewards/rejected": -7.0649919509887695, + "step": 5352 + }, + { + "epoch": 0.83, + "learning_rate": 1.0221227242076872e-05, + "logits/chosen": -2.8243370056152344, + "logits/rejected": -2.973423719406128, + "logps/chosen": -126.97793579101562, + "logps/rejected": -241.41819763183594, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.700286388397217, + "rewards/margins": 5.8311567306518555, + "rewards/rejected": -8.531442642211914, + "step": 5353 + }, + { + "epoch": 0.83, + "learning_rate": 1.0220493801545724e-05, + "logits/chosen": -1.6855626106262207, + "logits/rejected": -2.0365519523620605, + "logps/chosen": -318.24591064453125, + "logps/rejected": -498.76409912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0922905206680298, + "rewards/margins": 11.125965118408203, + "rewards/rejected": -12.218255996704102, + "step": 5354 + }, + { + "epoch": 0.83, + "learning_rate": 1.0219760361014575e-05, + "logits/chosen": -2.704824686050415, + "logits/rejected": -3.1432511806488037, + "logps/chosen": -41.125770568847656, + "logps/rejected": -262.46917724609375, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.767688274383545, + "rewards/margins": 4.29824686050415, + "rewards/rejected": -6.065935134887695, + "step": 5355 + }, + { + "epoch": 0.83, + "learning_rate": 1.0219026920483427e-05, + "logits/chosen": -3.0339460372924805, + "logits/rejected": -1.9507927894592285, + "logps/chosen": -219.45932006835938, + "logps/rejected": -315.0292053222656, + "loss": 2.6949, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6328818798065186, + "rewards/margins": 2.576829671859741, + "rewards/rejected": -6.20971155166626, + "step": 5356 + }, + { + "epoch": 0.83, + "learning_rate": 1.0218293479952279e-05, + "logits/chosen": -1.8880059719085693, + "logits/rejected": -2.8238840103149414, + "logps/chosen": -197.99066162109375, + "logps/rejected": -361.51324462890625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7027894258499146, + "rewards/margins": 6.1653242111206055, + "rewards/rejected": -7.8681135177612305, + "step": 5357 + }, + { + "epoch": 0.83, + "learning_rate": 1.0217560039421133e-05, + "logits/chosen": -3.020014524459839, + "logits/rejected": -2.849937677383423, + "logps/chosen": -206.9012451171875, + "logps/rejected": -243.08126831054688, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2849518060684204, + "rewards/margins": 5.700403213500977, + "rewards/rejected": -5.985354900360107, + "step": 5358 + }, + { + "epoch": 0.83, + "learning_rate": 1.0216826598889985e-05, + "logits/chosen": -2.947885036468506, + "logits/rejected": -3.011810302734375, + "logps/chosen": -65.70548248291016, + "logps/rejected": -253.23904418945312, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0178344249725342, + "rewards/margins": 8.344029426574707, + "rewards/rejected": -9.36186408996582, + "step": 5359 + }, + { + "epoch": 0.83, + "learning_rate": 1.0216093158358836e-05, + "logits/chosen": -3.2530620098114014, + "logits/rejected": -3.1852831840515137, + "logps/chosen": -94.275390625, + "logps/rejected": -135.5165252685547, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.162510395050049, + "rewards/margins": 6.426626205444336, + "rewards/rejected": -8.589137077331543, + "step": 5360 + }, + { + "epoch": 0.83, + "learning_rate": 1.0215359717827688e-05, + "logits/chosen": -3.031877279281616, + "logits/rejected": -3.159557580947876, + "logps/chosen": -71.25822448730469, + "logps/rejected": -152.59774780273438, + "loss": 1.0732, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.460475444793701, + "rewards/margins": 1.9241359233856201, + "rewards/rejected": -7.384611129760742, + "step": 5361 + }, + { + "epoch": 0.83, + "learning_rate": 1.021462627729654e-05, + "logits/chosen": -3.092643976211548, + "logits/rejected": -3.0963289737701416, + "logps/chosen": -61.46814727783203, + "logps/rejected": -117.27899932861328, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6238040924072266, + "rewards/margins": 4.175403118133545, + "rewards/rejected": -5.799206733703613, + "step": 5362 + }, + { + "epoch": 0.83, + "learning_rate": 1.0213892836765392e-05, + "logits/chosen": -1.61012864112854, + "logits/rejected": -1.8408654928207397, + "logps/chosen": -86.01171875, + "logps/rejected": -330.1651611328125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3652982711791992, + "rewards/margins": 5.4396467208862305, + "rewards/rejected": -6.8049445152282715, + "step": 5363 + }, + { + "epoch": 0.83, + "learning_rate": 1.0213159396234244e-05, + "logits/chosen": -2.2354700565338135, + "logits/rejected": -2.7568373680114746, + "logps/chosen": -214.39407348632812, + "logps/rejected": -299.46099853515625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5493435263633728, + "rewards/margins": 8.675980567932129, + "rewards/rejected": -9.225323677062988, + "step": 5364 + }, + { + "epoch": 0.83, + "learning_rate": 1.0212425955703096e-05, + "logits/chosen": -2.8319754600524902, + "logits/rejected": -2.9891953468322754, + "logps/chosen": -477.9921569824219, + "logps/rejected": -915.8289794921875, + "loss": 2.9768, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.000096797943115, + "rewards/margins": 1.0908212661743164, + "rewards/rejected": -5.090918064117432, + "step": 5365 + }, + { + "epoch": 0.83, + "learning_rate": 1.0211692515171948e-05, + "logits/chosen": -3.2200493812561035, + "logits/rejected": -3.1452364921569824, + "logps/chosen": -420.1171569824219, + "logps/rejected": -264.9963684082031, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08031311631202698, + "rewards/margins": 6.936494827270508, + "rewards/rejected": -6.856181621551514, + "step": 5366 + }, + { + "epoch": 0.83, + "learning_rate": 1.0210959074640801e-05, + "logits/chosen": -3.1189491748809814, + "logits/rejected": -2.788877248764038, + "logps/chosen": -262.27587890625, + "logps/rejected": -255.05068969726562, + "loss": 1.8422, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.339449405670166, + "rewards/margins": 1.969160556793213, + "rewards/rejected": -5.308609962463379, + "step": 5367 + }, + { + "epoch": 0.83, + "learning_rate": 1.0210225634109653e-05, + "logits/chosen": -2.1570370197296143, + "logits/rejected": -3.1000287532806396, + "logps/chosen": -398.5950012207031, + "logps/rejected": -607.135986328125, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4788288474082947, + "rewards/margins": 7.110273838043213, + "rewards/rejected": -7.589102268218994, + "step": 5368 + }, + { + "epoch": 0.83, + "learning_rate": 1.0209492193578505e-05, + "logits/chosen": -3.0056517124176025, + "logits/rejected": -2.5607211589813232, + "logps/chosen": -116.42218780517578, + "logps/rejected": -250.7281494140625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5671809911727905, + "rewards/margins": 6.525602340698242, + "rewards/rejected": -8.092782974243164, + "step": 5369 + }, + { + "epoch": 0.84, + "learning_rate": 1.0208758753047357e-05, + "logits/chosen": -3.1821956634521484, + "logits/rejected": -2.8619022369384766, + "logps/chosen": -385.87188720703125, + "logps/rejected": -467.34234619140625, + "loss": 4.4988, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.073797225952148, + "rewards/margins": -4.4691619873046875, + "rewards/rejected": -1.6046351194381714, + "step": 5370 + }, + { + "epoch": 0.84, + "learning_rate": 1.0208025312516209e-05, + "logits/chosen": -1.97133469581604, + "logits/rejected": -2.751554489135742, + "logps/chosen": -185.8123779296875, + "logps/rejected": -401.1495056152344, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.408902883529663, + "rewards/margins": 6.633708953857422, + "rewards/rejected": -9.042612075805664, + "step": 5371 + }, + { + "epoch": 0.84, + "learning_rate": 1.020729187198506e-05, + "logits/chosen": -2.833648681640625, + "logits/rejected": -1.6922041177749634, + "logps/chosen": -163.62127685546875, + "logps/rejected": -152.85865783691406, + "loss": 1.0469, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.912158012390137, + "rewards/margins": 4.365686416625977, + "rewards/rejected": -9.277844429016113, + "step": 5372 + }, + { + "epoch": 0.84, + "learning_rate": 1.0206558431453913e-05, + "logits/chosen": -3.1494362354278564, + "logits/rejected": -3.036484718322754, + "logps/chosen": -360.8035888671875, + "logps/rejected": -188.5869903564453, + "loss": 2.4176, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9242851734161377, + "rewards/margins": -1.1984188556671143, + "rewards/rejected": -2.7258663177490234, + "step": 5373 + }, + { + "epoch": 0.84, + "learning_rate": 1.0205824990922764e-05, + "logits/chosen": -1.8509929180145264, + "logits/rejected": -3.0929338932037354, + "logps/chosen": -289.20623779296875, + "logps/rejected": -376.8746337890625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7961366176605225, + "rewards/margins": 6.336679458618164, + "rewards/rejected": -8.132816314697266, + "step": 5374 + }, + { + "epoch": 0.84, + "learning_rate": 1.0205091550391616e-05, + "logits/chosen": -3.347093343734741, + "logits/rejected": -3.087937831878662, + "logps/chosen": -101.61781311035156, + "logps/rejected": -183.32901000976562, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0476468801498413, + "rewards/margins": 5.459559440612793, + "rewards/rejected": -6.507205963134766, + "step": 5375 + }, + { + "epoch": 0.84, + "learning_rate": 1.020435810986047e-05, + "logits/chosen": -2.9114990234375, + "logits/rejected": -2.88604736328125, + "logps/chosen": -173.97488403320312, + "logps/rejected": -220.5607452392578, + "loss": 0.0548, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7305973768234253, + "rewards/margins": 4.719224452972412, + "rewards/rejected": -6.449821949005127, + "step": 5376 + }, + { + "epoch": 0.84, + "learning_rate": 1.0203624669329322e-05, + "logits/chosen": -2.0146961212158203, + "logits/rejected": -3.1223230361938477, + "logps/chosen": -159.5148162841797, + "logps/rejected": -405.2771301269531, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.138283610343933, + "rewards/margins": 5.649588584899902, + "rewards/rejected": -6.787872314453125, + "step": 5377 + }, + { + "epoch": 0.84, + "learning_rate": 1.0202891228798175e-05, + "logits/chosen": -1.603177785873413, + "logits/rejected": -2.7015321254730225, + "logps/chosen": -305.628662109375, + "logps/rejected": -307.8820495605469, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.556178331375122, + "rewards/margins": 5.142502307891846, + "rewards/rejected": -7.698680877685547, + "step": 5378 + }, + { + "epoch": 0.84, + "learning_rate": 1.0202157788267027e-05, + "logits/chosen": -2.8816959857940674, + "logits/rejected": -1.9513983726501465, + "logps/chosen": -171.71939086914062, + "logps/rejected": -163.40122985839844, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2622005939483643, + "rewards/margins": 3.9665169715881348, + "rewards/rejected": -6.228717803955078, + "step": 5379 + }, + { + "epoch": 0.84, + "learning_rate": 1.0201424347735879e-05, + "logits/chosen": -1.8221300840377808, + "logits/rejected": -2.904752492904663, + "logps/chosen": -99.3487548828125, + "logps/rejected": -255.54190063476562, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1168020963668823, + "rewards/margins": 5.569781303405762, + "rewards/rejected": -6.686583518981934, + "step": 5380 + }, + { + "epoch": 0.84, + "learning_rate": 1.0200690907204731e-05, + "logits/chosen": -2.7781779766082764, + "logits/rejected": -3.1768643856048584, + "logps/chosen": -626.6881103515625, + "logps/rejected": -513.8244018554688, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9698814749717712, + "rewards/margins": 5.882036209106445, + "rewards/rejected": -6.8519182205200195, + "step": 5381 + }, + { + "epoch": 0.84, + "learning_rate": 1.0199957466673583e-05, + "logits/chosen": -2.724313974380493, + "logits/rejected": -2.669848918914795, + "logps/chosen": -503.55767822265625, + "logps/rejected": -463.73492431640625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26132047176361084, + "rewards/margins": 7.036195755004883, + "rewards/rejected": -7.297515869140625, + "step": 5382 + }, + { + "epoch": 0.84, + "learning_rate": 1.0199224026142435e-05, + "logits/chosen": -2.7309699058532715, + "logits/rejected": -2.908357858657837, + "logps/chosen": -40.19953155517578, + "logps/rejected": -128.54153442382812, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1581814289093018, + "rewards/margins": 4.957372188568115, + "rewards/rejected": -7.115553855895996, + "step": 5383 + }, + { + "epoch": 0.84, + "learning_rate": 1.0198490585611287e-05, + "logits/chosen": -2.7891883850097656, + "logits/rejected": -2.974945545196533, + "logps/chosen": -147.31956481933594, + "logps/rejected": -211.1939697265625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6127411127090454, + "rewards/margins": 4.861157417297363, + "rewards/rejected": -5.473898410797119, + "step": 5384 + }, + { + "epoch": 0.84, + "learning_rate": 1.019775714508014e-05, + "logits/chosen": -2.5930120944976807, + "logits/rejected": -3.0608856678009033, + "logps/chosen": -123.5513916015625, + "logps/rejected": -405.78424072265625, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4940032958984375, + "rewards/margins": 7.404653072357178, + "rewards/rejected": -8.898655891418457, + "step": 5385 + }, + { + "epoch": 0.84, + "learning_rate": 1.0197023704548992e-05, + "logits/chosen": -2.512061357498169, + "logits/rejected": -3.054810047149658, + "logps/chosen": -193.88436889648438, + "logps/rejected": -282.29278564453125, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8004181385040283, + "rewards/margins": 5.316390514373779, + "rewards/rejected": -7.116808891296387, + "step": 5386 + }, + { + "epoch": 0.84, + "learning_rate": 1.0196290264017844e-05, + "logits/chosen": -2.5624310970306396, + "logits/rejected": -3.090125560760498, + "logps/chosen": -256.8941955566406, + "logps/rejected": -403.08245849609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7462596893310547, + "rewards/margins": 6.756048202514648, + "rewards/rejected": -8.502307891845703, + "step": 5387 + }, + { + "epoch": 0.84, + "learning_rate": 1.0195556823486696e-05, + "logits/chosen": -2.1232426166534424, + "logits/rejected": -2.9151999950408936, + "logps/chosen": -281.779541015625, + "logps/rejected": -310.56085205078125, + "loss": 2.7378, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.161267042160034, + "rewards/margins": 0.8189706802368164, + "rewards/rejected": -3.9802377223968506, + "step": 5388 + }, + { + "epoch": 0.84, + "learning_rate": 1.0194823382955548e-05, + "logits/chosen": -2.6042799949645996, + "logits/rejected": -3.059953212738037, + "logps/chosen": -85.69599151611328, + "logps/rejected": -247.42051696777344, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4653249979019165, + "rewards/margins": 5.011307716369629, + "rewards/rejected": -6.476633071899414, + "step": 5389 + }, + { + "epoch": 0.84, + "learning_rate": 1.01940899424244e-05, + "logits/chosen": -1.9364399909973145, + "logits/rejected": -3.0151166915893555, + "logps/chosen": -79.6052017211914, + "logps/rejected": -347.34259033203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4760527610778809, + "rewards/margins": 7.370488166809082, + "rewards/rejected": -8.846541404724121, + "step": 5390 + }, + { + "epoch": 0.84, + "learning_rate": 1.0193356501893251e-05, + "logits/chosen": -3.054811954498291, + "logits/rejected": -2.97408127784729, + "logps/chosen": -181.92422485351562, + "logps/rejected": -335.0848693847656, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.538813829421997, + "rewards/margins": 3.3754656314849854, + "rewards/rejected": -4.914279460906982, + "step": 5391 + }, + { + "epoch": 0.84, + "learning_rate": 1.0192623061362103e-05, + "logits/chosen": -2.8172028064727783, + "logits/rejected": -2.7041022777557373, + "logps/chosen": -237.20330810546875, + "logps/rejected": -319.0034484863281, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2256721258163452, + "rewards/margins": 7.400518417358398, + "rewards/rejected": -8.626190185546875, + "step": 5392 + }, + { + "epoch": 0.84, + "learning_rate": 1.0191889620830957e-05, + "logits/chosen": -2.9302642345428467, + "logits/rejected": -2.6668143272399902, + "logps/chosen": -170.80078125, + "logps/rejected": -245.03695678710938, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8702735900878906, + "rewards/margins": 4.974066734313965, + "rewards/rejected": -6.8443403244018555, + "step": 5393 + }, + { + "epoch": 0.84, + "learning_rate": 1.0191156180299809e-05, + "logits/chosen": -3.0284194946289062, + "logits/rejected": -2.7926626205444336, + "logps/chosen": -233.43014526367188, + "logps/rejected": -224.76901245117188, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9432892203330994, + "rewards/margins": 7.002414703369141, + "rewards/rejected": -7.945703983306885, + "step": 5394 + }, + { + "epoch": 0.84, + "learning_rate": 1.019042273976866e-05, + "logits/chosen": -1.7608097791671753, + "logits/rejected": -3.0056912899017334, + "logps/chosen": -127.44296264648438, + "logps/rejected": -413.4097900390625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1482068300247192, + "rewards/margins": 7.576760768890381, + "rewards/rejected": -8.724967956542969, + "step": 5395 + }, + { + "epoch": 0.84, + "learning_rate": 1.0189689299237513e-05, + "logits/chosen": -2.9228978157043457, + "logits/rejected": -2.49847674369812, + "logps/chosen": -236.99282836914062, + "logps/rejected": -270.3663024902344, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1737651824951172, + "rewards/margins": 5.259012222290039, + "rewards/rejected": -6.432777404785156, + "step": 5396 + }, + { + "epoch": 0.84, + "learning_rate": 1.0188955858706364e-05, + "logits/chosen": -2.8122801780700684, + "logits/rejected": -2.9879088401794434, + "logps/chosen": -311.0294189453125, + "logps/rejected": -410.8099365234375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6321628093719482, + "rewards/margins": 5.475828170776367, + "rewards/rejected": -9.107991218566895, + "step": 5397 + }, + { + "epoch": 0.84, + "learning_rate": 1.0188222418175216e-05, + "logits/chosen": -2.125098705291748, + "logits/rejected": -2.4686009883880615, + "logps/chosen": -212.25872802734375, + "logps/rejected": -430.8627624511719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5404640436172485, + "rewards/margins": 10.457717895507812, + "rewards/rejected": -11.998181343078613, + "step": 5398 + }, + { + "epoch": 0.84, + "learning_rate": 1.0187488977644068e-05, + "logits/chosen": -3.118152379989624, + "logits/rejected": -1.8309099674224854, + "logps/chosen": -676.4642944335938, + "logps/rejected": -287.8662414550781, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2590876817703247, + "rewards/margins": 5.980292320251465, + "rewards/rejected": -7.2393798828125, + "step": 5399 + }, + { + "epoch": 0.84, + "learning_rate": 1.018675553711292e-05, + "logits/chosen": -2.7419164180755615, + "logits/rejected": -2.9691503047943115, + "logps/chosen": -173.51266479492188, + "logps/rejected": -164.61769104003906, + "loss": 1.3003, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.597650527954102, + "rewards/margins": 2.359386444091797, + "rewards/rejected": -6.957036972045898, + "step": 5400 + }, + { + "epoch": 0.84, + "learning_rate": 1.0186022096581772e-05, + "logits/chosen": -0.9845415353775024, + "logits/rejected": -2.755326986312866, + "logps/chosen": -68.96330261230469, + "logps/rejected": -178.98046875, + "loss": 0.9461, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.613927364349365, + "rewards/margins": 1.8127343654632568, + "rewards/rejected": -7.426661968231201, + "step": 5401 + }, + { + "epoch": 0.84, + "learning_rate": 1.0185288656050626e-05, + "logits/chosen": -2.995577812194824, + "logits/rejected": -1.4157415628433228, + "logps/chosen": -256.1697082519531, + "logps/rejected": -168.52578735351562, + "loss": 0.3905, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2386012077331543, + "rewards/margins": 3.056854724884033, + "rewards/rejected": -6.295456409454346, + "step": 5402 + }, + { + "epoch": 0.84, + "learning_rate": 1.0184555215519477e-05, + "logits/chosen": -1.7508723735809326, + "logits/rejected": -2.6262261867523193, + "logps/chosen": -121.86744689941406, + "logps/rejected": -317.575927734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.072131633758545, + "rewards/margins": 8.643220901489258, + "rewards/rejected": -10.715352058410645, + "step": 5403 + }, + { + "epoch": 0.84, + "learning_rate": 1.018382177498833e-05, + "logits/chosen": -1.826149821281433, + "logits/rejected": -3.1163647174835205, + "logps/chosen": -118.15655517578125, + "logps/rejected": -486.5264892578125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2866564989089966, + "rewards/margins": 7.75390625, + "rewards/rejected": -9.040562629699707, + "step": 5404 + }, + { + "epoch": 0.84, + "learning_rate": 1.0183088334457181e-05, + "logits/chosen": -2.7240564823150635, + "logits/rejected": -3.05837082862854, + "logps/chosen": -209.99002075195312, + "logps/rejected": -289.8877868652344, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1390414237976074, + "rewards/margins": 6.536013603210449, + "rewards/rejected": -9.675054550170898, + "step": 5405 + }, + { + "epoch": 0.84, + "learning_rate": 1.0182354893926033e-05, + "logits/chosen": -2.7048513889312744, + "logits/rejected": -2.5636661052703857, + "logps/chosen": -186.79464721679688, + "logps/rejected": -395.588623046875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.148606300354004, + "rewards/margins": 9.426641464233398, + "rewards/rejected": -10.575246810913086, + "step": 5406 + }, + { + "epoch": 0.84, + "learning_rate": 1.0181621453394885e-05, + "logits/chosen": -2.923579454421997, + "logits/rejected": -1.7861562967300415, + "logps/chosen": -428.1292724609375, + "logps/rejected": -331.9708557128906, + "loss": 0.2463, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.562073230743408, + "rewards/margins": 2.8901920318603516, + "rewards/rejected": -5.45226526260376, + "step": 5407 + }, + { + "epoch": 0.84, + "learning_rate": 1.0180888012863737e-05, + "logits/chosen": -2.7414450645446777, + "logits/rejected": -2.490696668624878, + "logps/chosen": -125.72119140625, + "logps/rejected": -75.99594116210938, + "loss": 2.3109, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.815054416656494, + "rewards/margins": -1.5996599197387695, + "rewards/rejected": -5.215394496917725, + "step": 5408 + }, + { + "epoch": 0.84, + "learning_rate": 1.0180154572332589e-05, + "logits/chosen": -3.1817002296447754, + "logits/rejected": -2.50146746635437, + "logps/chosen": -219.299560546875, + "logps/rejected": -167.92591857910156, + "loss": 3.9309, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.0821852684021, + "rewards/margins": -0.1061861515045166, + "rewards/rejected": -4.975999355316162, + "step": 5409 + }, + { + "epoch": 0.84, + "learning_rate": 1.0179421131801442e-05, + "logits/chosen": -2.0914604663848877, + "logits/rejected": -2.6526076793670654, + "logps/chosen": -83.7098388671875, + "logps/rejected": -496.5172119140625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1138861179351807, + "rewards/margins": 6.335577011108398, + "rewards/rejected": -8.449462890625, + "step": 5410 + }, + { + "epoch": 0.84, + "learning_rate": 1.0178687691270294e-05, + "logits/chosen": -2.8455398082733154, + "logits/rejected": -2.531966209411621, + "logps/chosen": -139.65179443359375, + "logps/rejected": -183.319091796875, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2374589443206787, + "rewards/margins": 4.387997627258301, + "rewards/rejected": -6.6254563331604, + "step": 5411 + }, + { + "epoch": 0.84, + "learning_rate": 1.0177954250739148e-05, + "logits/chosen": -2.689634323120117, + "logits/rejected": -2.804354429244995, + "logps/chosen": -145.96722412109375, + "logps/rejected": -316.5777282714844, + "loss": 0.0552, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2918663024902344, + "rewards/margins": 4.451406002044678, + "rewards/rejected": -5.74327278137207, + "step": 5412 + }, + { + "epoch": 0.84, + "learning_rate": 1.0177220810208e-05, + "logits/chosen": -1.9656656980514526, + "logits/rejected": -2.807743549346924, + "logps/chosen": -267.99127197265625, + "logps/rejected": -268.51226806640625, + "loss": 0.5039, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.339911699295044, + "rewards/margins": 3.03568959236145, + "rewards/rejected": -5.375601291656494, + "step": 5413 + }, + { + "epoch": 0.84, + "learning_rate": 1.0176487369676851e-05, + "logits/chosen": -2.738243341445923, + "logits/rejected": -2.8516130447387695, + "logps/chosen": -59.159828186035156, + "logps/rejected": -306.2967529296875, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.919398307800293, + "rewards/margins": 5.9704670906066895, + "rewards/rejected": -8.889864921569824, + "step": 5414 + }, + { + "epoch": 0.84, + "learning_rate": 1.0175753929145703e-05, + "logits/chosen": -2.618350028991699, + "logits/rejected": -3.0059921741485596, + "logps/chosen": -51.952125549316406, + "logps/rejected": -163.71847534179688, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7051596641540527, + "rewards/margins": 5.514244079589844, + "rewards/rejected": -8.219404220581055, + "step": 5415 + }, + { + "epoch": 0.84, + "learning_rate": 1.0175020488614555e-05, + "logits/chosen": -3.143362283706665, + "logits/rejected": -2.9790890216827393, + "logps/chosen": -1066.7479248046875, + "logps/rejected": -698.6495971679688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20503845810890198, + "rewards/margins": 8.04052734375, + "rewards/rejected": -8.245566368103027, + "step": 5416 + }, + { + "epoch": 0.84, + "learning_rate": 1.0174287048083407e-05, + "logits/chosen": -3.1840555667877197, + "logits/rejected": -2.445730209350586, + "logps/chosen": -648.0577392578125, + "logps/rejected": -219.30618286132812, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6413646936416626, + "rewards/margins": 7.433831214904785, + "rewards/rejected": -8.075196266174316, + "step": 5417 + }, + { + "epoch": 0.84, + "learning_rate": 1.0173553607552259e-05, + "logits/chosen": -3.234544038772583, + "logits/rejected": -2.626946210861206, + "logps/chosen": -362.8546142578125, + "logps/rejected": -176.25653076171875, + "loss": 4.3218, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.094671249389648, + "rewards/margins": -1.4024004936218262, + "rewards/rejected": -4.692270755767822, + "step": 5418 + }, + { + "epoch": 0.84, + "learning_rate": 1.017282016702111e-05, + "logits/chosen": -3.0737972259521484, + "logits/rejected": -3.0702319145202637, + "logps/chosen": -143.48776245117188, + "logps/rejected": -234.6213836669922, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5193191766738892, + "rewards/margins": 7.191642761230469, + "rewards/rejected": -8.710962295532227, + "step": 5419 + }, + { + "epoch": 0.84, + "learning_rate": 1.0172086726489964e-05, + "logits/chosen": -2.1115670204162598, + "logits/rejected": -2.9056708812713623, + "logps/chosen": -228.94317626953125, + "logps/rejected": -348.153076171875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07651060819625854, + "rewards/margins": 5.887948989868164, + "rewards/rejected": -5.964459419250488, + "step": 5420 + }, + { + "epoch": 0.84, + "learning_rate": 1.0171353285958816e-05, + "logits/chosen": -1.2729846239089966, + "logits/rejected": -1.7593648433685303, + "logps/chosen": -119.82304382324219, + "logps/rejected": -347.25115966796875, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8091596364974976, + "rewards/margins": 5.464591026306152, + "rewards/rejected": -6.273750305175781, + "step": 5421 + }, + { + "epoch": 0.84, + "learning_rate": 1.0170619845427668e-05, + "logits/chosen": -3.052734851837158, + "logits/rejected": -2.7477078437805176, + "logps/chosen": -258.09674072265625, + "logps/rejected": -74.22311401367188, + "loss": 4.7814, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.483303070068359, + "rewards/margins": -2.3449547290802, + "rewards/rejected": -3.13834810256958, + "step": 5422 + }, + { + "epoch": 0.84, + "learning_rate": 1.016988640489652e-05, + "logits/chosen": -2.196202516555786, + "logits/rejected": -2.9579477310180664, + "logps/chosen": -50.92976379394531, + "logps/rejected": -165.39195251464844, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.45477294921875, + "rewards/margins": 5.753330707550049, + "rewards/rejected": -8.20810317993164, + "step": 5423 + }, + { + "epoch": 0.84, + "learning_rate": 1.0169152964365372e-05, + "logits/chosen": -1.6451207399368286, + "logits/rejected": -3.0393450260162354, + "logps/chosen": -119.93217468261719, + "logps/rejected": -286.90625, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.397212028503418, + "rewards/margins": 4.224360942840576, + "rewards/rejected": -6.621572971343994, + "step": 5424 + }, + { + "epoch": 0.84, + "learning_rate": 1.0168419523834224e-05, + "logits/chosen": -2.7033846378326416, + "logits/rejected": -2.8905532360076904, + "logps/chosen": -243.2708740234375, + "logps/rejected": -234.48130798339844, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.297065258026123, + "rewards/margins": 6.200709819793701, + "rewards/rejected": -8.497775077819824, + "step": 5425 + }, + { + "epoch": 0.84, + "learning_rate": 1.0167686083303076e-05, + "logits/chosen": -2.7141425609588623, + "logits/rejected": -2.9173760414123535, + "logps/chosen": -90.97270202636719, + "logps/rejected": -273.42559814453125, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3997678756713867, + "rewards/margins": 3.9755213260650635, + "rewards/rejected": -7.375288963317871, + "step": 5426 + }, + { + "epoch": 0.84, + "learning_rate": 1.0166952642771928e-05, + "logits/chosen": -1.5694750547409058, + "logits/rejected": -2.810060739517212, + "logps/chosen": -90.51432800292969, + "logps/rejected": -329.81781005859375, + "loss": 0.4122, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.709038734436035, + "rewards/margins": 5.168020248413086, + "rewards/rejected": -9.877058982849121, + "step": 5427 + }, + { + "epoch": 0.84, + "learning_rate": 1.016621920224078e-05, + "logits/chosen": -1.4162797927856445, + "logits/rejected": -2.248508930206299, + "logps/chosen": -145.230224609375, + "logps/rejected": -305.0260009765625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1740531921386719, + "rewards/margins": 5.722400665283203, + "rewards/rejected": -6.896453857421875, + "step": 5428 + }, + { + "epoch": 0.84, + "learning_rate": 1.0165485761709633e-05, + "logits/chosen": -2.2457056045532227, + "logits/rejected": -2.8189821243286133, + "logps/chosen": -264.0222473144531, + "logps/rejected": -527.0362548828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0769439935684204, + "rewards/margins": 10.453043937683105, + "rewards/rejected": -11.529988288879395, + "step": 5429 + }, + { + "epoch": 0.84, + "learning_rate": 1.0164752321178485e-05, + "logits/chosen": -3.0060718059539795, + "logits/rejected": -2.7806177139282227, + "logps/chosen": -127.34481048583984, + "logps/rejected": -151.5736541748047, + "loss": 0.7257, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4767255783081055, + "rewards/margins": 2.006091833114624, + "rewards/rejected": -7.48281717300415, + "step": 5430 + }, + { + "epoch": 0.84, + "learning_rate": 1.0164018880647337e-05, + "logits/chosen": -2.0512173175811768, + "logits/rejected": -2.7431137561798096, + "logps/chosen": -106.65879821777344, + "logps/rejected": -339.74188232421875, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.052845001220703, + "rewards/margins": 3.830679416656494, + "rewards/rejected": -8.883523941040039, + "step": 5431 + }, + { + "epoch": 0.84, + "learning_rate": 1.0163285440116189e-05, + "logits/chosen": -1.7380698919296265, + "logits/rejected": -2.1969070434570312, + "logps/chosen": -155.30917358398438, + "logps/rejected": -382.64483642578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3856050968170166, + "rewards/margins": 8.75626277923584, + "rewards/rejected": -11.141867637634277, + "step": 5432 + }, + { + "epoch": 0.84, + "learning_rate": 1.016255199958504e-05, + "logits/chosen": -2.825819492340088, + "logits/rejected": -2.649895429611206, + "logps/chosen": -298.80487060546875, + "logps/rejected": -451.18865966796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.724956750869751, + "rewards/margins": 8.889366149902344, + "rewards/rejected": -11.614322662353516, + "step": 5433 + }, + { + "epoch": 0.85, + "learning_rate": 1.0161818559053892e-05, + "logits/chosen": -2.7932684421539307, + "logits/rejected": -3.2113747596740723, + "logps/chosen": -32.625144958496094, + "logps/rejected": -245.0865478515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3352288007736206, + "rewards/margins": 8.913188934326172, + "rewards/rejected": -10.248416900634766, + "step": 5434 + }, + { + "epoch": 0.85, + "learning_rate": 1.0161085118522744e-05, + "logits/chosen": -2.9351837635040283, + "logits/rejected": -1.8981621265411377, + "logps/chosen": -211.00570678710938, + "logps/rejected": -157.89297485351562, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1073296070098877, + "rewards/margins": 5.911643028259277, + "rewards/rejected": -8.018973350524902, + "step": 5435 + }, + { + "epoch": 0.85, + "learning_rate": 1.0160351677991596e-05, + "logits/chosen": -1.724536418914795, + "logits/rejected": -2.9172298908233643, + "logps/chosen": -136.40786743164062, + "logps/rejected": -331.14642333984375, + "loss": 0.2565, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.211467742919922, + "rewards/margins": 3.836193084716797, + "rewards/rejected": -8.047660827636719, + "step": 5436 + }, + { + "epoch": 0.85, + "learning_rate": 1.0159618237460448e-05, + "logits/chosen": -1.8479893207550049, + "logits/rejected": -2.733450174331665, + "logps/chosen": -214.47146606445312, + "logps/rejected": -294.904052734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5808366537094116, + "rewards/margins": 8.722410202026367, + "rewards/rejected": -10.303247451782227, + "step": 5437 + }, + { + "epoch": 0.85, + "learning_rate": 1.0158884796929302e-05, + "logits/chosen": -2.9329497814178467, + "logits/rejected": -2.204110622406006, + "logps/chosen": -231.420166015625, + "logps/rejected": -105.1690673828125, + "loss": 2.887, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.405952453613281, + "rewards/margins": -2.8046538829803467, + "rewards/rejected": -5.601297855377197, + "step": 5438 + }, + { + "epoch": 0.85, + "learning_rate": 1.0158151356398153e-05, + "logits/chosen": -1.981382131576538, + "logits/rejected": -2.7770333290100098, + "logps/chosen": -192.48095703125, + "logps/rejected": -630.4972534179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6067452430725098, + "rewards/margins": 14.395576477050781, + "rewards/rejected": -17.002321243286133, + "step": 5439 + }, + { + "epoch": 0.85, + "learning_rate": 1.0157417915867005e-05, + "logits/chosen": -1.91368567943573, + "logits/rejected": -2.9077646732330322, + "logps/chosen": -78.903076171875, + "logps/rejected": -327.8058776855469, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6961796283721924, + "rewards/margins": 8.691015243530273, + "rewards/rejected": -10.387195587158203, + "step": 5440 + }, + { + "epoch": 0.85, + "learning_rate": 1.0156684475335857e-05, + "logits/chosen": -2.0235695838928223, + "logits/rejected": -2.675492525100708, + "logps/chosen": -169.967041015625, + "logps/rejected": -281.7840881347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6746361255645752, + "rewards/margins": 10.200441360473633, + "rewards/rejected": -11.875078201293945, + "step": 5441 + }, + { + "epoch": 0.85, + "learning_rate": 1.0155951034804709e-05, + "logits/chosen": -1.496664047241211, + "logits/rejected": -2.7846181392669678, + "logps/chosen": -180.3715057373047, + "logps/rejected": -331.0299072265625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0416386127471924, + "rewards/margins": 7.794975280761719, + "rewards/rejected": -10.836613655090332, + "step": 5442 + }, + { + "epoch": 0.85, + "learning_rate": 1.0155217594273561e-05, + "logits/chosen": -3.022393226623535, + "logits/rejected": -1.946690559387207, + "logps/chosen": -208.2635498046875, + "logps/rejected": -137.3189239501953, + "loss": 2.1459, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.0092453956604, + "rewards/margins": 0.2389070987701416, + "rewards/rejected": -6.248152256011963, + "step": 5443 + }, + { + "epoch": 0.85, + "learning_rate": 1.0154484153742415e-05, + "logits/chosen": -2.2324066162109375, + "logits/rejected": -3.02726149559021, + "logps/chosen": -88.82334899902344, + "logps/rejected": -268.125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5009877681732178, + "rewards/margins": 6.408620834350586, + "rewards/rejected": -8.909608840942383, + "step": 5444 + }, + { + "epoch": 0.85, + "learning_rate": 1.0153750713211266e-05, + "logits/chosen": -1.5523301362991333, + "logits/rejected": -2.6740288734436035, + "logps/chosen": -139.59176635742188, + "logps/rejected": -304.4101867675781, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2189056873321533, + "rewards/margins": 8.207159042358398, + "rewards/rejected": -11.426065444946289, + "step": 5445 + }, + { + "epoch": 0.85, + "learning_rate": 1.0153017272680118e-05, + "logits/chosen": -2.3397750854492188, + "logits/rejected": -2.961967706680298, + "logps/chosen": -93.98765563964844, + "logps/rejected": -359.4562072753906, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2599014043807983, + "rewards/margins": 9.57677936553955, + "rewards/rejected": -10.83668041229248, + "step": 5446 + }, + { + "epoch": 0.85, + "learning_rate": 1.0152283832148972e-05, + "logits/chosen": -2.0822384357452393, + "logits/rejected": -2.8719756603240967, + "logps/chosen": -100.33726501464844, + "logps/rejected": -275.8886413574219, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6578354835510254, + "rewards/margins": 4.7128753662109375, + "rewards/rejected": -7.370710372924805, + "step": 5447 + }, + { + "epoch": 0.85, + "learning_rate": 1.0151550391617824e-05, + "logits/chosen": -2.4217453002929688, + "logits/rejected": -3.0243430137634277, + "logps/chosen": -37.783180236816406, + "logps/rejected": -294.72528076171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4253873825073242, + "rewards/margins": 9.682966232299805, + "rewards/rejected": -11.108352661132812, + "step": 5448 + }, + { + "epoch": 0.85, + "learning_rate": 1.0150816951086676e-05, + "logits/chosen": -3.1620676517486572, + "logits/rejected": -2.789072275161743, + "logps/chosen": -201.90530395507812, + "logps/rejected": -121.49322509765625, + "loss": 3.0946, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.737765789031982, + "rewards/margins": -2.9956281185150146, + "rewards/rejected": -3.7421374320983887, + "step": 5449 + }, + { + "epoch": 0.85, + "learning_rate": 1.0150083510555528e-05, + "logits/chosen": -2.8276281356811523, + "logits/rejected": -2.907259941101074, + "logps/chosen": -149.16673278808594, + "logps/rejected": -230.41696166992188, + "loss": 1.6522, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.595526695251465, + "rewards/margins": 2.5431666374206543, + "rewards/rejected": -8.138692855834961, + "step": 5450 + }, + { + "epoch": 0.85, + "learning_rate": 1.014935007002438e-05, + "logits/chosen": -2.969905376434326, + "logits/rejected": -2.7303500175476074, + "logps/chosen": -396.6571044921875, + "logps/rejected": -361.3084716796875, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4464402198791504, + "rewards/margins": 6.694413661956787, + "rewards/rejected": -9.140853881835938, + "step": 5451 + }, + { + "epoch": 0.85, + "learning_rate": 1.0148616629493231e-05, + "logits/chosen": -2.4588820934295654, + "logits/rejected": -3.0460731983184814, + "logps/chosen": -76.00932312011719, + "logps/rejected": -202.70248413085938, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9984562397003174, + "rewards/margins": 6.216527462005615, + "rewards/rejected": -9.214983940124512, + "step": 5452 + }, + { + "epoch": 0.85, + "learning_rate": 1.0147883188962083e-05, + "logits/chosen": -2.9870474338531494, + "logits/rejected": -1.8349429368972778, + "logps/chosen": -372.4406433105469, + "logps/rejected": -298.0274963378906, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5147652626037598, + "rewards/margins": 4.986942291259766, + "rewards/rejected": -7.501707553863525, + "step": 5453 + }, + { + "epoch": 0.85, + "learning_rate": 1.0147149748430935e-05, + "logits/chosen": -2.840191125869751, + "logits/rejected": -2.1447436809539795, + "logps/chosen": -241.32208251953125, + "logps/rejected": -179.90231323242188, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.131633758544922, + "rewards/margins": 4.472064018249512, + "rewards/rejected": -6.603697776794434, + "step": 5454 + }, + { + "epoch": 0.85, + "learning_rate": 1.0146416307899787e-05, + "logits/chosen": -2.9453721046447754, + "logits/rejected": -2.8917341232299805, + "logps/chosen": -196.37640380859375, + "logps/rejected": -220.0047607421875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.003219127655029, + "rewards/margins": 6.158463478088379, + "rewards/rejected": -10.161683082580566, + "step": 5455 + }, + { + "epoch": 0.85, + "learning_rate": 1.014568286736864e-05, + "logits/chosen": -2.9418578147888184, + "logits/rejected": -2.735872983932495, + "logps/chosen": -491.9341735839844, + "logps/rejected": -574.5565185546875, + "loss": 4.2793, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.682950496673584, + "rewards/margins": -0.38117074966430664, + "rewards/rejected": -5.301779747009277, + "step": 5456 + }, + { + "epoch": 0.85, + "learning_rate": 1.0144949426837492e-05, + "logits/chosen": -2.9831418991088867, + "logits/rejected": -3.1419334411621094, + "logps/chosen": -212.6610107421875, + "logps/rejected": -318.6158142089844, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3044562339782715, + "rewards/margins": 4.989077568054199, + "rewards/rejected": -8.293534278869629, + "step": 5457 + }, + { + "epoch": 0.85, + "learning_rate": 1.0144215986306344e-05, + "logits/chosen": -2.1443262100219727, + "logits/rejected": -2.7033205032348633, + "logps/chosen": -196.52163696289062, + "logps/rejected": -287.66949462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.144169807434082, + "rewards/margins": 9.699687004089355, + "rewards/rejected": -11.843856811523438, + "step": 5458 + }, + { + "epoch": 0.85, + "learning_rate": 1.0143482545775196e-05, + "logits/chosen": -2.369593620300293, + "logits/rejected": -2.636821746826172, + "logps/chosen": -260.8063049316406, + "logps/rejected": -456.1903076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085997581481934, + "rewards/margins": 8.680131912231445, + "rewards/rejected": -12.766129493713379, + "step": 5459 + }, + { + "epoch": 0.85, + "learning_rate": 1.0142749105244048e-05, + "logits/chosen": -2.383735179901123, + "logits/rejected": -1.8925124406814575, + "logps/chosen": -304.1471252441406, + "logps/rejected": -285.6070251464844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7969391345977783, + "rewards/margins": 7.797656059265137, + "rewards/rejected": -11.594594955444336, + "step": 5460 + }, + { + "epoch": 0.85, + "learning_rate": 1.01420156647129e-05, + "logits/chosen": -2.8533244132995605, + "logits/rejected": -2.8105721473693848, + "logps/chosen": -243.1376953125, + "logps/rejected": -414.3512268066406, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6497802734375, + "rewards/margins": 9.922635078430176, + "rewards/rejected": -12.572415351867676, + "step": 5461 + }, + { + "epoch": 0.85, + "learning_rate": 1.0141282224181752e-05, + "logits/chosen": -2.036425828933716, + "logits/rejected": -2.9358255863189697, + "logps/chosen": -314.0628967285156, + "logps/rejected": -348.25390625, + "loss": 3.8164, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.390090942382812, + "rewards/margins": -0.2928028106689453, + "rewards/rejected": -8.097288131713867, + "step": 5462 + }, + { + "epoch": 0.85, + "learning_rate": 1.0140548783650604e-05, + "logits/chosen": -1.7018377780914307, + "logits/rejected": -2.8849568367004395, + "logps/chosen": -194.21124267578125, + "logps/rejected": -352.86004638671875, + "loss": 0.1397, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.708066940307617, + "rewards/margins": 2.770519256591797, + "rewards/rejected": -7.478586196899414, + "step": 5463 + }, + { + "epoch": 0.85, + "learning_rate": 1.0139815343119456e-05, + "logits/chosen": -2.5080671310424805, + "logits/rejected": -2.73240065574646, + "logps/chosen": -90.36255645751953, + "logps/rejected": -233.67926025390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.373288631439209, + "rewards/margins": 8.922090530395508, + "rewards/rejected": -13.295378684997559, + "step": 5464 + }, + { + "epoch": 0.85, + "learning_rate": 1.0139081902588309e-05, + "logits/chosen": -2.0898303985595703, + "logits/rejected": -2.825871706008911, + "logps/chosen": -125.08289337158203, + "logps/rejected": -280.2114562988281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.20086669921875, + "rewards/margins": 8.383570671081543, + "rewards/rejected": -11.58443832397461, + "step": 5465 + }, + { + "epoch": 0.85, + "learning_rate": 1.0138348462057161e-05, + "logits/chosen": -3.0216143131256104, + "logits/rejected": -2.950340747833252, + "logps/chosen": -419.58160400390625, + "logps/rejected": -551.02294921875, + "loss": 2.3955, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.25780725479126, + "rewards/margins": 1.6089658737182617, + "rewards/rejected": -6.866772651672363, + "step": 5466 + }, + { + "epoch": 0.85, + "learning_rate": 1.0137615021526013e-05, + "logits/chosen": -1.7038158178329468, + "logits/rejected": -2.9108967781066895, + "logps/chosen": -129.45272827148438, + "logps/rejected": -615.2516479492188, + "loss": 5.5627, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6342453956604, + "rewards/margins": -1.2398223876953125, + "rewards/rejected": -4.394423007965088, + "step": 5467 + }, + { + "epoch": 0.85, + "learning_rate": 1.0136881580994865e-05, + "logits/chosen": -2.6874961853027344, + "logits/rejected": -3.047813892364502, + "logps/chosen": -143.75267028808594, + "logps/rejected": -371.04443359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183131217956543, + "rewards/margins": 7.0520830154418945, + "rewards/rejected": -12.235214233398438, + "step": 5468 + }, + { + "epoch": 0.85, + "learning_rate": 1.0136148140463717e-05, + "logits/chosen": -1.5182647705078125, + "logits/rejected": -3.1390223503112793, + "logps/chosen": -567.6727294921875, + "logps/rejected": -727.6184692382812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.691204786300659, + "rewards/margins": 9.276351928710938, + "rewards/rejected": -11.967556953430176, + "step": 5469 + }, + { + "epoch": 0.85, + "learning_rate": 1.0135414699932568e-05, + "logits/chosen": -2.147123098373413, + "logits/rejected": -2.794930934906006, + "logps/chosen": -430.8580627441406, + "logps/rejected": -743.0142822265625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3388473987579346, + "rewards/margins": 8.815254211425781, + "rewards/rejected": -11.154102325439453, + "step": 5470 + }, + { + "epoch": 0.85, + "learning_rate": 1.013468125940142e-05, + "logits/chosen": -1.7094155550003052, + "logits/rejected": -2.583660125732422, + "logps/chosen": -116.00271606445312, + "logps/rejected": -263.3117980957031, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.640408515930176, + "rewards/margins": 6.723457336425781, + "rewards/rejected": -10.363865852355957, + "step": 5471 + }, + { + "epoch": 0.85, + "learning_rate": 1.0133947818870272e-05, + "logits/chosen": -2.66230845451355, + "logits/rejected": -3.1484529972076416, + "logps/chosen": -301.62646484375, + "logps/rejected": -285.2934875488281, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.880035400390625, + "rewards/margins": 4.459012031555176, + "rewards/rejected": -8.3390474319458, + "step": 5472 + }, + { + "epoch": 0.85, + "learning_rate": 1.0133214378339124e-05, + "logits/chosen": -1.443532943725586, + "logits/rejected": -2.97444224357605, + "logps/chosen": -84.04161071777344, + "logps/rejected": -372.00714111328125, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.853984355926514, + "rewards/margins": 2.6758618354797363, + "rewards/rejected": -7.52984619140625, + "step": 5473 + }, + { + "epoch": 0.85, + "learning_rate": 1.0132480937807978e-05, + "logits/chosen": -1.2222477197647095, + "logits/rejected": -2.7000536918640137, + "logps/chosen": -148.0530548095703, + "logps/rejected": -324.91900634765625, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.552824020385742, + "rewards/margins": 6.593323707580566, + "rewards/rejected": -11.146147727966309, + "step": 5474 + }, + { + "epoch": 0.85, + "learning_rate": 1.013174749727683e-05, + "logits/chosen": -2.9022459983825684, + "logits/rejected": -3.0494234561920166, + "logps/chosen": -253.5355224609375, + "logps/rejected": -455.36279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7605301141738892, + "rewards/margins": 10.159364700317383, + "rewards/rejected": -10.91989517211914, + "step": 5475 + }, + { + "epoch": 0.85, + "learning_rate": 1.0131014056745681e-05, + "logits/chosen": -2.760681629180908, + "logits/rejected": -2.928532361984253, + "logps/chosen": -152.5782012939453, + "logps/rejected": -294.2484130859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.691516399383545, + "rewards/margins": 8.420499801635742, + "rewards/rejected": -12.112016677856445, + "step": 5476 + }, + { + "epoch": 0.85, + "learning_rate": 1.0130280616214533e-05, + "logits/chosen": -2.8269882202148438, + "logits/rejected": -1.8284027576446533, + "logps/chosen": -274.914794921875, + "logps/rejected": -316.8660888671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.625584363937378, + "rewards/margins": 8.044602394104004, + "rewards/rejected": -10.670186996459961, + "step": 5477 + }, + { + "epoch": 0.85, + "learning_rate": 1.0129547175683387e-05, + "logits/chosen": -2.796323299407959, + "logits/rejected": -3.0430967807769775, + "logps/chosen": -516.7481079101562, + "logps/rejected": -501.5675048828125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8105621337890625, + "rewards/margins": 5.458047866821289, + "rewards/rejected": -10.268610000610352, + "step": 5478 + }, + { + "epoch": 0.85, + "learning_rate": 1.0128813735152239e-05, + "logits/chosen": -2.845179796218872, + "logits/rejected": -1.8710352182388306, + "logps/chosen": -698.928466796875, + "logps/rejected": -448.665771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6455535888671875, + "rewards/margins": 11.574146270751953, + "rewards/rejected": -12.21969985961914, + "step": 5479 + }, + { + "epoch": 0.85, + "learning_rate": 1.012808029462109e-05, + "logits/chosen": -2.9028282165527344, + "logits/rejected": -3.034825086593628, + "logps/chosen": -140.20315551757812, + "logps/rejected": -271.36663818359375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1837856769561768, + "rewards/margins": 8.329974174499512, + "rewards/rejected": -11.51375961303711, + "step": 5480 + }, + { + "epoch": 0.85, + "learning_rate": 1.0127346854089943e-05, + "logits/chosen": -2.0398778915405273, + "logits/rejected": -2.9180455207824707, + "logps/chosen": -277.24627685546875, + "logps/rejected": -722.1358642578125, + "loss": 3.5363, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.390087127685547, + "rewards/margins": 1.0683507919311523, + "rewards/rejected": -9.4584379196167, + "step": 5481 + }, + { + "epoch": 0.85, + "learning_rate": 1.0126613413558794e-05, + "logits/chosen": -2.4148805141448975, + "logits/rejected": -2.717287302017212, + "logps/chosen": -234.4062957763672, + "logps/rejected": -350.4347839355469, + "loss": 0.1284, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.317806243896484, + "rewards/margins": 3.5352327823638916, + "rewards/rejected": -7.853038787841797, + "step": 5482 + }, + { + "epoch": 0.85, + "learning_rate": 1.0125879973027648e-05, + "logits/chosen": -1.9447035789489746, + "logits/rejected": -2.623019218444824, + "logps/chosen": -185.77493286132812, + "logps/rejected": -453.3811950683594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539145469665527, + "rewards/margins": 7.904272079467773, + "rewards/rejected": -12.4434175491333, + "step": 5483 + }, + { + "epoch": 0.85, + "learning_rate": 1.01251465324965e-05, + "logits/chosen": -2.9823994636535645, + "logits/rejected": -2.5848588943481445, + "logps/chosen": -686.6436157226562, + "logps/rejected": -630.503173828125, + "loss": 0.1017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.821920871734619, + "rewards/margins": 5.353597640991211, + "rewards/rejected": -10.175518989562988, + "step": 5484 + }, + { + "epoch": 0.85, + "learning_rate": 1.0124413091965352e-05, + "logits/chosen": -1.5868715047836304, + "logits/rejected": -2.8850479125976562, + "logps/chosen": -188.3704376220703, + "logps/rejected": -412.19122314453125, + "loss": 3.3524, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.6281538009643555, + "rewards/margins": 1.586327075958252, + "rewards/rejected": -8.214481353759766, + "step": 5485 + }, + { + "epoch": 0.85, + "learning_rate": 1.0123679651434204e-05, + "logits/chosen": -2.942469596862793, + "logits/rejected": -3.030764102935791, + "logps/chosen": -432.76806640625, + "logps/rejected": -258.7142333984375, + "loss": 5.3419, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.911518096923828, + "rewards/margins": -5.319606304168701, + "rewards/rejected": -5.591911315917969, + "step": 5486 + }, + { + "epoch": 0.85, + "learning_rate": 1.0122946210903056e-05, + "logits/chosen": -1.620290994644165, + "logits/rejected": -2.9878835678100586, + "logps/chosen": -69.23603057861328, + "logps/rejected": -502.4337463378906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.742300033569336, + "rewards/margins": 8.866873741149902, + "rewards/rejected": -11.609173774719238, + "step": 5487 + }, + { + "epoch": 0.85, + "learning_rate": 1.0122212770371907e-05, + "logits/chosen": -3.011472463607788, + "logits/rejected": -2.7037065029144287, + "logps/chosen": -302.0940246582031, + "logps/rejected": -388.5205993652344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.047314643859863, + "rewards/margins": 9.301016807556152, + "rewards/rejected": -13.348331451416016, + "step": 5488 + }, + { + "epoch": 0.85, + "learning_rate": 1.012147932984076e-05, + "logits/chosen": -3.031055450439453, + "logits/rejected": -2.50424861907959, + "logps/chosen": -314.1684265136719, + "logps/rejected": -132.8931121826172, + "loss": 5.646, + "rewards/accuracies": 0.0, + "rewards/chosen": -11.479050636291504, + "rewards/margins": -5.642430305480957, + "rewards/rejected": -5.836620330810547, + "step": 5489 + }, + { + "epoch": 0.85, + "learning_rate": 1.0120745889309611e-05, + "logits/chosen": -1.6150263547897339, + "logits/rejected": -1.967200517654419, + "logps/chosen": -215.0019073486328, + "logps/rejected": -330.4983825683594, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.356907844543457, + "rewards/margins": 5.227839469909668, + "rewards/rejected": -11.584747314453125, + "step": 5490 + }, + { + "epoch": 0.85, + "learning_rate": 1.0120012448778465e-05, + "logits/chosen": -2.091303586959839, + "logits/rejected": -2.8398890495300293, + "logps/chosen": -111.17106628417969, + "logps/rejected": -248.542236328125, + "loss": 3.1958, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.379788398742676, + "rewards/margins": 0.8485157489776611, + "rewards/rejected": -8.228303909301758, + "step": 5491 + }, + { + "epoch": 0.85, + "learning_rate": 1.0119279008247317e-05, + "logits/chosen": -0.8233060836791992, + "logits/rejected": -2.773733615875244, + "logps/chosen": -52.944366455078125, + "logps/rejected": -275.317138671875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277630805969238, + "rewards/margins": 6.2068634033203125, + "rewards/rejected": -10.48449420928955, + "step": 5492 + }, + { + "epoch": 0.85, + "learning_rate": 1.0118545567716168e-05, + "logits/chosen": -3.047748327255249, + "logits/rejected": -2.284074544906616, + "logps/chosen": -195.2470245361328, + "logps/rejected": -236.8660430908203, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.098845958709717, + "rewards/margins": 7.474880218505859, + "rewards/rejected": -10.573726654052734, + "step": 5493 + }, + { + "epoch": 0.85, + "learning_rate": 1.011781212718502e-05, + "logits/chosen": -1.0461671352386475, + "logits/rejected": -2.429511785507202, + "logps/chosen": -87.08462524414062, + "logps/rejected": -291.4705810546875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.997859239578247, + "rewards/margins": 6.473921775817871, + "rewards/rejected": -10.471780776977539, + "step": 5494 + }, + { + "epoch": 0.85, + "learning_rate": 1.0117078686653872e-05, + "logits/chosen": -2.170271158218384, + "logits/rejected": -2.4414682388305664, + "logps/chosen": -125.47938537597656, + "logps/rejected": -417.71429443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6749637126922607, + "rewards/margins": 12.960809707641602, + "rewards/rejected": -15.635772705078125, + "step": 5495 + }, + { + "epoch": 0.85, + "learning_rate": 1.0116345246122724e-05, + "logits/chosen": -3.0502638816833496, + "logits/rejected": -2.688473701477051, + "logps/chosen": -285.20458984375, + "logps/rejected": -235.43753051757812, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.049262285232544, + "rewards/margins": 5.07008171081543, + "rewards/rejected": -8.119344711303711, + "step": 5496 + }, + { + "epoch": 0.85, + "learning_rate": 1.0115611805591576e-05, + "logits/chosen": -1.0962806940078735, + "logits/rejected": -2.7874414920806885, + "logps/chosen": -89.58407592773438, + "logps/rejected": -353.7985534667969, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6824491024017334, + "rewards/margins": 7.426337718963623, + "rewards/rejected": -11.108786582946777, + "step": 5497 + }, + { + "epoch": 0.86, + "learning_rate": 1.0114878365060428e-05, + "logits/chosen": -2.3476481437683105, + "logits/rejected": -2.8766157627105713, + "logps/chosen": -61.48236846923828, + "logps/rejected": -257.5076904296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.370988368988037, + "rewards/margins": 7.4709296226501465, + "rewards/rejected": -11.841917991638184, + "step": 5498 + }, + { + "epoch": 0.86, + "learning_rate": 1.011414492452928e-05, + "logits/chosen": -1.7957403659820557, + "logits/rejected": -2.7669126987457275, + "logps/chosen": -107.03239440917969, + "logps/rejected": -280.24810791015625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4402143955230713, + "rewards/margins": 6.755127906799316, + "rewards/rejected": -10.195342063903809, + "step": 5499 + }, + { + "epoch": 0.86, + "learning_rate": 1.0113411483998133e-05, + "logits/chosen": -2.9771337509155273, + "logits/rejected": -3.0782229900360107, + "logps/chosen": -139.33741760253906, + "logps/rejected": -230.71278381347656, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9704341888427734, + "rewards/margins": 4.733404159545898, + "rewards/rejected": -8.703838348388672, + "step": 5500 + }, + { + "epoch": 0.86, + "learning_rate": 1.0112678043466985e-05, + "logits/chosen": -2.963390827178955, + "logits/rejected": -2.496716022491455, + "logps/chosen": -630.9777221679688, + "logps/rejected": -578.598876953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.767906188964844, + "rewards/margins": 8.52370834350586, + "rewards/rejected": -14.291614532470703, + "step": 5501 + }, + { + "epoch": 0.86, + "learning_rate": 1.0111944602935837e-05, + "logits/chosen": -3.1608798503875732, + "logits/rejected": -1.9859832525253296, + "logps/chosen": -275.1866149902344, + "logps/rejected": -160.81668090820312, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.181579351425171, + "rewards/margins": 5.3137617111206055, + "rewards/rejected": -6.4953413009643555, + "step": 5502 + }, + { + "epoch": 0.86, + "learning_rate": 1.0111211162404689e-05, + "logits/chosen": -1.5466631650924683, + "logits/rejected": -3.0456035137176514, + "logps/chosen": -190.58978271484375, + "logps/rejected": -350.5672607421875, + "loss": 0.123, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.361967086791992, + "rewards/margins": 3.269615888595581, + "rewards/rejected": -9.631582260131836, + "step": 5503 + }, + { + "epoch": 0.86, + "learning_rate": 1.011047772187354e-05, + "logits/chosen": -1.9200866222381592, + "logits/rejected": -1.8499343395233154, + "logps/chosen": -124.62474822998047, + "logps/rejected": -217.53524780273438, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.158520698547363, + "rewards/margins": 5.324262619018555, + "rewards/rejected": -9.482782363891602, + "step": 5504 + }, + { + "epoch": 0.86, + "learning_rate": 1.0109744281342393e-05, + "logits/chosen": -2.735581398010254, + "logits/rejected": -3.0265069007873535, + "logps/chosen": -165.9155731201172, + "logps/rejected": -113.26953125, + "loss": 5.5055, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.53209114074707, + "rewards/margins": -2.0355043411254883, + "rewards/rejected": -6.496587753295898, + "step": 5505 + }, + { + "epoch": 0.86, + "learning_rate": 1.0109010840811245e-05, + "logits/chosen": -2.6528351306915283, + "logits/rejected": -2.955822706222534, + "logps/chosen": -80.17061614990234, + "logps/rejected": -170.54476928710938, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4585137367248535, + "rewards/margins": 6.9765825271606445, + "rewards/rejected": -10.435096740722656, + "step": 5506 + }, + { + "epoch": 0.86, + "learning_rate": 1.0108277400280096e-05, + "logits/chosen": -3.1102702617645264, + "logits/rejected": -2.5855369567871094, + "logps/chosen": -376.99481201171875, + "logps/rejected": -113.25215148925781, + "loss": 6.897, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.282381057739258, + "rewards/margins": -6.895861625671387, + "rewards/rejected": -3.3865184783935547, + "step": 5507 + }, + { + "epoch": 0.86, + "learning_rate": 1.0107543959748948e-05, + "logits/chosen": -1.572029948234558, + "logits/rejected": -3.1205291748046875, + "logps/chosen": -78.10865020751953, + "logps/rejected": -658.410888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9147024154663086, + "rewards/margins": 9.393694877624512, + "rewards/rejected": -12.30839729309082, + "step": 5508 + }, + { + "epoch": 0.86, + "learning_rate": 1.0106810519217802e-05, + "logits/chosen": -1.3486517667770386, + "logits/rejected": -2.5208401679992676, + "logps/chosen": -61.82645797729492, + "logps/rejected": -371.546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.927173137664795, + "rewards/margins": 9.329619407653809, + "rewards/rejected": -12.256792068481445, + "step": 5509 + }, + { + "epoch": 0.86, + "learning_rate": 1.0106077078686654e-05, + "logits/chosen": -2.9277801513671875, + "logits/rejected": -2.896463394165039, + "logps/chosen": -307.58642578125, + "logps/rejected": -214.71099853515625, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3479745388031006, + "rewards/margins": 6.29932975769043, + "rewards/rejected": -9.64730453491211, + "step": 5510 + }, + { + "epoch": 0.86, + "learning_rate": 1.0105343638155506e-05, + "logits/chosen": -2.9507193565368652, + "logits/rejected": -3.01739239692688, + "logps/chosen": -233.8526611328125, + "logps/rejected": -529.4649047851562, + "loss": 3.0609, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.40132474899292, + "rewards/margins": 3.1551990509033203, + "rewards/rejected": -10.556524276733398, + "step": 5511 + }, + { + "epoch": 0.86, + "learning_rate": 1.010461019762436e-05, + "logits/chosen": -2.677222490310669, + "logits/rejected": -3.0314781665802, + "logps/chosen": -78.47505187988281, + "logps/rejected": -209.0680694580078, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4661917686462402, + "rewards/margins": 7.079446792602539, + "rewards/rejected": -10.545639038085938, + "step": 5512 + }, + { + "epoch": 0.86, + "learning_rate": 1.0103876757093211e-05, + "logits/chosen": -2.68371319770813, + "logits/rejected": -2.8625659942626953, + "logps/chosen": -150.16104125976562, + "logps/rejected": -151.94235229492188, + "loss": 1.4885, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.269987106323242, + "rewards/margins": 1.9506494998931885, + "rewards/rejected": -8.220636367797852, + "step": 5513 + }, + { + "epoch": 0.86, + "learning_rate": 1.0103143316562063e-05, + "logits/chosen": -1.8079668283462524, + "logits/rejected": -2.990708112716675, + "logps/chosen": -127.22854614257812, + "logps/rejected": -357.280517578125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.027799129486084, + "rewards/margins": 6.449612617492676, + "rewards/rejected": -10.477411270141602, + "step": 5514 + }, + { + "epoch": 0.86, + "learning_rate": 1.0102409876030915e-05, + "logits/chosen": -2.799145460128784, + "logits/rejected": -2.976290225982666, + "logps/chosen": -119.64840698242188, + "logps/rejected": -222.35845947265625, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.684075832366943, + "rewards/margins": 5.144495010375977, + "rewards/rejected": -9.828571319580078, + "step": 5515 + }, + { + "epoch": 0.86, + "learning_rate": 1.0101676435499767e-05, + "logits/chosen": -1.3916207551956177, + "logits/rejected": -2.8111634254455566, + "logps/chosen": -90.0994644165039, + "logps/rejected": -267.05328369140625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.459244251251221, + "rewards/margins": 5.907670021057129, + "rewards/rejected": -11.366914749145508, + "step": 5516 + }, + { + "epoch": 0.86, + "learning_rate": 1.0100942994968619e-05, + "logits/chosen": -2.8618075847625732, + "logits/rejected": -2.0844929218292236, + "logps/chosen": -377.45721435546875, + "logps/rejected": -374.085693359375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.973316192626953, + "rewards/margins": 6.071916580200195, + "rewards/rejected": -11.045232772827148, + "step": 5517 + }, + { + "epoch": 0.86, + "learning_rate": 1.0100209554437472e-05, + "logits/chosen": -2.803690195083618, + "logits/rejected": -3.0301783084869385, + "logps/chosen": -369.6436767578125, + "logps/rejected": -487.269287109375, + "loss": 3.7936, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.839409351348877, + "rewards/margins": -0.4000980854034424, + "rewards/rejected": -6.4393110275268555, + "step": 5518 + }, + { + "epoch": 0.86, + "learning_rate": 1.0099476113906324e-05, + "logits/chosen": -2.994943618774414, + "logits/rejected": -3.1083340644836426, + "logps/chosen": -246.7552032470703, + "logps/rejected": -296.4044189453125, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.522134780883789, + "rewards/margins": 3.131021022796631, + "rewards/rejected": -8.653155326843262, + "step": 5519 + }, + { + "epoch": 0.86, + "learning_rate": 1.0098742673375176e-05, + "logits/chosen": -1.802382469177246, + "logits/rejected": -2.9323153495788574, + "logps/chosen": -220.11376953125, + "logps/rejected": -370.4207763671875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7353105545043945, + "rewards/margins": 6.820147514343262, + "rewards/rejected": -11.555458068847656, + "step": 5520 + }, + { + "epoch": 0.86, + "learning_rate": 1.0098009232844028e-05, + "logits/chosen": -2.858522653579712, + "logits/rejected": -3.104280948638916, + "logps/chosen": -194.71298217773438, + "logps/rejected": -355.84832763671875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1307711601257324, + "rewards/margins": 7.8465070724487305, + "rewards/rejected": -10.977277755737305, + "step": 5521 + }, + { + "epoch": 0.86, + "learning_rate": 1.009727579231288e-05, + "logits/chosen": -1.9764494895935059, + "logits/rejected": -2.3505518436431885, + "logps/chosen": -126.29367065429688, + "logps/rejected": -332.48583984375, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.367837429046631, + "rewards/margins": 7.218161106109619, + "rewards/rejected": -10.58599853515625, + "step": 5522 + }, + { + "epoch": 0.86, + "learning_rate": 1.0096542351781732e-05, + "logits/chosen": -2.3557612895965576, + "logits/rejected": -2.8766565322875977, + "logps/chosen": -110.57068634033203, + "logps/rejected": -286.33001708984375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.863637924194336, + "rewards/margins": 7.76253604888916, + "rewards/rejected": -12.626174926757812, + "step": 5523 + }, + { + "epoch": 0.86, + "learning_rate": 1.0095808911250583e-05, + "logits/chosen": -2.8083431720733643, + "logits/rejected": -2.4996721744537354, + "logps/chosen": -369.8391418457031, + "logps/rejected": -503.2645263671875, + "loss": 0.0434, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.318803310394287, + "rewards/margins": 6.031562805175781, + "rewards/rejected": -10.350366592407227, + "step": 5524 + }, + { + "epoch": 0.86, + "learning_rate": 1.0095075470719435e-05, + "logits/chosen": -3.0906460285186768, + "logits/rejected": -2.772233009338379, + "logps/chosen": -539.9534912109375, + "logps/rejected": -354.9528503417969, + "loss": 0.5486, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.524414300918579, + "rewards/margins": 5.283353328704834, + "rewards/rejected": -8.807767868041992, + "step": 5525 + }, + { + "epoch": 0.86, + "learning_rate": 1.0094342030188287e-05, + "logits/chosen": -3.0512449741363525, + "logits/rejected": -2.9610486030578613, + "logps/chosen": -87.22472381591797, + "logps/rejected": -289.5055236816406, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8946712017059326, + "rewards/margins": 6.033073425292969, + "rewards/rejected": -8.92774486541748, + "step": 5526 + }, + { + "epoch": 0.86, + "learning_rate": 1.009360858965714e-05, + "logits/chosen": -1.6675986051559448, + "logits/rejected": -3.0674691200256348, + "logps/chosen": -60.76698303222656, + "logps/rejected": -306.36138916015625, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.159786701202393, + "rewards/margins": 4.537445068359375, + "rewards/rejected": -8.697232246398926, + "step": 5527 + }, + { + "epoch": 0.86, + "learning_rate": 1.0092875149125993e-05, + "logits/chosen": -2.6951844692230225, + "logits/rejected": -3.074760913848877, + "logps/chosen": -820.387939453125, + "logps/rejected": -733.1532592773438, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.044572591781616, + "rewards/margins": 8.523043632507324, + "rewards/rejected": -11.56761646270752, + "step": 5528 + }, + { + "epoch": 0.86, + "learning_rate": 1.0092141708594845e-05, + "logits/chosen": -2.9098188877105713, + "logits/rejected": -2.1803646087646484, + "logps/chosen": -421.8434753417969, + "logps/rejected": -393.67010498046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8048171997070312, + "rewards/margins": 7.710309028625488, + "rewards/rejected": -11.51512622833252, + "step": 5529 + }, + { + "epoch": 0.86, + "learning_rate": 1.0091408268063696e-05, + "logits/chosen": -2.8891732692718506, + "logits/rejected": -2.8300094604492188, + "logps/chosen": -662.6982421875, + "logps/rejected": -560.8297119140625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2314858436584473, + "rewards/margins": 7.071314334869385, + "rewards/rejected": -10.302800178527832, + "step": 5530 + }, + { + "epoch": 0.86, + "learning_rate": 1.0090674827532548e-05, + "logits/chosen": -1.9591951370239258, + "logits/rejected": -3.141927719116211, + "logps/chosen": -173.82032775878906, + "logps/rejected": -510.01043701171875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.213369846343994, + "rewards/margins": 7.158205986022949, + "rewards/rejected": -10.371576309204102, + "step": 5531 + }, + { + "epoch": 0.86, + "learning_rate": 1.00899413870014e-05, + "logits/chosen": -1.873950719833374, + "logits/rejected": -2.7946627140045166, + "logps/chosen": -109.37583923339844, + "logps/rejected": -305.3011474609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9952282905578613, + "rewards/margins": 6.602728843688965, + "rewards/rejected": -10.597957611083984, + "step": 5532 + }, + { + "epoch": 0.86, + "learning_rate": 1.0089207946470252e-05, + "logits/chosen": -2.93619966506958, + "logits/rejected": -3.016488790512085, + "logps/chosen": -164.3458251953125, + "logps/rejected": -281.3435974121094, + "loss": 3.4223, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.296714782714844, + "rewards/margins": 2.466069221496582, + "rewards/rejected": -9.762784004211426, + "step": 5533 + }, + { + "epoch": 0.86, + "learning_rate": 1.0088474505939104e-05, + "logits/chosen": -2.934232711791992, + "logits/rejected": -1.3607885837554932, + "logps/chosen": -400.328125, + "logps/rejected": -88.38565063476562, + "loss": 4.8186, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.508549690246582, + "rewards/margins": -4.809727191925049, + "rewards/rejected": -4.698822498321533, + "step": 5534 + }, + { + "epoch": 0.86, + "learning_rate": 1.0087741065407956e-05, + "logits/chosen": -2.386291265487671, + "logits/rejected": -2.8564867973327637, + "logps/chosen": -459.26373291015625, + "logps/rejected": -371.9956970214844, + "loss": 3.99, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.214841842651367, + "rewards/margins": 0.8452911376953125, + "rewards/rejected": -8.06013298034668, + "step": 5535 + }, + { + "epoch": 0.86, + "learning_rate": 1.008700762487681e-05, + "logits/chosen": -2.869081497192383, + "logits/rejected": -3.0266048908233643, + "logps/chosen": -171.72584533691406, + "logps/rejected": -349.3990173339844, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993469715118408, + "rewards/margins": 7.365547180175781, + "rewards/rejected": -12.359016418457031, + "step": 5536 + }, + { + "epoch": 0.86, + "learning_rate": 1.0086274184345661e-05, + "logits/chosen": -3.164532423019409, + "logits/rejected": -1.8623234033584595, + "logps/chosen": -421.72686767578125, + "logps/rejected": -376.3109130859375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.565432548522949, + "rewards/margins": 5.915033340454102, + "rewards/rejected": -11.48046588897705, + "step": 5537 + }, + { + "epoch": 0.86, + "learning_rate": 1.0085540743814513e-05, + "logits/chosen": -2.9007723331451416, + "logits/rejected": -2.0456340312957764, + "logps/chosen": -647.262451171875, + "logps/rejected": -561.2933349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322084426879883, + "rewards/margins": 10.886857986450195, + "rewards/rejected": -15.208942413330078, + "step": 5538 + }, + { + "epoch": 0.86, + "learning_rate": 1.0084807303283365e-05, + "logits/chosen": -2.3198885917663574, + "logits/rejected": -2.7210004329681396, + "logps/chosen": -237.25660705566406, + "logps/rejected": -373.4294128417969, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2839033603668213, + "rewards/margins": 8.573396682739258, + "rewards/rejected": -11.8572998046875, + "step": 5539 + }, + { + "epoch": 0.86, + "learning_rate": 1.0084073862752217e-05, + "logits/chosen": -2.7504005432128906, + "logits/rejected": -2.617008686065674, + "logps/chosen": -395.9205322265625, + "logps/rejected": -259.2929382324219, + "loss": 2.4574, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.121762275695801, + "rewards/margins": -2.0403852462768555, + "rewards/rejected": -5.081377029418945, + "step": 5540 + }, + { + "epoch": 0.86, + "learning_rate": 1.0083340422221069e-05, + "logits/chosen": -2.3831186294555664, + "logits/rejected": -2.230229377746582, + "logps/chosen": -148.89169311523438, + "logps/rejected": -266.41510009765625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.520140171051025, + "rewards/margins": 7.935990333557129, + "rewards/rejected": -12.456130981445312, + "step": 5541 + }, + { + "epoch": 0.86, + "learning_rate": 1.008260698168992e-05, + "logits/chosen": -2.9388222694396973, + "logits/rejected": -2.9367313385009766, + "logps/chosen": -92.80494689941406, + "logps/rejected": -140.28712463378906, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9752330780029297, + "rewards/margins": 3.660597801208496, + "rewards/rejected": -7.635830879211426, + "step": 5542 + }, + { + "epoch": 0.86, + "learning_rate": 1.0081873541158773e-05, + "logits/chosen": -2.952958106994629, + "logits/rejected": -2.3013603687286377, + "logps/chosen": -463.3581237792969, + "logps/rejected": -328.4591369628906, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2694411277770996, + "rewards/margins": 6.919022560119629, + "rewards/rejected": -9.188464164733887, + "step": 5543 + }, + { + "epoch": 0.86, + "learning_rate": 1.0081140100627626e-05, + "logits/chosen": -2.4732813835144043, + "logits/rejected": -3.0322906970977783, + "logps/chosen": -166.0314178466797, + "logps/rejected": -304.42779541015625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.803286075592041, + "rewards/margins": 6.775636672973633, + "rewards/rejected": -10.578922271728516, + "step": 5544 + }, + { + "epoch": 0.86, + "learning_rate": 1.0080406660096478e-05, + "logits/chosen": -3.1648285388946533, + "logits/rejected": -2.6559221744537354, + "logps/chosen": -253.25172424316406, + "logps/rejected": -290.97039794921875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6470179557800293, + "rewards/margins": 6.037015438079834, + "rewards/rejected": -8.684033393859863, + "step": 5545 + }, + { + "epoch": 0.86, + "learning_rate": 1.0079673219565332e-05, + "logits/chosen": -2.8653581142425537, + "logits/rejected": -3.079800844192505, + "logps/chosen": -320.3285217285156, + "logps/rejected": -337.4519958496094, + "loss": 4.7209, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.949628829956055, + "rewards/margins": -3.2179551124572754, + "rewards/rejected": -5.7316741943359375, + "step": 5546 + }, + { + "epoch": 0.86, + "learning_rate": 1.0078939779034183e-05, + "logits/chosen": -2.9751405715942383, + "logits/rejected": -2.7282321453094482, + "logps/chosen": -483.4808349609375, + "logps/rejected": -383.1026611328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8435730934143066, + "rewards/margins": 9.274970054626465, + "rewards/rejected": -12.11854362487793, + "step": 5547 + }, + { + "epoch": 0.86, + "learning_rate": 1.0078206338503035e-05, + "logits/chosen": -2.86250638961792, + "logits/rejected": -3.110487461090088, + "logps/chosen": -133.33981323242188, + "logps/rejected": -267.9609375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.707033157348633, + "rewards/margins": 5.455239295959473, + "rewards/rejected": -9.162273406982422, + "step": 5548 + }, + { + "epoch": 0.86, + "learning_rate": 1.0077472897971887e-05, + "logits/chosen": -1.4992716312408447, + "logits/rejected": -1.9818744659423828, + "logps/chosen": -137.6588592529297, + "logps/rejected": -364.9356994628906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035003662109375, + "rewards/margins": 6.801435947418213, + "rewards/rejected": -11.83643913269043, + "step": 5549 + }, + { + "epoch": 0.86, + "learning_rate": 1.0076739457440739e-05, + "logits/chosen": -3.0770480632781982, + "logits/rejected": -2.005127191543579, + "logps/chosen": -480.98150634765625, + "logps/rejected": -391.67828369140625, + "loss": 2.9005, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.618254661560059, + "rewards/margins": 3.2363624572753906, + "rewards/rejected": -10.85461711883545, + "step": 5550 + }, + { + "epoch": 0.86, + "learning_rate": 1.0076006016909591e-05, + "logits/chosen": -2.852548360824585, + "logits/rejected": -2.7709147930145264, + "logps/chosen": -263.03338623046875, + "logps/rejected": -212.0115203857422, + "loss": 5.0471, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.538474082946777, + "rewards/margins": -5.033463478088379, + "rewards/rejected": -3.5050110816955566, + "step": 5551 + }, + { + "epoch": 0.86, + "learning_rate": 1.0075272576378443e-05, + "logits/chosen": -2.8485796451568604, + "logits/rejected": -2.141136884689331, + "logps/chosen": -481.2533264160156, + "logps/rejected": -390.30303955078125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5205016136169434, + "rewards/margins": 6.293670654296875, + "rewards/rejected": -9.81417179107666, + "step": 5552 + }, + { + "epoch": 0.86, + "learning_rate": 1.0074539135847295e-05, + "logits/chosen": -2.6393706798553467, + "logits/rejected": -1.6225709915161133, + "logps/chosen": -238.2661590576172, + "logps/rejected": -105.05785369873047, + "loss": 3.158, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.295335292816162, + "rewards/margins": -0.34055352210998535, + "rewards/rejected": -4.954782009124756, + "step": 5553 + }, + { + "epoch": 0.86, + "learning_rate": 1.0073805695316148e-05, + "logits/chosen": -1.9153103828430176, + "logits/rejected": -2.8209240436553955, + "logps/chosen": -68.486328125, + "logps/rejected": -447.0724182128906, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.691194772720337, + "rewards/margins": 6.304952621459961, + "rewards/rejected": -9.996147155761719, + "step": 5554 + }, + { + "epoch": 0.86, + "learning_rate": 1.0073072254785e-05, + "logits/chosen": -2.7380526065826416, + "logits/rejected": -3.098175525665283, + "logps/chosen": -97.60457611083984, + "logps/rejected": -213.25657653808594, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.604470729827881, + "rewards/margins": 5.362194061279297, + "rewards/rejected": -8.966665267944336, + "step": 5555 + }, + { + "epoch": 0.86, + "learning_rate": 1.0072338814253852e-05, + "logits/chosen": -1.568662166595459, + "logits/rejected": -3.044139862060547, + "logps/chosen": -288.3048400878906, + "logps/rejected": -578.1722412109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8871657848358154, + "rewards/margins": 7.328242301940918, + "rewards/rejected": -11.215408325195312, + "step": 5556 + }, + { + "epoch": 0.86, + "learning_rate": 1.0071605373722704e-05, + "logits/chosen": -1.4807020425796509, + "logits/rejected": -3.008035182952881, + "logps/chosen": -211.76014709472656, + "logps/rejected": -415.1500549316406, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.489182949066162, + "rewards/margins": 2.384685516357422, + "rewards/rejected": -7.873868465423584, + "step": 5557 + }, + { + "epoch": 0.86, + "learning_rate": 1.0070871933191556e-05, + "logits/chosen": -2.8457999229431152, + "logits/rejected": -2.423762798309326, + "logps/chosen": -374.59454345703125, + "logps/rejected": -296.81243896484375, + "loss": 3.978, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.91441535949707, + "rewards/margins": -0.42505550384521484, + "rewards/rejected": -6.4893598556518555, + "step": 5558 + }, + { + "epoch": 0.86, + "learning_rate": 1.0070138492660408e-05, + "logits/chosen": -1.1310228109359741, + "logits/rejected": -2.760913848876953, + "logps/chosen": -162.04266357421875, + "logps/rejected": -420.29864501953125, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4190759658813477, + "rewards/margins": 5.710713863372803, + "rewards/rejected": -9.129789352416992, + "step": 5559 + }, + { + "epoch": 0.86, + "learning_rate": 1.006940505212926e-05, + "logits/chosen": -3.099602460861206, + "logits/rejected": -3.1038618087768555, + "logps/chosen": -62.2037353515625, + "logps/rejected": -126.39239501953125, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.297946453094482, + "rewards/margins": 4.617464542388916, + "rewards/rejected": -8.915410995483398, + "step": 5560 + }, + { + "epoch": 0.86, + "learning_rate": 1.0068671611598111e-05, + "logits/chosen": -2.582881212234497, + "logits/rejected": -3.0552704334259033, + "logps/chosen": -177.17913818359375, + "logps/rejected": -507.4620361328125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.236572742462158, + "rewards/margins": 8.386366844177246, + "rewards/rejected": -12.622940063476562, + "step": 5561 + }, + { + "epoch": 0.87, + "learning_rate": 1.0067938171066963e-05, + "logits/chosen": -2.004798412322998, + "logits/rejected": -2.591224431991577, + "logps/chosen": -139.77220153808594, + "logps/rejected": -335.9187316894531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.210834503173828, + "rewards/margins": 6.0963544845581055, + "rewards/rejected": -9.307188987731934, + "step": 5562 + }, + { + "epoch": 0.87, + "learning_rate": 1.0067204730535817e-05, + "logits/chosen": -1.8822680711746216, + "logits/rejected": -2.832526683807373, + "logps/chosen": -152.7909393310547, + "logps/rejected": -280.42047119140625, + "loss": 1.5697, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.284927845001221, + "rewards/margins": 0.9593322277069092, + "rewards/rejected": -7.244259834289551, + "step": 5563 + }, + { + "epoch": 0.87, + "learning_rate": 1.0066471290004669e-05, + "logits/chosen": -2.6719770431518555, + "logits/rejected": -2.738863229751587, + "logps/chosen": -159.418701171875, + "logps/rejected": -306.599853515625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.534973621368408, + "rewards/margins": 6.700615406036377, + "rewards/rejected": -9.235589027404785, + "step": 5564 + }, + { + "epoch": 0.87, + "learning_rate": 1.006573784947352e-05, + "logits/chosen": -2.7520034313201904, + "logits/rejected": -1.98435378074646, + "logps/chosen": -185.94216918945312, + "logps/rejected": -431.31146240234375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4394404888153076, + "rewards/margins": 9.30238151550293, + "rewards/rejected": -11.741822242736816, + "step": 5565 + }, + { + "epoch": 0.87, + "learning_rate": 1.0065004408942373e-05, + "logits/chosen": -2.990682363510132, + "logits/rejected": -3.174828290939331, + "logps/chosen": -205.10086059570312, + "logps/rejected": -241.7861785888672, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9029135704040527, + "rewards/margins": 4.844544410705566, + "rewards/rejected": -8.747457504272461, + "step": 5566 + }, + { + "epoch": 0.87, + "learning_rate": 1.0064270968411224e-05, + "logits/chosen": -2.133934736251831, + "logits/rejected": -2.767540454864502, + "logps/chosen": -381.6265869140625, + "logps/rejected": -318.0422058105469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3604493141174316, + "rewards/margins": 8.064231872558594, + "rewards/rejected": -9.424680709838867, + "step": 5567 + }, + { + "epoch": 0.87, + "learning_rate": 1.0063537527880076e-05, + "logits/chosen": -3.0995285511016846, + "logits/rejected": -1.1795848608016968, + "logps/chosen": -375.6083984375, + "logps/rejected": -96.51348114013672, + "loss": 0.4058, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.595350742340088, + "rewards/margins": 1.30287766456604, + "rewards/rejected": -7.898228645324707, + "step": 5568 + }, + { + "epoch": 0.87, + "learning_rate": 1.0062804087348928e-05, + "logits/chosen": -2.8606724739074707, + "logits/rejected": -2.197690486907959, + "logps/chosen": -524.6032104492188, + "logps/rejected": -403.470458984375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4245011806488037, + "rewards/margins": 7.2925567626953125, + "rewards/rejected": -9.717058181762695, + "step": 5569 + }, + { + "epoch": 0.87, + "learning_rate": 1.006207064681778e-05, + "logits/chosen": -2.908740997314453, + "logits/rejected": -1.9818073511123657, + "logps/chosen": -304.3807373046875, + "logps/rejected": -177.23403930664062, + "loss": 4.4846, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.697512626647949, + "rewards/margins": -0.8016057014465332, + "rewards/rejected": -6.895906925201416, + "step": 5570 + }, + { + "epoch": 0.87, + "learning_rate": 1.0061337206286632e-05, + "logits/chosen": -2.4832024574279785, + "logits/rejected": -3.1411075592041016, + "logps/chosen": -243.81517028808594, + "logps/rejected": -412.7395935058594, + "loss": 3.026, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.3597893714904785, + "rewards/margins": 1.4247057437896729, + "rewards/rejected": -7.7844953536987305, + "step": 5571 + }, + { + "epoch": 0.87, + "learning_rate": 1.0060603765755485e-05, + "logits/chosen": -2.643622636795044, + "logits/rejected": -3.1260457038879395, + "logps/chosen": -714.8828735351562, + "logps/rejected": -786.3702392578125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6831176280975342, + "rewards/margins": 6.245092868804932, + "rewards/rejected": -7.928210258483887, + "step": 5572 + }, + { + "epoch": 0.87, + "learning_rate": 1.0059870325224337e-05, + "logits/chosen": -2.555856943130493, + "logits/rejected": -2.842679500579834, + "logps/chosen": -118.78434753417969, + "logps/rejected": -252.78860473632812, + "loss": 0.077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.850624084472656, + "rewards/margins": 5.453831672668457, + "rewards/rejected": -10.304455757141113, + "step": 5573 + }, + { + "epoch": 0.87, + "learning_rate": 1.005913688469319e-05, + "logits/chosen": -2.693971872329712, + "logits/rejected": -1.4852906465530396, + "logps/chosen": -209.37014770507812, + "logps/rejected": -149.50656127929688, + "loss": 3.0955, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.975916862487793, + "rewards/margins": 0.3485841751098633, + "rewards/rejected": -6.324501037597656, + "step": 5574 + }, + { + "epoch": 0.87, + "learning_rate": 1.0058403444162041e-05, + "logits/chosen": -1.7509701251983643, + "logits/rejected": -2.8834590911865234, + "logps/chosen": -172.9233856201172, + "logps/rejected": -333.0304870605469, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3273820877075195, + "rewards/margins": 5.87339973449707, + "rewards/rejected": -9.200780868530273, + "step": 5575 + }, + { + "epoch": 0.87, + "learning_rate": 1.0057670003630893e-05, + "logits/chosen": -1.7018036842346191, + "logits/rejected": -3.149447441101074, + "logps/chosen": -109.96253967285156, + "logps/rejected": -455.75140380859375, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7629005908966064, + "rewards/margins": 6.17038631439209, + "rewards/rejected": -9.933286666870117, + "step": 5576 + }, + { + "epoch": 0.87, + "learning_rate": 1.0056936563099745e-05, + "logits/chosen": -2.8360307216644287, + "logits/rejected": -2.4983572959899902, + "logps/chosen": -324.65179443359375, + "logps/rejected": -394.8542785644531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6070908308029175, + "rewards/margins": 7.828799247741699, + "rewards/rejected": -9.435890197753906, + "step": 5577 + }, + { + "epoch": 0.87, + "learning_rate": 1.0056203122568598e-05, + "logits/chosen": -3.1501824855804443, + "logits/rejected": -3.0680527687072754, + "logps/chosen": -108.44981384277344, + "logps/rejected": -250.05422973632812, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.005981683731079, + "rewards/margins": 3.937767744064331, + "rewards/rejected": -6.94374942779541, + "step": 5578 + }, + { + "epoch": 0.87, + "learning_rate": 1.005546968203745e-05, + "logits/chosen": -2.9966964721679688, + "logits/rejected": -2.9184200763702393, + "logps/chosen": -126.84156799316406, + "logps/rejected": -190.76229858398438, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.866079807281494, + "rewards/margins": 5.141641616821289, + "rewards/rejected": -9.007721900939941, + "step": 5579 + }, + { + "epoch": 0.87, + "learning_rate": 1.0054736241506304e-05, + "logits/chosen": -2.3431475162506104, + "logits/rejected": -3.0370848178863525, + "logps/chosen": -108.74324798583984, + "logps/rejected": -355.0873107910156, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8603615760803223, + "rewards/margins": 7.401514530181885, + "rewards/rejected": -9.261876106262207, + "step": 5580 + }, + { + "epoch": 0.87, + "learning_rate": 1.0054002800975156e-05, + "logits/chosen": -3.091069459915161, + "logits/rejected": -2.449028253555298, + "logps/chosen": -768.1375732421875, + "logps/rejected": -474.58563232421875, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.464656829833984, + "rewards/margins": 1.850330114364624, + "rewards/rejected": -7.3149871826171875, + "step": 5581 + }, + { + "epoch": 0.87, + "learning_rate": 1.0053269360444008e-05, + "logits/chosen": -3.147141456604004, + "logits/rejected": -2.77530837059021, + "logps/chosen": -161.6223602294922, + "logps/rejected": -341.346923828125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.957273483276367, + "rewards/margins": 6.457921981811523, + "rewards/rejected": -11.41519546508789, + "step": 5582 + }, + { + "epoch": 0.87, + "learning_rate": 1.005253591991286e-05, + "logits/chosen": -1.3511701822280884, + "logits/rejected": -2.892747163772583, + "logps/chosen": -119.3204116821289, + "logps/rejected": -420.6622009277344, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4302330017089844, + "rewards/margins": 7.056634902954102, + "rewards/rejected": -8.486867904663086, + "step": 5583 + }, + { + "epoch": 0.87, + "learning_rate": 1.0051802479381711e-05, + "logits/chosen": -2.184790849685669, + "logits/rejected": -2.8487846851348877, + "logps/chosen": -120.92045593261719, + "logps/rejected": -279.1791687011719, + "loss": 0.6707, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.126706123352051, + "rewards/margins": 3.474231243133545, + "rewards/rejected": -8.600936889648438, + "step": 5584 + }, + { + "epoch": 0.87, + "learning_rate": 1.0051069038850563e-05, + "logits/chosen": -2.862391710281372, + "logits/rejected": -3.029348134994507, + "logps/chosen": -613.9058227539062, + "logps/rejected": -491.8650817871094, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9862968921661377, + "rewards/margins": 5.959704399108887, + "rewards/rejected": -7.946001052856445, + "step": 5585 + }, + { + "epoch": 0.87, + "learning_rate": 1.0050335598319415e-05, + "logits/chosen": -2.232433319091797, + "logits/rejected": -3.006533145904541, + "logps/chosen": -395.3749694824219, + "logps/rejected": -393.9082336425781, + "loss": 1.6173, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3015449047088623, + "rewards/margins": 2.671290397644043, + "rewards/rejected": -5.972835540771484, + "step": 5586 + }, + { + "epoch": 0.87, + "learning_rate": 1.0049602157788267e-05, + "logits/chosen": -2.7536470890045166, + "logits/rejected": -2.997058153152466, + "logps/chosen": -49.43840408325195, + "logps/rejected": -213.4425048828125, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.986914873123169, + "rewards/margins": 3.9637451171875, + "rewards/rejected": -6.950660228729248, + "step": 5587 + }, + { + "epoch": 0.87, + "learning_rate": 1.0048868717257119e-05, + "logits/chosen": -2.8067803382873535, + "logits/rejected": -3.313204765319824, + "logps/chosen": -445.10101318359375, + "logps/rejected": -474.2879333496094, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8998351097106934, + "rewards/margins": 4.0215864181518555, + "rewards/rejected": -7.921422004699707, + "step": 5588 + }, + { + "epoch": 0.87, + "learning_rate": 1.0048135276725973e-05, + "logits/chosen": -3.1419708728790283, + "logits/rejected": -3.1588504314422607, + "logps/chosen": -301.1531982421875, + "logps/rejected": -201.02053833007812, + "loss": 2.7998, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.236769676208496, + "rewards/margins": -0.029596328735351562, + "rewards/rejected": -5.2071733474731445, + "step": 5589 + }, + { + "epoch": 0.87, + "learning_rate": 1.0047401836194824e-05, + "logits/chosen": -2.8400797843933105, + "logits/rejected": -3.056813955307007, + "logps/chosen": -146.80755615234375, + "logps/rejected": -341.97381591796875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4656119346618652, + "rewards/margins": 7.106261730194092, + "rewards/rejected": -10.571873664855957, + "step": 5590 + }, + { + "epoch": 0.87, + "learning_rate": 1.0046668395663676e-05, + "logits/chosen": -2.7426695823669434, + "logits/rejected": -3.1818974018096924, + "logps/chosen": -133.41696166992188, + "logps/rejected": -241.52389526367188, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.976325273513794, + "rewards/margins": 3.350471019744873, + "rewards/rejected": -7.326796531677246, + "step": 5591 + }, + { + "epoch": 0.87, + "learning_rate": 1.0045934955132528e-05, + "logits/chosen": -3.0784010887145996, + "logits/rejected": -2.621083974838257, + "logps/chosen": -566.0082397460938, + "logps/rejected": -405.7203674316406, + "loss": 2.6493, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.929004192352295, + "rewards/margins": -0.3299434185028076, + "rewards/rejected": -4.599060535430908, + "step": 5592 + }, + { + "epoch": 0.87, + "learning_rate": 1.004520151460138e-05, + "logits/chosen": -2.8614368438720703, + "logits/rejected": -2.4296021461486816, + "logps/chosen": -144.93984985351562, + "logps/rejected": -208.64743041992188, + "loss": 2.2794, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.588138103485107, + "rewards/margins": 2.8650450706481934, + "rewards/rejected": -7.453183174133301, + "step": 5593 + }, + { + "epoch": 0.87, + "learning_rate": 1.0044468074070232e-05, + "logits/chosen": -2.9526185989379883, + "logits/rejected": -2.8508059978485107, + "logps/chosen": -399.612060546875, + "logps/rejected": -365.8931884765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8666177988052368, + "rewards/margins": 6.367919921875, + "rewards/rejected": -8.234537124633789, + "step": 5594 + }, + { + "epoch": 0.87, + "learning_rate": 1.0043734633539084e-05, + "logits/chosen": -2.651031494140625, + "logits/rejected": -3.018803119659424, + "logps/chosen": -136.25927734375, + "logps/rejected": -321.6229248046875, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4077014923095703, + "rewards/margins": 4.642730236053467, + "rewards/rejected": -8.050432205200195, + "step": 5595 + }, + { + "epoch": 0.87, + "learning_rate": 1.0043001193007936e-05, + "logits/chosen": -2.4499738216400146, + "logits/rejected": -3.147308826446533, + "logps/chosen": -51.028297424316406, + "logps/rejected": -241.92210388183594, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2158548831939697, + "rewards/margins": 5.688335418701172, + "rewards/rejected": -8.904190063476562, + "step": 5596 + }, + { + "epoch": 0.87, + "learning_rate": 1.0042267752476788e-05, + "logits/chosen": -2.974079132080078, + "logits/rejected": -3.093341588973999, + "logps/chosen": -445.79266357421875, + "logps/rejected": -368.310302734375, + "loss": 4.6459, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.098463535308838, + "rewards/margins": -1.38919997215271, + "rewards/rejected": -4.709263801574707, + "step": 5597 + }, + { + "epoch": 0.87, + "learning_rate": 1.0041534311945641e-05, + "logits/chosen": -1.8289192914962769, + "logits/rejected": -2.7956886291503906, + "logps/chosen": -187.62411499023438, + "logps/rejected": -430.3597412109375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5929954051971436, + "rewards/margins": 7.127413272857666, + "rewards/rejected": -9.720409393310547, + "step": 5598 + }, + { + "epoch": 0.87, + "learning_rate": 1.0040800871414493e-05, + "logits/chosen": -1.4917265176773071, + "logits/rejected": -2.8864500522613525, + "logps/chosen": -115.5498046875, + "logps/rejected": -297.05108642578125, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2899041175842285, + "rewards/margins": 4.408623695373535, + "rewards/rejected": -8.698528289794922, + "step": 5599 + }, + { + "epoch": 0.87, + "learning_rate": 1.0040067430883345e-05, + "logits/chosen": -1.395762324333191, + "logits/rejected": -3.004747152328491, + "logps/chosen": -155.64639282226562, + "logps/rejected": -342.5345153808594, + "loss": 0.2316, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.46525239944458, + "rewards/margins": 1.8422672748565674, + "rewards/rejected": -6.307519912719727, + "step": 5600 + }, + { + "epoch": 0.87, + "learning_rate": 1.0039333990352197e-05, + "logits/chosen": -2.3230907917022705, + "logits/rejected": -3.0620977878570557, + "logps/chosen": -219.29014587402344, + "logps/rejected": -377.7227783203125, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.108431339263916, + "rewards/margins": 5.166417598724365, + "rewards/rejected": -8.274848937988281, + "step": 5601 + }, + { + "epoch": 0.87, + "learning_rate": 1.0038600549821049e-05, + "logits/chosen": -2.2693192958831787, + "logits/rejected": -3.036097288131714, + "logps/chosen": -155.23147583007812, + "logps/rejected": -366.09234619140625, + "loss": 0.1068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9060287475585938, + "rewards/margins": 4.418705940246582, + "rewards/rejected": -7.324734687805176, + "step": 5602 + }, + { + "epoch": 0.87, + "learning_rate": 1.00378671092899e-05, + "logits/chosen": -3.1227407455444336, + "logits/rejected": -2.8984434604644775, + "logps/chosen": -91.68893432617188, + "logps/rejected": -124.40609741210938, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1297829151153564, + "rewards/margins": 4.689404010772705, + "rewards/rejected": -7.819187164306641, + "step": 5603 + }, + { + "epoch": 0.87, + "learning_rate": 1.0037133668758752e-05, + "logits/chosen": -3.0088016986846924, + "logits/rejected": -2.9325754642486572, + "logps/chosen": -134.84449768066406, + "logps/rejected": -156.8516387939453, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5199193954467773, + "rewards/margins": 6.502789497375488, + "rewards/rejected": -9.022708892822266, + "step": 5604 + }, + { + "epoch": 0.87, + "learning_rate": 1.0036400228227604e-05, + "logits/chosen": -2.966732978820801, + "logits/rejected": -2.6771366596221924, + "logps/chosen": -386.66473388671875, + "logps/rejected": -356.3049011230469, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.480609178543091, + "rewards/margins": 4.466277122497559, + "rewards/rejected": -7.94688606262207, + "step": 5605 + }, + { + "epoch": 0.87, + "learning_rate": 1.0035666787696456e-05, + "logits/chosen": -1.9515964984893799, + "logits/rejected": -2.5899102687835693, + "logps/chosen": -199.0560302734375, + "logps/rejected": -438.2165222167969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.903435230255127, + "rewards/margins": 7.945159912109375, + "rewards/rejected": -10.848594665527344, + "step": 5606 + }, + { + "epoch": 0.87, + "learning_rate": 1.003493334716531e-05, + "logits/chosen": -3.161240816116333, + "logits/rejected": -3.2455532550811768, + "logps/chosen": -255.19259643554688, + "logps/rejected": -229.15736389160156, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9626755714416504, + "rewards/margins": 6.269742012023926, + "rewards/rejected": -9.232418060302734, + "step": 5607 + }, + { + "epoch": 0.87, + "learning_rate": 1.0034199906634162e-05, + "logits/chosen": -2.148853302001953, + "logits/rejected": -2.815476417541504, + "logps/chosen": -215.7744903564453, + "logps/rejected": -372.02667236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4934234619140625, + "rewards/margins": 11.281339645385742, + "rewards/rejected": -12.774763107299805, + "step": 5608 + }, + { + "epoch": 0.87, + "learning_rate": 1.0033466466103013e-05, + "logits/chosen": -0.8199224472045898, + "logits/rejected": -2.5789942741394043, + "logps/chosen": -133.46791076660156, + "logps/rejected": -299.4627990722656, + "loss": 1.5501, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.981940746307373, + "rewards/margins": 1.3507763147354126, + "rewards/rejected": -6.332717418670654, + "step": 5609 + }, + { + "epoch": 0.87, + "learning_rate": 1.0032733025571865e-05, + "logits/chosen": -3.0411007404327393, + "logits/rejected": -2.9400439262390137, + "logps/chosen": -88.0994873046875, + "logps/rejected": -291.9708251953125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125842571258545, + "rewards/margins": 6.163623332977295, + "rewards/rejected": -10.28946590423584, + "step": 5610 + }, + { + "epoch": 0.87, + "learning_rate": 1.0031999585040717e-05, + "logits/chosen": -2.641864538192749, + "logits/rejected": -3.002821445465088, + "logps/chosen": -366.439453125, + "logps/rejected": -448.1031799316406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.450439453125, + "rewards/margins": 6.312016487121582, + "rewards/rejected": -8.762455940246582, + "step": 5611 + }, + { + "epoch": 0.87, + "learning_rate": 1.003126614450957e-05, + "logits/chosen": -2.7214794158935547, + "logits/rejected": -3.0743660926818848, + "logps/chosen": -180.43447875976562, + "logps/rejected": -203.3729248046875, + "loss": 0.76, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.547510623931885, + "rewards/margins": 1.8608217239379883, + "rewards/rejected": -8.408332824707031, + "step": 5612 + }, + { + "epoch": 0.87, + "learning_rate": 1.0030532703978423e-05, + "logits/chosen": -2.8161158561706543, + "logits/rejected": -3.1961491107940674, + "logps/chosen": -164.3309326171875, + "logps/rejected": -243.23623657226562, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2575747966766357, + "rewards/margins": 3.239516258239746, + "rewards/rejected": -6.497091293334961, + "step": 5613 + }, + { + "epoch": 0.87, + "learning_rate": 1.0029799263447275e-05, + "logits/chosen": -3.08065128326416, + "logits/rejected": -2.0379021167755127, + "logps/chosen": -580.6063842773438, + "logps/rejected": -295.6199951171875, + "loss": 1.442, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.196615695953369, + "rewards/margins": 2.078643560409546, + "rewards/rejected": -7.275259017944336, + "step": 5614 + }, + { + "epoch": 0.87, + "learning_rate": 1.0029065822916126e-05, + "logits/chosen": -2.095864772796631, + "logits/rejected": -2.80922794342041, + "logps/chosen": -152.53358459472656, + "logps/rejected": -414.1729736328125, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.028041124343872, + "rewards/margins": 2.231410026550293, + "rewards/rejected": -5.259451389312744, + "step": 5615 + }, + { + "epoch": 0.87, + "learning_rate": 1.002833238238498e-05, + "logits/chosen": -2.3200552463531494, + "logits/rejected": -2.646674871444702, + "logps/chosen": -236.58322143554688, + "logps/rejected": -404.37548828125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7896995544433594, + "rewards/margins": 8.396787643432617, + "rewards/rejected": -11.186487197875977, + "step": 5616 + }, + { + "epoch": 0.87, + "learning_rate": 1.0027598941853832e-05, + "logits/chosen": -1.2775627374649048, + "logits/rejected": -2.905850887298584, + "logps/chosen": -101.24162292480469, + "logps/rejected": -305.70721435546875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.78713321685791, + "rewards/margins": 4.848356246948242, + "rewards/rejected": -7.635489463806152, + "step": 5617 + }, + { + "epoch": 0.87, + "learning_rate": 1.0026865501322684e-05, + "logits/chosen": -2.7479684352874756, + "logits/rejected": -2.9312658309936523, + "logps/chosen": -226.5878143310547, + "logps/rejected": -335.38580322265625, + "loss": 4.4682, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.6643385887146, + "rewards/margins": -0.7047514915466309, + "rewards/rejected": -5.959587097167969, + "step": 5618 + }, + { + "epoch": 0.87, + "learning_rate": 1.0026132060791536e-05, + "logits/chosen": -2.9251368045806885, + "logits/rejected": -2.4115684032440186, + "logps/chosen": -311.34686279296875, + "logps/rejected": -556.227294921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.400740146636963, + "rewards/margins": 8.440899848937988, + "rewards/rejected": -10.84164047241211, + "step": 5619 + }, + { + "epoch": 0.87, + "learning_rate": 1.0025398620260388e-05, + "logits/chosen": -2.0945403575897217, + "logits/rejected": -3.0378081798553467, + "logps/chosen": -127.48799133300781, + "logps/rejected": -363.14202880859375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2554192543029785, + "rewards/margins": 5.014028072357178, + "rewards/rejected": -7.269447326660156, + "step": 5620 + }, + { + "epoch": 0.87, + "learning_rate": 1.002466517972924e-05, + "logits/chosen": -2.511803388595581, + "logits/rejected": -3.128265142440796, + "logps/chosen": -329.8095703125, + "logps/rejected": -525.4158935546875, + "loss": 3.575, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.807735443115234, + "rewards/margins": -1.2178611755371094, + "rewards/rejected": -5.589874267578125, + "step": 5621 + }, + { + "epoch": 0.87, + "learning_rate": 1.0023931739198091e-05, + "logits/chosen": -3.0879602432250977, + "logits/rejected": -2.354138135910034, + "logps/chosen": -286.1181640625, + "logps/rejected": -254.2586669921875, + "loss": 1.0027, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.937896251678467, + "rewards/margins": 3.5263073444366455, + "rewards/rejected": -8.464203834533691, + "step": 5622 + }, + { + "epoch": 0.87, + "learning_rate": 1.0023198298666943e-05, + "logits/chosen": -1.3318729400634766, + "logits/rejected": -2.772899866104126, + "logps/chosen": -100.0439453125, + "logps/rejected": -410.44488525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.086683988571167, + "rewards/margins": 9.904706954956055, + "rewards/rejected": -11.9913911819458, + "step": 5623 + }, + { + "epoch": 0.87, + "learning_rate": 1.0022464858135795e-05, + "logits/chosen": -2.798513650894165, + "logits/rejected": -2.4888973236083984, + "logps/chosen": -165.07557678222656, + "logps/rejected": -345.3415222167969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3041162490844727, + "rewards/margins": 7.809106826782227, + "rewards/rejected": -10.113224029541016, + "step": 5624 + }, + { + "epoch": 0.87, + "learning_rate": 1.0021731417604649e-05, + "logits/chosen": -3.0437986850738525, + "logits/rejected": -1.4049417972564697, + "logps/chosen": -546.5899658203125, + "logps/rejected": -274.5450134277344, + "loss": 1.148, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.534196376800537, + "rewards/margins": 2.430546522140503, + "rewards/rejected": -7.964742660522461, + "step": 5625 + }, + { + "epoch": 0.87, + "learning_rate": 1.00209979770735e-05, + "logits/chosen": -1.537372350692749, + "logits/rejected": -2.8403666019439697, + "logps/chosen": -61.31455993652344, + "logps/rejected": -360.2607421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8432068824768066, + "rewards/margins": 8.196253776550293, + "rewards/rejected": -11.039461135864258, + "step": 5626 + }, + { + "epoch": 0.88, + "learning_rate": 1.0020264536542352e-05, + "logits/chosen": -2.0076749324798584, + "logits/rejected": -3.000854015350342, + "logps/chosen": -140.98681640625, + "logps/rejected": -366.16839599609375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2401461601257324, + "rewards/margins": 7.2398834228515625, + "rewards/rejected": -10.480030059814453, + "step": 5627 + }, + { + "epoch": 0.88, + "learning_rate": 1.0019531096011204e-05, + "logits/chosen": -2.182262420654297, + "logits/rejected": -2.9713521003723145, + "logps/chosen": -94.11754608154297, + "logps/rejected": -238.40420532226562, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.822868824005127, + "rewards/margins": 4.496358871459961, + "rewards/rejected": -7.319227695465088, + "step": 5628 + }, + { + "epoch": 0.88, + "learning_rate": 1.0018797655480056e-05, + "logits/chosen": -2.911295175552368, + "logits/rejected": -2.965195655822754, + "logps/chosen": -345.87481689453125, + "logps/rejected": -428.65924072265625, + "loss": 1.8911, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.243048191070557, + "rewards/margins": 1.0582138299942017, + "rewards/rejected": -6.301261901855469, + "step": 5629 + }, + { + "epoch": 0.88, + "learning_rate": 1.0018064214948908e-05, + "logits/chosen": -3.1233127117156982, + "logits/rejected": -2.1083438396453857, + "logps/chosen": -409.65234375, + "logps/rejected": -407.0862121582031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.315036296844482, + "rewards/margins": 7.954936504364014, + "rewards/rejected": -12.26997184753418, + "step": 5630 + }, + { + "epoch": 0.88, + "learning_rate": 1.001733077441776e-05, + "logits/chosen": -1.3537667989730835, + "logits/rejected": -2.5709996223449707, + "logps/chosen": -295.8042907714844, + "logps/rejected": -435.17462158203125, + "loss": 1.0249, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.079106330871582, + "rewards/margins": 5.603304862976074, + "rewards/rejected": -11.682411193847656, + "step": 5631 + }, + { + "epoch": 0.88, + "learning_rate": 1.0016597333886612e-05, + "logits/chosen": -3.1787269115448, + "logits/rejected": -2.852734088897705, + "logps/chosen": -726.1497192382812, + "logps/rejected": -608.807861328125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.847258448600769, + "rewards/margins": 5.784299373626709, + "rewards/rejected": -7.631557464599609, + "step": 5632 + }, + { + "epoch": 0.88, + "learning_rate": 1.0015863893355464e-05, + "logits/chosen": -2.583801746368408, + "logits/rejected": -2.7960753440856934, + "logps/chosen": -278.56878662109375, + "logps/rejected": -538.6140747070312, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.265139102935791, + "rewards/margins": 7.944315433502197, + "rewards/rejected": -10.209454536437988, + "step": 5633 + }, + { + "epoch": 0.88, + "learning_rate": 1.0015130452824317e-05, + "logits/chosen": -2.8603084087371826, + "logits/rejected": -2.9056508541107178, + "logps/chosen": -325.4286804199219, + "logps/rejected": -295.4952392578125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8170108795166016, + "rewards/margins": 6.545246601104736, + "rewards/rejected": -9.36225700378418, + "step": 5634 + }, + { + "epoch": 0.88, + "learning_rate": 1.0014397012293169e-05, + "logits/chosen": -3.076343059539795, + "logits/rejected": -3.16400146484375, + "logps/chosen": -264.88104248046875, + "logps/rejected": -162.8788299560547, + "loss": 2.6514, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.0535888671875, + "rewards/margins": -2.571640729904175, + "rewards/rejected": -4.481947898864746, + "step": 5635 + }, + { + "epoch": 0.88, + "learning_rate": 1.0013663571762021e-05, + "logits/chosen": -2.973304510116577, + "logits/rejected": -3.1119613647460938, + "logps/chosen": -54.657432556152344, + "logps/rejected": -161.25045776367188, + "loss": 0.0986, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.426091194152832, + "rewards/margins": 4.341549873352051, + "rewards/rejected": -8.767641067504883, + "step": 5636 + }, + { + "epoch": 0.88, + "learning_rate": 1.0012930131230873e-05, + "logits/chosen": -2.5984745025634766, + "logits/rejected": -3.116196393966675, + "logps/chosen": -370.84552001953125, + "logps/rejected": -427.22015380859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8584473133087158, + "rewards/margins": 8.535292625427246, + "rewards/rejected": -10.393739700317383, + "step": 5637 + }, + { + "epoch": 0.88, + "learning_rate": 1.0012196690699725e-05, + "logits/chosen": -1.6179885864257812, + "logits/rejected": -2.638289451599121, + "logps/chosen": -130.15322875976562, + "logps/rejected": -338.4629821777344, + "loss": 2.8014, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.692150115966797, + "rewards/margins": 2.6248908042907715, + "rewards/rejected": -8.31704044342041, + "step": 5638 + }, + { + "epoch": 0.88, + "learning_rate": 1.0011463250168577e-05, + "logits/chosen": -2.599360227584839, + "logits/rejected": -3.0532517433166504, + "logps/chosen": -124.52250671386719, + "logps/rejected": -260.8845520019531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6101202964782715, + "rewards/margins": 7.2074995040893555, + "rewards/rejected": -9.817619323730469, + "step": 5639 + }, + { + "epoch": 0.88, + "learning_rate": 1.0010729809637428e-05, + "logits/chosen": -2.8834047317504883, + "logits/rejected": -2.9624979496002197, + "logps/chosen": -450.2040100097656, + "logps/rejected": -348.0941162109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8850159645080566, + "rewards/margins": 7.363995552062988, + "rewards/rejected": -9.249011993408203, + "step": 5640 + }, + { + "epoch": 0.88, + "learning_rate": 1.000999636910628e-05, + "logits/chosen": -2.1287930011749268, + "logits/rejected": -2.6607954502105713, + "logps/chosen": -178.84019470214844, + "logps/rejected": -273.8697509765625, + "loss": 2.9595, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.987092018127441, + "rewards/margins": -0.49889349937438965, + "rewards/rejected": -5.488198280334473, + "step": 5641 + }, + { + "epoch": 0.88, + "learning_rate": 1.0009262928575132e-05, + "logits/chosen": -1.6610866785049438, + "logits/rejected": -2.832904815673828, + "logps/chosen": -131.1490936279297, + "logps/rejected": -234.77305603027344, + "loss": 2.5377, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3310465812683105, + "rewards/margins": 1.5862913131713867, + "rewards/rejected": -6.917337894439697, + "step": 5642 + }, + { + "epoch": 0.88, + "learning_rate": 1.0008529488043986e-05, + "logits/chosen": -2.975252628326416, + "logits/rejected": -3.0552144050598145, + "logps/chosen": -83.23326110839844, + "logps/rejected": -333.90423583984375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9370150566101074, + "rewards/margins": 5.847989082336426, + "rewards/rejected": -8.785004615783691, + "step": 5643 + }, + { + "epoch": 0.88, + "learning_rate": 1.0007796047512838e-05, + "logits/chosen": -2.0209178924560547, + "logits/rejected": -3.1884496212005615, + "logps/chosen": -541.18896484375, + "logps/rejected": -726.8397216796875, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4939303398132324, + "rewards/margins": 3.927769184112549, + "rewards/rejected": -7.421699523925781, + "step": 5644 + }, + { + "epoch": 0.88, + "learning_rate": 1.000706260698169e-05, + "logits/chosen": -2.268374443054199, + "logits/rejected": -2.9415905475616455, + "logps/chosen": -112.46353149414062, + "logps/rejected": -353.79302978515625, + "loss": 0.7492, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.155821800231934, + "rewards/margins": 2.545499801635742, + "rewards/rejected": -7.701321601867676, + "step": 5645 + }, + { + "epoch": 0.88, + "learning_rate": 1.0006329166450543e-05, + "logits/chosen": -2.8625295162200928, + "logits/rejected": -1.906745433807373, + "logps/chosen": -236.40562438964844, + "logps/rejected": -212.8628692626953, + "loss": 3.0749, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.657090187072754, + "rewards/margins": -0.5140337944030762, + "rewards/rejected": -5.143056392669678, + "step": 5646 + }, + { + "epoch": 0.88, + "learning_rate": 1.0005595725919395e-05, + "logits/chosen": -1.1558130979537964, + "logits/rejected": -3.1228559017181396, + "logps/chosen": -262.7489318847656, + "logps/rejected": -478.79364013671875, + "loss": 0.9271, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.685314655303955, + "rewards/margins": 1.9399816989898682, + "rewards/rejected": -7.625296592712402, + "step": 5647 + }, + { + "epoch": 0.88, + "learning_rate": 1.0004862285388247e-05, + "logits/chosen": -3.148513078689575, + "logits/rejected": -1.8315027952194214, + "logps/chosen": -641.8466796875, + "logps/rejected": -396.40655517578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9531581997871399, + "rewards/margins": 8.9440279006958, + "rewards/rejected": -9.897186279296875, + "step": 5648 + }, + { + "epoch": 0.88, + "learning_rate": 1.0004128844857099e-05, + "logits/chosen": -2.48604416847229, + "logits/rejected": -1.3256163597106934, + "logps/chosen": -386.67919921875, + "logps/rejected": -358.94879150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2810291051864624, + "rewards/margins": 9.377324104309082, + "rewards/rejected": -10.658352851867676, + "step": 5649 + }, + { + "epoch": 0.88, + "learning_rate": 1.000339540432595e-05, + "logits/chosen": -2.948457717895508, + "logits/rejected": -3.131194829940796, + "logps/chosen": -98.64987182617188, + "logps/rejected": -224.60549926757812, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.569457769393921, + "rewards/margins": 6.67531156539917, + "rewards/rejected": -9.244769096374512, + "step": 5650 + }, + { + "epoch": 0.88, + "learning_rate": 1.0002661963794803e-05, + "logits/chosen": -3.2236437797546387, + "logits/rejected": -2.4617011547088623, + "logps/chosen": -512.497802734375, + "logps/rejected": -931.9000244140625, + "loss": 2.8676, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.648166656494141, + "rewards/margins": 0.6334071159362793, + "rewards/rejected": -5.28157377243042, + "step": 5651 + }, + { + "epoch": 0.88, + "learning_rate": 1.0001928523263656e-05, + "logits/chosen": -2.437704563140869, + "logits/rejected": -3.146331310272217, + "logps/chosen": -399.1003723144531, + "logps/rejected": -502.3809814453125, + "loss": 1.7999, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4231529235839844, + "rewards/margins": 3.5219016075134277, + "rewards/rejected": -5.945054531097412, + "step": 5652 + }, + { + "epoch": 0.88, + "learning_rate": 1.0001195082732508e-05, + "logits/chosen": -2.491964817047119, + "logits/rejected": -3.1348657608032227, + "logps/chosen": -150.71389770507812, + "logps/rejected": -252.32647705078125, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6196322441101074, + "rewards/margins": 5.516201019287109, + "rewards/rejected": -8.135833740234375, + "step": 5653 + }, + { + "epoch": 0.88, + "learning_rate": 1.000046164220136e-05, + "logits/chosen": -1.8921985626220703, + "logits/rejected": -3.0360875129699707, + "logps/chosen": -328.14788818359375, + "logps/rejected": -464.2886047363281, + "loss": 2.1326, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.818016052246094, + "rewards/margins": 3.1795248985290527, + "rewards/rejected": -7.9975409507751465, + "step": 5654 + }, + { + "epoch": 0.88, + "learning_rate": 9.999728201670212e-06, + "logits/chosen": -2.8754794597625732, + "logits/rejected": -2.9889161586761475, + "logps/chosen": -66.04138946533203, + "logps/rejected": -175.2606658935547, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.544001579284668, + "rewards/margins": 5.103469371795654, + "rewards/rejected": -8.647470474243164, + "step": 5655 + }, + { + "epoch": 0.88, + "learning_rate": 9.998994761139064e-06, + "logits/chosen": -2.012338638305664, + "logits/rejected": -2.5748064517974854, + "logps/chosen": -219.79689025878906, + "logps/rejected": -506.6431884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2440807819366455, + "rewards/margins": 9.752601623535156, + "rewards/rejected": -12.996683120727539, + "step": 5656 + }, + { + "epoch": 0.88, + "learning_rate": 9.998261320607915e-06, + "logits/chosen": -3.0582435131073, + "logits/rejected": -3.046335458755493, + "logps/chosen": -147.09710693359375, + "logps/rejected": -294.6789855957031, + "loss": 1.9003, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.414094924926758, + "rewards/margins": 2.0799098014831543, + "rewards/rejected": -7.49400520324707, + "step": 5657 + }, + { + "epoch": 0.88, + "learning_rate": 9.997527880076767e-06, + "logits/chosen": -2.9705569744110107, + "logits/rejected": -3.0964465141296387, + "logps/chosen": -74.48004913330078, + "logps/rejected": -383.5692138671875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6336233615875244, + "rewards/margins": 7.440389633178711, + "rewards/rejected": -11.074012756347656, + "step": 5658 + }, + { + "epoch": 0.88, + "learning_rate": 9.99679443954562e-06, + "logits/chosen": -2.7576253414154053, + "logits/rejected": -3.0112433433532715, + "logps/chosen": -154.84988403320312, + "logps/rejected": -379.84747314453125, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2766077518463135, + "rewards/margins": 6.140927314758301, + "rewards/rejected": -8.417535781860352, + "step": 5659 + }, + { + "epoch": 0.88, + "learning_rate": 9.996060999014471e-06, + "logits/chosen": -2.1397042274475098, + "logits/rejected": -2.9609978199005127, + "logps/chosen": -410.3431396484375, + "logps/rejected": -443.4462585449219, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9803009033203125, + "rewards/margins": 6.833348274230957, + "rewards/rejected": -8.81364917755127, + "step": 5660 + }, + { + "epoch": 0.88, + "learning_rate": 9.995327558483325e-06, + "logits/chosen": -2.0490026473999023, + "logits/rejected": -2.8269646167755127, + "logps/chosen": -238.68072509765625, + "logps/rejected": -546.69482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0438201427459717, + "rewards/margins": 9.446681022644043, + "rewards/rejected": -11.490501403808594, + "step": 5661 + }, + { + "epoch": 0.88, + "learning_rate": 9.994594117952177e-06, + "logits/chosen": -2.7280495166778564, + "logits/rejected": -3.116804599761963, + "logps/chosen": -214.93060302734375, + "logps/rejected": -182.11746215820312, + "loss": 2.695, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.038435935974121, + "rewards/margins": 0.6603419780731201, + "rewards/rejected": -5.69877815246582, + "step": 5662 + }, + { + "epoch": 0.88, + "learning_rate": 9.993860677421028e-06, + "logits/chosen": -2.4482083320617676, + "logits/rejected": -2.9003868103027344, + "logps/chosen": -298.8531188964844, + "logps/rejected": -359.88775634765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6510000228881836, + "rewards/margins": 7.036553382873535, + "rewards/rejected": -9.687553405761719, + "step": 5663 + }, + { + "epoch": 0.88, + "learning_rate": 9.99312723688988e-06, + "logits/chosen": -2.9388267993927, + "logits/rejected": -3.0819251537323, + "logps/chosen": -84.97161102294922, + "logps/rejected": -278.13458251953125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6810519695281982, + "rewards/margins": 6.201290130615234, + "rewards/rejected": -9.882341384887695, + "step": 5664 + }, + { + "epoch": 0.88, + "learning_rate": 9.992393796358732e-06, + "logits/chosen": -2.332167863845825, + "logits/rejected": -2.956963062286377, + "logps/chosen": -140.22833251953125, + "logps/rejected": -428.374755859375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.139056444168091, + "rewards/margins": 7.8836164474487305, + "rewards/rejected": -10.022672653198242, + "step": 5665 + }, + { + "epoch": 0.88, + "learning_rate": 9.991660355827584e-06, + "logits/chosen": -1.3342756032943726, + "logits/rejected": -2.9307639598846436, + "logps/chosen": -53.6149787902832, + "logps/rejected": -231.2550506591797, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9619791507720947, + "rewards/margins": 4.826855182647705, + "rewards/rejected": -7.788834571838379, + "step": 5666 + }, + { + "epoch": 0.88, + "learning_rate": 9.990926915296436e-06, + "logits/chosen": -2.9886419773101807, + "logits/rejected": -3.0013372898101807, + "logps/chosen": -93.66433715820312, + "logps/rejected": -82.73017883300781, + "loss": 2.6166, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.224512577056885, + "rewards/margins": 0.3026251792907715, + "rewards/rejected": -5.527137756347656, + "step": 5667 + }, + { + "epoch": 0.88, + "learning_rate": 9.990193474765288e-06, + "logits/chosen": -2.9253149032592773, + "logits/rejected": -3.245748281478882, + "logps/chosen": -582.853271484375, + "logps/rejected": -473.174072265625, + "loss": 0.0619, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.551337480545044, + "rewards/margins": 4.024912357330322, + "rewards/rejected": -6.576250076293945, + "step": 5668 + }, + { + "epoch": 0.88, + "learning_rate": 9.98946003423414e-06, + "logits/chosen": -2.7303359508514404, + "logits/rejected": -3.1122045516967773, + "logps/chosen": -174.12936401367188, + "logps/rejected": -268.83135986328125, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.974696636199951, + "rewards/margins": 4.904166221618652, + "rewards/rejected": -7.8788628578186035, + "step": 5669 + }, + { + "epoch": 0.88, + "learning_rate": 9.988726593702993e-06, + "logits/chosen": -2.211747884750366, + "logits/rejected": -2.995939254760742, + "logps/chosen": -131.1287384033203, + "logps/rejected": -235.01547241210938, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5123958587646484, + "rewards/margins": 4.938540458679199, + "rewards/rejected": -7.450936317443848, + "step": 5670 + }, + { + "epoch": 0.88, + "learning_rate": 9.987993153171845e-06, + "logits/chosen": -2.4339101314544678, + "logits/rejected": -3.0014069080352783, + "logps/chosen": -295.07208251953125, + "logps/rejected": -353.6164855957031, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5426040887832642, + "rewards/margins": 5.126493453979492, + "rewards/rejected": -6.669097900390625, + "step": 5671 + }, + { + "epoch": 0.88, + "learning_rate": 9.987259712640697e-06, + "logits/chosen": -2.9068586826324463, + "logits/rejected": -2.96779727935791, + "logps/chosen": -118.10298156738281, + "logps/rejected": -361.5724792480469, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1859216690063477, + "rewards/margins": 5.273759841918945, + "rewards/rejected": -7.459681510925293, + "step": 5672 + }, + { + "epoch": 0.88, + "learning_rate": 9.986526272109549e-06, + "logits/chosen": -3.0338642597198486, + "logits/rejected": -2.9438281059265137, + "logps/chosen": -164.1827392578125, + "logps/rejected": -64.32633209228516, + "loss": 5.1276, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.23196029663086, + "rewards/margins": -5.091997146606445, + "rewards/rejected": -3.1399636268615723, + "step": 5673 + }, + { + "epoch": 0.88, + "learning_rate": 9.9857928315784e-06, + "logits/chosen": -2.988981246948242, + "logits/rejected": -1.398357629776001, + "logps/chosen": -566.6397705078125, + "logps/rejected": -292.9051513671875, + "loss": 2.5357, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.992866039276123, + "rewards/margins": -0.8566133975982666, + "rewards/rejected": -5.136252403259277, + "step": 5674 + }, + { + "epoch": 0.88, + "learning_rate": 9.985059391047253e-06, + "logits/chosen": -2.860570192337036, + "logits/rejected": -2.8591442108154297, + "logps/chosen": -203.79150390625, + "logps/rejected": -275.2906188964844, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7566299438476562, + "rewards/margins": 3.6785037517547607, + "rewards/rejected": -6.435133934020996, + "step": 5675 + }, + { + "epoch": 0.88, + "learning_rate": 9.984325950516105e-06, + "logits/chosen": -3.144561767578125, + "logits/rejected": -2.7609615325927734, + "logps/chosen": -318.24334716796875, + "logps/rejected": -219.82765197753906, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8789215087890625, + "rewards/margins": 7.212470054626465, + "rewards/rejected": -9.091391563415527, + "step": 5676 + }, + { + "epoch": 0.88, + "learning_rate": 9.983592509984956e-06, + "logits/chosen": -2.9666216373443604, + "logits/rejected": -2.593393325805664, + "logps/chosen": -511.54254150390625, + "logps/rejected": -426.3712463378906, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2075302600860596, + "rewards/margins": 4.3132643699646, + "rewards/rejected": -7.520794868469238, + "step": 5677 + }, + { + "epoch": 0.88, + "learning_rate": 9.98285906945381e-06, + "logits/chosen": -2.191987991333008, + "logits/rejected": -3.007453203201294, + "logps/chosen": -138.0633544921875, + "logps/rejected": -386.6361999511719, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7961435317993164, + "rewards/margins": 5.54891300201416, + "rewards/rejected": -8.345056533813477, + "step": 5678 + }, + { + "epoch": 0.88, + "learning_rate": 9.982125628922662e-06, + "logits/chosen": -2.0682849884033203, + "logits/rejected": -3.0653157234191895, + "logps/chosen": -166.47659301757812, + "logps/rejected": -322.720703125, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.619919776916504, + "rewards/margins": 5.305948257446289, + "rewards/rejected": -8.925868034362793, + "step": 5679 + }, + { + "epoch": 0.88, + "learning_rate": 9.981392188391515e-06, + "logits/chosen": -2.8395156860351562, + "logits/rejected": -3.0134527683258057, + "logps/chosen": -41.67283248901367, + "logps/rejected": -125.05168151855469, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2709622383117676, + "rewards/margins": 4.924343109130859, + "rewards/rejected": -8.195304870605469, + "step": 5680 + }, + { + "epoch": 0.88, + "learning_rate": 9.980658747860367e-06, + "logits/chosen": -1.5588757991790771, + "logits/rejected": -2.6496591567993164, + "logps/chosen": -211.60394287109375, + "logps/rejected": -561.9501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3491883277893066, + "rewards/margins": 9.875531196594238, + "rewards/rejected": -12.224720001220703, + "step": 5681 + }, + { + "epoch": 0.88, + "learning_rate": 9.97992530732922e-06, + "logits/chosen": -2.9578306674957275, + "logits/rejected": -3.0904078483581543, + "logps/chosen": -101.47655487060547, + "logps/rejected": -174.23117065429688, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3517704010009766, + "rewards/margins": 6.2880659103393555, + "rewards/rejected": -9.639835357666016, + "step": 5682 + }, + { + "epoch": 0.88, + "learning_rate": 9.979191866798071e-06, + "logits/chosen": -2.9041855335235596, + "logits/rejected": -3.0658910274505615, + "logps/chosen": -92.47126770019531, + "logps/rejected": -254.817626953125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9570670127868652, + "rewards/margins": 5.517270088195801, + "rewards/rejected": -8.474337577819824, + "step": 5683 + }, + { + "epoch": 0.88, + "learning_rate": 9.978458426266923e-06, + "logits/chosen": -2.652107000350952, + "logits/rejected": -2.3903236389160156, + "logps/chosen": -122.42854309082031, + "logps/rejected": -230.0482940673828, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.648052215576172, + "rewards/margins": 4.759092330932617, + "rewards/rejected": -8.407144546508789, + "step": 5684 + }, + { + "epoch": 0.88, + "learning_rate": 9.977724985735775e-06, + "logits/chosen": -3.0853159427642822, + "logits/rejected": -3.046316623687744, + "logps/chosen": -565.8974609375, + "logps/rejected": -289.78778076171875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7142364978790283, + "rewards/margins": 6.812302589416504, + "rewards/rejected": -8.526538848876953, + "step": 5685 + }, + { + "epoch": 0.88, + "learning_rate": 9.976991545204627e-06, + "logits/chosen": -2.836655855178833, + "logits/rejected": -2.905397891998291, + "logps/chosen": -197.65802001953125, + "logps/rejected": -360.3000183105469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0206031799316406, + "rewards/margins": 8.989738464355469, + "rewards/rejected": -11.01034164428711, + "step": 5686 + }, + { + "epoch": 0.88, + "learning_rate": 9.97625810467348e-06, + "logits/chosen": -2.8534252643585205, + "logits/rejected": -2.4143378734588623, + "logps/chosen": -129.67630004882812, + "logps/rejected": -268.0289001464844, + "loss": 1.3328, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.628294467926025, + "rewards/margins": 2.356114149093628, + "rewards/rejected": -7.984408378601074, + "step": 5687 + }, + { + "epoch": 0.88, + "learning_rate": 9.975524664142332e-06, + "logits/chosen": -1.9794094562530518, + "logits/rejected": -2.7389864921569824, + "logps/chosen": -189.21304321289062, + "logps/rejected": -313.46575927734375, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.711483955383301, + "rewards/margins": 2.8863415718078613, + "rewards/rejected": -8.59782600402832, + "step": 5688 + }, + { + "epoch": 0.88, + "learning_rate": 9.974791223611184e-06, + "logits/chosen": -2.813748359680176, + "logits/rejected": -2.967052698135376, + "logps/chosen": -105.57160949707031, + "logps/rejected": -238.17575073242188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3316636085510254, + "rewards/margins": 8.253317832946777, + "rewards/rejected": -10.584981918334961, + "step": 5689 + }, + { + "epoch": 0.88, + "learning_rate": 9.974057783080036e-06, + "logits/chosen": -2.799548387527466, + "logits/rejected": -2.6359710693359375, + "logps/chosen": -151.04930114746094, + "logps/rejected": -182.5069580078125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4478061199188232, + "rewards/margins": 6.300501823425293, + "rewards/rejected": -7.748307704925537, + "step": 5690 + }, + { + "epoch": 0.89, + "learning_rate": 9.973324342548888e-06, + "logits/chosen": -3.032827854156494, + "logits/rejected": -2.191843032836914, + "logps/chosen": -837.8819580078125, + "logps/rejected": -506.6059265136719, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2770018577575684, + "rewards/margins": 6.299656867980957, + "rewards/rejected": -9.576658248901367, + "step": 5691 + }, + { + "epoch": 0.89, + "learning_rate": 9.97259090201774e-06, + "logits/chosen": -0.5186129212379456, + "logits/rejected": -2.4420230388641357, + "logps/chosen": -103.05964660644531, + "logps/rejected": -477.0997009277344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.570444107055664, + "rewards/margins": 6.292965888977051, + "rewards/rejected": -9.863410949707031, + "step": 5692 + }, + { + "epoch": 0.89, + "learning_rate": 9.971857461486592e-06, + "logits/chosen": -3.1105077266693115, + "logits/rejected": -2.967397928237915, + "logps/chosen": -165.32522583007812, + "logps/rejected": -338.5477294921875, + "loss": 1.3783, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3987064361572266, + "rewards/margins": 4.938311576843262, + "rewards/rejected": -8.337018966674805, + "step": 5693 + }, + { + "epoch": 0.89, + "learning_rate": 9.971124020955443e-06, + "logits/chosen": -2.279418468475342, + "logits/rejected": -2.7495994567871094, + "logps/chosen": -258.2996520996094, + "logps/rejected": -538.836181640625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9110541343688965, + "rewards/margins": 8.198003768920898, + "rewards/rejected": -12.109057426452637, + "step": 5694 + }, + { + "epoch": 0.89, + "learning_rate": 9.970390580424295e-06, + "logits/chosen": -1.9231644868850708, + "logits/rejected": -2.9159529209136963, + "logps/chosen": -155.3247528076172, + "logps/rejected": -333.7569885253906, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.638756513595581, + "rewards/margins": 7.792544364929199, + "rewards/rejected": -11.43130111694336, + "step": 5695 + }, + { + "epoch": 0.89, + "learning_rate": 9.969657139893149e-06, + "logits/chosen": -1.4221574068069458, + "logits/rejected": -2.9219396114349365, + "logps/chosen": -245.76412963867188, + "logps/rejected": -501.3033142089844, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.347599744796753, + "rewards/margins": 4.985320091247559, + "rewards/rejected": -7.332919597625732, + "step": 5696 + }, + { + "epoch": 0.89, + "learning_rate": 9.968923699362e-06, + "logits/chosen": -2.8141112327575684, + "logits/rejected": -2.7749061584472656, + "logps/chosen": -360.8255615234375, + "logps/rejected": -464.47747802734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.216172218322754, + "rewards/margins": 8.541399002075195, + "rewards/rejected": -11.757572174072266, + "step": 5697 + }, + { + "epoch": 0.89, + "learning_rate": 9.968190258830853e-06, + "logits/chosen": -0.680141031742096, + "logits/rejected": -2.8241994380950928, + "logps/chosen": -60.31414031982422, + "logps/rejected": -428.98138427734375, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5964269638061523, + "rewards/margins": 6.1064348220825195, + "rewards/rejected": -9.702861785888672, + "step": 5698 + }, + { + "epoch": 0.89, + "learning_rate": 9.967456818299705e-06, + "logits/chosen": -3.074444055557251, + "logits/rejected": -2.141982078552246, + "logps/chosen": -282.4651794433594, + "logps/rejected": -187.2397003173828, + "loss": 4.5184, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.04029655456543, + "rewards/margins": 0.7154431343078613, + "rewards/rejected": -7.755739688873291, + "step": 5699 + }, + { + "epoch": 0.89, + "learning_rate": 9.966723377768556e-06, + "logits/chosen": -1.482085943222046, + "logits/rejected": -2.625864028930664, + "logps/chosen": -261.2339172363281, + "logps/rejected": -273.15582275390625, + "loss": 2.5455, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.321111679077148, + "rewards/margins": 0.6205472946166992, + "rewards/rejected": -6.941658973693848, + "step": 5700 + }, + { + "epoch": 0.89, + "learning_rate": 9.965989937237408e-06, + "logits/chosen": -2.849257707595825, + "logits/rejected": -2.907954216003418, + "logps/chosen": -308.02252197265625, + "logps/rejected": -476.082763671875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.471431016921997, + "rewards/margins": 6.717175483703613, + "rewards/rejected": -10.188606262207031, + "step": 5701 + }, + { + "epoch": 0.89, + "learning_rate": 9.96525649670626e-06, + "logits/chosen": -1.4051178693771362, + "logits/rejected": -2.298705577850342, + "logps/chosen": -178.553955078125, + "logps/rejected": -356.0998229980469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1948227882385254, + "rewards/margins": 10.388105392456055, + "rewards/rejected": -12.582927703857422, + "step": 5702 + }, + { + "epoch": 0.89, + "learning_rate": 9.964523056175112e-06, + "logits/chosen": -2.3362009525299072, + "logits/rejected": -2.5076279640197754, + "logps/chosen": -216.402099609375, + "logps/rejected": -271.3104553222656, + "loss": 1.4516, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.541298866271973, + "rewards/margins": 3.4269471168518066, + "rewards/rejected": -8.968246459960938, + "step": 5703 + }, + { + "epoch": 0.89, + "learning_rate": 9.963789615643964e-06, + "logits/chosen": -2.4161555767059326, + "logits/rejected": -3.024709939956665, + "logps/chosen": -107.69712829589844, + "logps/rejected": -285.59259033203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7494254112243652, + "rewards/margins": 6.856607913970947, + "rewards/rejected": -9.606033325195312, + "step": 5704 + }, + { + "epoch": 0.89, + "learning_rate": 9.963056175112817e-06, + "logits/chosen": -2.6985650062561035, + "logits/rejected": -3.0659124851226807, + "logps/chosen": -89.53733825683594, + "logps/rejected": -220.21351623535156, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5851311683654785, + "rewards/margins": 5.64966344833374, + "rewards/rejected": -8.234794616699219, + "step": 5705 + }, + { + "epoch": 0.89, + "learning_rate": 9.96232273458167e-06, + "logits/chosen": -1.6803009510040283, + "logits/rejected": -2.7870023250579834, + "logps/chosen": -190.7737579345703, + "logps/rejected": -387.4331970214844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2990407943725586, + "rewards/margins": 8.065430641174316, + "rewards/rejected": -10.364471435546875, + "step": 5706 + }, + { + "epoch": 0.89, + "learning_rate": 9.961589294050521e-06, + "logits/chosen": -1.8997424840927124, + "logits/rejected": -2.8297836780548096, + "logps/chosen": -149.4051055908203, + "logps/rejected": -303.65655517578125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1184067726135254, + "rewards/margins": 5.272285461425781, + "rewards/rejected": -8.390691757202148, + "step": 5707 + }, + { + "epoch": 0.89, + "learning_rate": 9.960855853519373e-06, + "logits/chosen": -1.7880908250808716, + "logits/rejected": -2.6183910369873047, + "logps/chosen": -72.380615234375, + "logps/rejected": -180.64300537109375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.060112953186035, + "rewards/margins": 5.563057899475098, + "rewards/rejected": -8.623170852661133, + "step": 5708 + }, + { + "epoch": 0.89, + "learning_rate": 9.960122412988225e-06, + "logits/chosen": -2.2612836360931396, + "logits/rejected": -2.8354945182800293, + "logps/chosen": -116.21984100341797, + "logps/rejected": -198.33526611328125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4784741401672363, + "rewards/margins": 5.402243614196777, + "rewards/rejected": -8.880718231201172, + "step": 5709 + }, + { + "epoch": 0.89, + "learning_rate": 9.959388972457077e-06, + "logits/chosen": -3.053662061691284, + "logits/rejected": -2.4335083961486816, + "logps/chosen": -301.2962646484375, + "logps/rejected": -159.553955078125, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.977474212646484, + "rewards/margins": 3.157477378845215, + "rewards/rejected": -8.1349515914917, + "step": 5710 + }, + { + "epoch": 0.89, + "learning_rate": 9.958655531925929e-06, + "logits/chosen": -0.739762008190155, + "logits/rejected": -3.007197856903076, + "logps/chosen": -68.06391906738281, + "logps/rejected": -487.3288879394531, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.77345609664917, + "rewards/margins": 4.7755446434021, + "rewards/rejected": -8.54900074005127, + "step": 5711 + }, + { + "epoch": 0.89, + "learning_rate": 9.95792209139478e-06, + "logits/chosen": -2.009981155395508, + "logits/rejected": -2.614985704421997, + "logps/chosen": -74.46124267578125, + "logps/rejected": -213.8414764404297, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.455937385559082, + "rewards/margins": 6.641229152679443, + "rewards/rejected": -9.097167015075684, + "step": 5712 + }, + { + "epoch": 0.89, + "learning_rate": 9.957188650863634e-06, + "logits/chosen": -2.7525980472564697, + "logits/rejected": -1.9911658763885498, + "logps/chosen": -476.2826843261719, + "logps/rejected": -399.89080810546875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3344544172286987, + "rewards/margins": 7.677814960479736, + "rewards/rejected": -9.012269020080566, + "step": 5713 + }, + { + "epoch": 0.89, + "learning_rate": 9.956455210332486e-06, + "logits/chosen": -2.671849489212036, + "logits/rejected": -3.0499112606048584, + "logps/chosen": -348.28143310546875, + "logps/rejected": -530.7972412109375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.823408842086792, + "rewards/margins": 5.652077674865723, + "rewards/rejected": -9.475486755371094, + "step": 5714 + }, + { + "epoch": 0.89, + "learning_rate": 9.95572176980134e-06, + "logits/chosen": -2.901106834411621, + "logits/rejected": -2.0310535430908203, + "logps/chosen": -486.7249450683594, + "logps/rejected": -549.4578857421875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.312832832336426, + "rewards/margins": 7.4121174812316895, + "rewards/rejected": -9.724949836730957, + "step": 5715 + }, + { + "epoch": 0.89, + "learning_rate": 9.954988329270192e-06, + "logits/chosen": -2.098973274230957, + "logits/rejected": -2.660607099533081, + "logps/chosen": -365.383544921875, + "logps/rejected": -374.48626708984375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9658668041229248, + "rewards/margins": 7.433361530303955, + "rewards/rejected": -9.3992280960083, + "step": 5716 + }, + { + "epoch": 0.89, + "learning_rate": 9.954254888739043e-06, + "logits/chosen": -2.263038158416748, + "logits/rejected": -1.8101494312286377, + "logps/chosen": -196.38162231445312, + "logps/rejected": -251.89183044433594, + "loss": 0.0966, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.93759822845459, + "rewards/margins": 3.9209086894989014, + "rewards/rejected": -6.85850715637207, + "step": 5717 + }, + { + "epoch": 0.89, + "learning_rate": 9.953521448207895e-06, + "logits/chosen": -3.084841012954712, + "logits/rejected": -1.4930635690689087, + "logps/chosen": -308.2825012207031, + "logps/rejected": -174.5360565185547, + "loss": 1.5706, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.126905918121338, + "rewards/margins": 2.8716156482696533, + "rewards/rejected": -7.99852180480957, + "step": 5718 + }, + { + "epoch": 0.89, + "learning_rate": 9.952788007676747e-06, + "logits/chosen": -2.9589664936065674, + "logits/rejected": -3.155073642730713, + "logps/chosen": -51.741943359375, + "logps/rejected": -169.5196990966797, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9247255325317383, + "rewards/margins": 5.133946418762207, + "rewards/rejected": -8.058671951293945, + "step": 5719 + }, + { + "epoch": 0.89, + "learning_rate": 9.952054567145599e-06, + "logits/chosen": -2.3578877449035645, + "logits/rejected": -3.1199381351470947, + "logps/chosen": -125.65396881103516, + "logps/rejected": -341.7137145996094, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.963963270187378, + "rewards/margins": 3.8420724868774414, + "rewards/rejected": -6.806035995483398, + "step": 5720 + }, + { + "epoch": 0.89, + "learning_rate": 9.951321126614451e-06, + "logits/chosen": -1.0859570503234863, + "logits/rejected": -2.7626185417175293, + "logps/chosen": -107.23582458496094, + "logps/rejected": -187.97476196289062, + "loss": 1.4094, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.416830539703369, + "rewards/margins": 2.4717583656311035, + "rewards/rejected": -7.888588905334473, + "step": 5721 + }, + { + "epoch": 0.89, + "learning_rate": 9.950587686083303e-06, + "logits/chosen": -2.359086751937866, + "logits/rejected": -2.8839612007141113, + "logps/chosen": -152.18661499023438, + "logps/rejected": -231.78573608398438, + "loss": 2.641, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.2537736892700195, + "rewards/margins": 0.8798074722290039, + "rewards/rejected": -7.133581161499023, + "step": 5722 + }, + { + "epoch": 0.89, + "learning_rate": 9.949854245552156e-06, + "logits/chosen": -2.1806821823120117, + "logits/rejected": -2.7259860038757324, + "logps/chosen": -292.5709228515625, + "logps/rejected": -636.3001708984375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7060747146606445, + "rewards/margins": 5.884242057800293, + "rewards/rejected": -11.590316772460938, + "step": 5723 + }, + { + "epoch": 0.89, + "learning_rate": 9.949120805021008e-06, + "logits/chosen": -2.815276861190796, + "logits/rejected": -3.067470073699951, + "logps/chosen": -213.94229125976562, + "logps/rejected": -247.190673828125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.696106433868408, + "rewards/margins": 6.484475612640381, + "rewards/rejected": -9.180582046508789, + "step": 5724 + }, + { + "epoch": 0.89, + "learning_rate": 9.94838736448986e-06, + "logits/chosen": -2.7441625595092773, + "logits/rejected": -3.056058406829834, + "logps/chosen": -80.93325805664062, + "logps/rejected": -186.51092529296875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4726319313049316, + "rewards/margins": 5.355470657348633, + "rewards/rejected": -8.828102111816406, + "step": 5725 + }, + { + "epoch": 0.89, + "learning_rate": 9.947653923958712e-06, + "logits/chosen": -2.9730305671691895, + "logits/rejected": -3.0101912021636963, + "logps/chosen": -48.65563201904297, + "logps/rejected": -116.53016662597656, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.737776517868042, + "rewards/margins": 4.460851669311523, + "rewards/rejected": -7.198627948760986, + "step": 5726 + }, + { + "epoch": 0.89, + "learning_rate": 9.946920483427564e-06, + "logits/chosen": -1.9378972053527832, + "logits/rejected": -1.954424262046814, + "logps/chosen": -75.9117202758789, + "logps/rejected": -173.67730712890625, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.014890670776367, + "rewards/margins": 4.553922653198242, + "rewards/rejected": -8.56881332397461, + "step": 5727 + }, + { + "epoch": 0.89, + "learning_rate": 9.946187042896416e-06, + "logits/chosen": -3.061298131942749, + "logits/rejected": -2.6832034587860107, + "logps/chosen": -544.989990234375, + "logps/rejected": -546.82275390625, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.586420774459839, + "rewards/margins": 4.615239143371582, + "rewards/rejected": -8.20166015625, + "step": 5728 + }, + { + "epoch": 0.89, + "learning_rate": 9.945453602365268e-06, + "logits/chosen": -2.297376871109009, + "logits/rejected": -3.0227277278900146, + "logps/chosen": -413.7275085449219, + "logps/rejected": -501.21893310546875, + "loss": 2.6113, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.540377140045166, + "rewards/margins": 1.011873722076416, + "rewards/rejected": -6.552250862121582, + "step": 5729 + }, + { + "epoch": 0.89, + "learning_rate": 9.94472016183412e-06, + "logits/chosen": -2.598515272140503, + "logits/rejected": -1.2357869148254395, + "logps/chosen": -231.65257263183594, + "logps/rejected": -206.8898468017578, + "loss": 4.3358, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.12635612487793, + "rewards/margins": 1.7644624710083008, + "rewards/rejected": -8.89081859588623, + "step": 5730 + }, + { + "epoch": 0.89, + "learning_rate": 9.943986721302971e-06, + "logits/chosen": -3.0383689403533936, + "logits/rejected": -2.2772367000579834, + "logps/chosen": -354.3334045410156, + "logps/rejected": -260.0830078125, + "loss": 2.1838, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.0540056228637695, + "rewards/margins": 0.9768557548522949, + "rewards/rejected": -6.030860900878906, + "step": 5731 + }, + { + "epoch": 0.89, + "learning_rate": 9.943253280771825e-06, + "logits/chosen": -2.734398603439331, + "logits/rejected": -3.2025928497314453, + "logps/chosen": -796.8856201171875, + "logps/rejected": -526.1964111328125, + "loss": 0.9574, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.351351737976074, + "rewards/margins": 2.5368001461029053, + "rewards/rejected": -6.888152122497559, + "step": 5732 + }, + { + "epoch": 0.89, + "learning_rate": 9.942519840240677e-06, + "logits/chosen": -2.5714457035064697, + "logits/rejected": -2.020258903503418, + "logps/chosen": -531.4306640625, + "logps/rejected": -555.3960571289062, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.278782844543457, + "rewards/margins": 5.583497524261475, + "rewards/rejected": -9.862279891967773, + "step": 5733 + }, + { + "epoch": 0.89, + "learning_rate": 9.941786399709529e-06, + "logits/chosen": -2.5062074661254883, + "logits/rejected": -2.0981497764587402, + "logps/chosen": -162.3140106201172, + "logps/rejected": -175.64273071289062, + "loss": 0.2107, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.944155216217041, + "rewards/margins": 2.0882787704467773, + "rewards/rejected": -7.032433986663818, + "step": 5734 + }, + { + "epoch": 0.89, + "learning_rate": 9.94105295917838e-06, + "logits/chosen": -2.8653101921081543, + "logits/rejected": -1.628866195678711, + "logps/chosen": -176.73410034179688, + "logps/rejected": -72.27241516113281, + "loss": 3.5561, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.181292533874512, + "rewards/margins": -3.373795509338379, + "rewards/rejected": -4.807497501373291, + "step": 5735 + }, + { + "epoch": 0.89, + "learning_rate": 9.940319518647232e-06, + "logits/chosen": -3.1509692668914795, + "logits/rejected": -2.411137342453003, + "logps/chosen": -236.6444091796875, + "logps/rejected": -218.15011596679688, + "loss": 1.9567, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.523433685302734, + "rewards/margins": -0.4748603105545044, + "rewards/rejected": -5.0485734939575195, + "step": 5736 + }, + { + "epoch": 0.89, + "learning_rate": 9.939586078116084e-06, + "logits/chosen": -2.6108157634735107, + "logits/rejected": -2.88179612159729, + "logps/chosen": -231.51885986328125, + "logps/rejected": -211.93206787109375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.293248176574707, + "rewards/margins": 5.468995571136475, + "rewards/rejected": -8.762243270874023, + "step": 5737 + }, + { + "epoch": 0.89, + "learning_rate": 9.938852637584936e-06, + "logits/chosen": -2.920657157897949, + "logits/rejected": -2.829227924346924, + "logps/chosen": -284.7420654296875, + "logps/rejected": -329.59381103515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0804526805877686, + "rewards/margins": 8.604022979736328, + "rewards/rejected": -10.684475898742676, + "step": 5738 + }, + { + "epoch": 0.89, + "learning_rate": 9.938119197053788e-06, + "logits/chosen": -2.8035340309143066, + "logits/rejected": -2.0438811779022217, + "logps/chosen": -217.33932495117188, + "logps/rejected": -112.31863403320312, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.633389472961426, + "rewards/margins": 3.1312255859375, + "rewards/rejected": -7.764615058898926, + "step": 5739 + }, + { + "epoch": 0.89, + "learning_rate": 9.93738575652264e-06, + "logits/chosen": -2.0078845024108887, + "logits/rejected": -2.8533215522766113, + "logps/chosen": -134.32635498046875, + "logps/rejected": -312.609375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.064152240753174, + "rewards/margins": 6.680811882019043, + "rewards/rejected": -9.744964599609375, + "step": 5740 + }, + { + "epoch": 0.89, + "learning_rate": 9.936652315991494e-06, + "logits/chosen": -2.5544912815093994, + "logits/rejected": -3.0942134857177734, + "logps/chosen": -114.45709991455078, + "logps/rejected": -276.6720886230469, + "loss": 0.4378, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.264601230621338, + "rewards/margins": 3.0427651405334473, + "rewards/rejected": -8.307366371154785, + "step": 5741 + }, + { + "epoch": 0.89, + "learning_rate": 9.935918875460345e-06, + "logits/chosen": -2.92517352104187, + "logits/rejected": -3.0934062004089355, + "logps/chosen": -76.2813720703125, + "logps/rejected": -290.6424865722656, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848894119262695, + "rewards/margins": 3.2191007137298584, + "rewards/rejected": -8.067995071411133, + "step": 5742 + }, + { + "epoch": 0.89, + "learning_rate": 9.935185434929197e-06, + "logits/chosen": -2.354295015335083, + "logits/rejected": -2.640678882598877, + "logps/chosen": -121.67151641845703, + "logps/rejected": -330.48992919921875, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.490460157394409, + "rewards/margins": 5.666572093963623, + "rewards/rejected": -9.157032012939453, + "step": 5743 + }, + { + "epoch": 0.89, + "learning_rate": 9.93445199439805e-06, + "logits/chosen": -2.2362210750579834, + "logits/rejected": -2.981313705444336, + "logps/chosen": -319.1418151855469, + "logps/rejected": -297.35821533203125, + "loss": 1.3328, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.75543212890625, + "rewards/margins": 1.3333213329315186, + "rewards/rejected": -6.088753700256348, + "step": 5744 + }, + { + "epoch": 0.89, + "learning_rate": 9.933718553866901e-06, + "logits/chosen": -2.150735855102539, + "logits/rejected": -2.794766426086426, + "logps/chosen": -111.89205932617188, + "logps/rejected": -456.6298828125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9361240863800049, + "rewards/margins": 8.579764366149902, + "rewards/rejected": -10.515888214111328, + "step": 5745 + }, + { + "epoch": 0.89, + "learning_rate": 9.932985113335753e-06, + "logits/chosen": -2.333822727203369, + "logits/rejected": -2.8479533195495605, + "logps/chosen": -270.33251953125, + "logps/rejected": -420.0532531738281, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6733101606369019, + "rewards/margins": 6.622391700744629, + "rewards/rejected": -8.29570198059082, + "step": 5746 + }, + { + "epoch": 0.89, + "learning_rate": 9.932251672804607e-06, + "logits/chosen": -2.128915786743164, + "logits/rejected": -2.7944278717041016, + "logps/chosen": -352.7704162597656, + "logps/rejected": -426.4620361328125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9260971546173096, + "rewards/margins": 7.613327980041504, + "rewards/rejected": -9.539424896240234, + "step": 5747 + }, + { + "epoch": 0.89, + "learning_rate": 9.931518232273458e-06, + "logits/chosen": -1.827451467514038, + "logits/rejected": -3.0973691940307617, + "logps/chosen": -76.97984313964844, + "logps/rejected": -309.0401306152344, + "loss": 0.1652, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1101601123809814, + "rewards/margins": 3.752650737762451, + "rewards/rejected": -6.8628106117248535, + "step": 5748 + }, + { + "epoch": 0.89, + "learning_rate": 9.93078479174231e-06, + "logits/chosen": -2.9374563694000244, + "logits/rejected": -1.7381372451782227, + "logps/chosen": -258.2646179199219, + "logps/rejected": -305.36663818359375, + "loss": 3.7418, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.671019554138184, + "rewards/margins": 1.515815258026123, + "rewards/rejected": -7.186834812164307, + "step": 5749 + }, + { + "epoch": 0.89, + "learning_rate": 9.930051351211164e-06, + "logits/chosen": -2.8506758213043213, + "logits/rejected": -2.9575867652893066, + "logps/chosen": -75.16947937011719, + "logps/rejected": -163.1031036376953, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.28739595413208, + "rewards/margins": 5.170686721801758, + "rewards/rejected": -8.45808219909668, + "step": 5750 + }, + { + "epoch": 0.89, + "learning_rate": 9.929317910680016e-06, + "logits/chosen": -2.6063220500946045, + "logits/rejected": -2.2531321048736572, + "logps/chosen": -137.73974609375, + "logps/rejected": -161.5908203125, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9005303382873535, + "rewards/margins": 5.461302757263184, + "rewards/rejected": -9.361833572387695, + "step": 5751 + }, + { + "epoch": 0.89, + "learning_rate": 9.928584470148868e-06, + "logits/chosen": -2.3767197132110596, + "logits/rejected": -2.6717774868011475, + "logps/chosen": -127.91627502441406, + "logps/rejected": -334.57452392578125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7982959747314453, + "rewards/margins": 4.635432243347168, + "rewards/rejected": -7.433728218078613, + "step": 5752 + }, + { + "epoch": 0.89, + "learning_rate": 9.92785102961772e-06, + "logits/chosen": -2.802144765853882, + "logits/rejected": -2.4572157859802246, + "logps/chosen": -240.98207092285156, + "logps/rejected": -263.5625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9516725540161133, + "rewards/margins": 6.440738677978516, + "rewards/rejected": -9.392411231994629, + "step": 5753 + }, + { + "epoch": 0.89, + "learning_rate": 9.927117589086571e-06, + "logits/chosen": -2.8746402263641357, + "logits/rejected": -2.2310659885406494, + "logps/chosen": -524.9956665039062, + "logps/rejected": -523.4583129882812, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.000467777252197, + "rewards/margins": 6.632606506347656, + "rewards/rejected": -10.633074760437012, + "step": 5754 + }, + { + "epoch": 0.9, + "learning_rate": 9.926384148555423e-06, + "logits/chosen": -2.4083688259124756, + "logits/rejected": -2.5353891849517822, + "logps/chosen": -206.48863220214844, + "logps/rejected": -339.8869323730469, + "loss": 2.7904, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.279201984405518, + "rewards/margins": 1.938035249710083, + "rewards/rejected": -7.2172369956970215, + "step": 5755 + }, + { + "epoch": 0.9, + "learning_rate": 9.925650708024275e-06, + "logits/chosen": -2.8931055068969727, + "logits/rejected": -3.1142427921295166, + "logps/chosen": -171.52484130859375, + "logps/rejected": -149.72044372558594, + "loss": 2.7961, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.338994979858398, + "rewards/margins": -1.5734918117523193, + "rewards/rejected": -4.7655029296875, + "step": 5756 + }, + { + "epoch": 0.9, + "learning_rate": 9.924917267493127e-06, + "logits/chosen": -2.6628010272979736, + "logits/rejected": -2.335340738296509, + "logps/chosen": -258.51177978515625, + "logps/rejected": -324.1805725097656, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410524845123291, + "rewards/margins": 6.150517463684082, + "rewards/rejected": -10.561042785644531, + "step": 5757 + }, + { + "epoch": 0.9, + "learning_rate": 9.924183826961979e-06, + "logits/chosen": -2.851423978805542, + "logits/rejected": -2.0615508556365967, + "logps/chosen": -278.68890380859375, + "logps/rejected": -149.89816284179688, + "loss": 1.0975, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.143080711364746, + "rewards/margins": 0.30146706104278564, + "rewards/rejected": -5.4445481300354, + "step": 5758 + }, + { + "epoch": 0.9, + "learning_rate": 9.923450386430832e-06, + "logits/chosen": -2.2577662467956543, + "logits/rejected": -3.112788438796997, + "logps/chosen": -60.382598876953125, + "logps/rejected": -394.7697448730469, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6157193183898926, + "rewards/margins": 9.217828750610352, + "rewards/rejected": -12.833548545837402, + "step": 5759 + }, + { + "epoch": 0.9, + "learning_rate": 9.922716945899684e-06, + "logits/chosen": -2.8649110794067383, + "logits/rejected": -3.1085855960845947, + "logps/chosen": -319.3006591796875, + "logps/rejected": -315.10345458984375, + "loss": 1.0022, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.469017028808594, + "rewards/margins": 1.6594407558441162, + "rewards/rejected": -6.128458023071289, + "step": 5760 + }, + { + "epoch": 0.9, + "learning_rate": 9.921983505368536e-06, + "logits/chosen": -2.5616140365600586, + "logits/rejected": -2.8005616664886475, + "logps/chosen": -94.37411499023438, + "logps/rejected": -189.13748168945312, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8080930709838867, + "rewards/margins": 4.122689247131348, + "rewards/rejected": -6.930782318115234, + "step": 5761 + }, + { + "epoch": 0.9, + "learning_rate": 9.921250064837388e-06, + "logits/chosen": -2.1419260501861572, + "logits/rejected": -2.7943713665008545, + "logps/chosen": -176.2024688720703, + "logps/rejected": -293.72369384765625, + "loss": 3.9284, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.187231540679932, + "rewards/margins": 1.218412160873413, + "rewards/rejected": -8.405643463134766, + "step": 5762 + }, + { + "epoch": 0.9, + "learning_rate": 9.92051662430624e-06, + "logits/chosen": -2.53590726852417, + "logits/rejected": -2.7282910346984863, + "logps/chosen": -194.06887817382812, + "logps/rejected": -240.1837158203125, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.673635244369507, + "rewards/margins": 3.5527184009552, + "rewards/rejected": -7.226353645324707, + "step": 5763 + }, + { + "epoch": 0.9, + "learning_rate": 9.919783183775092e-06, + "logits/chosen": -2.831454038619995, + "logits/rejected": -1.917206048965454, + "logps/chosen": -258.72686767578125, + "logps/rejected": -289.49090576171875, + "loss": 0.1933, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.262326955795288, + "rewards/margins": 2.660292625427246, + "rewards/rejected": -5.922619819641113, + "step": 5764 + }, + { + "epoch": 0.9, + "learning_rate": 9.919049743243944e-06, + "logits/chosen": -2.92624568939209, + "logits/rejected": -2.436424732208252, + "logps/chosen": -480.7975158691406, + "logps/rejected": -485.2628479003906, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.574456810951233, + "rewards/margins": 8.410663604736328, + "rewards/rejected": -9.98512077331543, + "step": 5765 + }, + { + "epoch": 0.9, + "learning_rate": 9.918316302712796e-06, + "logits/chosen": -2.0287015438079834, + "logits/rejected": -2.7951624393463135, + "logps/chosen": -221.16737365722656, + "logps/rejected": -329.26824951171875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.085263252258301, + "rewards/margins": 5.169743537902832, + "rewards/rejected": -8.255006790161133, + "step": 5766 + }, + { + "epoch": 0.9, + "learning_rate": 9.91758286218165e-06, + "logits/chosen": -2.7210114002227783, + "logits/rejected": -3.021510362625122, + "logps/chosen": -230.9927520751953, + "logps/rejected": -158.7146759033203, + "loss": 3.0158, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.560163497924805, + "rewards/margins": -0.23919296264648438, + "rewards/rejected": -6.32097053527832, + "step": 5767 + }, + { + "epoch": 0.9, + "learning_rate": 9.916849421650501e-06, + "logits/chosen": -2.402711868286133, + "logits/rejected": -3.0643150806427, + "logps/chosen": -198.93228149414062, + "logps/rejected": -390.5929260253906, + "loss": 0.2244, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.695068836212158, + "rewards/margins": 4.877355098724365, + "rewards/rejected": -11.572423934936523, + "step": 5768 + }, + { + "epoch": 0.9, + "learning_rate": 9.916115981119353e-06, + "logits/chosen": -3.073765277862549, + "logits/rejected": -2.868689775466919, + "logps/chosen": -633.2468872070312, + "logps/rejected": -321.2625732421875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0330688953399658, + "rewards/margins": 5.784684658050537, + "rewards/rejected": -6.817753791809082, + "step": 5769 + }, + { + "epoch": 0.9, + "learning_rate": 9.915382540588205e-06, + "logits/chosen": -2.603395462036133, + "logits/rejected": -2.341289758682251, + "logps/chosen": -131.6126708984375, + "logps/rejected": -128.49298095703125, + "loss": 2.4765, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.124515056610107, + "rewards/margins": -0.7618894577026367, + "rewards/rejected": -6.362625598907471, + "step": 5770 + }, + { + "epoch": 0.9, + "learning_rate": 9.914649100057057e-06, + "logits/chosen": -2.1821706295013428, + "logits/rejected": -2.920635938644409, + "logps/chosen": -222.83670043945312, + "logps/rejected": -341.97705078125, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.274743556976318, + "rewards/margins": 4.366860389709473, + "rewards/rejected": -8.641603469848633, + "step": 5771 + }, + { + "epoch": 0.9, + "learning_rate": 9.913915659525909e-06, + "logits/chosen": -2.5042290687561035, + "logits/rejected": -2.7849373817443848, + "logps/chosen": -576.0127563476562, + "logps/rejected": -553.26220703125, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0023553371429443, + "rewards/margins": 5.532010078430176, + "rewards/rejected": -8.534364700317383, + "step": 5772 + }, + { + "epoch": 0.9, + "learning_rate": 9.91318221899476e-06, + "logits/chosen": -2.4261765480041504, + "logits/rejected": -2.9522178173065186, + "logps/chosen": -300.68865966796875, + "logps/rejected": -399.077392578125, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5217061042785645, + "rewards/margins": 4.50477933883667, + "rewards/rejected": -8.026485443115234, + "step": 5773 + }, + { + "epoch": 0.9, + "learning_rate": 9.912448778463612e-06, + "logits/chosen": -1.8920561075210571, + "logits/rejected": -2.8828694820404053, + "logps/chosen": -175.8416290283203, + "logps/rejected": -441.44390869140625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0415585041046143, + "rewards/margins": 7.1630024909973145, + "rewards/rejected": -10.204561233520508, + "step": 5774 + }, + { + "epoch": 0.9, + "learning_rate": 9.911715337932464e-06, + "logits/chosen": -1.0728766918182373, + "logits/rejected": -1.9396603107452393, + "logps/chosen": -228.3734130859375, + "logps/rejected": -576.4596557617188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.433387279510498, + "rewards/margins": 9.494354248046875, + "rewards/rejected": -12.927741050720215, + "step": 5775 + }, + { + "epoch": 0.9, + "learning_rate": 9.910981897401318e-06, + "logits/chosen": -1.9331012964248657, + "logits/rejected": -2.8100152015686035, + "logps/chosen": -244.54176330566406, + "logps/rejected": -338.7187194824219, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5948922634124756, + "rewards/margins": 4.296972751617432, + "rewards/rejected": -6.891865253448486, + "step": 5776 + }, + { + "epoch": 0.9, + "learning_rate": 9.91024845687017e-06, + "logits/chosen": -2.9696359634399414, + "logits/rejected": -2.2140190601348877, + "logps/chosen": -360.58203125, + "logps/rejected": -281.8527526855469, + "loss": 3.1266, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.377994537353516, + "rewards/margins": -0.6567747592926025, + "rewards/rejected": -5.721220016479492, + "step": 5777 + }, + { + "epoch": 0.9, + "learning_rate": 9.909515016339022e-06, + "logits/chosen": -2.8080618381500244, + "logits/rejected": -2.7630436420440674, + "logps/chosen": -318.1382141113281, + "logps/rejected": -192.70370483398438, + "loss": 1.9586, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.452083110809326, + "rewards/margins": 0.10265684127807617, + "rewards/rejected": -5.554739952087402, + "step": 5778 + }, + { + "epoch": 0.9, + "learning_rate": 9.908781575807873e-06, + "logits/chosen": -2.9991137981414795, + "logits/rejected": -3.1220850944519043, + "logps/chosen": -274.83709716796875, + "logps/rejected": -403.78131103515625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.139768362045288, + "rewards/margins": 6.490166664123535, + "rewards/rejected": -8.629934310913086, + "step": 5779 + }, + { + "epoch": 0.9, + "learning_rate": 9.908048135276725e-06, + "logits/chosen": -2.021165132522583, + "logits/rejected": -3.052187919616699, + "logps/chosen": -161.91311645507812, + "logps/rejected": -340.5150146484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.515010118484497, + "rewards/margins": 7.715457439422607, + "rewards/rejected": -10.230467796325684, + "step": 5780 + }, + { + "epoch": 0.9, + "learning_rate": 9.907314694745579e-06, + "logits/chosen": -1.7761054039001465, + "logits/rejected": -2.68652081489563, + "logps/chosen": -107.79051208496094, + "logps/rejected": -397.07672119140625, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.726112961769104, + "rewards/margins": 6.525979042053223, + "rewards/rejected": -8.252092361450195, + "step": 5781 + }, + { + "epoch": 0.9, + "learning_rate": 9.90658125421443e-06, + "logits/chosen": -1.655145525932312, + "logits/rejected": -2.8517935276031494, + "logps/chosen": -98.76277160644531, + "logps/rejected": -227.82684326171875, + "loss": 0.5075, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.330066680908203, + "rewards/margins": 2.2521321773529053, + "rewards/rejected": -7.5821990966796875, + "step": 5782 + }, + { + "epoch": 0.9, + "learning_rate": 9.905847813683283e-06, + "logits/chosen": -1.878390908241272, + "logits/rejected": -3.08052659034729, + "logps/chosen": -146.4906005859375, + "logps/rejected": -264.9671936035156, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5070838928222656, + "rewards/margins": 4.082162857055664, + "rewards/rejected": -6.58924674987793, + "step": 5783 + }, + { + "epoch": 0.9, + "learning_rate": 9.905114373152135e-06, + "logits/chosen": -2.3563895225524902, + "logits/rejected": -2.9291980266571045, + "logps/chosen": -95.21925354003906, + "logps/rejected": -374.7881774902344, + "loss": 0.0502, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.571349620819092, + "rewards/margins": 5.737053871154785, + "rewards/rejected": -10.308403968811035, + "step": 5784 + }, + { + "epoch": 0.9, + "learning_rate": 9.904380932620988e-06, + "logits/chosen": -2.9453909397125244, + "logits/rejected": -2.224062919616699, + "logps/chosen": -325.3501281738281, + "logps/rejected": -207.77792358398438, + "loss": 0.6605, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.736997365951538, + "rewards/margins": 2.9898760318756104, + "rewards/rejected": -6.726873397827148, + "step": 5785 + }, + { + "epoch": 0.9, + "learning_rate": 9.90364749208984e-06, + "logits/chosen": -2.578612804412842, + "logits/rejected": -3.169642210006714, + "logps/chosen": -69.10197448730469, + "logps/rejected": -269.5796203613281, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9824625253677368, + "rewards/margins": 5.258705139160156, + "rewards/rejected": -7.2411675453186035, + "step": 5786 + }, + { + "epoch": 0.9, + "learning_rate": 9.902914051558692e-06, + "logits/chosen": -2.960904121398926, + "logits/rejected": -3.0882513523101807, + "logps/chosen": -118.39654541015625, + "logps/rejected": -302.4617614746094, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9079644680023193, + "rewards/margins": 7.189925193786621, + "rewards/rejected": -11.09788990020752, + "step": 5787 + }, + { + "epoch": 0.9, + "learning_rate": 9.902180611027544e-06, + "logits/chosen": -2.885770082473755, + "logits/rejected": -2.1338703632354736, + "logps/chosen": -219.80303955078125, + "logps/rejected": -197.6610107421875, + "loss": 2.7165, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.555169582366943, + "rewards/margins": -0.36460280418395996, + "rewards/rejected": -4.1905670166015625, + "step": 5788 + }, + { + "epoch": 0.9, + "learning_rate": 9.901447170496396e-06, + "logits/chosen": -2.266557455062866, + "logits/rejected": -2.9663853645324707, + "logps/chosen": -540.7928466796875, + "logps/rejected": -479.4779052734375, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3809540271759033, + "rewards/margins": 5.596423149108887, + "rewards/rejected": -6.977377414703369, + "step": 5789 + }, + { + "epoch": 0.9, + "learning_rate": 9.900713729965247e-06, + "logits/chosen": -2.7965035438537598, + "logits/rejected": -1.5420736074447632, + "logps/chosen": -212.65786743164062, + "logps/rejected": -362.0334167480469, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.670823335647583, + "rewards/margins": 9.682723999023438, + "rewards/rejected": -12.353546142578125, + "step": 5790 + }, + { + "epoch": 0.9, + "learning_rate": 9.8999802894341e-06, + "logits/chosen": -3.1476190090179443, + "logits/rejected": -2.5199670791625977, + "logps/chosen": -321.3967590332031, + "logps/rejected": -300.1310729980469, + "loss": 5.055, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.233902931213379, + "rewards/margins": -5.048040390014648, + "rewards/rejected": -3.1858625411987305, + "step": 5791 + }, + { + "epoch": 0.9, + "learning_rate": 9.899246848902951e-06, + "logits/chosen": -2.8765225410461426, + "logits/rejected": -2.6637039184570312, + "logps/chosen": -255.0871124267578, + "logps/rejected": -248.8611297607422, + "loss": 2.6494, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.254233360290527, + "rewards/margins": 0.029956340789794922, + "rewards/rejected": -5.284189701080322, + "step": 5792 + }, + { + "epoch": 0.9, + "learning_rate": 9.898513408371803e-06, + "logits/chosen": -2.9053525924682617, + "logits/rejected": -3.1045587062835693, + "logps/chosen": -216.80471801757812, + "logps/rejected": -480.4596252441406, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9025769233703613, + "rewards/margins": 7.011807918548584, + "rewards/rejected": -9.914384841918945, + "step": 5793 + }, + { + "epoch": 0.9, + "learning_rate": 9.897779967840657e-06, + "logits/chosen": -1.4589234590530396, + "logits/rejected": -2.998096227645874, + "logps/chosen": -139.5724334716797, + "logps/rejected": -484.27325439453125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7398624420166016, + "rewards/margins": 8.31033706665039, + "rewards/rejected": -12.050199508666992, + "step": 5794 + }, + { + "epoch": 0.9, + "learning_rate": 9.897046527309509e-06, + "logits/chosen": -2.6764631271362305, + "logits/rejected": -3.0791358947753906, + "logps/chosen": -98.30204010009766, + "logps/rejected": -141.4649658203125, + "loss": 0.0923, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085693836212158, + "rewards/margins": 3.484710454940796, + "rewards/rejected": -7.570404529571533, + "step": 5795 + }, + { + "epoch": 0.9, + "learning_rate": 9.89631308677836e-06, + "logits/chosen": -1.5849560499191284, + "logits/rejected": -3.006196975708008, + "logps/chosen": -219.55828857421875, + "logps/rejected": -317.8260498046875, + "loss": 3.3965, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.41560697555542, + "rewards/margins": -1.667189121246338, + "rewards/rejected": -4.748417854309082, + "step": 5796 + }, + { + "epoch": 0.9, + "learning_rate": 9.895579646247212e-06, + "logits/chosen": -2.692336320877075, + "logits/rejected": -3.0271830558776855, + "logps/chosen": -49.00489807128906, + "logps/rejected": -144.46502685546875, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5858798027038574, + "rewards/margins": 3.7950901985168457, + "rewards/rejected": -7.380970001220703, + "step": 5797 + }, + { + "epoch": 0.9, + "learning_rate": 9.894846205716064e-06, + "logits/chosen": -2.4092888832092285, + "logits/rejected": -2.9469010829925537, + "logps/chosen": -368.20855712890625, + "logps/rejected": -502.4090576171875, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.211724281311035, + "rewards/margins": 4.899918556213379, + "rewards/rejected": -8.111642837524414, + "step": 5798 + }, + { + "epoch": 0.9, + "learning_rate": 9.894112765184916e-06, + "logits/chosen": -2.3644464015960693, + "logits/rejected": -2.1416285037994385, + "logps/chosen": -175.06906127929688, + "logps/rejected": -311.03607177734375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0391616821289062, + "rewards/margins": 8.21853256225586, + "rewards/rejected": -11.257694244384766, + "step": 5799 + }, + { + "epoch": 0.9, + "learning_rate": 9.893379324653768e-06, + "logits/chosen": -2.3762779235839844, + "logits/rejected": -2.8179781436920166, + "logps/chosen": -311.07891845703125, + "logps/rejected": -368.1328125, + "loss": 1.7051, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.045036315917969, + "rewards/margins": 2.9910216331481934, + "rewards/rejected": -8.03605842590332, + "step": 5800 + }, + { + "epoch": 0.9, + "learning_rate": 9.89264588412262e-06, + "logits/chosen": -2.1275150775909424, + "logits/rejected": -3.203155994415283, + "logps/chosen": -171.63064575195312, + "logps/rejected": -406.22650146484375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.076742649078369, + "rewards/margins": 6.287725448608398, + "rewards/rejected": -8.36446762084961, + "step": 5801 + }, + { + "epoch": 0.9, + "learning_rate": 9.891912443591472e-06, + "logits/chosen": -2.759819984436035, + "logits/rejected": -3.126800060272217, + "logps/chosen": -295.3704833984375, + "logps/rejected": -285.903564453125, + "loss": 2.3337, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.25731897354126, + "rewards/margins": 0.5552258491516113, + "rewards/rejected": -5.812544822692871, + "step": 5802 + }, + { + "epoch": 0.9, + "learning_rate": 9.891179003060325e-06, + "logits/chosen": -2.947470188140869, + "logits/rejected": -0.7341171503067017, + "logps/chosen": -1041.193359375, + "logps/rejected": -321.59991455078125, + "loss": 1.459, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9258241653442383, + "rewards/margins": 3.3178067207336426, + "rewards/rejected": -7.243630886077881, + "step": 5803 + }, + { + "epoch": 0.9, + "learning_rate": 9.890445562529177e-06, + "logits/chosen": -3.035343647003174, + "logits/rejected": -2.358947515487671, + "logps/chosen": -292.2998962402344, + "logps/rejected": -234.71034240722656, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0136475563049316, + "rewards/margins": 6.216063499450684, + "rewards/rejected": -8.229711532592773, + "step": 5804 + }, + { + "epoch": 0.9, + "learning_rate": 9.889712121998029e-06, + "logits/chosen": -2.3029136657714844, + "logits/rejected": -2.7744140625, + "logps/chosen": -109.07672119140625, + "logps/rejected": -404.6463623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.443103790283203, + "rewards/margins": 9.773024559020996, + "rewards/rejected": -14.216129302978516, + "step": 5805 + }, + { + "epoch": 0.9, + "learning_rate": 9.888978681466881e-06, + "logits/chosen": -2.6110329627990723, + "logits/rejected": -3.0476768016815186, + "logps/chosen": -224.33193969726562, + "logps/rejected": -180.56954956054688, + "loss": 1.9331, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.685385704040527, + "rewards/margins": -0.24914181232452393, + "rewards/rejected": -5.436244010925293, + "step": 5806 + }, + { + "epoch": 0.9, + "learning_rate": 9.888245240935733e-06, + "logits/chosen": -1.3795528411865234, + "logits/rejected": -2.6567115783691406, + "logps/chosen": -222.70159912109375, + "logps/rejected": -348.1043395996094, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4207310676574707, + "rewards/margins": 5.55426025390625, + "rewards/rejected": -7.974991798400879, + "step": 5807 + }, + { + "epoch": 0.9, + "learning_rate": 9.887511800404585e-06, + "logits/chosen": -1.7346471548080444, + "logits/rejected": -2.6047260761260986, + "logps/chosen": -100.87261962890625, + "logps/rejected": -206.75213623046875, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.608375072479248, + "rewards/margins": 3.2070565223693848, + "rewards/rejected": -6.815431594848633, + "step": 5808 + }, + { + "epoch": 0.9, + "learning_rate": 9.886778359873437e-06, + "logits/chosen": -3.0145397186279297, + "logits/rejected": -1.6025159358978271, + "logps/chosen": -376.5457458496094, + "logps/rejected": -131.46543884277344, + "loss": 1.0971, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.049452781677246, + "rewards/margins": 0.8242971897125244, + "rewards/rejected": -4.873749732971191, + "step": 5809 + }, + { + "epoch": 0.9, + "learning_rate": 9.886044919342288e-06, + "logits/chosen": -2.7050442695617676, + "logits/rejected": -3.2360410690307617, + "logps/chosen": -433.19622802734375, + "logps/rejected": -362.4886779785156, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8741841316223145, + "rewards/margins": 4.634148120880127, + "rewards/rejected": -7.508332252502441, + "step": 5810 + }, + { + "epoch": 0.9, + "learning_rate": 9.88531147881114e-06, + "logits/chosen": -2.2992749214172363, + "logits/rejected": -3.0741007328033447, + "logps/chosen": -122.2258529663086, + "logps/rejected": -305.0549621582031, + "loss": 0.0847, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4166481494903564, + "rewards/margins": 3.748002529144287, + "rewards/rejected": -6.164650917053223, + "step": 5811 + }, + { + "epoch": 0.9, + "learning_rate": 9.884578038279994e-06, + "logits/chosen": -2.686197519302368, + "logits/rejected": -3.0456502437591553, + "logps/chosen": -116.45240783691406, + "logps/rejected": -232.32994079589844, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8559226989746094, + "rewards/margins": 4.5025634765625, + "rewards/rejected": -7.358486175537109, + "step": 5812 + }, + { + "epoch": 0.9, + "learning_rate": 9.883844597748846e-06, + "logits/chosen": -2.8392138481140137, + "logits/rejected": -3.111660957336426, + "logps/chosen": -82.5910873413086, + "logps/rejected": -182.05035400390625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6698474884033203, + "rewards/margins": 5.4509196281433105, + "rewards/rejected": -8.120766639709473, + "step": 5813 + }, + { + "epoch": 0.9, + "learning_rate": 9.883111157217698e-06, + "logits/chosen": -1.8437778949737549, + "logits/rejected": -3.13745379447937, + "logps/chosen": -166.71902465820312, + "logps/rejected": -459.4713439941406, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5577621459960938, + "rewards/margins": 6.380202770233154, + "rewards/rejected": -8.937965393066406, + "step": 5814 + }, + { + "epoch": 0.9, + "learning_rate": 9.882377716686551e-06, + "logits/chosen": -3.2485570907592773, + "logits/rejected": -1.6087493896484375, + "logps/chosen": -1148.062255859375, + "logps/rejected": -815.3834838867188, + "loss": 2.4303, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7446320056915283, + "rewards/margins": -0.5403778553009033, + "rewards/rejected": -3.204254150390625, + "step": 5815 + }, + { + "epoch": 0.9, + "learning_rate": 9.881644276155403e-06, + "logits/chosen": -2.9890177249908447, + "logits/rejected": -2.090151071548462, + "logps/chosen": -633.61181640625, + "logps/rejected": -242.83343505859375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7645096182823181, + "rewards/margins": 5.556501388549805, + "rewards/rejected": -6.321010589599609, + "step": 5816 + }, + { + "epoch": 0.9, + "learning_rate": 9.880910835624255e-06, + "logits/chosen": -2.9529755115509033, + "logits/rejected": -1.9972221851348877, + "logps/chosen": -570.1384887695312, + "logps/rejected": -367.65008544921875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4970054626464844, + "rewards/margins": 5.763426780700684, + "rewards/rejected": -7.260432243347168, + "step": 5817 + }, + { + "epoch": 0.9, + "learning_rate": 9.880177395093107e-06, + "logits/chosen": -2.3473856449127197, + "logits/rejected": -2.796931743621826, + "logps/chosen": -98.11082458496094, + "logps/rejected": -368.1197509765625, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.953665256500244, + "rewards/margins": 6.54698371887207, + "rewards/rejected": -10.500648498535156, + "step": 5818 + }, + { + "epoch": 0.9, + "learning_rate": 9.879443954561959e-06, + "logits/chosen": -2.6864843368530273, + "logits/rejected": -3.167933940887451, + "logps/chosen": -250.2117919921875, + "logps/rejected": -271.07073974609375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7014269828796387, + "rewards/margins": 5.6084513664245605, + "rewards/rejected": -7.309878349304199, + "step": 5819 + }, + { + "epoch": 0.91, + "learning_rate": 9.87871051403081e-06, + "logits/chosen": -2.24424409866333, + "logits/rejected": -3.181903600692749, + "logps/chosen": -167.70437622070312, + "logps/rejected": -251.45132446289062, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.885974168777466, + "rewards/margins": 4.976431369781494, + "rewards/rejected": -7.862405300140381, + "step": 5820 + }, + { + "epoch": 0.91, + "learning_rate": 9.877977073499664e-06, + "logits/chosen": -1.5172946453094482, + "logits/rejected": -2.895505666732788, + "logps/chosen": -70.72543334960938, + "logps/rejected": -288.102294921875, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.891470193862915, + "rewards/margins": 5.733355522155762, + "rewards/rejected": -8.624825477600098, + "step": 5821 + }, + { + "epoch": 0.91, + "learning_rate": 9.877243632968516e-06, + "logits/chosen": -2.6339635848999023, + "logits/rejected": -1.86457097530365, + "logps/chosen": -248.08419799804688, + "logps/rejected": -224.44956970214844, + "loss": 1.1359, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.658341884613037, + "rewards/margins": 2.6203441619873047, + "rewards/rejected": -6.2786865234375, + "step": 5822 + }, + { + "epoch": 0.91, + "learning_rate": 9.876510192437368e-06, + "logits/chosen": -2.0379638671875, + "logits/rejected": -2.749626398086548, + "logps/chosen": -119.46546936035156, + "logps/rejected": -204.5164794921875, + "loss": 1.6958, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.750030040740967, + "rewards/margins": 1.7420756816864014, + "rewards/rejected": -5.492105484008789, + "step": 5823 + }, + { + "epoch": 0.91, + "learning_rate": 9.87577675190622e-06, + "logits/chosen": -2.931272506713867, + "logits/rejected": -3.074986219406128, + "logps/chosen": -173.94515991210938, + "logps/rejected": -270.9403076171875, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6865463256835938, + "rewards/margins": 4.547711372375488, + "rewards/rejected": -7.234257698059082, + "step": 5824 + }, + { + "epoch": 0.91, + "learning_rate": 9.875043311375072e-06, + "logits/chosen": -2.2297487258911133, + "logits/rejected": -2.7735671997070312, + "logps/chosen": -192.2994842529297, + "logps/rejected": -234.62033081054688, + "loss": 2.0787, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.194282531738281, + "rewards/margins": 2.1344704627990723, + "rewards/rejected": -7.3287529945373535, + "step": 5825 + }, + { + "epoch": 0.91, + "learning_rate": 9.874309870843924e-06, + "logits/chosen": -2.7229459285736084, + "logits/rejected": -2.7095887660980225, + "logps/chosen": -185.41326904296875, + "logps/rejected": -251.20355224609375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7970657348632812, + "rewards/margins": 6.563056468963623, + "rewards/rejected": -8.360122680664062, + "step": 5826 + }, + { + "epoch": 0.91, + "learning_rate": 9.873576430312775e-06, + "logits/chosen": -2.561666965484619, + "logits/rejected": -2.7325682640075684, + "logps/chosen": -149.70648193359375, + "logps/rejected": -241.5569305419922, + "loss": 1.1998, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.933751106262207, + "rewards/margins": 1.6435649394989014, + "rewards/rejected": -7.5773162841796875, + "step": 5827 + }, + { + "epoch": 0.91, + "learning_rate": 9.872842989781627e-06, + "logits/chosen": -2.7052412033081055, + "logits/rejected": -2.593484878540039, + "logps/chosen": -141.96705627441406, + "logps/rejected": -244.2838897705078, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5875985622406006, + "rewards/margins": 6.822467803955078, + "rewards/rejected": -9.410066604614258, + "step": 5828 + }, + { + "epoch": 0.91, + "learning_rate": 9.87210954925048e-06, + "logits/chosen": -2.452869415283203, + "logits/rejected": -2.7740671634674072, + "logps/chosen": -156.17117309570312, + "logps/rejected": -150.43319702148438, + "loss": 0.9646, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7730207443237305, + "rewards/margins": 1.1396520137786865, + "rewards/rejected": -5.912672519683838, + "step": 5829 + }, + { + "epoch": 0.91, + "learning_rate": 9.871376108719333e-06, + "logits/chosen": -2.307849645614624, + "logits/rejected": -3.045081853866577, + "logps/chosen": -268.5248107910156, + "logps/rejected": -227.27749633789062, + "loss": 1.6433, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3367128372192383, + "rewards/margins": 1.0545682907104492, + "rewards/rejected": -4.3912811279296875, + "step": 5830 + }, + { + "epoch": 0.91, + "learning_rate": 9.870642668188185e-06, + "logits/chosen": -1.7644321918487549, + "logits/rejected": -2.8726770877838135, + "logps/chosen": -78.59669494628906, + "logps/rejected": -208.37603759765625, + "loss": 0.2276, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.663778305053711, + "rewards/margins": 2.7194859981536865, + "rewards/rejected": -8.383264541625977, + "step": 5831 + }, + { + "epoch": 0.91, + "learning_rate": 9.869909227657037e-06, + "logits/chosen": -0.8700936436653137, + "logits/rejected": -2.7958602905273438, + "logps/chosen": -55.19814682006836, + "logps/rejected": -321.7564697265625, + "loss": 0.1083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319570302963257, + "rewards/margins": 4.7694525718688965, + "rewards/rejected": -8.089022636413574, + "step": 5832 + }, + { + "epoch": 0.91, + "learning_rate": 9.869175787125888e-06, + "logits/chosen": -2.8729088306427, + "logits/rejected": -2.307232141494751, + "logps/chosen": -114.63606262207031, + "logps/rejected": -134.54507446289062, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.044342517852783, + "rewards/margins": 3.7148118019104004, + "rewards/rejected": -6.759154319763184, + "step": 5833 + }, + { + "epoch": 0.91, + "learning_rate": 9.86844234659474e-06, + "logits/chosen": -1.059987187385559, + "logits/rejected": -2.5974605083465576, + "logps/chosen": -244.127685546875, + "logps/rejected": -379.9951171875, + "loss": 0.3676, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.529362440109253, + "rewards/margins": 4.808869361877441, + "rewards/rejected": -7.338231086730957, + "step": 5834 + }, + { + "epoch": 0.91, + "learning_rate": 9.867708906063592e-06, + "logits/chosen": -2.5881755352020264, + "logits/rejected": -3.1800334453582764, + "logps/chosen": -535.5120849609375, + "logps/rejected": -536.5582275390625, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8152282238006592, + "rewards/margins": 3.4865236282348633, + "rewards/rejected": -5.301752090454102, + "step": 5835 + }, + { + "epoch": 0.91, + "learning_rate": 9.866975465532444e-06, + "logits/chosen": -1.4776687622070312, + "logits/rejected": -2.619328737258911, + "logps/chosen": -191.8204345703125, + "logps/rejected": -339.8768615722656, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.086578369140625, + "rewards/margins": 5.079236030578613, + "rewards/rejected": -8.165814399719238, + "step": 5836 + }, + { + "epoch": 0.91, + "learning_rate": 9.866242025001296e-06, + "logits/chosen": -2.9619810581207275, + "logits/rejected": -2.9833309650421143, + "logps/chosen": -172.83091735839844, + "logps/rejected": -209.73252868652344, + "loss": 2.1945, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.895060062408447, + "rewards/margins": 0.4351780414581299, + "rewards/rejected": -5.330237865447998, + "step": 5837 + }, + { + "epoch": 0.91, + "learning_rate": 9.865508584470148e-06, + "logits/chosen": -3.104585647583008, + "logits/rejected": -3.169569730758667, + "logps/chosen": -381.9111022949219, + "logps/rejected": -291.83734130859375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.374253511428833, + "rewards/margins": 4.173626899719238, + "rewards/rejected": -7.547880172729492, + "step": 5838 + }, + { + "epoch": 0.91, + "learning_rate": 9.864775143939001e-06, + "logits/chosen": -1.6880868673324585, + "logits/rejected": -2.842968225479126, + "logps/chosen": -94.29518127441406, + "logps/rejected": -235.4791717529297, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.69675612449646, + "rewards/margins": 3.7165822982788086, + "rewards/rejected": -6.413338661193848, + "step": 5839 + }, + { + "epoch": 0.91, + "learning_rate": 9.864041703407853e-06, + "logits/chosen": -1.3776932954788208, + "logits/rejected": -2.770090103149414, + "logps/chosen": -400.4971923828125, + "logps/rejected": -623.1145629882812, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.076312303543091, + "rewards/margins": 7.984026908874512, + "rewards/rejected": -10.060338973999023, + "step": 5840 + }, + { + "epoch": 0.91, + "learning_rate": 9.863308262876705e-06, + "logits/chosen": -2.9543070793151855, + "logits/rejected": -2.037520170211792, + "logps/chosen": -369.03778076171875, + "logps/rejected": -269.8511962890625, + "loss": 0.0797, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.014920711517334, + "rewards/margins": 3.9678685665130615, + "rewards/rejected": -8.982789993286133, + "step": 5841 + }, + { + "epoch": 0.91, + "learning_rate": 9.862574822345557e-06, + "logits/chosen": -0.9448422789573669, + "logits/rejected": -2.745096206665039, + "logps/chosen": -109.44682312011719, + "logps/rejected": -315.9620666503906, + "loss": 0.0851, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.421884059906006, + "rewards/margins": 3.6935410499572754, + "rewards/rejected": -8.115425109863281, + "step": 5842 + }, + { + "epoch": 0.91, + "learning_rate": 9.861841381814409e-06, + "logits/chosen": -1.542956829071045, + "logits/rejected": -2.8797314167022705, + "logps/chosen": -324.3842468261719, + "logps/rejected": -470.276611328125, + "loss": 3.8181, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.3398919105529785, + "rewards/margins": -1.4635369777679443, + "rewards/rejected": -4.876355171203613, + "step": 5843 + }, + { + "epoch": 0.91, + "learning_rate": 9.86110794128326e-06, + "logits/chosen": -2.5773277282714844, + "logits/rejected": -2.7722299098968506, + "logps/chosen": -126.19818115234375, + "logps/rejected": -237.1016387939453, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.073338031768799, + "rewards/margins": 5.827512741088867, + "rewards/rejected": -8.900850296020508, + "step": 5844 + }, + { + "epoch": 0.91, + "learning_rate": 9.860374500752113e-06, + "logits/chosen": -2.9205071926116943, + "logits/rejected": -2.464015483856201, + "logps/chosen": -188.97213745117188, + "logps/rejected": -155.2911376953125, + "loss": 1.7705, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.728188991546631, + "rewards/margins": 0.7210240364074707, + "rewards/rejected": -4.449213027954102, + "step": 5845 + }, + { + "epoch": 0.91, + "learning_rate": 9.859641060220965e-06, + "logits/chosen": -2.7119529247283936, + "logits/rejected": -3.0999629497528076, + "logps/chosen": -67.00213623046875, + "logps/rejected": -179.27816772460938, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6669986248016357, + "rewards/margins": 4.385452747344971, + "rewards/rejected": -8.052452087402344, + "step": 5846 + }, + { + "epoch": 0.91, + "learning_rate": 9.858907619689818e-06, + "logits/chosen": -2.6258933544158936, + "logits/rejected": -2.977186679840088, + "logps/chosen": -146.91342163085938, + "logps/rejected": -291.59295654296875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.042604923248291, + "rewards/margins": 5.296409606933594, + "rewards/rejected": -10.339014053344727, + "step": 5847 + }, + { + "epoch": 0.91, + "learning_rate": 9.85817417915867e-06, + "logits/chosen": -2.973754644393921, + "logits/rejected": -2.931858539581299, + "logps/chosen": -108.047119140625, + "logps/rejected": -117.38381958007812, + "loss": 1.771, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.746035575866699, + "rewards/margins": 0.10335719585418701, + "rewards/rejected": -6.849392890930176, + "step": 5848 + }, + { + "epoch": 0.91, + "learning_rate": 9.857440738627524e-06, + "logits/chosen": -3.0356786251068115, + "logits/rejected": -2.7301723957061768, + "logps/chosen": -455.937255859375, + "logps/rejected": -467.6726989746094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.24662184715271, + "rewards/margins": 7.357020378112793, + "rewards/rejected": -10.603642463684082, + "step": 5849 + }, + { + "epoch": 0.91, + "learning_rate": 9.856707298096375e-06, + "logits/chosen": -2.0659406185150146, + "logits/rejected": -3.1066930294036865, + "logps/chosen": -162.34857177734375, + "logps/rejected": -467.683837890625, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.730138063430786, + "rewards/margins": 5.853615760803223, + "rewards/rejected": -8.58375358581543, + "step": 5850 + }, + { + "epoch": 0.91, + "learning_rate": 9.855973857565227e-06, + "logits/chosen": -2.6010637283325195, + "logits/rejected": -2.7809462547302246, + "logps/chosen": -301.648193359375, + "logps/rejected": -317.14794921875, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2532926797866821, + "rewards/margins": 5.18088960647583, + "rewards/rejected": -6.434182167053223, + "step": 5851 + }, + { + "epoch": 0.91, + "learning_rate": 9.85524041703408e-06, + "logits/chosen": -2.8361802101135254, + "logits/rejected": -2.909514904022217, + "logps/chosen": -435.8341979980469, + "logps/rejected": -501.7525634765625, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1338539123535156, + "rewards/margins": 5.585092544555664, + "rewards/rejected": -8.71894645690918, + "step": 5852 + }, + { + "epoch": 0.91, + "learning_rate": 9.854506976502931e-06, + "logits/chosen": -3.0748915672302246, + "logits/rejected": -1.9240821599960327, + "logps/chosen": -276.733154296875, + "logps/rejected": -247.0998992919922, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.471677780151367, + "rewards/margins": 6.8858771324157715, + "rewards/rejected": -10.357555389404297, + "step": 5853 + }, + { + "epoch": 0.91, + "learning_rate": 9.853773535971783e-06, + "logits/chosen": -1.4635682106018066, + "logits/rejected": -2.6794960498809814, + "logps/chosen": -146.42007446289062, + "logps/rejected": -347.2364501953125, + "loss": 0.2645, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.972983360290527, + "rewards/margins": 5.5289435386657715, + "rewards/rejected": -10.501927375793457, + "step": 5854 + }, + { + "epoch": 0.91, + "learning_rate": 9.853040095440635e-06, + "logits/chosen": -2.1844382286071777, + "logits/rejected": -2.5759665966033936, + "logps/chosen": -126.4804916381836, + "logps/rejected": -309.7522277832031, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868955135345459, + "rewards/margins": 4.797750473022461, + "rewards/rejected": -8.666706085205078, + "step": 5855 + }, + { + "epoch": 0.91, + "learning_rate": 9.852306654909487e-06, + "logits/chosen": -2.753091335296631, + "logits/rejected": -3.096656322479248, + "logps/chosen": -147.5283203125, + "logps/rejected": -177.20394897460938, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.735612392425537, + "rewards/margins": 5.195837020874023, + "rewards/rejected": -7.931448936462402, + "step": 5856 + }, + { + "epoch": 0.91, + "learning_rate": 9.85157321437834e-06, + "logits/chosen": -2.9140114784240723, + "logits/rejected": -2.400991201400757, + "logps/chosen": -415.51336669921875, + "logps/rejected": -277.01470947265625, + "loss": 1.9451, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.834031105041504, + "rewards/margins": -0.07648611068725586, + "rewards/rejected": -4.757544994354248, + "step": 5857 + }, + { + "epoch": 0.91, + "learning_rate": 9.850839773847192e-06, + "logits/chosen": -1.5629353523254395, + "logits/rejected": -2.772599458694458, + "logps/chosen": -204.78086853027344, + "logps/rejected": -403.5390625, + "loss": 2.3826, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.952410697937012, + "rewards/margins": 0.9507231712341309, + "rewards/rejected": -5.903134346008301, + "step": 5858 + }, + { + "epoch": 0.91, + "learning_rate": 9.850106333316044e-06, + "logits/chosen": -2.685398817062378, + "logits/rejected": -3.2549381256103516, + "logps/chosen": -207.86082458496094, + "logps/rejected": -341.69378662109375, + "loss": 0.0479, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5791192054748535, + "rewards/margins": 3.456674814224243, + "rewards/rejected": -7.035794258117676, + "step": 5859 + }, + { + "epoch": 0.91, + "learning_rate": 9.849372892784896e-06, + "logits/chosen": -2.5090341567993164, + "logits/rejected": -1.6563026905059814, + "logps/chosen": -203.67288208007812, + "logps/rejected": -142.28533935546875, + "loss": 2.3726, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.144237518310547, + "rewards/margins": 0.05682253837585449, + "rewards/rejected": -5.201059818267822, + "step": 5860 + }, + { + "epoch": 0.91, + "learning_rate": 9.848639452253748e-06, + "logits/chosen": -2.48262095451355, + "logits/rejected": -2.8349452018737793, + "logps/chosen": -146.886962890625, + "logps/rejected": -268.9825439453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.177135467529297, + "rewards/margins": 6.577142715454102, + "rewards/rejected": -8.754278182983398, + "step": 5861 + }, + { + "epoch": 0.91, + "learning_rate": 9.8479060117226e-06, + "logits/chosen": -1.9224987030029297, + "logits/rejected": -2.477675676345825, + "logps/chosen": -240.20098876953125, + "logps/rejected": -331.940185546875, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8861284255981445, + "rewards/margins": 4.634219169616699, + "rewards/rejected": -7.520347595214844, + "step": 5862 + }, + { + "epoch": 0.91, + "learning_rate": 9.847172571191452e-06, + "logits/chosen": -2.70697283744812, + "logits/rejected": -1.5799368619918823, + "logps/chosen": -215.59349060058594, + "logps/rejected": -103.43592834472656, + "loss": 0.2127, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.861489772796631, + "rewards/margins": 2.6169416904449463, + "rewards/rejected": -6.478431701660156, + "step": 5863 + }, + { + "epoch": 0.91, + "learning_rate": 9.846439130660303e-06, + "logits/chosen": -1.2475610971450806, + "logits/rejected": -2.9854111671447754, + "logps/chosen": -135.08258056640625, + "logps/rejected": -395.5950927734375, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.608027696609497, + "rewards/margins": 5.303293228149414, + "rewards/rejected": -7.911320686340332, + "step": 5864 + }, + { + "epoch": 0.91, + "learning_rate": 9.845705690129157e-06, + "logits/chosen": -2.409911632537842, + "logits/rejected": -2.7700109481811523, + "logps/chosen": -279.02630615234375, + "logps/rejected": -290.21685791015625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.428433656692505, + "rewards/margins": 5.601771354675293, + "rewards/rejected": -8.030204772949219, + "step": 5865 + }, + { + "epoch": 0.91, + "learning_rate": 9.844972249598009e-06, + "logits/chosen": -2.6212124824523926, + "logits/rejected": -2.9302303791046143, + "logps/chosen": -124.12644958496094, + "logps/rejected": -281.6134033203125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0112171173095703, + "rewards/margins": 5.839512825012207, + "rewards/rejected": -7.850729942321777, + "step": 5866 + }, + { + "epoch": 0.91, + "learning_rate": 9.84423880906686e-06, + "logits/chosen": -2.970233917236328, + "logits/rejected": -2.773574113845825, + "logps/chosen": -431.50433349609375, + "logps/rejected": -438.0028076171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7300946712493896, + "rewards/margins": 7.354628562927246, + "rewards/rejected": -10.084722518920898, + "step": 5867 + }, + { + "epoch": 0.91, + "learning_rate": 9.843505368535713e-06, + "logits/chosen": -3.098951816558838, + "logits/rejected": -2.8611068725585938, + "logps/chosen": -388.2210693359375, + "logps/rejected": -227.80357360839844, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.101693034172058, + "rewards/margins": 6.436504364013672, + "rewards/rejected": -7.5381975173950195, + "step": 5868 + }, + { + "epoch": 0.91, + "learning_rate": 9.842771928004564e-06, + "logits/chosen": -2.8715736865997314, + "logits/rejected": -2.2314302921295166, + "logps/chosen": -157.56951904296875, + "logps/rejected": -272.4284362792969, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1700778007507324, + "rewards/margins": 5.894374370574951, + "rewards/rejected": -9.064452171325684, + "step": 5869 + }, + { + "epoch": 0.91, + "learning_rate": 9.842038487473416e-06, + "logits/chosen": -2.2910008430480957, + "logits/rejected": -3.0186917781829834, + "logps/chosen": -82.708984375, + "logps/rejected": -240.72743225097656, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6431806087493896, + "rewards/margins": 5.58304500579834, + "rewards/rejected": -8.226225852966309, + "step": 5870 + }, + { + "epoch": 0.91, + "learning_rate": 9.841305046942268e-06, + "logits/chosen": -2.7933056354522705, + "logits/rejected": -3.0664870738983154, + "logps/chosen": -376.63482666015625, + "logps/rejected": -402.4884033203125, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5155994892120361, + "rewards/margins": 3.9390408992767334, + "rewards/rejected": -5.4546403884887695, + "step": 5871 + }, + { + "epoch": 0.91, + "learning_rate": 9.84057160641112e-06, + "logits/chosen": -1.5166189670562744, + "logits/rejected": -2.6535816192626953, + "logps/chosen": -132.5899658203125, + "logps/rejected": -285.627685546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.772615432739258, + "rewards/margins": 7.3849334716796875, + "rewards/rejected": -10.157548904418945, + "step": 5872 + }, + { + "epoch": 0.91, + "learning_rate": 9.839838165879972e-06, + "logits/chosen": -2.879901170730591, + "logits/rejected": -2.0299935340881348, + "logps/chosen": -422.6837158203125, + "logps/rejected": -258.3931884765625, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.519552707672119, + "rewards/margins": 4.909800052642822, + "rewards/rejected": -7.429352760314941, + "step": 5873 + }, + { + "epoch": 0.91, + "learning_rate": 9.839104725348826e-06, + "logits/chosen": -2.897095203399658, + "logits/rejected": -2.0957040786743164, + "logps/chosen": -419.574462890625, + "logps/rejected": -548.8446655273438, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9318933486938477, + "rewards/margins": 6.195556163787842, + "rewards/rejected": -10.127449035644531, + "step": 5874 + }, + { + "epoch": 0.91, + "learning_rate": 9.838371284817677e-06, + "logits/chosen": -1.1453431844711304, + "logits/rejected": -2.203016996383667, + "logps/chosen": -87.05767059326172, + "logps/rejected": -229.82052612304688, + "loss": 0.5367, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.045867919921875, + "rewards/margins": 3.5443031787872314, + "rewards/rejected": -8.590170860290527, + "step": 5875 + }, + { + "epoch": 0.91, + "learning_rate": 9.83763784428653e-06, + "logits/chosen": -3.085221290588379, + "logits/rejected": -2.6144087314605713, + "logps/chosen": -202.95167541503906, + "logps/rejected": -237.70245361328125, + "loss": 0.2205, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.759885311126709, + "rewards/margins": 3.374518632888794, + "rewards/rejected": -9.134403228759766, + "step": 5876 + }, + { + "epoch": 0.91, + "learning_rate": 9.836904403755381e-06, + "logits/chosen": -2.911363363265991, + "logits/rejected": -3.1722774505615234, + "logps/chosen": -51.60813522338867, + "logps/rejected": -211.05557250976562, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0802500247955322, + "rewards/margins": 4.357532501220703, + "rewards/rejected": -7.4377827644348145, + "step": 5877 + }, + { + "epoch": 0.91, + "learning_rate": 9.836170963224233e-06, + "logits/chosen": -2.849898338317871, + "logits/rejected": -3.0339558124542236, + "logps/chosen": -550.1021728515625, + "logps/rejected": -656.6324462890625, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3153467178344727, + "rewards/margins": 5.095006465911865, + "rewards/rejected": -8.41035270690918, + "step": 5878 + }, + { + "epoch": 0.91, + "learning_rate": 9.835437522693085e-06, + "logits/chosen": -2.2540628910064697, + "logits/rejected": -3.018380880355835, + "logps/chosen": -305.73114013671875, + "logps/rejected": -405.8140869140625, + "loss": 3.3771, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.024486541748047, + "rewards/margins": 1.7041926383972168, + "rewards/rejected": -7.728679180145264, + "step": 5879 + }, + { + "epoch": 0.91, + "learning_rate": 9.834704082161937e-06, + "logits/chosen": -1.4300564527511597, + "logits/rejected": -2.496229410171509, + "logps/chosen": -161.44468688964844, + "logps/rejected": -427.94781494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9407825469970703, + "rewards/margins": 11.494832992553711, + "rewards/rejected": -14.435615539550781, + "step": 5880 + }, + { + "epoch": 0.91, + "learning_rate": 9.83397064163079e-06, + "logits/chosen": -2.6952896118164062, + "logits/rejected": -2.8888144493103027, + "logps/chosen": -105.76620483398438, + "logps/rejected": -263.24786376953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8097105026245117, + "rewards/margins": 6.049314498901367, + "rewards/rejected": -8.859025955200195, + "step": 5881 + }, + { + "epoch": 0.91, + "learning_rate": 9.833237201099642e-06, + "logits/chosen": -2.3518192768096924, + "logits/rejected": -3.044382333755493, + "logps/chosen": -295.9390869140625, + "logps/rejected": -429.2766418457031, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.952612280845642, + "rewards/margins": 4.395302772521973, + "rewards/rejected": -6.347914695739746, + "step": 5882 + }, + { + "epoch": 0.91, + "learning_rate": 9.832503760568496e-06, + "logits/chosen": -2.677872896194458, + "logits/rejected": -3.0516624450683594, + "logps/chosen": -627.1589965820312, + "logps/rejected": -590.8737182617188, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.686497688293457, + "rewards/margins": 5.398126125335693, + "rewards/rejected": -9.084623336791992, + "step": 5883 + }, + { + "epoch": 0.92, + "learning_rate": 9.831770320037348e-06, + "logits/chosen": -1.6708617210388184, + "logits/rejected": -2.817976236343384, + "logps/chosen": -113.25638580322266, + "logps/rejected": -288.3470458984375, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.429248332977295, + "rewards/margins": 4.11030387878418, + "rewards/rejected": -6.539552211761475, + "step": 5884 + }, + { + "epoch": 0.92, + "learning_rate": 9.8310368795062e-06, + "logits/chosen": -2.319896936416626, + "logits/rejected": -2.764249801635742, + "logps/chosen": -121.70350646972656, + "logps/rejected": -247.347900390625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8782691955566406, + "rewards/margins": 5.577410697937012, + "rewards/rejected": -8.455679893493652, + "step": 5885 + }, + { + "epoch": 0.92, + "learning_rate": 9.830303438975052e-06, + "logits/chosen": -2.3586947917938232, + "logits/rejected": -3.0984175205230713, + "logps/chosen": -583.2298583984375, + "logps/rejected": -580.47021484375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.948094129562378, + "rewards/margins": 7.069125652313232, + "rewards/rejected": -9.017219543457031, + "step": 5886 + }, + { + "epoch": 0.92, + "learning_rate": 9.829569998443903e-06, + "logits/chosen": -2.6303110122680664, + "logits/rejected": -1.6130696535110474, + "logps/chosen": -440.08782958984375, + "logps/rejected": -286.7542419433594, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9607272148132324, + "rewards/margins": 6.632515907287598, + "rewards/rejected": -9.593242645263672, + "step": 5887 + }, + { + "epoch": 0.92, + "learning_rate": 9.828836557912755e-06, + "logits/chosen": -1.4366655349731445, + "logits/rejected": -2.42543625831604, + "logps/chosen": -208.63427734375, + "logps/rejected": -417.3837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.842892646789551, + "rewards/margins": 9.887151718139648, + "rewards/rejected": -12.730045318603516, + "step": 5888 + }, + { + "epoch": 0.92, + "learning_rate": 9.828103117381607e-06, + "logits/chosen": -2.066575050354004, + "logits/rejected": -2.9994945526123047, + "logps/chosen": -280.4644775390625, + "logps/rejected": -538.6107177734375, + "loss": 2.8325, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.771015644073486, + "rewards/margins": -0.2912726402282715, + "rewards/rejected": -5.479743003845215, + "step": 5889 + }, + { + "epoch": 0.92, + "learning_rate": 9.827369676850459e-06, + "logits/chosen": -2.941596269607544, + "logits/rejected": -3.1374382972717285, + "logps/chosen": -89.95036315917969, + "logps/rejected": -328.5078430175781, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9191758632659912, + "rewards/margins": 7.4869585037231445, + "rewards/rejected": -9.406134605407715, + "step": 5890 + }, + { + "epoch": 0.92, + "learning_rate": 9.826636236319311e-06, + "logits/chosen": -2.242574453353882, + "logits/rejected": -3.0490894317626953, + "logps/chosen": -217.58709716796875, + "logps/rejected": -459.2098388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.824139416217804, + "rewards/margins": 10.102912902832031, + "rewards/rejected": -10.92705249786377, + "step": 5891 + }, + { + "epoch": 0.92, + "learning_rate": 9.825902795788164e-06, + "logits/chosen": -3.075843095779419, + "logits/rejected": -2.3128578662872314, + "logps/chosen": -228.42019653320312, + "logps/rejected": -81.79391479492188, + "loss": 0.9177, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.368870258331299, + "rewards/margins": 2.004197359085083, + "rewards/rejected": -6.373067855834961, + "step": 5892 + }, + { + "epoch": 0.92, + "learning_rate": 9.825169355257016e-06, + "logits/chosen": -2.6472737789154053, + "logits/rejected": -3.0621628761291504, + "logps/chosen": -233.13380432128906, + "logps/rejected": -446.68048095703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5153563022613525, + "rewards/margins": 7.289844989776611, + "rewards/rejected": -9.805201530456543, + "step": 5893 + }, + { + "epoch": 0.92, + "learning_rate": 9.824435914725868e-06, + "logits/chosen": -2.4839298725128174, + "logits/rejected": -2.3732991218566895, + "logps/chosen": -244.5426788330078, + "logps/rejected": -350.235107421875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.281560182571411, + "rewards/margins": 5.534780502319336, + "rewards/rejected": -8.816341400146484, + "step": 5894 + }, + { + "epoch": 0.92, + "learning_rate": 9.82370247419472e-06, + "logits/chosen": -2.830948829650879, + "logits/rejected": -2.8410160541534424, + "logps/chosen": -277.81671142578125, + "logps/rejected": -470.9974365234375, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.770431637763977, + "rewards/margins": 5.876686096191406, + "rewards/rejected": -7.647117614746094, + "step": 5895 + }, + { + "epoch": 0.92, + "learning_rate": 9.822969033663572e-06, + "logits/chosen": -2.215029001235962, + "logits/rejected": -3.135948419570923, + "logps/chosen": -354.20684814453125, + "logps/rejected": -597.29638671875, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3164310455322266, + "rewards/margins": 3.437859535217285, + "rewards/rejected": -5.754290580749512, + "step": 5896 + }, + { + "epoch": 0.92, + "learning_rate": 9.822235593132424e-06, + "logits/chosen": -3.05244517326355, + "logits/rejected": -2.8482794761657715, + "logps/chosen": -567.9730224609375, + "logps/rejected": -427.63671875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1827895641326904, + "rewards/margins": 6.965830326080322, + "rewards/rejected": -9.14862060546875, + "step": 5897 + }, + { + "epoch": 0.92, + "learning_rate": 9.821502152601276e-06, + "logits/chosen": -2.9387319087982178, + "logits/rejected": -1.7918394804000854, + "logps/chosen": -365.0245361328125, + "logps/rejected": -230.74972534179688, + "loss": 3.3537, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.821191787719727, + "rewards/margins": 0.5448105335235596, + "rewards/rejected": -7.366002559661865, + "step": 5898 + }, + { + "epoch": 0.92, + "learning_rate": 9.820768712070128e-06, + "logits/chosen": -3.1116766929626465, + "logits/rejected": -1.6462122201919556, + "logps/chosen": -490.10174560546875, + "logps/rejected": -390.65966796875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348820209503174, + "rewards/margins": 7.478265762329102, + "rewards/rejected": -11.827085494995117, + "step": 5899 + }, + { + "epoch": 0.92, + "learning_rate": 9.82003527153898e-06, + "logits/chosen": -3.146158218383789, + "logits/rejected": -2.0955581665039062, + "logps/chosen": -318.4147033691406, + "logps/rejected": -103.42398834228516, + "loss": 0.7065, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.168184280395508, + "rewards/margins": 2.14333176612854, + "rewards/rejected": -7.311515808105469, + "step": 5900 + }, + { + "epoch": 0.92, + "learning_rate": 9.819301831007833e-06, + "logits/chosen": -2.246479034423828, + "logits/rejected": -2.904733896255493, + "logps/chosen": -325.2341613769531, + "logps/rejected": -329.2261962890625, + "loss": 4.4557, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.448071002960205, + "rewards/margins": -1.2229104042053223, + "rewards/rejected": -4.225161075592041, + "step": 5901 + }, + { + "epoch": 0.92, + "learning_rate": 9.818568390476685e-06, + "logits/chosen": -2.9660470485687256, + "logits/rejected": -3.097179412841797, + "logps/chosen": -113.18669128417969, + "logps/rejected": -181.30856323242188, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8236149549484253, + "rewards/margins": 5.828914642333984, + "rewards/rejected": -7.652529239654541, + "step": 5902 + }, + { + "epoch": 0.92, + "learning_rate": 9.817834949945537e-06, + "logits/chosen": -2.872506618499756, + "logits/rejected": -1.5002367496490479, + "logps/chosen": -531.5469970703125, + "logps/rejected": -300.255859375, + "loss": 2.6785, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8241965770721436, + "rewards/margins": -0.5828938484191895, + "rewards/rejected": -3.241302728652954, + "step": 5903 + }, + { + "epoch": 0.92, + "learning_rate": 9.817101509414389e-06, + "logits/chosen": -1.9787307977676392, + "logits/rejected": -3.0004611015319824, + "logps/chosen": -58.2360954284668, + "logps/rejected": -179.55859375, + "loss": 0.0488, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.915508508682251, + "rewards/margins": 3.778008460998535, + "rewards/rejected": -6.693516731262207, + "step": 5904 + }, + { + "epoch": 0.92, + "learning_rate": 9.81636806888324e-06, + "logits/chosen": -2.9487547874450684, + "logits/rejected": -3.143949031829834, + "logps/chosen": -96.92007446289062, + "logps/rejected": -330.66461181640625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6156461238861084, + "rewards/margins": 6.220461845397949, + "rewards/rejected": -7.836108207702637, + "step": 5905 + }, + { + "epoch": 0.92, + "learning_rate": 9.815634628352092e-06, + "logits/chosen": -2.9418909549713135, + "logits/rejected": -2.487877368927002, + "logps/chosen": -137.60560607910156, + "logps/rejected": -257.0338439941406, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4819889068603516, + "rewards/margins": 6.229493618011475, + "rewards/rejected": -8.711483001708984, + "step": 5906 + }, + { + "epoch": 0.92, + "learning_rate": 9.814901187820944e-06, + "logits/chosen": -2.683115243911743, + "logits/rejected": -2.859541654586792, + "logps/chosen": -145.91989135742188, + "logps/rejected": -211.06149291992188, + "loss": 0.0837, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.827996253967285, + "rewards/margins": 3.3266706466674805, + "rewards/rejected": -6.154666900634766, + "step": 5907 + }, + { + "epoch": 0.92, + "learning_rate": 9.814167747289796e-06, + "logits/chosen": -1.9286935329437256, + "logits/rejected": -2.757331609725952, + "logps/chosen": -125.42249298095703, + "logps/rejected": -232.74917602539062, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.772274971008301, + "rewards/margins": 3.716121196746826, + "rewards/rejected": -8.488395690917969, + "step": 5908 + }, + { + "epoch": 0.92, + "learning_rate": 9.813434306758648e-06, + "logits/chosen": -1.554526448249817, + "logits/rejected": -2.7592432498931885, + "logps/chosen": -140.38479614257812, + "logps/rejected": -400.884765625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8708882331848145, + "rewards/margins": 6.616915225982666, + "rewards/rejected": -10.48780345916748, + "step": 5909 + }, + { + "epoch": 0.92, + "learning_rate": 9.812700866227502e-06, + "logits/chosen": -2.957688570022583, + "logits/rejected": -3.074880599975586, + "logps/chosen": -169.6512451171875, + "logps/rejected": -159.73199462890625, + "loss": 3.6925, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.215329647064209, + "rewards/margins": -0.49591875076293945, + "rewards/rejected": -5.7194108963012695, + "step": 5910 + }, + { + "epoch": 0.92, + "learning_rate": 9.811967425696354e-06, + "logits/chosen": -2.221078395843506, + "logits/rejected": -3.113215923309326, + "logps/chosen": -201.79830932617188, + "logps/rejected": -273.4444580078125, + "loss": 1.783, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.920971393585205, + "rewards/margins": 0.43016624450683594, + "rewards/rejected": -4.351137638092041, + "step": 5911 + }, + { + "epoch": 0.92, + "learning_rate": 9.811233985165205e-06, + "logits/chosen": -2.9202399253845215, + "logits/rejected": -2.596121072769165, + "logps/chosen": -340.19781494140625, + "logps/rejected": -301.9481506347656, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.857602119445801, + "rewards/margins": 5.420609951019287, + "rewards/rejected": -8.27821159362793, + "step": 5912 + }, + { + "epoch": 0.92, + "learning_rate": 9.810500544634057e-06, + "logits/chosen": -3.024052143096924, + "logits/rejected": -2.7495994567871094, + "logps/chosen": -248.56497192382812, + "logps/rejected": -397.32489013671875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.645439863204956, + "rewards/margins": 7.75833797454834, + "rewards/rejected": -9.403778076171875, + "step": 5913 + }, + { + "epoch": 0.92, + "learning_rate": 9.80976710410291e-06, + "logits/chosen": -2.8183884620666504, + "logits/rejected": -2.0279524326324463, + "logps/chosen": -249.49879455566406, + "logps/rejected": -198.27392578125, + "loss": 2.6956, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.205649375915527, + "rewards/margins": 0.03362631797790527, + "rewards/rejected": -5.239275932312012, + "step": 5914 + }, + { + "epoch": 0.92, + "learning_rate": 9.809033663571763e-06, + "logits/chosen": -2.7213664054870605, + "logits/rejected": -2.8990604877471924, + "logps/chosen": -266.3566589355469, + "logps/rejected": -328.2994384765625, + "loss": 3.0796, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.640473365783691, + "rewards/margins": -0.2691025733947754, + "rewards/rejected": -5.371370792388916, + "step": 5915 + }, + { + "epoch": 0.92, + "learning_rate": 9.808300223040615e-06, + "logits/chosen": -2.643785238265991, + "logits/rejected": -2.948657989501953, + "logps/chosen": -78.90171813964844, + "logps/rejected": -211.95399475097656, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.239212989807129, + "rewards/margins": 5.009486198425293, + "rewards/rejected": -7.248699188232422, + "step": 5916 + }, + { + "epoch": 0.92, + "learning_rate": 9.807566782509467e-06, + "logits/chosen": -2.781848907470703, + "logits/rejected": -2.691636562347412, + "logps/chosen": -144.06509399414062, + "logps/rejected": -338.9515075683594, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.555786609649658, + "rewards/margins": 5.962186336517334, + "rewards/rejected": -8.517972946166992, + "step": 5917 + }, + { + "epoch": 0.92, + "learning_rate": 9.806833341978318e-06, + "logits/chosen": -1.850388526916504, + "logits/rejected": -2.7920899391174316, + "logps/chosen": -157.6560821533203, + "logps/rejected": -401.1190490722656, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.50584077835083, + "rewards/margins": 8.031022071838379, + "rewards/rejected": -9.53686237335205, + "step": 5918 + }, + { + "epoch": 0.92, + "learning_rate": 9.806099901447172e-06, + "logits/chosen": -2.873763084411621, + "logits/rejected": -2.434927463531494, + "logps/chosen": -189.1407470703125, + "logps/rejected": -183.50762939453125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6271796226501465, + "rewards/margins": 5.960994720458984, + "rewards/rejected": -8.588174819946289, + "step": 5919 + }, + { + "epoch": 0.92, + "learning_rate": 9.805366460916024e-06, + "logits/chosen": -2.791193723678589, + "logits/rejected": -2.9717559814453125, + "logps/chosen": -335.6045837402344, + "logps/rejected": -331.94012451171875, + "loss": 1.1444, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.290638446807861, + "rewards/margins": 2.2230606079101562, + "rewards/rejected": -6.513699054718018, + "step": 5920 + }, + { + "epoch": 0.92, + "learning_rate": 9.804633020384876e-06, + "logits/chosen": -3.23352313041687, + "logits/rejected": -3.037684917449951, + "logps/chosen": -301.797119140625, + "logps/rejected": -184.95556640625, + "loss": 6.9282, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.312294006347656, + "rewards/margins": -6.925011157989502, + "rewards/rejected": -2.3872830867767334, + "step": 5921 + }, + { + "epoch": 0.92, + "learning_rate": 9.803899579853728e-06, + "logits/chosen": -3.173384666442871, + "logits/rejected": -2.966550588607788, + "logps/chosen": -161.3912353515625, + "logps/rejected": -313.5565185546875, + "loss": 2.3755, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.536375045776367, + "rewards/margins": -0.28879356384277344, + "rewards/rejected": -3.2475814819335938, + "step": 5922 + }, + { + "epoch": 0.92, + "learning_rate": 9.80316613932258e-06, + "logits/chosen": -1.7802389860153198, + "logits/rejected": -2.74113130569458, + "logps/chosen": -149.29446411132812, + "logps/rejected": -348.4273681640625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.520572304725647, + "rewards/margins": 8.030343055725098, + "rewards/rejected": -9.550914764404297, + "step": 5923 + }, + { + "epoch": 0.92, + "learning_rate": 9.802432698791431e-06, + "logits/chosen": -2.2700047492980957, + "logits/rejected": -3.0401573181152344, + "logps/chosen": -179.2181854248047, + "logps/rejected": -287.97027587890625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4437267780303955, + "rewards/margins": 5.6467790603637695, + "rewards/rejected": -8.090505599975586, + "step": 5924 + }, + { + "epoch": 0.92, + "learning_rate": 9.801699258260283e-06, + "logits/chosen": -2.756183624267578, + "logits/rejected": -2.0062098503112793, + "logps/chosen": -407.29046630859375, + "logps/rejected": -218.39454650878906, + "loss": 1.8596, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7185609340667725, + "rewards/margins": 1.6779279708862305, + "rewards/rejected": -5.396488666534424, + "step": 5925 + }, + { + "epoch": 0.92, + "learning_rate": 9.800965817729135e-06, + "logits/chosen": -2.8361783027648926, + "logits/rejected": -2.904510259628296, + "logps/chosen": -376.00286865234375, + "logps/rejected": -257.74322509765625, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.591252088546753, + "rewards/margins": 3.9642491340637207, + "rewards/rejected": -7.5555009841918945, + "step": 5926 + }, + { + "epoch": 0.92, + "learning_rate": 9.800232377197987e-06, + "logits/chosen": -2.9762351512908936, + "logits/rejected": -3.015223741531372, + "logps/chosen": -308.24517822265625, + "logps/rejected": -266.59490966796875, + "loss": 1.7242, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.042167663574219, + "rewards/margins": -0.6540429592132568, + "rewards/rejected": -3.388124942779541, + "step": 5927 + }, + { + "epoch": 0.92, + "learning_rate": 9.79949893666684e-06, + "logits/chosen": -2.4406187534332275, + "logits/rejected": -3.0806150436401367, + "logps/chosen": -104.64350891113281, + "logps/rejected": -273.17254638671875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8132586479187012, + "rewards/margins": 5.57755184173584, + "rewards/rejected": -7.390810012817383, + "step": 5928 + }, + { + "epoch": 0.92, + "learning_rate": 9.798765496135692e-06, + "logits/chosen": -2.2034943103790283, + "logits/rejected": -2.8414406776428223, + "logps/chosen": -249.9974365234375, + "logps/rejected": -209.56443786621094, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7085886001586914, + "rewards/margins": 4.508249282836914, + "rewards/rejected": -7.2168378829956055, + "step": 5929 + }, + { + "epoch": 0.92, + "learning_rate": 9.798032055604544e-06, + "logits/chosen": -2.672823429107666, + "logits/rejected": -2.8905556201934814, + "logps/chosen": -69.97968292236328, + "logps/rejected": -107.65029907226562, + "loss": 0.7681, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.241598129272461, + "rewards/margins": 2.3690671920776367, + "rewards/rejected": -6.610665321350098, + "step": 5930 + }, + { + "epoch": 0.92, + "learning_rate": 9.797298615073396e-06, + "logits/chosen": -3.0827603340148926, + "logits/rejected": -2.525897979736328, + "logps/chosen": -488.01611328125, + "logps/rejected": -564.65869140625, + "loss": 2.2194, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.248215913772583, + "rewards/margins": 1.3580965995788574, + "rewards/rejected": -4.6063127517700195, + "step": 5931 + }, + { + "epoch": 0.92, + "learning_rate": 9.796565174542248e-06, + "logits/chosen": -1.4934264421463013, + "logits/rejected": -3.020656108856201, + "logps/chosen": -103.77019500732422, + "logps/rejected": -312.30694580078125, + "loss": 1.1832, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.649036884307861, + "rewards/margins": 1.7017488479614258, + "rewards/rejected": -7.350785732269287, + "step": 5932 + }, + { + "epoch": 0.92, + "learning_rate": 9.7958317340111e-06, + "logits/chosen": -2.326510429382324, + "logits/rejected": -2.812739133834839, + "logps/chosen": -98.29428100585938, + "logps/rejected": -407.30474853515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9945359230041504, + "rewards/margins": 7.268255233764648, + "rewards/rejected": -10.262791633605957, + "step": 5933 + }, + { + "epoch": 0.92, + "learning_rate": 9.795098293479952e-06, + "logits/chosen": -2.6003963947296143, + "logits/rejected": -2.6047544479370117, + "logps/chosen": -71.71902465820312, + "logps/rejected": -195.98321533203125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8828842639923096, + "rewards/margins": 5.752619743347168, + "rewards/rejected": -7.635504245758057, + "step": 5934 + }, + { + "epoch": 0.92, + "learning_rate": 9.794364852948804e-06, + "logits/chosen": -2.729621171951294, + "logits/rejected": -3.0651731491088867, + "logps/chosen": -151.25697326660156, + "logps/rejected": -113.8294906616211, + "loss": 2.1944, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8041276931762695, + "rewards/margins": 0.8148801326751709, + "rewards/rejected": -5.619007587432861, + "step": 5935 + }, + { + "epoch": 0.92, + "learning_rate": 9.793631412417656e-06, + "logits/chosen": -2.7129929065704346, + "logits/rejected": -2.9508554935455322, + "logps/chosen": -131.88079833984375, + "logps/rejected": -237.30355834960938, + "loss": 1.165, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0639004707336426, + "rewards/margins": 1.7907828092575073, + "rewards/rejected": -4.8546833992004395, + "step": 5936 + }, + { + "epoch": 0.92, + "learning_rate": 9.79289797188651e-06, + "logits/chosen": -2.8947837352752686, + "logits/rejected": -2.3247056007385254, + "logps/chosen": -104.2816162109375, + "logps/rejected": -87.24886322021484, + "loss": 1.805, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.501760005950928, + "rewards/margins": 1.1360598802566528, + "rewards/rejected": -5.637820243835449, + "step": 5937 + }, + { + "epoch": 0.92, + "learning_rate": 9.792164531355361e-06, + "logits/chosen": -2.1241109371185303, + "logits/rejected": -2.664018154144287, + "logps/chosen": -181.23800659179688, + "logps/rejected": -446.48565673828125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8259849548339844, + "rewards/margins": 6.507375717163086, + "rewards/rejected": -8.33336067199707, + "step": 5938 + }, + { + "epoch": 0.92, + "learning_rate": 9.791431090824213e-06, + "logits/chosen": -2.614610195159912, + "logits/rejected": -2.7529468536376953, + "logps/chosen": -119.15924835205078, + "logps/rejected": -380.51800537109375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.668229341506958, + "rewards/margins": 4.916293144226074, + "rewards/rejected": -7.584522724151611, + "step": 5939 + }, + { + "epoch": 0.92, + "learning_rate": 9.790697650293065e-06, + "logits/chosen": -3.07779860496521, + "logits/rejected": -3.1714751720428467, + "logps/chosen": -80.3524169921875, + "logps/rejected": -185.94940185546875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.098078966140747, + "rewards/margins": 6.348482608795166, + "rewards/rejected": -8.446561813354492, + "step": 5940 + }, + { + "epoch": 0.92, + "learning_rate": 9.789964209761917e-06, + "logits/chosen": -1.8825536966323853, + "logits/rejected": -2.6788086891174316, + "logps/chosen": -108.20075988769531, + "logps/rejected": -409.7079772949219, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3016983270645142, + "rewards/margins": 9.67876148223877, + "rewards/rejected": -10.980459213256836, + "step": 5941 + }, + { + "epoch": 0.92, + "learning_rate": 9.789230769230769e-06, + "logits/chosen": -2.9703598022460938, + "logits/rejected": -3.1278769969940186, + "logps/chosen": -309.4443664550781, + "logps/rejected": -477.33892822265625, + "loss": 1.4772, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.385487079620361, + "rewards/margins": 1.6236202716827393, + "rewards/rejected": -6.0091071128845215, + "step": 5942 + }, + { + "epoch": 0.92, + "learning_rate": 9.78849732869962e-06, + "logits/chosen": -2.8076977729797363, + "logits/rejected": -2.278062105178833, + "logps/chosen": -176.19009399414062, + "logps/rejected": -270.88958740234375, + "loss": 2.7045, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.539196014404297, + "rewards/margins": 0.09044575691223145, + "rewards/rejected": -4.629642009735107, + "step": 5943 + }, + { + "epoch": 0.92, + "learning_rate": 9.787763888168472e-06, + "logits/chosen": -2.729325294494629, + "logits/rejected": -2.409409284591675, + "logps/chosen": -103.8038101196289, + "logps/rejected": -221.482177734375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.658735990524292, + "rewards/margins": 4.625971794128418, + "rewards/rejected": -6.284708023071289, + "step": 5944 + }, + { + "epoch": 0.92, + "learning_rate": 9.787030447637324e-06, + "logits/chosen": -0.9518744349479675, + "logits/rejected": -2.841278314590454, + "logps/chosen": -79.41619873046875, + "logps/rejected": -302.0872802734375, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161110877990723, + "rewards/margins": 4.04485559463501, + "rewards/rejected": -9.205965995788574, + "step": 5945 + }, + { + "epoch": 0.92, + "learning_rate": 9.786297007106178e-06, + "logits/chosen": -3.0382912158966064, + "logits/rejected": -3.05021333694458, + "logps/chosen": -88.35857391357422, + "logps/rejected": -235.86181640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5546252727508545, + "rewards/margins": 7.516219615936279, + "rewards/rejected": -9.070844650268555, + "step": 5946 + }, + { + "epoch": 0.92, + "learning_rate": 9.78556356657503e-06, + "logits/chosen": -2.8719818592071533, + "logits/rejected": -2.0797507762908936, + "logps/chosen": -148.33004760742188, + "logps/rejected": -198.07260131835938, + "loss": 1.8358, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.241220951080322, + "rewards/margins": 2.6355628967285156, + "rewards/rejected": -6.876783847808838, + "step": 5947 + }, + { + "epoch": 0.93, + "learning_rate": 9.784830126043882e-06, + "logits/chosen": -2.898310899734497, + "logits/rejected": -2.5741567611694336, + "logps/chosen": -94.45823669433594, + "logps/rejected": -172.15333557128906, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.366586446762085, + "rewards/margins": 6.1307220458984375, + "rewards/rejected": -7.497308731079102, + "step": 5948 + }, + { + "epoch": 0.93, + "learning_rate": 9.784096685512735e-06, + "logits/chosen": -1.9480557441711426, + "logits/rejected": -3.10569429397583, + "logps/chosen": -558.2506103515625, + "logps/rejected": -675.0023193359375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6076629161834717, + "rewards/margins": 4.661813735961914, + "rewards/rejected": -6.269476413726807, + "step": 5949 + }, + { + "epoch": 0.93, + "learning_rate": 9.783363244981587e-06, + "logits/chosen": -2.873788833618164, + "logits/rejected": -3.0277702808380127, + "logps/chosen": -157.1389923095703, + "logps/rejected": -117.55348205566406, + "loss": 2.1445, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6582024097442627, + "rewards/margins": 0.9048290252685547, + "rewards/rejected": -4.563031196594238, + "step": 5950 + }, + { + "epoch": 0.93, + "learning_rate": 9.782629804450439e-06, + "logits/chosen": -2.1733558177948, + "logits/rejected": -3.1686975955963135, + "logps/chosen": -178.8249969482422, + "logps/rejected": -382.08135986328125, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6562819480895996, + "rewards/margins": 3.5732033252716064, + "rewards/rejected": -6.229485511779785, + "step": 5951 + }, + { + "epoch": 0.93, + "learning_rate": 9.78189636391929e-06, + "logits/chosen": -1.5916564464569092, + "logits/rejected": -2.9187796115875244, + "logps/chosen": -108.06781005859375, + "logps/rejected": -413.86016845703125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307904005050659, + "rewards/margins": 6.7689104080200195, + "rewards/rejected": -9.076814651489258, + "step": 5952 + }, + { + "epoch": 0.93, + "learning_rate": 9.781162923388143e-06, + "logits/chosen": -1.8734686374664307, + "logits/rejected": -2.5766398906707764, + "logps/chosen": -182.16665649414062, + "logps/rejected": -450.31842041015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9424374103546143, + "rewards/margins": 6.135930061340332, + "rewards/rejected": -8.078367233276367, + "step": 5953 + }, + { + "epoch": 0.93, + "learning_rate": 9.780429482856996e-06, + "logits/chosen": -2.007958173751831, + "logits/rejected": -2.931969404220581, + "logps/chosen": -159.10166931152344, + "logps/rejected": -377.73455810546875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9718072414398193, + "rewards/margins": 6.396522521972656, + "rewards/rejected": -9.368330001831055, + "step": 5954 + }, + { + "epoch": 0.93, + "learning_rate": 9.779696042325848e-06, + "logits/chosen": -2.8068532943725586, + "logits/rejected": -3.158217191696167, + "logps/chosen": -51.780113220214844, + "logps/rejected": -219.60533142089844, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4215282201766968, + "rewards/margins": 6.12388801574707, + "rewards/rejected": -7.545415878295898, + "step": 5955 + }, + { + "epoch": 0.93, + "learning_rate": 9.7789626017947e-06, + "logits/chosen": -1.7197343111038208, + "logits/rejected": -2.8337793350219727, + "logps/chosen": -151.07485961914062, + "logps/rejected": -358.93035888671875, + "loss": 2.4015, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8455193042755127, + "rewards/margins": 0.5116708278656006, + "rewards/rejected": -4.357190132141113, + "step": 5956 + }, + { + "epoch": 0.93, + "learning_rate": 9.778229161263552e-06, + "logits/chosen": -3.009627342224121, + "logits/rejected": -2.4878251552581787, + "logps/chosen": -476.4214782714844, + "logps/rejected": -468.03131103515625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7198243141174316, + "rewards/margins": 7.119726181030273, + "rewards/rejected": -8.839550971984863, + "step": 5957 + }, + { + "epoch": 0.93, + "learning_rate": 9.777495720732404e-06, + "logits/chosen": -2.1144957542419434, + "logits/rejected": -2.5015344619750977, + "logps/chosen": -130.91012573242188, + "logps/rejected": -319.1226806640625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7827022075653076, + "rewards/margins": 6.0960845947265625, + "rewards/rejected": -7.878786563873291, + "step": 5958 + }, + { + "epoch": 0.93, + "learning_rate": 9.776762280201256e-06, + "logits/chosen": -2.975191831588745, + "logits/rejected": -1.6900291442871094, + "logps/chosen": -184.29173278808594, + "logps/rejected": -221.12106323242188, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4273592233657837, + "rewards/margins": 4.958510398864746, + "rewards/rejected": -6.385869979858398, + "step": 5959 + }, + { + "epoch": 0.93, + "learning_rate": 9.776028839670107e-06, + "logits/chosen": -3.0352015495300293, + "logits/rejected": -3.1850266456604004, + "logps/chosen": -114.53678894042969, + "logps/rejected": -260.7222900390625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.466947078704834, + "rewards/margins": 6.08983039855957, + "rewards/rejected": -8.556777954101562, + "step": 5960 + }, + { + "epoch": 0.93, + "learning_rate": 9.77529539913896e-06, + "logits/chosen": -1.8546758890151978, + "logits/rejected": -2.6972062587738037, + "logps/chosen": -173.28367614746094, + "logps/rejected": -485.1429138183594, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4290497303009033, + "rewards/margins": 6.129542827606201, + "rewards/rejected": -8.558592796325684, + "step": 5961 + }, + { + "epoch": 0.93, + "learning_rate": 9.774561958607811e-06, + "logits/chosen": -3.101348638534546, + "logits/rejected": -3.1592979431152344, + "logps/chosen": -197.8818359375, + "logps/rejected": -263.528564453125, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7368484735488892, + "rewards/margins": 4.798708915710449, + "rewards/rejected": -6.535557746887207, + "step": 5962 + }, + { + "epoch": 0.93, + "learning_rate": 9.773828518076665e-06, + "logits/chosen": -3.0530056953430176, + "logits/rejected": -2.659756898880005, + "logps/chosen": -181.698486328125, + "logps/rejected": -98.12071228027344, + "loss": 2.4472, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.526674270629883, + "rewards/margins": 0.1682896614074707, + "rewards/rejected": -4.6949639320373535, + "step": 5963 + }, + { + "epoch": 0.93, + "learning_rate": 9.773095077545517e-06, + "logits/chosen": -2.6741158962249756, + "logits/rejected": -2.6810855865478516, + "logps/chosen": -278.3416442871094, + "logps/rejected": -453.8347473144531, + "loss": 0.8914, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7843141555786133, + "rewards/margins": 3.2790980339050293, + "rewards/rejected": -7.063411712646484, + "step": 5964 + }, + { + "epoch": 0.93, + "learning_rate": 9.772361637014369e-06, + "logits/chosen": -2.7189605236053467, + "logits/rejected": -3.0168282985687256, + "logps/chosen": -325.6602478027344, + "logps/rejected": -249.1300048828125, + "loss": 3.8368, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.98266077041626, + "rewards/margins": -1.983860731124878, + "rewards/rejected": -3.9987998008728027, + "step": 5965 + }, + { + "epoch": 0.93, + "learning_rate": 9.77162819648322e-06, + "logits/chosen": -1.6407361030578613, + "logits/rejected": -3.039386749267578, + "logps/chosen": -233.8495330810547, + "logps/rejected": -258.92962646484375, + "loss": 1.93, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7539725303649902, + "rewards/margins": -0.5001256465911865, + "rewards/rejected": -3.2538466453552246, + "step": 5966 + }, + { + "epoch": 0.93, + "learning_rate": 9.770894755952072e-06, + "logits/chosen": -2.883068799972534, + "logits/rejected": -1.1268614530563354, + "logps/chosen": -219.901123046875, + "logps/rejected": -76.27055358886719, + "loss": 1.4584, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.53448486328125, + "rewards/margins": -0.19880497455596924, + "rewards/rejected": -6.33568000793457, + "step": 5967 + }, + { + "epoch": 0.93, + "learning_rate": 9.770161315420924e-06, + "logits/chosen": -2.4121923446655273, + "logits/rejected": -2.8284173011779785, + "logps/chosen": -185.64051818847656, + "logps/rejected": -385.8017883300781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.038001298904419, + "rewards/margins": 6.316136837005615, + "rewards/rejected": -7.354138374328613, + "step": 5968 + }, + { + "epoch": 0.93, + "learning_rate": 9.769427874889776e-06, + "logits/chosen": -2.972442388534546, + "logits/rejected": -2.0264201164245605, + "logps/chosen": -301.32574462890625, + "logps/rejected": -104.07149505615234, + "loss": 0.8623, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2239532470703125, + "rewards/margins": 0.2328411340713501, + "rewards/rejected": -4.456794261932373, + "step": 5969 + }, + { + "epoch": 0.93, + "learning_rate": 9.768694434358628e-06, + "logits/chosen": -2.0464768409729004, + "logits/rejected": -2.938587188720703, + "logps/chosen": -84.00465393066406, + "logps/rejected": -196.43275451660156, + "loss": 0.1079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.472822427749634, + "rewards/margins": 2.7062649726867676, + "rewards/rejected": -6.1790876388549805, + "step": 5970 + }, + { + "epoch": 0.93, + "learning_rate": 9.76796099382748e-06, + "logits/chosen": -2.6899232864379883, + "logits/rejected": -3.0003621578216553, + "logps/chosen": -243.6400604248047, + "logps/rejected": -452.17315673828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.379682183265686, + "rewards/margins": 7.18833589553833, + "rewards/rejected": -8.568017959594727, + "step": 5971 + }, + { + "epoch": 0.93, + "learning_rate": 9.767227553296333e-06, + "logits/chosen": -3.1415340900421143, + "logits/rejected": -2.9440436363220215, + "logps/chosen": -496.2911376953125, + "logps/rejected": -236.19180297851562, + "loss": 0.2856, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7289169430732727, + "rewards/margins": 4.914078712463379, + "rewards/rejected": -5.642995357513428, + "step": 5972 + }, + { + "epoch": 0.93, + "learning_rate": 9.766494112765185e-06, + "logits/chosen": -1.8422333002090454, + "logits/rejected": -2.117063522338867, + "logps/chosen": -262.1673889160156, + "logps/rejected": -479.4591064453125, + "loss": 2.6635, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.731973648071289, + "rewards/margins": -0.29226231575012207, + "rewards/rejected": -4.439711093902588, + "step": 5973 + }, + { + "epoch": 0.93, + "learning_rate": 9.765760672234037e-06, + "logits/chosen": -2.818054437637329, + "logits/rejected": -2.935224771499634, + "logps/chosen": -264.5191955566406, + "logps/rejected": -201.353271484375, + "loss": 2.3944, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.736143112182617, + "rewards/margins": 0.40517258644104004, + "rewards/rejected": -5.141315460205078, + "step": 5974 + }, + { + "epoch": 0.93, + "learning_rate": 9.765027231702889e-06, + "logits/chosen": -2.9439311027526855, + "logits/rejected": -2.8778367042541504, + "logps/chosen": -283.1641540527344, + "logps/rejected": -313.85552978515625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8487396240234375, + "rewards/margins": 6.755340099334717, + "rewards/rejected": -7.604079723358154, + "step": 5975 + }, + { + "epoch": 0.93, + "learning_rate": 9.764293791171741e-06, + "logits/chosen": -2.0095040798187256, + "logits/rejected": -2.8350837230682373, + "logps/chosen": -242.15794372558594, + "logps/rejected": -341.6767272949219, + "loss": 3.313, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.667811393737793, + "rewards/margins": -1.1824874877929688, + "rewards/rejected": -3.4853241443634033, + "step": 5976 + }, + { + "epoch": 0.93, + "learning_rate": 9.763560350640593e-06, + "logits/chosen": -2.1273670196533203, + "logits/rejected": -3.1139514446258545, + "logps/chosen": -469.4186096191406, + "logps/rejected": -535.6367797851562, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7072205543518066, + "rewards/margins": 6.232635498046875, + "rewards/rejected": -7.939856052398682, + "step": 5977 + }, + { + "epoch": 0.93, + "learning_rate": 9.762826910109445e-06, + "logits/chosen": -1.353529930114746, + "logits/rejected": -2.7489612102508545, + "logps/chosen": -100.584716796875, + "logps/rejected": -307.87994384765625, + "loss": 1.3404, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.945514678955078, + "rewards/margins": 1.9223511219024658, + "rewards/rejected": -6.867866039276123, + "step": 5978 + }, + { + "epoch": 0.93, + "learning_rate": 9.762093469578297e-06, + "logits/chosen": -2.8551530838012695, + "logits/rejected": -2.6629881858825684, + "logps/chosen": -214.34625244140625, + "logps/rejected": -167.039794921875, + "loss": 0.4373, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.961453437805176, + "rewards/margins": 2.150970458984375, + "rewards/rejected": -7.112423896789551, + "step": 5979 + }, + { + "epoch": 0.93, + "learning_rate": 9.761360029047148e-06, + "logits/chosen": -1.8275192975997925, + "logits/rejected": -2.8693020343780518, + "logps/chosen": -150.16363525390625, + "logps/rejected": -325.7223815917969, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3888230323791504, + "rewards/margins": 3.6014528274536133, + "rewards/rejected": -6.9902753829956055, + "step": 5980 + }, + { + "epoch": 0.93, + "learning_rate": 9.760626588516002e-06, + "logits/chosen": -2.1620309352874756, + "logits/rejected": -2.5391533374786377, + "logps/chosen": -60.797149658203125, + "logps/rejected": -162.22244262695312, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6116926670074463, + "rewards/margins": 4.793420791625977, + "rewards/rejected": -7.405113697052002, + "step": 5981 + }, + { + "epoch": 0.93, + "learning_rate": 9.759893147984854e-06, + "logits/chosen": -3.1103808879852295, + "logits/rejected": -3.059459686279297, + "logps/chosen": -237.2107391357422, + "logps/rejected": -216.97386169433594, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.715181827545166, + "rewards/margins": 4.7528839111328125, + "rewards/rejected": -8.46806526184082, + "step": 5982 + }, + { + "epoch": 0.93, + "learning_rate": 9.759159707453707e-06, + "logits/chosen": -3.128558397293091, + "logits/rejected": -2.212782621383667, + "logps/chosen": -453.2431640625, + "logps/rejected": -334.10565185546875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0974334478378296, + "rewards/margins": 5.391012191772461, + "rewards/rejected": -6.488445281982422, + "step": 5983 + }, + { + "epoch": 0.93, + "learning_rate": 9.75842626692256e-06, + "logits/chosen": -2.980076313018799, + "logits/rejected": -2.9399189949035645, + "logps/chosen": -123.94309997558594, + "logps/rejected": -109.00144958496094, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.249589920043945, + "rewards/margins": 3.6752123832702637, + "rewards/rejected": -7.924802303314209, + "step": 5984 + }, + { + "epoch": 0.93, + "learning_rate": 9.757692826391411e-06, + "logits/chosen": -2.610602617263794, + "logits/rejected": -3.170254707336426, + "logps/chosen": -327.1332702636719, + "logps/rejected": -475.0135498046875, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.416353464126587, + "rewards/margins": 3.783252716064453, + "rewards/rejected": -6.199605941772461, + "step": 5985 + }, + { + "epoch": 0.93, + "learning_rate": 9.756959385860263e-06, + "logits/chosen": -2.402278184890747, + "logits/rejected": -3.128484010696411, + "logps/chosen": -157.299072265625, + "logps/rejected": -267.15814208984375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.967278242111206, + "rewards/margins": 5.205816268920898, + "rewards/rejected": -7.173094272613525, + "step": 5986 + }, + { + "epoch": 0.93, + "learning_rate": 9.756225945329115e-06, + "logits/chosen": -1.6995633840560913, + "logits/rejected": -2.7491729259490967, + "logps/chosen": -144.70790100097656, + "logps/rejected": -320.0325927734375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1081864833831787, + "rewards/margins": 5.954470634460449, + "rewards/rejected": -8.062657356262207, + "step": 5987 + }, + { + "epoch": 0.93, + "learning_rate": 9.755492504797967e-06, + "logits/chosen": -2.9634413719177246, + "logits/rejected": -2.5592072010040283, + "logps/chosen": -473.7913818359375, + "logps/rejected": -578.4912109375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9535775184631348, + "rewards/margins": 5.093839168548584, + "rewards/rejected": -7.047416687011719, + "step": 5988 + }, + { + "epoch": 0.93, + "learning_rate": 9.754759064266819e-06, + "logits/chosen": -3.2197766304016113, + "logits/rejected": -3.0000951290130615, + "logps/chosen": -497.2309265136719, + "logps/rejected": -368.1593933105469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0424730777740479, + "rewards/margins": 6.9015398025512695, + "rewards/rejected": -7.944012641906738, + "step": 5989 + }, + { + "epoch": 0.93, + "learning_rate": 9.754025623735672e-06, + "logits/chosen": -2.9153120517730713, + "logits/rejected": -2.9876456260681152, + "logps/chosen": -104.17625427246094, + "logps/rejected": -297.1435852050781, + "loss": 2.0998, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8953869342803955, + "rewards/margins": 2.1144137382507324, + "rewards/rejected": -6.009800434112549, + "step": 5990 + }, + { + "epoch": 0.93, + "learning_rate": 9.753292183204524e-06, + "logits/chosen": -1.262534499168396, + "logits/rejected": -2.9664230346679688, + "logps/chosen": -167.3170623779297, + "logps/rejected": -341.02178955078125, + "loss": 0.6406, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.225065231323242, + "rewards/margins": 0.7539083957672119, + "rewards/rejected": -3.978973388671875, + "step": 5991 + }, + { + "epoch": 0.93, + "learning_rate": 9.752558742673376e-06, + "logits/chosen": -3.212071180343628, + "logits/rejected": -2.282533884048462, + "logps/chosen": -574.5267944335938, + "logps/rejected": -249.44384765625, + "loss": 1.0692, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7168405055999756, + "rewards/margins": 2.055948495864868, + "rewards/rejected": -4.772789001464844, + "step": 5992 + }, + { + "epoch": 0.93, + "learning_rate": 9.751825302142228e-06, + "logits/chosen": -2.8691976070404053, + "logits/rejected": -3.021859884262085, + "logps/chosen": -968.33251953125, + "logps/rejected": -711.3385009765625, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15191650390625, + "rewards/margins": 6.043982028961182, + "rewards/rejected": -6.195898532867432, + "step": 5993 + }, + { + "epoch": 0.93, + "learning_rate": 9.75109186161108e-06, + "logits/chosen": -3.144695281982422, + "logits/rejected": -2.686396598815918, + "logps/chosen": -354.678955078125, + "logps/rejected": -166.59158325195312, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8183571100234985, + "rewards/margins": 5.570588111877441, + "rewards/rejected": -7.388944625854492, + "step": 5994 + }, + { + "epoch": 0.93, + "learning_rate": 9.750358421079932e-06, + "logits/chosen": -2.8578922748565674, + "logits/rejected": -3.11588191986084, + "logps/chosen": -80.46019744873047, + "logps/rejected": -179.1967010498047, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5979084968566895, + "rewards/margins": 4.26889705657959, + "rewards/rejected": -5.866805076599121, + "step": 5995 + }, + { + "epoch": 0.93, + "learning_rate": 9.749624980548784e-06, + "logits/chosen": -2.2540979385375977, + "logits/rejected": -2.9376213550567627, + "logps/chosen": -224.40792846679688, + "logps/rejected": -279.5663757324219, + "loss": 1.205, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9151031970977783, + "rewards/margins": 2.0032958984375, + "rewards/rejected": -5.918398857116699, + "step": 5996 + }, + { + "epoch": 0.93, + "learning_rate": 9.748891540017635e-06, + "logits/chosen": -3.0447490215301514, + "logits/rejected": -3.149026870727539, + "logps/chosen": -109.30731201171875, + "logps/rejected": -209.61639404296875, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.620983600616455, + "rewards/margins": 3.662309408187866, + "rewards/rejected": -6.283292770385742, + "step": 5997 + }, + { + "epoch": 0.93, + "learning_rate": 9.748158099486487e-06, + "logits/chosen": -2.5358505249023438, + "logits/rejected": -2.9091060161590576, + "logps/chosen": -170.37718200683594, + "logps/rejected": -231.209228515625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.708888292312622, + "rewards/margins": 4.983642578125, + "rewards/rejected": -6.692530632019043, + "step": 5998 + }, + { + "epoch": 0.93, + "learning_rate": 9.747424658955341e-06, + "logits/chosen": -2.3132693767547607, + "logits/rejected": -3.1294939517974854, + "logps/chosen": -120.97797393798828, + "logps/rejected": -415.12237548828125, + "loss": 0.1785, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6178252696990967, + "rewards/margins": 3.4780075550079346, + "rewards/rejected": -5.095832824707031, + "step": 5999 + }, + { + "epoch": 0.93, + "learning_rate": 9.746691218424193e-06, + "logits/chosen": -2.6479175090789795, + "logits/rejected": -3.0449435710906982, + "logps/chosen": -100.71940612792969, + "logps/rejected": -196.7366943359375, + "loss": 2.1128, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.251156806945801, + "rewards/margins": -0.4406123161315918, + "rewards/rejected": -4.810544967651367, + "step": 6000 + }, + { + "epoch": 0.93, + "learning_rate": 9.745957777893045e-06, + "logits/chosen": -1.07982337474823, + "logits/rejected": -1.8137803077697754, + "logps/chosen": -243.86041259765625, + "logps/rejected": -518.341064453125, + "loss": 2.4973, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.504055976867676, + "rewards/margins": 1.22102952003479, + "rewards/rejected": -4.725085258483887, + "step": 6001 + }, + { + "epoch": 0.93, + "learning_rate": 9.745224337361896e-06, + "logits/chosen": -1.2945462465286255, + "logits/rejected": -2.8250303268432617, + "logps/chosen": -92.45919799804688, + "logps/rejected": -343.4605712890625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.221790313720703, + "rewards/margins": 5.95802116394043, + "rewards/rejected": -8.179811477661133, + "step": 6002 + }, + { + "epoch": 0.93, + "learning_rate": 9.744490896830748e-06, + "logits/chosen": -3.012265205383301, + "logits/rejected": -2.5174503326416016, + "logps/chosen": -113.32344055175781, + "logps/rejected": -112.56118774414062, + "loss": 0.7342, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.272797107696533, + "rewards/margins": 2.41914439201355, + "rewards/rejected": -5.691941261291504, + "step": 6003 + }, + { + "epoch": 0.93, + "learning_rate": 9.7437574562996e-06, + "logits/chosen": -3.0031142234802246, + "logits/rejected": -2.3156578540802, + "logps/chosen": -162.0919189453125, + "logps/rejected": -227.9770050048828, + "loss": 1.6917, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.093724250793457, + "rewards/margins": 2.572835922241211, + "rewards/rejected": -5.666560173034668, + "step": 6004 + }, + { + "epoch": 0.93, + "learning_rate": 9.743024015768452e-06, + "logits/chosen": -2.232808828353882, + "logits/rejected": -3.0904171466827393, + "logps/chosen": -42.21508026123047, + "logps/rejected": -186.0693359375, + "loss": 0.0912, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.858937978744507, + "rewards/margins": 2.618936777114868, + "rewards/rejected": -5.477874755859375, + "step": 6005 + }, + { + "epoch": 0.93, + "learning_rate": 9.742290575237304e-06, + "logits/chosen": -2.6971240043640137, + "logits/rejected": -2.6153078079223633, + "logps/chosen": -318.84332275390625, + "logps/rejected": -308.7013854980469, + "loss": 0.0636, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.74566650390625, + "rewards/margins": 3.1421165466308594, + "rewards/rejected": -4.887783050537109, + "step": 6006 + }, + { + "epoch": 0.93, + "learning_rate": 9.741557134706156e-06, + "logits/chosen": -2.7334179878234863, + "logits/rejected": -3.1130645275115967, + "logps/chosen": -41.71598815917969, + "logps/rejected": -154.7863006591797, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.030583620071411, + "rewards/margins": 4.992609977722168, + "rewards/rejected": -7.023194313049316, + "step": 6007 + }, + { + "epoch": 0.93, + "learning_rate": 9.74082369417501e-06, + "logits/chosen": -3.0989766120910645, + "logits/rejected": -3.0155060291290283, + "logps/chosen": -182.5533447265625, + "logps/rejected": -203.50083923339844, + "loss": 1.3743, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4629716873168945, + "rewards/margins": 1.607058048248291, + "rewards/rejected": -4.070029258728027, + "step": 6008 + }, + { + "epoch": 0.93, + "learning_rate": 9.740090253643861e-06, + "logits/chosen": -3.066105604171753, + "logits/rejected": -3.0855460166931152, + "logps/chosen": -98.09724426269531, + "logps/rejected": -295.89544677734375, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.410907745361328, + "rewards/margins": 7.9041032791137695, + "rewards/rejected": -10.315011024475098, + "step": 6009 + }, + { + "epoch": 0.93, + "learning_rate": 9.739356813112713e-06, + "logits/chosen": -2.84645676612854, + "logits/rejected": -3.095587730407715, + "logps/chosen": -539.0699462890625, + "logps/rejected": -778.4608154296875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7860733270645142, + "rewards/margins": 6.5359392166137695, + "rewards/rejected": -8.322011947631836, + "step": 6010 + }, + { + "epoch": 0.93, + "learning_rate": 9.738623372581565e-06, + "logits/chosen": -3.0099189281463623, + "logits/rejected": -3.140105962753296, + "logps/chosen": -60.36172103881836, + "logps/rejected": -198.25384521484375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2901207208633423, + "rewards/margins": 4.625679016113281, + "rewards/rejected": -5.915799617767334, + "step": 6011 + }, + { + "epoch": 0.93, + "learning_rate": 9.737889932050417e-06, + "logits/chosen": -3.093472480773926, + "logits/rejected": -2.9307374954223633, + "logps/chosen": -911.7101440429688, + "logps/rejected": -433.8192138671875, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6829497814178467, + "rewards/margins": 5.5371012687683105, + "rewards/rejected": -6.220050811767578, + "step": 6012 + }, + { + "epoch": 0.94, + "learning_rate": 9.737156491519269e-06, + "logits/chosen": -2.7108914852142334, + "logits/rejected": -3.2275867462158203, + "logps/chosen": -169.15240478515625, + "logps/rejected": -375.452392578125, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5387368202209473, + "rewards/margins": 6.411534309387207, + "rewards/rejected": -8.950271606445312, + "step": 6013 + }, + { + "epoch": 0.94, + "learning_rate": 9.73642305098812e-06, + "logits/chosen": -3.2670273780822754, + "logits/rejected": -2.853447198867798, + "logps/chosen": -206.8142547607422, + "logps/rejected": -318.2127685546875, + "loss": 1.9834, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5093424320220947, + "rewards/margins": 1.9960377216339111, + "rewards/rejected": -5.505380153656006, + "step": 6014 + }, + { + "epoch": 0.94, + "learning_rate": 9.735689610456974e-06, + "logits/chosen": -2.7946736812591553, + "logits/rejected": -2.944544792175293, + "logps/chosen": -111.9561538696289, + "logps/rejected": -183.81585693359375, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.457589864730835, + "rewards/margins": 4.226078033447266, + "rewards/rejected": -7.6836676597595215, + "step": 6015 + }, + { + "epoch": 0.94, + "learning_rate": 9.734956169925826e-06, + "logits/chosen": -2.625781536102295, + "logits/rejected": -2.967236280441284, + "logps/chosen": -147.92147827148438, + "logps/rejected": -342.1669921875, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9465248584747314, + "rewards/margins": 4.091723442077637, + "rewards/rejected": -6.038248538970947, + "step": 6016 + }, + { + "epoch": 0.94, + "learning_rate": 9.73422272939468e-06, + "logits/chosen": -1.828202486038208, + "logits/rejected": -2.8834166526794434, + "logps/chosen": -133.28311157226562, + "logps/rejected": -237.74267578125, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.469106435775757, + "rewards/margins": 3.022251844406128, + "rewards/rejected": -5.491358280181885, + "step": 6017 + }, + { + "epoch": 0.94, + "learning_rate": 9.733489288863532e-06, + "logits/chosen": -3.0695276260375977, + "logits/rejected": -2.648169755935669, + "logps/chosen": -716.5678100585938, + "logps/rejected": -390.5166015625, + "loss": 2.0619, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5490541458129883, + "rewards/margins": -0.030062198638916016, + "rewards/rejected": -3.518991708755493, + "step": 6018 + }, + { + "epoch": 0.94, + "learning_rate": 9.732755848332384e-06, + "logits/chosen": -2.529207468032837, + "logits/rejected": -3.033876419067383, + "logps/chosen": -291.5774841308594, + "logps/rejected": -194.3605194091797, + "loss": 0.4982, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6512078046798706, + "rewards/margins": 2.9355313777923584, + "rewards/rejected": -4.5867390632629395, + "step": 6019 + }, + { + "epoch": 0.94, + "learning_rate": 9.732022407801235e-06, + "logits/chosen": -2.728912591934204, + "logits/rejected": -3.1652731895446777, + "logps/chosen": -868.251708984375, + "logps/rejected": -662.0717163085938, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6746978759765625, + "rewards/margins": 4.270122051239014, + "rewards/rejected": -4.944819450378418, + "step": 6020 + }, + { + "epoch": 0.94, + "learning_rate": 9.731288967270087e-06, + "logits/chosen": -2.8915810585021973, + "logits/rejected": -2.0868852138519287, + "logps/chosen": -420.03948974609375, + "logps/rejected": -402.15045166015625, + "loss": 2.0032, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4719085693359375, + "rewards/margins": 2.2798566818237305, + "rewards/rejected": -5.751765727996826, + "step": 6021 + }, + { + "epoch": 0.94, + "learning_rate": 9.730555526738939e-06, + "logits/chosen": -0.7193390727043152, + "logits/rejected": -2.924050807952881, + "logps/chosen": -94.25687408447266, + "logps/rejected": -689.463134765625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.602505922317505, + "rewards/margins": 8.284923553466797, + "rewards/rejected": -10.887430191040039, + "step": 6022 + }, + { + "epoch": 0.94, + "learning_rate": 9.729822086207791e-06, + "logits/chosen": -3.161757469177246, + "logits/rejected": -3.1776034832000732, + "logps/chosen": -562.3086547851562, + "logps/rejected": -463.588134765625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41580507159233093, + "rewards/margins": 6.455533027648926, + "rewards/rejected": -6.871337890625, + "step": 6023 + }, + { + "epoch": 0.94, + "learning_rate": 9.729088645676643e-06, + "logits/chosen": -3.0109715461730957, + "logits/rejected": -2.308311939239502, + "logps/chosen": -690.5903930664062, + "logps/rejected": -474.09661865234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6206023693084717, + "rewards/margins": 7.379220008850098, + "rewards/rejected": -8.999822616577148, + "step": 6024 + }, + { + "epoch": 0.94, + "learning_rate": 9.728355205145495e-06, + "logits/chosen": -2.990337371826172, + "logits/rejected": -3.0587103366851807, + "logps/chosen": -428.31842041015625, + "logps/rejected": -608.7083740234375, + "loss": 1.5025, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7008209228515625, + "rewards/margins": 0.17376410961151123, + "rewards/rejected": -3.8745851516723633, + "step": 6025 + }, + { + "epoch": 0.94, + "learning_rate": 9.727621764614348e-06, + "logits/chosen": -2.4907736778259277, + "logits/rejected": -2.6598386764526367, + "logps/chosen": -148.75213623046875, + "logps/rejected": -238.52865600585938, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7632484436035156, + "rewards/margins": 5.4735918045043945, + "rewards/rejected": -7.236840724945068, + "step": 6026 + }, + { + "epoch": 0.94, + "learning_rate": 9.7268883240832e-06, + "logits/chosen": -2.8436574935913086, + "logits/rejected": -3.126171827316284, + "logps/chosen": -55.67416763305664, + "logps/rejected": -192.1395263671875, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3950862884521484, + "rewards/margins": 4.238203525543213, + "rewards/rejected": -6.6332902908325195, + "step": 6027 + }, + { + "epoch": 0.94, + "learning_rate": 9.726154883552052e-06, + "logits/chosen": -2.9474213123321533, + "logits/rejected": -3.121359348297119, + "logps/chosen": -173.36297607421875, + "logps/rejected": -213.45455932617188, + "loss": 0.9299, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.062093734741211, + "rewards/margins": 1.626888632774353, + "rewards/rejected": -5.6889824867248535, + "step": 6028 + }, + { + "epoch": 0.94, + "learning_rate": 9.725421443020904e-06, + "logits/chosen": -2.298814296722412, + "logits/rejected": -2.8381478786468506, + "logps/chosen": -455.95965576171875, + "logps/rejected": -542.2114868164062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5979523658752441, + "rewards/margins": 8.101888656616211, + "rewards/rejected": -9.699840545654297, + "step": 6029 + }, + { + "epoch": 0.94, + "learning_rate": 9.724688002489756e-06, + "logits/chosen": -2.2568254470825195, + "logits/rejected": -3.057619094848633, + "logps/chosen": -71.49470520019531, + "logps/rejected": -413.97900390625, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9034910202026367, + "rewards/margins": 5.468295097351074, + "rewards/rejected": -8.371786117553711, + "step": 6030 + }, + { + "epoch": 0.94, + "learning_rate": 9.723954561958608e-06, + "logits/chosen": -2.5488641262054443, + "logits/rejected": -2.9616312980651855, + "logps/chosen": -387.4997863769531, + "logps/rejected": -666.2157592773438, + "loss": 3.0312, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.014890670776367, + "rewards/margins": -0.723944902420044, + "rewards/rejected": -4.290945529937744, + "step": 6031 + }, + { + "epoch": 0.94, + "learning_rate": 9.72322112142746e-06, + "logits/chosen": -2.3680243492126465, + "logits/rejected": -2.830946683883667, + "logps/chosen": -256.6468200683594, + "logps/rejected": -253.07398986816406, + "loss": 1.2982, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5116584300994873, + "rewards/margins": 2.5938549041748047, + "rewards/rejected": -6.105513572692871, + "step": 6032 + }, + { + "epoch": 0.94, + "learning_rate": 9.722487680896311e-06, + "logits/chosen": -2.165787935256958, + "logits/rejected": -2.7610223293304443, + "logps/chosen": -209.5731658935547, + "logps/rejected": -208.05218505859375, + "loss": 0.6762, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.475517272949219, + "rewards/margins": 1.6317698955535889, + "rewards/rejected": -6.1072869300842285, + "step": 6033 + }, + { + "epoch": 0.94, + "learning_rate": 9.721754240365163e-06, + "logits/chosen": -2.749659538269043, + "logits/rejected": -3.084026336669922, + "logps/chosen": -78.68724822998047, + "logps/rejected": -299.4652404785156, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8861958980560303, + "rewards/margins": 6.884040832519531, + "rewards/rejected": -8.77023696899414, + "step": 6034 + }, + { + "epoch": 0.94, + "learning_rate": 9.721020799834017e-06, + "logits/chosen": -2.7717878818511963, + "logits/rejected": -3.102367401123047, + "logps/chosen": -225.80348205566406, + "logps/rejected": -362.8962097167969, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.058074951171875, + "rewards/margins": 6.491716384887695, + "rewards/rejected": -7.54979133605957, + "step": 6035 + }, + { + "epoch": 0.94, + "learning_rate": 9.720287359302869e-06, + "logits/chosen": -2.175307035446167, + "logits/rejected": -2.7601540088653564, + "logps/chosen": -284.16314697265625, + "logps/rejected": -498.44268798828125, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3076438903808594, + "rewards/margins": 5.656349182128906, + "rewards/rejected": -6.963993072509766, + "step": 6036 + }, + { + "epoch": 0.94, + "learning_rate": 9.71955391877172e-06, + "logits/chosen": -2.278646469116211, + "logits/rejected": -3.063145637512207, + "logps/chosen": -96.26219177246094, + "logps/rejected": -428.2877502441406, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.610506534576416, + "rewards/margins": 5.057045936584473, + "rewards/rejected": -7.6675519943237305, + "step": 6037 + }, + { + "epoch": 0.94, + "learning_rate": 9.718820478240573e-06, + "logits/chosen": -2.163742780685425, + "logits/rejected": -2.8298518657684326, + "logps/chosen": -232.62396240234375, + "logps/rejected": -300.0584411621094, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6236624717712402, + "rewards/margins": 5.4825592041015625, + "rewards/rejected": -8.106222152709961, + "step": 6038 + }, + { + "epoch": 0.94, + "learning_rate": 9.718087037709424e-06, + "logits/chosen": -1.8729640245437622, + "logits/rejected": -2.716517686843872, + "logps/chosen": -168.39366149902344, + "logps/rejected": -405.47314453125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.03299880027771, + "rewards/margins": 6.602212905883789, + "rewards/rejected": -8.635211944580078, + "step": 6039 + }, + { + "epoch": 0.94, + "learning_rate": 9.717353597178276e-06, + "logits/chosen": -2.5880696773529053, + "logits/rejected": -3.0253684520721436, + "logps/chosen": -375.7843017578125, + "logps/rejected": -563.1753540039062, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.457707643508911, + "rewards/margins": 5.032124042510986, + "rewards/rejected": -7.489831924438477, + "step": 6040 + }, + { + "epoch": 0.94, + "learning_rate": 9.716620156647128e-06, + "logits/chosen": -2.930086135864258, + "logits/rejected": -1.6397262811660767, + "logps/chosen": -223.67672729492188, + "logps/rejected": -124.66767883300781, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9344048500061035, + "rewards/margins": 3.17952299118042, + "rewards/rejected": -7.113927841186523, + "step": 6041 + }, + { + "epoch": 0.94, + "learning_rate": 9.71588671611598e-06, + "logits/chosen": -3.0755136013031006, + "logits/rejected": -2.530698537826538, + "logps/chosen": -115.10279846191406, + "logps/rejected": -222.97718811035156, + "loss": 1.9421, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.196389198303223, + "rewards/margins": -1.5974756479263306, + "rewards/rejected": -2.5989134311676025, + "step": 6042 + }, + { + "epoch": 0.94, + "learning_rate": 9.715153275584832e-06, + "logits/chosen": -2.1004106998443604, + "logits/rejected": -2.948594093322754, + "logps/chosen": -183.16806030273438, + "logps/rejected": -302.0445251464844, + "loss": 1.7894, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.299560308456421, + "rewards/margins": -0.12710213661193848, + "rewards/rejected": -3.1724581718444824, + "step": 6043 + }, + { + "epoch": 0.94, + "learning_rate": 9.714419835053686e-06, + "logits/chosen": -2.4484102725982666, + "logits/rejected": -2.828378677368164, + "logps/chosen": -331.02130126953125, + "logps/rejected": -483.8328857421875, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6999611854553223, + "rewards/margins": 6.81095027923584, + "rewards/rejected": -8.51091194152832, + "step": 6044 + }, + { + "epoch": 0.94, + "learning_rate": 9.713686394522537e-06, + "logits/chosen": -1.3934319019317627, + "logits/rejected": -2.9674627780914307, + "logps/chosen": -112.89823913574219, + "logps/rejected": -351.017333984375, + "loss": 0.2055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4805684089660645, + "rewards/margins": 3.18070650100708, + "rewards/rejected": -5.6612749099731445, + "step": 6045 + }, + { + "epoch": 0.94, + "learning_rate": 9.71295295399139e-06, + "logits/chosen": -1.7422559261322021, + "logits/rejected": -2.6921026706695557, + "logps/chosen": -130.70498657226562, + "logps/rejected": -395.9431457519531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9162826538085938, + "rewards/margins": 8.699649810791016, + "rewards/rejected": -9.61593246459961, + "step": 6046 + }, + { + "epoch": 0.94, + "learning_rate": 9.712219513460241e-06, + "logits/chosen": -3.021831512451172, + "logits/rejected": -3.152407646179199, + "logps/chosen": -46.87261962890625, + "logps/rejected": -108.40702819824219, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9275336265563965, + "rewards/margins": 3.99235200881958, + "rewards/rejected": -6.919885635375977, + "step": 6047 + }, + { + "epoch": 0.94, + "learning_rate": 9.711486072929093e-06, + "logits/chosen": -2.905850887298584, + "logits/rejected": -2.8335514068603516, + "logps/chosen": -185.8314971923828, + "logps/rejected": -248.031494140625, + "loss": 2.6343, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.142128944396973, + "rewards/margins": 0.5284645557403564, + "rewards/rejected": -4.67059326171875, + "step": 6048 + }, + { + "epoch": 0.94, + "learning_rate": 9.710752632397947e-06, + "logits/chosen": -3.112302303314209, + "logits/rejected": -2.788959264755249, + "logps/chosen": -668.2529296875, + "logps/rejected": -575.0064086914062, + "loss": 2.3375, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7109711170196533, + "rewards/margins": 0.2567291259765625, + "rewards/rejected": -3.967700481414795, + "step": 6049 + }, + { + "epoch": 0.94, + "learning_rate": 9.710019191866799e-06, + "logits/chosen": -2.5367729663848877, + "logits/rejected": -3.130533218383789, + "logps/chosen": -60.977989196777344, + "logps/rejected": -252.82769775390625, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8064706325531006, + "rewards/margins": 6.068480491638184, + "rewards/rejected": -8.874951362609863, + "step": 6050 + }, + { + "epoch": 0.94, + "learning_rate": 9.70928575133565e-06, + "logits/chosen": -2.9003069400787354, + "logits/rejected": -2.5206730365753174, + "logps/chosen": -173.87355041503906, + "logps/rejected": -250.56398010253906, + "loss": 1.5561, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.734127998352051, + "rewards/margins": 2.45919132232666, + "rewards/rejected": -7.193319797515869, + "step": 6051 + }, + { + "epoch": 0.94, + "learning_rate": 9.708552310804504e-06, + "logits/chosen": -2.059682607650757, + "logits/rejected": -2.4628989696502686, + "logps/chosen": -107.60450744628906, + "logps/rejected": -292.9848937988281, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3723124265670776, + "rewards/margins": 5.334897994995117, + "rewards/rejected": -6.707210540771484, + "step": 6052 + }, + { + "epoch": 0.94, + "learning_rate": 9.707818870273356e-06, + "logits/chosen": -1.6750268936157227, + "logits/rejected": -3.0523228645324707, + "logps/chosen": -178.91812133789062, + "logps/rejected": -391.95159912109375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5896308422088623, + "rewards/margins": 4.8146467208862305, + "rewards/rejected": -6.404277324676514, + "step": 6053 + }, + { + "epoch": 0.94, + "learning_rate": 9.707085429742208e-06, + "logits/chosen": -2.7037339210510254, + "logits/rejected": -3.150181770324707, + "logps/chosen": -46.47942352294922, + "logps/rejected": -152.2764434814453, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.495587110519409, + "rewards/margins": 4.0925116539001465, + "rewards/rejected": -7.588098526000977, + "step": 6054 + }, + { + "epoch": 0.94, + "learning_rate": 9.70635198921106e-06, + "logits/chosen": -2.94460129737854, + "logits/rejected": -2.335983991622925, + "logps/chosen": -185.26881408691406, + "logps/rejected": -244.96841430664062, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2564380168914795, + "rewards/margins": 4.824779510498047, + "rewards/rejected": -6.081217288970947, + "step": 6055 + }, + { + "epoch": 0.94, + "learning_rate": 9.705618548679911e-06, + "logits/chosen": -2.95161771774292, + "logits/rejected": -2.7999954223632812, + "logps/chosen": -555.3125, + "logps/rejected": -453.04193115234375, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4322445392608643, + "rewards/margins": 6.416392803192139, + "rewards/rejected": -8.848637580871582, + "step": 6056 + }, + { + "epoch": 0.94, + "learning_rate": 9.704885108148763e-06, + "logits/chosen": -0.6928911805152893, + "logits/rejected": -2.7672388553619385, + "logps/chosen": -86.21754455566406, + "logps/rejected": -297.64227294921875, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.434494733810425, + "rewards/margins": 5.036110877990723, + "rewards/rejected": -7.470605850219727, + "step": 6057 + }, + { + "epoch": 0.94, + "learning_rate": 9.704151667617615e-06, + "logits/chosen": -2.5823543071746826, + "logits/rejected": -3.128080129623413, + "logps/chosen": -403.24298095703125, + "logps/rejected": -462.50390625, + "loss": 2.9244, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.092207431793213, + "rewards/margins": 2.7713396549224854, + "rewards/rejected": -6.863546848297119, + "step": 6058 + }, + { + "epoch": 0.94, + "learning_rate": 9.703418227086467e-06, + "logits/chosen": -2.3567774295806885, + "logits/rejected": -3.150043487548828, + "logps/chosen": -55.0567512512207, + "logps/rejected": -263.2895202636719, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.083456516265869, + "rewards/margins": 3.5601563453674316, + "rewards/rejected": -5.643612861633301, + "step": 6059 + }, + { + "epoch": 0.94, + "learning_rate": 9.702684786555319e-06, + "logits/chosen": -3.079993963241577, + "logits/rejected": -2.88299822807312, + "logps/chosen": -212.38763427734375, + "logps/rejected": -269.0058288574219, + "loss": 2.5889, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.765043258666992, + "rewards/margins": -1.5351300239562988, + "rewards/rejected": -3.2299134731292725, + "step": 6060 + }, + { + "epoch": 0.94, + "learning_rate": 9.701951346024173e-06, + "logits/chosen": -2.0929596424102783, + "logits/rejected": -3.0776619911193848, + "logps/chosen": -185.75347900390625, + "logps/rejected": -324.8334655761719, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4119491577148438, + "rewards/margins": 4.843752861022949, + "rewards/rejected": -7.255702018737793, + "step": 6061 + }, + { + "epoch": 0.94, + "learning_rate": 9.701217905493024e-06, + "logits/chosen": -2.643308639526367, + "logits/rejected": -3.103574514389038, + "logps/chosen": -110.12773895263672, + "logps/rejected": -249.64486694335938, + "loss": 2.0193, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.195072650909424, + "rewards/margins": 0.015411615371704102, + "rewards/rejected": -5.210484504699707, + "step": 6062 + }, + { + "epoch": 0.94, + "learning_rate": 9.700484464961876e-06, + "logits/chosen": -2.4614474773406982, + "logits/rejected": -3.155245542526245, + "logps/chosen": -167.70809936523438, + "logps/rejected": -323.18536376953125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9672367572784424, + "rewards/margins": 6.617283821105957, + "rewards/rejected": -8.58452033996582, + "step": 6063 + }, + { + "epoch": 0.94, + "learning_rate": 9.699751024430728e-06, + "logits/chosen": -3.0959463119506836, + "logits/rejected": -1.824792504310608, + "logps/chosen": -269.796630859375, + "logps/rejected": -195.03857421875, + "loss": 1.6803, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.067027568817139, + "rewards/margins": 1.630672574043274, + "rewards/rejected": -5.697700023651123, + "step": 6064 + }, + { + "epoch": 0.94, + "learning_rate": 9.69901758389958e-06, + "logits/chosen": -2.5021448135375977, + "logits/rejected": -2.251542568206787, + "logps/chosen": -189.287109375, + "logps/rejected": -297.91357421875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.599027633666992, + "rewards/margins": 6.072239875793457, + "rewards/rejected": -8.67126750946045, + "step": 6065 + }, + { + "epoch": 0.94, + "learning_rate": 9.698284143368432e-06, + "logits/chosen": -2.9195215702056885, + "logits/rejected": -3.0888140201568604, + "logps/chosen": -227.61410522460938, + "logps/rejected": -276.5132751464844, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6950104236602783, + "rewards/margins": 5.563897132873535, + "rewards/rejected": -7.258908271789551, + "step": 6066 + }, + { + "epoch": 0.94, + "learning_rate": 9.697550702837284e-06, + "logits/chosen": -2.9906444549560547, + "logits/rejected": -2.19240403175354, + "logps/chosen": -760.6600341796875, + "logps/rejected": -490.50152587890625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006059274077415466, + "rewards/margins": 6.0178070068359375, + "rewards/rejected": -6.023866176605225, + "step": 6067 + }, + { + "epoch": 0.94, + "learning_rate": 9.696817262306136e-06, + "logits/chosen": -2.6669111251831055, + "logits/rejected": -2.943511962890625, + "logps/chosen": -209.64566040039062, + "logps/rejected": -280.4375915527344, + "loss": 2.9765, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4885263442993164, + "rewards/margins": 0.1290597915649414, + "rewards/rejected": -3.617586135864258, + "step": 6068 + }, + { + "epoch": 0.94, + "learning_rate": 9.696083821774988e-06, + "logits/chosen": -2.8919403553009033, + "logits/rejected": -2.3021152019500732, + "logps/chosen": -332.5559387207031, + "logps/rejected": -163.83761596679688, + "loss": 3.4968, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.014031887054443, + "rewards/margins": -0.46334052085876465, + "rewards/rejected": -4.5506911277771, + "step": 6069 + }, + { + "epoch": 0.94, + "learning_rate": 9.695350381243841e-06, + "logits/chosen": -3.0990209579467773, + "logits/rejected": -2.978527545928955, + "logps/chosen": -122.47344207763672, + "logps/rejected": -306.0228271484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8004512786865234, + "rewards/margins": 7.795434951782227, + "rewards/rejected": -9.59588623046875, + "step": 6070 + }, + { + "epoch": 0.94, + "learning_rate": 9.694616940712693e-06, + "logits/chosen": -1.6546657085418701, + "logits/rejected": -2.8942911624908447, + "logps/chosen": -238.44996643066406, + "logps/rejected": -623.2047119140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0338947772979736, + "rewards/margins": 7.881330490112305, + "rewards/rejected": -9.915225982666016, + "step": 6071 + }, + { + "epoch": 0.94, + "learning_rate": 9.693883500181545e-06, + "logits/chosen": -1.7962977886199951, + "logits/rejected": -3.017244577407837, + "logps/chosen": -84.80671691894531, + "logps/rejected": -156.1778106689453, + "loss": 0.8203, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.736884593963623, + "rewards/margins": 1.4153637886047363, + "rewards/rejected": -5.152248382568359, + "step": 6072 + }, + { + "epoch": 0.94, + "learning_rate": 9.693150059650397e-06, + "logits/chosen": -3.0340168476104736, + "logits/rejected": -2.490954875946045, + "logps/chosen": -191.53077697753906, + "logps/rejected": -185.55926513671875, + "loss": 2.6532, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.129637718200684, + "rewards/margins": 1.2814812660217285, + "rewards/rejected": -5.411118984222412, + "step": 6073 + }, + { + "epoch": 0.94, + "learning_rate": 9.692416619119249e-06, + "logits/chosen": -2.4937314987182617, + "logits/rejected": -0.5533095598220825, + "logps/chosen": -470.06182861328125, + "logps/rejected": -57.5595703125, + "loss": 6.4265, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.0231351852417, + "rewards/margins": -5.613948822021484, + "rewards/rejected": -2.409186601638794, + "step": 6074 + }, + { + "epoch": 0.94, + "learning_rate": 9.6916831785881e-06, + "logits/chosen": -2.725158214569092, + "logits/rejected": -2.632275342941284, + "logps/chosen": -278.1075134277344, + "logps/rejected": -362.9024658203125, + "loss": 2.3921, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.751737117767334, + "rewards/margins": -0.06648683547973633, + "rewards/rejected": -4.685250282287598, + "step": 6075 + }, + { + "epoch": 0.94, + "learning_rate": 9.690949738056952e-06, + "logits/chosen": -1.8191198110580444, + "logits/rejected": -2.8812527656555176, + "logps/chosen": -217.07275390625, + "logps/rejected": -404.3650207519531, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.562442421913147, + "rewards/margins": 6.445618629455566, + "rewards/rejected": -8.008061408996582, + "step": 6076 + }, + { + "epoch": 0.95, + "learning_rate": 9.690216297525804e-06, + "logits/chosen": -3.0407538414001465, + "logits/rejected": -2.2462518215179443, + "logps/chosen": -696.2239990234375, + "logps/rejected": -526.989501953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0163452625274658, + "rewards/margins": 7.23941707611084, + "rewards/rejected": -8.255762100219727, + "step": 6077 + }, + { + "epoch": 0.95, + "learning_rate": 9.689482856994656e-06, + "logits/chosen": -2.3784337043762207, + "logits/rejected": -3.0539755821228027, + "logps/chosen": -84.72428894042969, + "logps/rejected": -98.22686004638672, + "loss": 2.2001, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.291011333465576, + "rewards/margins": 0.7666854858398438, + "rewards/rejected": -5.057696342468262, + "step": 6078 + }, + { + "epoch": 0.95, + "learning_rate": 9.68874941646351e-06, + "logits/chosen": -2.986459255218506, + "logits/rejected": -1.77812922000885, + "logps/chosen": -506.2364807128906, + "logps/rejected": -283.20623779296875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.76181560754776, + "rewards/margins": 5.260528564453125, + "rewards/rejected": -6.02234411239624, + "step": 6079 + }, + { + "epoch": 0.95, + "learning_rate": 9.688015975932362e-06, + "logits/chosen": -1.5558544397354126, + "logits/rejected": -2.915661334991455, + "logps/chosen": -100.16657257080078, + "logps/rejected": -203.30368041992188, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9617782831192017, + "rewards/margins": 6.130979061126709, + "rewards/rejected": -7.092757225036621, + "step": 6080 + }, + { + "epoch": 0.95, + "learning_rate": 9.687282535401214e-06, + "logits/chosen": -3.1052465438842773, + "logits/rejected": -2.6329851150512695, + "logps/chosen": -490.1435241699219, + "logps/rejected": -398.5891418457031, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.112249732017517, + "rewards/margins": 5.497595310211182, + "rewards/rejected": -6.609845161437988, + "step": 6081 + }, + { + "epoch": 0.95, + "learning_rate": 9.686549094870065e-06, + "logits/chosen": -1.9918910264968872, + "logits/rejected": -2.9895012378692627, + "logps/chosen": -288.9138488769531, + "logps/rejected": -449.9346923828125, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2624130249023438, + "rewards/margins": 5.7830095291137695, + "rewards/rejected": -7.045422554016113, + "step": 6082 + }, + { + "epoch": 0.95, + "learning_rate": 9.685815654338917e-06, + "logits/chosen": -1.1337367296218872, + "logits/rejected": -2.8882527351379395, + "logps/chosen": -121.3104248046875, + "logps/rejected": -398.62945556640625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.028553009033203, + "rewards/margins": 5.80905818939209, + "rewards/rejected": -7.837611198425293, + "step": 6083 + }, + { + "epoch": 0.95, + "learning_rate": 9.68508221380777e-06, + "logits/chosen": -1.794847011566162, + "logits/rejected": -2.7102513313293457, + "logps/chosen": -51.74988555908203, + "logps/rejected": -212.49575805664062, + "loss": 0.1092, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.059872150421143, + "rewards/margins": 2.8597543239593506, + "rewards/rejected": -6.919626712799072, + "step": 6084 + }, + { + "epoch": 0.95, + "learning_rate": 9.684348773276623e-06, + "logits/chosen": -2.701709508895874, + "logits/rejected": -3.0081727504730225, + "logps/chosen": -230.93931579589844, + "logps/rejected": -225.96328735351562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9523351788520813, + "rewards/margins": 6.242992877960205, + "rewards/rejected": -7.195328235626221, + "step": 6085 + }, + { + "epoch": 0.95, + "learning_rate": 9.683615332745475e-06, + "logits/chosen": -3.211963653564453, + "logits/rejected": -2.7975411415100098, + "logps/chosen": -165.19161987304688, + "logps/rejected": -87.41483306884766, + "loss": 2.0046, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5736916065216064, + "rewards/margins": 0.4825291633605957, + "rewards/rejected": -4.056221008300781, + "step": 6086 + }, + { + "epoch": 0.95, + "learning_rate": 9.682881892214326e-06, + "logits/chosen": -1.7509421110153198, + "logits/rejected": -2.5196292400360107, + "logps/chosen": -112.82915496826172, + "logps/rejected": -322.6864929199219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7787636518478394, + "rewards/margins": 7.722663879394531, + "rewards/rejected": -9.50142765045166, + "step": 6087 + }, + { + "epoch": 0.95, + "learning_rate": 9.68214845168318e-06, + "logits/chosen": -2.2552483081817627, + "logits/rejected": -2.649174928665161, + "logps/chosen": -133.33815002441406, + "logps/rejected": -279.90618896484375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0263545513153076, + "rewards/margins": 5.262323379516602, + "rewards/rejected": -7.288678169250488, + "step": 6088 + }, + { + "epoch": 0.95, + "learning_rate": 9.681415011152032e-06, + "logits/chosen": -1.8415522575378418, + "logits/rejected": -2.90574312210083, + "logps/chosen": -115.78826904296875, + "logps/rejected": -415.9742431640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5029281973838806, + "rewards/margins": 7.459770202636719, + "rewards/rejected": -7.962698459625244, + "step": 6089 + }, + { + "epoch": 0.95, + "learning_rate": 9.680681570620884e-06, + "logits/chosen": -2.9384853839874268, + "logits/rejected": -2.4364542961120605, + "logps/chosen": -360.8236389160156, + "logps/rejected": -295.5675354003906, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9657455682754517, + "rewards/margins": 3.854369640350342, + "rewards/rejected": -5.820115089416504, + "step": 6090 + }, + { + "epoch": 0.95, + "learning_rate": 9.679948130089736e-06, + "logits/chosen": -3.130052089691162, + "logits/rejected": -2.641775131225586, + "logps/chosen": -467.203857421875, + "logps/rejected": -310.3945007324219, + "loss": 1.9585, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.001728057861328, + "rewards/margins": 1.5681657791137695, + "rewards/rejected": -4.569893836975098, + "step": 6091 + }, + { + "epoch": 0.95, + "learning_rate": 9.679214689558588e-06, + "logits/chosen": -2.9160473346710205, + "logits/rejected": -3.0771148204803467, + "logps/chosen": -133.64334106445312, + "logps/rejected": -174.4709014892578, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2293556928634644, + "rewards/margins": 5.592956066131592, + "rewards/rejected": -6.8223114013671875, + "step": 6092 + }, + { + "epoch": 0.95, + "learning_rate": 9.67848124902744e-06, + "logits/chosen": -1.3604671955108643, + "logits/rejected": -2.6192445755004883, + "logps/chosen": -205.35394287109375, + "logps/rejected": -552.5729370117188, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6484405994415283, + "rewards/margins": 8.400344848632812, + "rewards/rejected": -10.048785209655762, + "step": 6093 + }, + { + "epoch": 0.95, + "learning_rate": 9.677747808496291e-06, + "logits/chosen": -2.998976230621338, + "logits/rejected": -3.0849812030792236, + "logps/chosen": -41.78614044189453, + "logps/rejected": -205.57481384277344, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4817824363708496, + "rewards/margins": 5.5201921463012695, + "rewards/rejected": -7.001974582672119, + "step": 6094 + }, + { + "epoch": 0.95, + "learning_rate": 9.677014367965143e-06, + "logits/chosen": -1.8151342868804932, + "logits/rejected": -2.785099744796753, + "logps/chosen": -108.91900634765625, + "logps/rejected": -149.9449462890625, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.314296245574951, + "rewards/margins": 3.7769076824188232, + "rewards/rejected": -7.091203689575195, + "step": 6095 + }, + { + "epoch": 0.95, + "learning_rate": 9.676280927433995e-06, + "logits/chosen": -2.4816935062408447, + "logits/rejected": -3.046659231185913, + "logps/chosen": -185.26210021972656, + "logps/rejected": -451.8323059082031, + "loss": 0.0441, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1902647018432617, + "rewards/margins": 6.0693206787109375, + "rewards/rejected": -7.259585380554199, + "step": 6096 + }, + { + "epoch": 0.95, + "learning_rate": 9.675547486902849e-06, + "logits/chosen": -3.0810611248016357, + "logits/rejected": -3.2892308235168457, + "logps/chosen": -104.84605407714844, + "logps/rejected": -84.0247573852539, + "loss": 2.1546, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.842381477355957, + "rewards/margins": -0.17785882949829102, + "rewards/rejected": -3.664522647857666, + "step": 6097 + }, + { + "epoch": 0.95, + "learning_rate": 9.6748140463717e-06, + "logits/chosen": -2.0889177322387695, + "logits/rejected": -2.854886531829834, + "logps/chosen": -134.4716796875, + "logps/rejected": -432.9064636230469, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7003002166748047, + "rewards/margins": 5.39378547668457, + "rewards/rejected": -8.094085693359375, + "step": 6098 + }, + { + "epoch": 0.95, + "learning_rate": 9.674080605840552e-06, + "logits/chosen": -2.333749771118164, + "logits/rejected": -3.012664794921875, + "logps/chosen": -585.7655639648438, + "logps/rejected": -773.8406982421875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5024070739746094, + "rewards/margins": 6.150167465209961, + "rewards/rejected": -8.65257453918457, + "step": 6099 + }, + { + "epoch": 0.95, + "learning_rate": 9.673347165309404e-06, + "logits/chosen": -1.305031657218933, + "logits/rejected": -2.8557636737823486, + "logps/chosen": -66.04326629638672, + "logps/rejected": -391.26019287109375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.644451141357422, + "rewards/margins": 6.491114616394043, + "rewards/rejected": -9.135565757751465, + "step": 6100 + }, + { + "epoch": 0.95, + "learning_rate": 9.672613724778256e-06, + "logits/chosen": -2.3110833168029785, + "logits/rejected": -2.943737268447876, + "logps/chosen": -251.77040100097656, + "logps/rejected": -294.7198486328125, + "loss": 5.9247, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.180021286010742, + "rewards/margins": -5.921945095062256, + "rewards/rejected": -2.2580764293670654, + "step": 6101 + }, + { + "epoch": 0.95, + "learning_rate": 9.671880284247108e-06, + "logits/chosen": -2.418776035308838, + "logits/rejected": -3.0934324264526367, + "logps/chosen": -275.5504150390625, + "logps/rejected": -302.419189453125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0534067153930664, + "rewards/margins": 4.6815338134765625, + "rewards/rejected": -6.734940528869629, + "step": 6102 + }, + { + "epoch": 0.95, + "learning_rate": 9.67114684371596e-06, + "logits/chosen": -2.921794891357422, + "logits/rejected": -3.0796680450439453, + "logps/chosen": -78.5534896850586, + "logps/rejected": -112.3919906616211, + "loss": 0.1449, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2217020988464355, + "rewards/margins": 2.9262094497680664, + "rewards/rejected": -5.147911548614502, + "step": 6103 + }, + { + "epoch": 0.95, + "learning_rate": 9.670413403184812e-06, + "logits/chosen": -2.5744736194610596, + "logits/rejected": -2.6848437786102295, + "logps/chosen": -230.33822631835938, + "logps/rejected": -333.3006591796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.043557643890381, + "rewards/margins": 6.765932083129883, + "rewards/rejected": -8.809490203857422, + "step": 6104 + }, + { + "epoch": 0.95, + "learning_rate": 9.669679962653664e-06, + "logits/chosen": -1.4491864442825317, + "logits/rejected": -2.5219380855560303, + "logps/chosen": -137.7273712158203, + "logps/rejected": -224.4358367919922, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8989641666412354, + "rewards/margins": 5.74418306350708, + "rewards/rejected": -7.6431474685668945, + "step": 6105 + }, + { + "epoch": 0.95, + "learning_rate": 9.668946522122517e-06, + "logits/chosen": -1.861982822418213, + "logits/rejected": -3.037374258041382, + "logps/chosen": -96.01172637939453, + "logps/rejected": -281.1307373046875, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3952722549438477, + "rewards/margins": 4.2993292808532715, + "rewards/rejected": -6.694601535797119, + "step": 6106 + }, + { + "epoch": 0.95, + "learning_rate": 9.668213081591369e-06, + "logits/chosen": -2.6422979831695557, + "logits/rejected": -1.8539915084838867, + "logps/chosen": -387.245361328125, + "logps/rejected": -500.9459533691406, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9729454517364502, + "rewards/margins": 5.884098052978516, + "rewards/rejected": -7.857043743133545, + "step": 6107 + }, + { + "epoch": 0.95, + "learning_rate": 9.667479641060221e-06, + "logits/chosen": -0.924641489982605, + "logits/rejected": -2.769740343093872, + "logps/chosen": -71.80620574951172, + "logps/rejected": -614.3363647460938, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0746889114379883, + "rewards/margins": 5.759939670562744, + "rewards/rejected": -7.834628582000732, + "step": 6108 + }, + { + "epoch": 0.95, + "learning_rate": 9.666746200529073e-06, + "logits/chosen": -2.852266550064087, + "logits/rejected": -2.6331188678741455, + "logps/chosen": -220.6608123779297, + "logps/rejected": -275.8588562011719, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6156094074249268, + "rewards/margins": 6.222166061401367, + "rewards/rejected": -7.837775230407715, + "step": 6109 + }, + { + "epoch": 0.95, + "learning_rate": 9.666012759997925e-06, + "logits/chosen": -2.861989736557007, + "logits/rejected": -1.6164251565933228, + "logps/chosen": -372.00714111328125, + "logps/rejected": -344.06231689453125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4325332641601562, + "rewards/margins": 7.124258041381836, + "rewards/rejected": -9.556791305541992, + "step": 6110 + }, + { + "epoch": 0.95, + "learning_rate": 9.665279319466777e-06, + "logits/chosen": -2.8306374549865723, + "logits/rejected": -2.861449956893921, + "logps/chosen": -428.374755859375, + "logps/rejected": -381.33154296875, + "loss": 3.2497, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.695497035980225, + "rewards/margins": -0.9622862339019775, + "rewards/rejected": -3.733210802078247, + "step": 6111 + }, + { + "epoch": 0.95, + "learning_rate": 9.664545878935629e-06, + "logits/chosen": -1.8528870344161987, + "logits/rejected": -3.0407397747039795, + "logps/chosen": -254.07891845703125, + "logps/rejected": -646.6141357421875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3404738903045654, + "rewards/margins": 6.6913838386535645, + "rewards/rejected": -8.03185749053955, + "step": 6112 + }, + { + "epoch": 0.95, + "learning_rate": 9.66381243840448e-06, + "logits/chosen": -2.776139736175537, + "logits/rejected": -1.851237416267395, + "logps/chosen": -191.27581787109375, + "logps/rejected": -178.30113220214844, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9619835019111633, + "rewards/margins": 5.375386714935303, + "rewards/rejected": -6.337369918823242, + "step": 6113 + }, + { + "epoch": 0.95, + "learning_rate": 9.663078997873332e-06, + "logits/chosen": -1.178699254989624, + "logits/rejected": -3.00826358795166, + "logps/chosen": -204.99307250976562, + "logps/rejected": -777.5430908203125, + "loss": 2.3929, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1628212928771973, + "rewards/margins": 0.11077213287353516, + "rewards/rejected": -3.2735931873321533, + "step": 6114 + }, + { + "epoch": 0.95, + "learning_rate": 9.662345557342186e-06, + "logits/chosen": -2.559748888015747, + "logits/rejected": -2.482229471206665, + "logps/chosen": -291.5899963378906, + "logps/rejected": -416.0772705078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.173543930053711, + "rewards/margins": 7.7985053062438965, + "rewards/rejected": -9.972049713134766, + "step": 6115 + }, + { + "epoch": 0.95, + "learning_rate": 9.661612116811038e-06, + "logits/chosen": -2.660010814666748, + "logits/rejected": -2.947322130203247, + "logps/chosen": -272.7469787597656, + "logps/rejected": -392.337158203125, + "loss": 0.2519, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.230996012687683, + "rewards/margins": 5.342827320098877, + "rewards/rejected": -6.57382345199585, + "step": 6116 + }, + { + "epoch": 0.95, + "learning_rate": 9.66087867627989e-06, + "logits/chosen": -1.4831632375717163, + "logits/rejected": -2.6508262157440186, + "logps/chosen": -164.0732879638672, + "logps/rejected": -412.66558837890625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5746629238128662, + "rewards/margins": 6.738650798797607, + "rewards/rejected": -8.313313484191895, + "step": 6117 + }, + { + "epoch": 0.95, + "learning_rate": 9.660145235748743e-06, + "logits/chosen": -1.9308775663375854, + "logits/rejected": -2.909423828125, + "logps/chosen": -262.06524658203125, + "logps/rejected": -281.0958557128906, + "loss": 2.4057, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9658401012420654, + "rewards/margins": 0.48248767852783203, + "rewards/rejected": -4.448327541351318, + "step": 6118 + }, + { + "epoch": 0.95, + "learning_rate": 9.659411795217595e-06, + "logits/chosen": -2.222609281539917, + "logits/rejected": -2.634122848510742, + "logps/chosen": -254.2593536376953, + "logps/rejected": -300.5938415527344, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.401824951171875, + "rewards/margins": 5.826763153076172, + "rewards/rejected": -8.228588104248047, + "step": 6119 + }, + { + "epoch": 0.95, + "learning_rate": 9.658678354686447e-06, + "logits/chosen": -2.1838932037353516, + "logits/rejected": -2.9763314723968506, + "logps/chosen": -164.91249084472656, + "logps/rejected": -189.80496215820312, + "loss": 1.851, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.105863571166992, + "rewards/margins": 0.6970489025115967, + "rewards/rejected": -4.80291223526001, + "step": 6120 + }, + { + "epoch": 0.95, + "learning_rate": 9.657944914155299e-06, + "logits/chosen": -2.902643918991089, + "logits/rejected": -3.1483519077301025, + "logps/chosen": -32.69693374633789, + "logps/rejected": -143.16116333007812, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9824341535568237, + "rewards/margins": 5.317343711853027, + "rewards/rejected": -6.299777984619141, + "step": 6121 + }, + { + "epoch": 0.95, + "learning_rate": 9.65721147362415e-06, + "logits/chosen": -2.839318037033081, + "logits/rejected": -2.563976764678955, + "logps/chosen": -98.3392333984375, + "logps/rejected": -138.02842712402344, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8411059379577637, + "rewards/margins": 4.807511329650879, + "rewards/rejected": -6.648617744445801, + "step": 6122 + }, + { + "epoch": 0.95, + "learning_rate": 9.656478033093003e-06, + "logits/chosen": -2.089200496673584, + "logits/rejected": -3.1782398223876953, + "logps/chosen": -114.32077026367188, + "logps/rejected": -178.05886840820312, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9544185400009155, + "rewards/margins": 2.967947006225586, + "rewards/rejected": -4.922365665435791, + "step": 6123 + }, + { + "epoch": 0.95, + "learning_rate": 9.655744592561856e-06, + "logits/chosen": -1.3200302124023438, + "logits/rejected": -3.136260747909546, + "logps/chosen": -132.03846740722656, + "logps/rejected": -605.2957763671875, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.010575771331787, + "rewards/margins": 5.756276607513428, + "rewards/rejected": -7.766852378845215, + "step": 6124 + }, + { + "epoch": 0.95, + "learning_rate": 9.655011152030708e-06, + "logits/chosen": -2.1780214309692383, + "logits/rejected": -3.173851728439331, + "logps/chosen": -329.28912353515625, + "logps/rejected": -433.0614013671875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8449978828430176, + "rewards/margins": 4.953571319580078, + "rewards/rejected": -6.798569202423096, + "step": 6125 + }, + { + "epoch": 0.95, + "learning_rate": 9.65427771149956e-06, + "logits/chosen": -2.5307774543762207, + "logits/rejected": -2.9207093715667725, + "logps/chosen": -269.47064208984375, + "logps/rejected": -383.6767578125, + "loss": 0.0933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3015426397323608, + "rewards/margins": 5.305067539215088, + "rewards/rejected": -6.606610298156738, + "step": 6126 + }, + { + "epoch": 0.95, + "learning_rate": 9.653544270968412e-06, + "logits/chosen": -2.8671467304229736, + "logits/rejected": -2.940577507019043, + "logps/chosen": -33.44963073730469, + "logps/rejected": -206.11297607421875, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.024696707725525, + "rewards/margins": 4.968311309814453, + "rewards/rejected": -5.993007659912109, + "step": 6127 + }, + { + "epoch": 0.95, + "learning_rate": 9.652810830437264e-06, + "logits/chosen": -2.172133684158325, + "logits/rejected": -2.8265585899353027, + "logps/chosen": -100.59220886230469, + "logps/rejected": -296.2201232910156, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2552475929260254, + "rewards/margins": 4.419904708862305, + "rewards/rejected": -6.675152778625488, + "step": 6128 + }, + { + "epoch": 0.95, + "learning_rate": 9.652077389906116e-06, + "logits/chosen": -2.759495735168457, + "logits/rejected": -0.9430920481681824, + "logps/chosen": -509.487060546875, + "logps/rejected": -287.43536376953125, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3328415155410767, + "rewards/margins": 4.337262153625488, + "rewards/rejected": -5.670103549957275, + "step": 6129 + }, + { + "epoch": 0.95, + "learning_rate": 9.651343949374967e-06, + "logits/chosen": -2.7135236263275146, + "logits/rejected": -2.7178196907043457, + "logps/chosen": -275.6700134277344, + "logps/rejected": -272.29150390625, + "loss": 3.1551, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.680904388427734, + "rewards/margins": -0.022127389907836914, + "rewards/rejected": -4.658776760101318, + "step": 6130 + }, + { + "epoch": 0.95, + "learning_rate": 9.65061050884382e-06, + "logits/chosen": -2.413961410522461, + "logits/rejected": -2.562493085861206, + "logps/chosen": -133.7902069091797, + "logps/rejected": -150.68251037597656, + "loss": 0.2143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5146234035491943, + "rewards/margins": 3.53279185295105, + "rewards/rejected": -7.047415256500244, + "step": 6131 + }, + { + "epoch": 0.95, + "learning_rate": 9.649877068312671e-06, + "logits/chosen": -2.3611576557159424, + "logits/rejected": -2.647451400756836, + "logps/chosen": -132.97189331054688, + "logps/rejected": -260.8036804199219, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8892810940742493, + "rewards/margins": 6.871980667114258, + "rewards/rejected": -7.761261940002441, + "step": 6132 + }, + { + "epoch": 0.95, + "learning_rate": 9.649143627781525e-06, + "logits/chosen": -2.661032199859619, + "logits/rejected": -2.93638277053833, + "logps/chosen": -94.39933776855469, + "logps/rejected": -256.2851867675781, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.06654691696167, + "rewards/margins": 5.827530384063721, + "rewards/rejected": -7.894077301025391, + "step": 6133 + }, + { + "epoch": 0.95, + "learning_rate": 9.648410187250377e-06, + "logits/chosen": -3.0232021808624268, + "logits/rejected": -2.953965663909912, + "logps/chosen": -102.73120880126953, + "logps/rejected": -152.95584106445312, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.164395809173584, + "rewards/margins": 4.008195400238037, + "rewards/rejected": -6.172591209411621, + "step": 6134 + }, + { + "epoch": 0.95, + "learning_rate": 9.647676746719228e-06, + "logits/chosen": -2.1672914028167725, + "logits/rejected": -3.0911355018615723, + "logps/chosen": -158.36959838867188, + "logps/rejected": -354.3966064453125, + "loss": 0.8432, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9076976776123047, + "rewards/margins": 2.600621223449707, + "rewards/rejected": -5.508318901062012, + "step": 6135 + }, + { + "epoch": 0.95, + "learning_rate": 9.64694330618808e-06, + "logits/chosen": -2.4889259338378906, + "logits/rejected": -1.3090664148330688, + "logps/chosen": -194.08514404296875, + "logps/rejected": -82.06368255615234, + "loss": 2.8583, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6381425857543945, + "rewards/margins": -1.7455124855041504, + "rewards/rejected": -3.892630100250244, + "step": 6136 + }, + { + "epoch": 0.95, + "learning_rate": 9.646209865656932e-06, + "logits/chosen": -2.7747180461883545, + "logits/rejected": -3.1731672286987305, + "logps/chosen": -92.98355102539062, + "logps/rejected": -196.5059051513672, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2474392652511597, + "rewards/margins": 5.704562664031982, + "rewards/rejected": -6.952002048492432, + "step": 6137 + }, + { + "epoch": 0.95, + "learning_rate": 9.645476425125784e-06, + "logits/chosen": -2.9652490615844727, + "logits/rejected": -2.75805401802063, + "logps/chosen": -209.2146453857422, + "logps/rejected": -230.2118682861328, + "loss": 0.3673, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3774538040161133, + "rewards/margins": 4.723083019256592, + "rewards/rejected": -7.100536823272705, + "step": 6138 + }, + { + "epoch": 0.95, + "learning_rate": 9.644742984594636e-06, + "logits/chosen": -2.4956085681915283, + "logits/rejected": -3.1094093322753906, + "logps/chosen": -51.696556091308594, + "logps/rejected": -167.7811737060547, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6428890228271484, + "rewards/margins": 3.304008960723877, + "rewards/rejected": -3.9468979835510254, + "step": 6139 + }, + { + "epoch": 0.95, + "learning_rate": 9.644009544063488e-06, + "logits/chosen": -3.0129354000091553, + "logits/rejected": -3.092501401901245, + "logps/chosen": -258.0963134765625, + "logps/rejected": -311.35430908203125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.641231894493103, + "rewards/margins": 5.9132585525512695, + "rewards/rejected": -7.554490089416504, + "step": 6140 + }, + { + "epoch": 0.96, + "learning_rate": 9.643276103532341e-06, + "logits/chosen": -2.4503257274627686, + "logits/rejected": -3.0519840717315674, + "logps/chosen": -378.9366760253906, + "logps/rejected": -381.17999267578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.706755518913269, + "rewards/margins": 7.671603202819824, + "rewards/rejected": -8.378358840942383, + "step": 6141 + }, + { + "epoch": 0.96, + "learning_rate": 9.642542663001193e-06, + "logits/chosen": -1.5726665258407593, + "logits/rejected": -2.882213830947876, + "logps/chosen": -142.20455932617188, + "logps/rejected": -358.94586181640625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7753077745437622, + "rewards/margins": 6.841790199279785, + "rewards/rejected": -8.617097854614258, + "step": 6142 + }, + { + "epoch": 0.96, + "learning_rate": 9.641809222470045e-06, + "logits/chosen": -2.9850339889526367, + "logits/rejected": -2.0016651153564453, + "logps/chosen": -467.3163146972656, + "logps/rejected": -280.45367431640625, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.150026321411133, + "rewards/margins": 4.792916774749756, + "rewards/rejected": -6.942943572998047, + "step": 6143 + }, + { + "epoch": 0.96, + "learning_rate": 9.641075781938897e-06, + "logits/chosen": -1.459782600402832, + "logits/rejected": -3.0347063541412354, + "logps/chosen": -402.6517333984375, + "logps/rejected": -506.75494384765625, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4642868041992188, + "rewards/margins": 4.141641139984131, + "rewards/rejected": -5.605928421020508, + "step": 6144 + }, + { + "epoch": 0.96, + "learning_rate": 9.640342341407749e-06, + "logits/chosen": -2.966360330581665, + "logits/rejected": -2.683769702911377, + "logps/chosen": -901.1363525390625, + "logps/rejected": -555.0479736328125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3483595848083496, + "rewards/margins": 4.593394756317139, + "rewards/rejected": -6.941754341125488, + "step": 6145 + }, + { + "epoch": 0.96, + "learning_rate": 9.6396089008766e-06, + "logits/chosen": -1.6450563669204712, + "logits/rejected": -2.9852406978607178, + "logps/chosen": -79.07657623291016, + "logps/rejected": -418.7861328125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1135289669036865, + "rewards/margins": 7.183693885803223, + "rewards/rejected": -8.297223091125488, + "step": 6146 + }, + { + "epoch": 0.96, + "learning_rate": 9.638875460345453e-06, + "logits/chosen": -3.0570552349090576, + "logits/rejected": -1.807125449180603, + "logps/chosen": -241.55841064453125, + "logps/rejected": -70.01933288574219, + "loss": 0.9749, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5474376678466797, + "rewards/margins": -0.3724706172943115, + "rewards/rejected": -3.174967050552368, + "step": 6147 + }, + { + "epoch": 0.96, + "learning_rate": 9.638142019814305e-06, + "logits/chosen": -1.765908122062683, + "logits/rejected": -2.602034330368042, + "logps/chosen": -114.32704162597656, + "logps/rejected": -364.61328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1077442169189453, + "rewards/margins": 7.683778762817383, + "rewards/rejected": -9.791522979736328, + "step": 6148 + }, + { + "epoch": 0.96, + "learning_rate": 9.637408579283156e-06, + "logits/chosen": -1.1576951742172241, + "logits/rejected": -2.983705997467041, + "logps/chosen": -59.24314880371094, + "logps/rejected": -249.65414428710938, + "loss": 0.2146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9639556407928467, + "rewards/margins": 4.160143852233887, + "rewards/rejected": -7.124099254608154, + "step": 6149 + }, + { + "epoch": 0.96, + "learning_rate": 9.63667513875201e-06, + "logits/chosen": -2.880960702896118, + "logits/rejected": -3.0000510215759277, + "logps/chosen": -399.8735656738281, + "logps/rejected": -505.87939453125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6674184799194336, + "rewards/margins": 6.175175666809082, + "rewards/rejected": -7.842594146728516, + "step": 6150 + }, + { + "epoch": 0.96, + "learning_rate": 9.635941698220862e-06, + "logits/chosen": -2.089956521987915, + "logits/rejected": -2.5949909687042236, + "logps/chosen": -62.57236862182617, + "logps/rejected": -150.4368133544922, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.703345775604248, + "rewards/margins": 4.321949005126953, + "rewards/rejected": -6.025294303894043, + "step": 6151 + }, + { + "epoch": 0.96, + "learning_rate": 9.635208257689716e-06, + "logits/chosen": -1.7982295751571655, + "logits/rejected": -2.9850497245788574, + "logps/chosen": -225.29830932617188, + "logps/rejected": -325.21270751953125, + "loss": 3.0145, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.906053066253662, + "rewards/margins": -1.018548607826233, + "rewards/rejected": -3.8875045776367188, + "step": 6152 + }, + { + "epoch": 0.96, + "learning_rate": 9.634474817158567e-06, + "logits/chosen": -1.9228259325027466, + "logits/rejected": -2.804354429244995, + "logps/chosen": -175.63339233398438, + "logps/rejected": -412.003173828125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1150658130645752, + "rewards/margins": 7.833366394042969, + "rewards/rejected": -8.948431968688965, + "step": 6153 + }, + { + "epoch": 0.96, + "learning_rate": 9.63374137662742e-06, + "logits/chosen": -2.6910879611968994, + "logits/rejected": -2.8306126594543457, + "logps/chosen": -171.60716247558594, + "logps/rejected": -286.6028747558594, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9568802118301392, + "rewards/margins": 5.941608428955078, + "rewards/rejected": -6.898488998413086, + "step": 6154 + }, + { + "epoch": 0.96, + "learning_rate": 9.633007936096271e-06, + "logits/chosen": -2.8988492488861084, + "logits/rejected": -2.3206112384796143, + "logps/chosen": -82.29763793945312, + "logps/rejected": -258.4773254394531, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8752875328063965, + "rewards/margins": 7.552858829498291, + "rewards/rejected": -9.428146362304688, + "step": 6155 + }, + { + "epoch": 0.96, + "learning_rate": 9.632274495565123e-06, + "logits/chosen": -1.9065686464309692, + "logits/rejected": -3.0303561687469482, + "logps/chosen": -191.7012481689453, + "logps/rejected": -521.8795166015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2271339893341064, + "rewards/margins": 9.11558723449707, + "rewards/rejected": -11.342721939086914, + "step": 6156 + }, + { + "epoch": 0.96, + "learning_rate": 9.631541055033975e-06, + "logits/chosen": -2.3856234550476074, + "logits/rejected": -2.7675023078918457, + "logps/chosen": -254.63665771484375, + "logps/rejected": -456.51202392578125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9215713739395142, + "rewards/margins": 6.657922267913818, + "rewards/rejected": -8.579493522644043, + "step": 6157 + }, + { + "epoch": 0.96, + "learning_rate": 9.630807614502827e-06, + "logits/chosen": -2.026320219039917, + "logits/rejected": -3.023017168045044, + "logps/chosen": -367.1993408203125, + "logps/rejected": -615.7793579101562, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4289391040802, + "rewards/margins": 5.235454559326172, + "rewards/rejected": -7.664393424987793, + "step": 6158 + }, + { + "epoch": 0.96, + "learning_rate": 9.63007417397168e-06, + "logits/chosen": -3.174482583999634, + "logits/rejected": -2.7063608169555664, + "logps/chosen": -197.88951110839844, + "logps/rejected": -52.421852111816406, + "loss": 1.138, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.60103702545166, + "rewards/margins": -0.6855498552322388, + "rewards/rejected": -2.915487051010132, + "step": 6159 + }, + { + "epoch": 0.96, + "learning_rate": 9.629340733440532e-06, + "logits/chosen": -2.160142183303833, + "logits/rejected": -2.944636583328247, + "logps/chosen": -108.46633911132812, + "logps/rejected": -448.19903564453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9552603960037231, + "rewards/margins": 7.957674503326416, + "rewards/rejected": -8.912935256958008, + "step": 6160 + }, + { + "epoch": 0.96, + "learning_rate": 9.628607292909384e-06, + "logits/chosen": -1.2283176183700562, + "logits/rejected": -2.54023814201355, + "logps/chosen": -168.4925537109375, + "logps/rejected": -604.0833129882812, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.023493528366089, + "rewards/margins": 8.779997825622559, + "rewards/rejected": -10.803491592407227, + "step": 6161 + }, + { + "epoch": 0.96, + "learning_rate": 9.627873852378236e-06, + "logits/chosen": -2.5467143058776855, + "logits/rejected": -2.931148052215576, + "logps/chosen": -112.15274047851562, + "logps/rejected": -289.43865966796875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.065828800201416, + "rewards/margins": 5.813392639160156, + "rewards/rejected": -7.8792219161987305, + "step": 6162 + }, + { + "epoch": 0.96, + "learning_rate": 9.627140411847088e-06, + "logits/chosen": -2.6344711780548096, + "logits/rejected": -2.968629837036133, + "logps/chosen": -81.4957046508789, + "logps/rejected": -117.4971694946289, + "loss": 0.3886, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.773380994796753, + "rewards/margins": 2.939901351928711, + "rewards/rejected": -6.713282585144043, + "step": 6163 + }, + { + "epoch": 0.96, + "learning_rate": 9.62640697131594e-06, + "logits/chosen": -3.069178819656372, + "logits/rejected": -1.1108397245407104, + "logps/chosen": -407.68548583984375, + "logps/rejected": -197.52386474609375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6323413848876953, + "rewards/margins": 4.272860527038574, + "rewards/rejected": -5.9052019119262695, + "step": 6164 + }, + { + "epoch": 0.96, + "learning_rate": 9.625673530784792e-06, + "logits/chosen": -1.0930413007736206, + "logits/rejected": -2.45147442817688, + "logps/chosen": -243.7183837890625, + "logps/rejected": -541.9910278320312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.758148431777954, + "rewards/margins": 7.832700729370117, + "rewards/rejected": -10.590848922729492, + "step": 6165 + }, + { + "epoch": 0.96, + "learning_rate": 9.624940090253643e-06, + "logits/chosen": -2.518780469894409, + "logits/rejected": -2.841909885406494, + "logps/chosen": -141.36399841308594, + "logps/rejected": -213.8677520751953, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1411869525909424, + "rewards/margins": 2.505190849304199, + "rewards/rejected": -4.646378040313721, + "step": 6166 + }, + { + "epoch": 0.96, + "learning_rate": 9.624206649722495e-06, + "logits/chosen": -2.160867214202881, + "logits/rejected": -2.7815520763397217, + "logps/chosen": -162.35293579101562, + "logps/rejected": -358.5960693359375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5194623470306396, + "rewards/margins": 8.237598419189453, + "rewards/rejected": -9.757061004638672, + "step": 6167 + }, + { + "epoch": 0.96, + "learning_rate": 9.623473209191349e-06, + "logits/chosen": -2.8249950408935547, + "logits/rejected": -3.1192564964294434, + "logps/chosen": -58.39520263671875, + "logps/rejected": -162.682861328125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5823169946670532, + "rewards/margins": 5.287440299987793, + "rewards/rejected": -6.869757175445557, + "step": 6168 + }, + { + "epoch": 0.96, + "learning_rate": 9.6227397686602e-06, + "logits/chosen": -2.4275827407836914, + "logits/rejected": -2.8174526691436768, + "logps/chosen": -290.961669921875, + "logps/rejected": -372.3614807128906, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2354576587677, + "rewards/margins": 6.047023773193359, + "rewards/rejected": -8.28248119354248, + "step": 6169 + }, + { + "epoch": 0.96, + "learning_rate": 9.622006328129053e-06, + "logits/chosen": -1.4116389751434326, + "logits/rejected": -3.0150160789489746, + "logps/chosen": -67.8001937866211, + "logps/rejected": -428.25323486328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6599607467651367, + "rewards/margins": 7.437216758728027, + "rewards/rejected": -9.097177505493164, + "step": 6170 + }, + { + "epoch": 0.96, + "learning_rate": 9.621272887597905e-06, + "logits/chosen": -2.8717093467712402, + "logits/rejected": -3.002073049545288, + "logps/chosen": -144.2913055419922, + "logps/rejected": -240.64846801757812, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7801119089126587, + "rewards/margins": 6.381931304931641, + "rewards/rejected": -8.162042617797852, + "step": 6171 + }, + { + "epoch": 0.96, + "learning_rate": 9.620539447066756e-06, + "logits/chosen": -2.853485584259033, + "logits/rejected": -2.5559604167938232, + "logps/chosen": -86.98506927490234, + "logps/rejected": -415.193603515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8205809593200684, + "rewards/margins": 8.446507453918457, + "rewards/rejected": -10.267087936401367, + "step": 6172 + }, + { + "epoch": 0.96, + "learning_rate": 9.619806006535608e-06, + "logits/chosen": -2.597045660018921, + "logits/rejected": -3.072155475616455, + "logps/chosen": -42.628047943115234, + "logps/rejected": -196.18014526367188, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.216578960418701, + "rewards/margins": 5.077913284301758, + "rewards/rejected": -7.294492244720459, + "step": 6173 + }, + { + "epoch": 0.96, + "learning_rate": 9.61907256600446e-06, + "logits/chosen": -2.7779603004455566, + "logits/rejected": -1.5651954412460327, + "logps/chosen": -235.69198608398438, + "logps/rejected": -238.02072143554688, + "loss": 3.3781, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.969381809234619, + "rewards/margins": -0.2797117233276367, + "rewards/rejected": -4.689670085906982, + "step": 6174 + }, + { + "epoch": 0.96, + "learning_rate": 9.618339125473312e-06, + "logits/chosen": -1.3526757955551147, + "logits/rejected": -2.8770227432250977, + "logps/chosen": -73.86769104003906, + "logps/rejected": -401.29156494140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9993395805358887, + "rewards/margins": 8.733747482299805, + "rewards/rejected": -10.733087539672852, + "step": 6175 + }, + { + "epoch": 0.96, + "learning_rate": 9.617605684942164e-06, + "logits/chosen": -0.680489182472229, + "logits/rejected": -2.638674736022949, + "logps/chosen": -53.70968246459961, + "logps/rejected": -371.3544006347656, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.983572006225586, + "rewards/margins": 7.29202127456665, + "rewards/rejected": -10.275592803955078, + "step": 6176 + }, + { + "epoch": 0.96, + "learning_rate": 9.616872244411018e-06, + "logits/chosen": -3.1062252521514893, + "logits/rejected": -0.7236684560775757, + "logps/chosen": -935.5992431640625, + "logps/rejected": -274.1778869628906, + "loss": 1.3792, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5247437953948975, + "rewards/margins": 2.0965919494628906, + "rewards/rejected": -5.621335506439209, + "step": 6177 + }, + { + "epoch": 0.96, + "learning_rate": 9.61613880387987e-06, + "logits/chosen": -1.4071317911148071, + "logits/rejected": -2.2829174995422363, + "logps/chosen": -424.9193115234375, + "logps/rejected": -620.2191162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1224122047424316, + "rewards/margins": 8.950711250305176, + "rewards/rejected": -11.073123931884766, + "step": 6178 + }, + { + "epoch": 0.96, + "learning_rate": 9.615405363348721e-06, + "logits/chosen": -2.099299907684326, + "logits/rejected": -3.10774827003479, + "logps/chosen": -111.13162994384766, + "logps/rejected": -328.80511474609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8161957859992981, + "rewards/margins": 7.393991470336914, + "rewards/rejected": -8.210186958312988, + "step": 6179 + }, + { + "epoch": 0.96, + "learning_rate": 9.614671922817573e-06, + "logits/chosen": -1.3512598276138306, + "logits/rejected": -2.4251363277435303, + "logps/chosen": -163.37777709960938, + "logps/rejected": -417.5376281738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8138267993927002, + "rewards/margins": 12.221477508544922, + "rewards/rejected": -14.035304069519043, + "step": 6180 + }, + { + "epoch": 0.96, + "learning_rate": 9.613938482286425e-06, + "logits/chosen": -3.029634714126587, + "logits/rejected": -2.47450590133667, + "logps/chosen": -423.70001220703125, + "logps/rejected": -412.0711669921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3709030151367188, + "rewards/margins": 8.452967643737793, + "rewards/rejected": -9.823871612548828, + "step": 6181 + }, + { + "epoch": 0.96, + "learning_rate": 9.613205041755277e-06, + "logits/chosen": -1.840638279914856, + "logits/rejected": -3.066446304321289, + "logps/chosen": -156.13238525390625, + "logps/rejected": -314.9204406738281, + "loss": 2.5432, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.097920894622803, + "rewards/margins": 1.6476078033447266, + "rewards/rejected": -5.7455291748046875, + "step": 6182 + }, + { + "epoch": 0.96, + "learning_rate": 9.612471601224129e-06, + "logits/chosen": -2.9360532760620117, + "logits/rejected": -2.191145658493042, + "logps/chosen": -647.657958984375, + "logps/rejected": -323.00048828125, + "loss": 2.0674, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.358285665512085, + "rewards/margins": 0.3810269832611084, + "rewards/rejected": -3.7393126487731934, + "step": 6183 + }, + { + "epoch": 0.96, + "learning_rate": 9.611738160692982e-06, + "logits/chosen": -3.0681166648864746, + "logits/rejected": -2.726395845413208, + "logps/chosen": -544.1148071289062, + "logps/rejected": -526.8464965820312, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7609710693359375, + "rewards/margins": 5.854403972625732, + "rewards/rejected": -7.615374565124512, + "step": 6184 + }, + { + "epoch": 0.96, + "learning_rate": 9.611004720161834e-06, + "logits/chosen": -2.307145595550537, + "logits/rejected": -2.6271703243255615, + "logps/chosen": -559.4761962890625, + "logps/rejected": -536.6285400390625, + "loss": 4.6132, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.244224548339844, + "rewards/margins": 0.4834442138671875, + "rewards/rejected": -5.727668762207031, + "step": 6185 + }, + { + "epoch": 0.96, + "learning_rate": 9.610271279630688e-06, + "logits/chosen": -2.3729300498962402, + "logits/rejected": -2.5809438228607178, + "logps/chosen": -115.40176391601562, + "logps/rejected": -259.3831481933594, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7412627935409546, + "rewards/margins": 8.38082504272461, + "rewards/rejected": -10.122088432312012, + "step": 6186 + }, + { + "epoch": 0.96, + "learning_rate": 9.60953783909954e-06, + "logits/chosen": -3.1364381313323975, + "logits/rejected": -3.1102428436279297, + "logps/chosen": -271.9788818359375, + "logps/rejected": -457.0851135253906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8692383766174316, + "rewards/margins": 8.876910209655762, + "rewards/rejected": -10.746149063110352, + "step": 6187 + }, + { + "epoch": 0.96, + "learning_rate": 9.608804398568392e-06, + "logits/chosen": -2.6109251976013184, + "logits/rejected": -2.9237051010131836, + "logps/chosen": -176.24974060058594, + "logps/rejected": -114.09429931640625, + "loss": 1.3897, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.848676919937134, + "rewards/margins": 1.7370153665542603, + "rewards/rejected": -5.585692405700684, + "step": 6188 + }, + { + "epoch": 0.96, + "learning_rate": 9.608070958037243e-06, + "logits/chosen": -2.408586025238037, + "logits/rejected": -2.8737692832946777, + "logps/chosen": -150.0077362060547, + "logps/rejected": -247.70742797851562, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5988101363182068, + "rewards/margins": 6.863828182220459, + "rewards/rejected": -7.462637901306152, + "step": 6189 + }, + { + "epoch": 0.96, + "learning_rate": 9.607337517506095e-06, + "logits/chosen": -3.0567028522491455, + "logits/rejected": -1.9272054433822632, + "logps/chosen": -480.3094787597656, + "logps/rejected": -259.6429138183594, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7419651746749878, + "rewards/margins": 2.748081684112549, + "rewards/rejected": -4.490046501159668, + "step": 6190 + }, + { + "epoch": 0.96, + "learning_rate": 9.606604076974947e-06, + "logits/chosen": -1.5798592567443848, + "logits/rejected": -3.0284714698791504, + "logps/chosen": -486.7316589355469, + "logps/rejected": -566.2915649414062, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8955414295196533, + "rewards/margins": 4.593530654907227, + "rewards/rejected": -5.489071846008301, + "step": 6191 + }, + { + "epoch": 0.96, + "learning_rate": 9.605870636443799e-06, + "logits/chosen": -2.977752923965454, + "logits/rejected": -3.158566474914551, + "logps/chosen": -101.20809173583984, + "logps/rejected": -342.4659118652344, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3310546875, + "rewards/margins": 8.483943939208984, + "rewards/rejected": -11.814998626708984, + "step": 6192 + }, + { + "epoch": 0.96, + "learning_rate": 9.605137195912651e-06, + "logits/chosen": -2.9410719871520996, + "logits/rejected": -2.7012922763824463, + "logps/chosen": -300.408447265625, + "logps/rejected": -102.20783996582031, + "loss": 5.713, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.23712158203125, + "rewards/margins": -5.699133396148682, + "rewards/rejected": -2.5379884243011475, + "step": 6193 + }, + { + "epoch": 0.96, + "learning_rate": 9.604403755381503e-06, + "logits/chosen": -2.0866336822509766, + "logits/rejected": -3.220838785171509, + "logps/chosen": -324.4602355957031, + "logps/rejected": -540.6583862304688, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8568389415740967, + "rewards/margins": 6.0318284034729, + "rewards/rejected": -7.888667106628418, + "step": 6194 + }, + { + "epoch": 0.96, + "learning_rate": 9.603670314850356e-06, + "logits/chosen": -3.021531820297241, + "logits/rejected": -1.9596843719482422, + "logps/chosen": -201.24942016601562, + "logps/rejected": -179.15560913085938, + "loss": 4.7651, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.634444236755371, + "rewards/margins": -1.5629205703735352, + "rewards/rejected": -5.071523666381836, + "step": 6195 + }, + { + "epoch": 0.96, + "learning_rate": 9.602936874319208e-06, + "logits/chosen": -1.2405874729156494, + "logits/rejected": -2.8286495208740234, + "logps/chosen": -33.861656188964844, + "logps/rejected": -270.63189697265625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8952172994613647, + "rewards/margins": 6.6168012619018555, + "rewards/rejected": -8.512019157409668, + "step": 6196 + }, + { + "epoch": 0.96, + "learning_rate": 9.60220343378806e-06, + "logits/chosen": -2.310844659805298, + "logits/rejected": -2.621088981628418, + "logps/chosen": -254.37364196777344, + "logps/rejected": -536.3712158203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.466670274734497, + "rewards/margins": 8.364614486694336, + "rewards/rejected": -10.83128547668457, + "step": 6197 + }, + { + "epoch": 0.96, + "learning_rate": 9.601469993256912e-06, + "logits/chosen": -2.2038683891296387, + "logits/rejected": -3.0904135704040527, + "logps/chosen": -31.633285522460938, + "logps/rejected": -263.5126953125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9254599809646606, + "rewards/margins": 5.000774383544922, + "rewards/rejected": -6.926234245300293, + "step": 6198 + }, + { + "epoch": 0.96, + "learning_rate": 9.600736552725764e-06, + "logits/chosen": -2.005143642425537, + "logits/rejected": -3.0975465774536133, + "logps/chosen": -69.55419921875, + "logps/rejected": -209.19805908203125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.177823543548584, + "rewards/margins": 5.337826251983643, + "rewards/rejected": -7.515649795532227, + "step": 6199 + }, + { + "epoch": 0.96, + "learning_rate": 9.600003112194616e-06, + "logits/chosen": -1.4120339155197144, + "logits/rejected": -2.954373836517334, + "logps/chosen": -183.23255920410156, + "logps/rejected": -524.098876953125, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8099701404571533, + "rewards/margins": 5.92510986328125, + "rewards/rejected": -7.735079765319824, + "step": 6200 + }, + { + "epoch": 0.96, + "learning_rate": 9.599269671663468e-06, + "logits/chosen": -1.9333276748657227, + "logits/rejected": -2.9669106006622314, + "logps/chosen": -212.58782958984375, + "logps/rejected": -185.70352172851562, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5993776321411133, + "rewards/margins": 3.8718788623809814, + "rewards/rejected": -7.471256256103516, + "step": 6201 + }, + { + "epoch": 0.96, + "learning_rate": 9.59853623113232e-06, + "logits/chosen": -2.5007026195526123, + "logits/rejected": -2.8485970497131348, + "logps/chosen": -94.27327728271484, + "logps/rejected": -235.91204833984375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.988511085510254, + "rewards/margins": 6.199846267700195, + "rewards/rejected": -8.18835735321045, + "step": 6202 + }, + { + "epoch": 0.96, + "learning_rate": 9.597802790601171e-06, + "logits/chosen": -3.1922800540924072, + "logits/rejected": -2.6219379901885986, + "logps/chosen": -297.9380187988281, + "logps/rejected": -173.1933135986328, + "loss": 2.0923, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9814071655273438, + "rewards/margins": 0.5724968910217285, + "rewards/rejected": -4.553904056549072, + "step": 6203 + }, + { + "epoch": 0.96, + "learning_rate": 9.597069350070025e-06, + "logits/chosen": -1.4446583986282349, + "logits/rejected": -2.911907911300659, + "logps/chosen": -128.78176879882812, + "logps/rejected": -304.954833984375, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.101879119873047, + "rewards/margins": 5.239908695220947, + "rewards/rejected": -8.341787338256836, + "step": 6204 + }, + { + "epoch": 0.97, + "learning_rate": 9.596335909538877e-06, + "logits/chosen": -2.9640920162200928, + "logits/rejected": -2.1751620769500732, + "logps/chosen": -470.5627746582031, + "logps/rejected": -321.46246337890625, + "loss": 2.8515, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.739144802093506, + "rewards/margins": -1.944403052330017, + "rewards/rejected": -2.7947418689727783, + "step": 6205 + }, + { + "epoch": 0.97, + "learning_rate": 9.595602469007729e-06, + "logits/chosen": -1.0767121315002441, + "logits/rejected": -2.8763234615325928, + "logps/chosen": -57.168212890625, + "logps/rejected": -228.7765655517578, + "loss": 0.6156, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.149774551391602, + "rewards/margins": 2.0184850692749023, + "rewards/rejected": -6.168259620666504, + "step": 6206 + }, + { + "epoch": 0.97, + "learning_rate": 9.59486902847658e-06, + "logits/chosen": -2.4481289386749268, + "logits/rejected": -3.0970988273620605, + "logps/chosen": -186.212890625, + "logps/rejected": -396.76959228515625, + "loss": 0.7693, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.969268798828125, + "rewards/margins": 3.045013427734375, + "rewards/rejected": -6.0142822265625, + "step": 6207 + }, + { + "epoch": 0.97, + "learning_rate": 9.594135587945433e-06, + "logits/chosen": -1.2472724914550781, + "logits/rejected": -2.6379969120025635, + "logps/chosen": -212.03378295898438, + "logps/rejected": -320.2608642578125, + "loss": 2.3776, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.3704962730407715, + "rewards/margins": 3.195134162902832, + "rewards/rejected": -7.5656304359436035, + "step": 6208 + }, + { + "epoch": 0.97, + "learning_rate": 9.593402147414284e-06, + "logits/chosen": -2.4362056255340576, + "logits/rejected": -3.277036190032959, + "logps/chosen": -148.09442138671875, + "logps/rejected": -316.335205078125, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.495244026184082, + "rewards/margins": 4.490956783294678, + "rewards/rejected": -6.986201286315918, + "step": 6209 + }, + { + "epoch": 0.97, + "learning_rate": 9.592668706883136e-06, + "logits/chosen": -1.2063785791397095, + "logits/rejected": -2.8863308429718018, + "logps/chosen": -83.90969848632812, + "logps/rejected": -270.09954833984375, + "loss": 1.7211, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8041982650756836, + "rewards/margins": 2.56813645362854, + "rewards/rejected": -5.372334957122803, + "step": 6210 + }, + { + "epoch": 0.97, + "learning_rate": 9.591935266351988e-06, + "logits/chosen": -3.0421621799468994, + "logits/rejected": -2.874563455581665, + "logps/chosen": -883.5501708984375, + "logps/rejected": -674.4193115234375, + "loss": 3.4988, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.858940124511719, + "rewards/margins": -1.8272072076797485, + "rewards/rejected": -4.03173303604126, + "step": 6211 + }, + { + "epoch": 0.97, + "learning_rate": 9.59120182582084e-06, + "logits/chosen": -2.451167106628418, + "logits/rejected": -2.881226062774658, + "logps/chosen": -247.09237670898438, + "logps/rejected": -238.1207275390625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0759117603302002, + "rewards/margins": 5.443729400634766, + "rewards/rejected": -6.519640922546387, + "step": 6212 + }, + { + "epoch": 0.97, + "learning_rate": 9.590468385289694e-06, + "logits/chosen": -2.469194173812866, + "logits/rejected": -2.877345323562622, + "logps/chosen": -239.52574157714844, + "logps/rejected": -358.45904541015625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8055431842803955, + "rewards/margins": 6.749492645263672, + "rewards/rejected": -8.555035591125488, + "step": 6213 + }, + { + "epoch": 0.97, + "learning_rate": 9.589734944758546e-06, + "logits/chosen": -2.0611515045166016, + "logits/rejected": -3.066835403442383, + "logps/chosen": -228.89697265625, + "logps/rejected": -393.7294616699219, + "loss": 1.7599, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.294940233230591, + "rewards/margins": 4.708345413208008, + "rewards/rejected": -8.00328540802002, + "step": 6214 + }, + { + "epoch": 0.97, + "learning_rate": 9.589001504227397e-06, + "logits/chosen": -2.5085535049438477, + "logits/rejected": -2.8077800273895264, + "logps/chosen": -230.9890594482422, + "logps/rejected": -358.92401123046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8727523684501648, + "rewards/margins": 7.714871883392334, + "rewards/rejected": -8.587623596191406, + "step": 6215 + }, + { + "epoch": 0.97, + "learning_rate": 9.58826806369625e-06, + "logits/chosen": -3.0122129917144775, + "logits/rejected": -2.559055805206299, + "logps/chosen": -463.28228759765625, + "logps/rejected": -349.90350341796875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2399321794509888, + "rewards/margins": 5.398462295532227, + "rewards/rejected": -6.638394355773926, + "step": 6216 + }, + { + "epoch": 0.97, + "learning_rate": 9.587534623165101e-06, + "logits/chosen": -2.7569305896759033, + "logits/rejected": -2.999453067779541, + "logps/chosen": -186.7577667236328, + "logps/rejected": -242.98593139648438, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.904256820678711, + "rewards/margins": 4.93548583984375, + "rewards/rejected": -6.839742660522461, + "step": 6217 + }, + { + "epoch": 0.97, + "learning_rate": 9.586801182633955e-06, + "logits/chosen": -3.0172622203826904, + "logits/rejected": -2.1613385677337646, + "logps/chosen": -304.99456787109375, + "logps/rejected": -146.15133666992188, + "loss": 3.0128, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5892837047576904, + "rewards/margins": -0.0885157585144043, + "rewards/rejected": -3.500767946243286, + "step": 6218 + }, + { + "epoch": 0.97, + "learning_rate": 9.586067742102807e-06, + "logits/chosen": -2.7305026054382324, + "logits/rejected": -1.752945899963379, + "logps/chosen": -147.68202209472656, + "logps/rejected": -63.05174255371094, + "loss": 2.2067, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.117233753204346, + "rewards/margins": -1.0535531044006348, + "rewards/rejected": -5.063680648803711, + "step": 6219 + }, + { + "epoch": 0.97, + "learning_rate": 9.585334301571658e-06, + "logits/chosen": -3.0630035400390625, + "logits/rejected": -2.8749918937683105, + "logps/chosen": -106.558349609375, + "logps/rejected": -107.81301879882812, + "loss": 1.6698, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8888144493103027, + "rewards/margins": 0.9770902395248413, + "rewards/rejected": -3.8659048080444336, + "step": 6220 + }, + { + "epoch": 0.97, + "learning_rate": 9.58460086104051e-06, + "logits/chosen": -3.13567852973938, + "logits/rejected": -2.6352505683898926, + "logps/chosen": -360.2340087890625, + "logps/rejected": -468.2052001953125, + "loss": 2.9335, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.666318893432617, + "rewards/margins": -1.5196280479431152, + "rewards/rejected": -3.14669132232666, + "step": 6221 + }, + { + "epoch": 0.97, + "learning_rate": 9.583867420509364e-06, + "logits/chosen": -2.9911205768585205, + "logits/rejected": -2.1514694690704346, + "logps/chosen": -452.1308898925781, + "logps/rejected": -509.123291015625, + "loss": 3.1744, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.973310947418213, + "rewards/margins": -0.0748291015625, + "rewards/rejected": -3.898481845855713, + "step": 6222 + }, + { + "epoch": 0.97, + "learning_rate": 9.583133979978216e-06, + "logits/chosen": -2.5030112266540527, + "logits/rejected": -2.250314474105835, + "logps/chosen": -83.39028930664062, + "logps/rejected": -191.19540405273438, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6625397205352783, + "rewards/margins": 5.750585556030273, + "rewards/rejected": -7.413125038146973, + "step": 6223 + }, + { + "epoch": 0.97, + "learning_rate": 9.582400539447068e-06, + "logits/chosen": -1.4201126098632812, + "logits/rejected": -2.8712165355682373, + "logps/chosen": -115.26785278320312, + "logps/rejected": -586.6712646484375, + "loss": 2.6885, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.252816200256348, + "rewards/margins": 4.372466087341309, + "rewards/rejected": -8.625282287597656, + "step": 6224 + }, + { + "epoch": 0.97, + "learning_rate": 9.58166709891592e-06, + "logits/chosen": -3.0165727138519287, + "logits/rejected": -2.249032974243164, + "logps/chosen": -390.5758056640625, + "logps/rejected": -242.04026794433594, + "loss": 2.4911, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.759574890136719, + "rewards/margins": -2.3267245292663574, + "rewards/rejected": -3.4328503608703613, + "step": 6225 + }, + { + "epoch": 0.97, + "learning_rate": 9.580933658384771e-06, + "logits/chosen": -2.381549119949341, + "logits/rejected": -2.6285979747772217, + "logps/chosen": -115.26429748535156, + "logps/rejected": -297.8584899902344, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7724336385726929, + "rewards/margins": 7.368346214294434, + "rewards/rejected": -9.140779495239258, + "step": 6226 + }, + { + "epoch": 0.97, + "learning_rate": 9.580200217853623e-06, + "logits/chosen": -2.5812971591949463, + "logits/rejected": -2.0492115020751953, + "logps/chosen": -113.19629669189453, + "logps/rejected": -149.28187561035156, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0184555053710938, + "rewards/margins": 6.748494625091553, + "rewards/rejected": -7.7669501304626465, + "step": 6227 + }, + { + "epoch": 0.97, + "learning_rate": 9.579466777322475e-06, + "logits/chosen": -2.6767075061798096, + "logits/rejected": -2.8440072536468506, + "logps/chosen": -201.3104248046875, + "logps/rejected": -243.76803588867188, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8376351594924927, + "rewards/margins": 5.521126747131348, + "rewards/rejected": -7.358761787414551, + "step": 6228 + }, + { + "epoch": 0.97, + "learning_rate": 9.578733336791327e-06, + "logits/chosen": -2.574875831604004, + "logits/rejected": -2.9854772090911865, + "logps/chosen": -148.05682373046875, + "logps/rejected": -398.2689208984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7762370109558105, + "rewards/margins": 6.637275218963623, + "rewards/rejected": -9.413512229919434, + "step": 6229 + }, + { + "epoch": 0.97, + "learning_rate": 9.577999896260179e-06, + "logits/chosen": -2.9967761039733887, + "logits/rejected": -1.9456219673156738, + "logps/chosen": -318.8221130371094, + "logps/rejected": -252.83413696289062, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46593210101127625, + "rewards/margins": 6.131639003753662, + "rewards/rejected": -6.597570896148682, + "step": 6230 + }, + { + "epoch": 0.97, + "learning_rate": 9.577266455729033e-06, + "logits/chosen": -3.072244644165039, + "logits/rejected": -2.620286226272583, + "logps/chosen": -590.9949951171875, + "logps/rejected": -375.0239562988281, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5948638916015625, + "rewards/margins": 5.471053123474121, + "rewards/rejected": -7.065917015075684, + "step": 6231 + }, + { + "epoch": 0.97, + "learning_rate": 9.576533015197884e-06, + "logits/chosen": -2.701521158218384, + "logits/rejected": -3.128448724746704, + "logps/chosen": -278.6681823730469, + "logps/rejected": -282.7995910644531, + "loss": 1.6919, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.805100679397583, + "rewards/margins": 0.24322283267974854, + "rewards/rejected": -2.048323392868042, + "step": 6232 + }, + { + "epoch": 0.97, + "learning_rate": 9.575799574666736e-06, + "logits/chosen": -2.5179059505462646, + "logits/rejected": -2.7197649478912354, + "logps/chosen": -326.06402587890625, + "logps/rejected": -376.70343017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4556846618652344, + "rewards/margins": 10.414397239685059, + "rewards/rejected": -11.870081901550293, + "step": 6233 + }, + { + "epoch": 0.97, + "learning_rate": 9.575066134135588e-06, + "logits/chosen": -3.112715244293213, + "logits/rejected": -2.0408077239990234, + "logps/chosen": -249.70501708984375, + "logps/rejected": -160.808837890625, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9251560568809509, + "rewards/margins": 4.956962585449219, + "rewards/rejected": -5.8821187019348145, + "step": 6234 + }, + { + "epoch": 0.97, + "learning_rate": 9.57433269360444e-06, + "logits/chosen": -2.6422171592712402, + "logits/rejected": -2.2056686878204346, + "logps/chosen": -249.97789001464844, + "logps/rejected": -364.3466796875, + "loss": 2.8693, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.45472526550293, + "rewards/margins": 0.40505266189575195, + "rewards/rejected": -5.859777927398682, + "step": 6235 + }, + { + "epoch": 0.97, + "learning_rate": 9.573599253073292e-06, + "logits/chosen": -2.431725025177002, + "logits/rejected": -3.028228521347046, + "logps/chosen": -470.2261962890625, + "logps/rejected": -448.39642333984375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8801651000976562, + "rewards/margins": 6.371344566345215, + "rewards/rejected": -7.251509666442871, + "step": 6236 + }, + { + "epoch": 0.97, + "learning_rate": 9.572865812542144e-06, + "logits/chosen": -2.545206069946289, + "logits/rejected": -3.0000598430633545, + "logps/chosen": -60.257774353027344, + "logps/rejected": -135.84268188476562, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5084699392318726, + "rewards/margins": 4.651023864746094, + "rewards/rejected": -6.159493923187256, + "step": 6237 + }, + { + "epoch": 0.97, + "learning_rate": 9.572132372010996e-06, + "logits/chosen": -1.7070691585540771, + "logits/rejected": -2.76069974899292, + "logps/chosen": -71.88526153564453, + "logps/rejected": -346.6875915527344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3577346801757812, + "rewards/margins": 10.702412605285645, + "rewards/rejected": -12.060147285461426, + "step": 6238 + }, + { + "epoch": 0.97, + "learning_rate": 9.57139893147985e-06, + "logits/chosen": -2.3439831733703613, + "logits/rejected": -2.9042744636535645, + "logps/chosen": -380.6119384765625, + "logps/rejected": -355.8758544921875, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.568501353263855, + "rewards/margins": 3.9647717475891113, + "rewards/rejected": -5.533272743225098, + "step": 6239 + }, + { + "epoch": 0.97, + "learning_rate": 9.570665490948701e-06, + "logits/chosen": -2.4379684925079346, + "logits/rejected": -3.102536916732788, + "logps/chosen": -271.5477294921875, + "logps/rejected": -437.5965576171875, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.614018201828003, + "rewards/margins": 6.728791236877441, + "rewards/rejected": -9.342809677124023, + "step": 6240 + }, + { + "epoch": 0.97, + "learning_rate": 9.569932050417553e-06, + "logits/chosen": -2.967756748199463, + "logits/rejected": -2.5931129455566406, + "logps/chosen": -168.8428192138672, + "logps/rejected": -130.03244018554688, + "loss": 0.874, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.689345359802246, + "rewards/margins": 0.0606083869934082, + "rewards/rejected": -4.749953746795654, + "step": 6241 + }, + { + "epoch": 0.97, + "learning_rate": 9.569198609886405e-06, + "logits/chosen": -2.685328960418701, + "logits/rejected": -3.1640238761901855, + "logps/chosen": -140.02633666992188, + "logps/rejected": -212.32577514648438, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2024284601211548, + "rewards/margins": 3.907898426055908, + "rewards/rejected": -5.110326766967773, + "step": 6242 + }, + { + "epoch": 0.97, + "learning_rate": 9.568465169355257e-06, + "logits/chosen": -2.831601142883301, + "logits/rejected": -2.558685064315796, + "logps/chosen": -295.3671875, + "logps/rejected": -285.09979248046875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4016708135604858, + "rewards/margins": 6.832222938537598, + "rewards/rejected": -8.233893394470215, + "step": 6243 + }, + { + "epoch": 0.97, + "learning_rate": 9.567731728824109e-06, + "logits/chosen": -1.8943599462509155, + "logits/rejected": -2.1656124591827393, + "logps/chosen": -323.0645751953125, + "logps/rejected": -316.223876953125, + "loss": 0.0428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.666738510131836, + "rewards/margins": 3.552293062210083, + "rewards/rejected": -5.21903133392334, + "step": 6244 + }, + { + "epoch": 0.97, + "learning_rate": 9.56699828829296e-06, + "logits/chosen": -2.8424623012542725, + "logits/rejected": -3.1136863231658936, + "logps/chosen": -167.62879943847656, + "logps/rejected": -235.71533203125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9168815612792969, + "rewards/margins": 6.6976470947265625, + "rewards/rejected": -7.614528179168701, + "step": 6245 + }, + { + "epoch": 0.97, + "learning_rate": 9.566264847761812e-06, + "logits/chosen": -3.006808280944824, + "logits/rejected": -3.102198839187622, + "logps/chosen": -58.047245025634766, + "logps/rejected": -176.231689453125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.214990258216858, + "rewards/margins": 6.307174205780029, + "rewards/rejected": -7.522164344787598, + "step": 6246 + }, + { + "epoch": 0.97, + "learning_rate": 9.565531407230664e-06, + "logits/chosen": -3.0771985054016113, + "logits/rejected": -2.81729793548584, + "logps/chosen": -76.77264404296875, + "logps/rejected": -410.1075439453125, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6831365823745728, + "rewards/margins": 4.137241363525391, + "rewards/rejected": -5.820378303527832, + "step": 6247 + }, + { + "epoch": 0.97, + "learning_rate": 9.564797966699518e-06, + "logits/chosen": -1.10737144947052, + "logits/rejected": -2.4742257595062256, + "logps/chosen": -51.15816116333008, + "logps/rejected": -155.4656219482422, + "loss": 1.7159, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.26127290725708, + "rewards/margins": 0.8625469207763672, + "rewards/rejected": -4.123819828033447, + "step": 6248 + }, + { + "epoch": 0.97, + "learning_rate": 9.56406452616837e-06, + "logits/chosen": -1.681016445159912, + "logits/rejected": -2.914605140686035, + "logps/chosen": -84.6217041015625, + "logps/rejected": -286.447021484375, + "loss": 1.37, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.777946949005127, + "rewards/margins": 1.9528100490570068, + "rewards/rejected": -6.730756759643555, + "step": 6249 + }, + { + "epoch": 0.97, + "learning_rate": 9.563331085637222e-06, + "logits/chosen": -2.331543445587158, + "logits/rejected": -2.9953150749206543, + "logps/chosen": -169.35833740234375, + "logps/rejected": -365.39593505859375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4684219360351562, + "rewards/margins": 9.034616470336914, + "rewards/rejected": -10.50303840637207, + "step": 6250 + }, + { + "epoch": 0.97, + "learning_rate": 9.562597645106073e-06, + "logits/chosen": -1.395228624343872, + "logits/rejected": -1.891352653503418, + "logps/chosen": -281.18756103515625, + "logps/rejected": -128.8430938720703, + "loss": 1.0132, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.331674575805664, + "rewards/margins": 2.013092279434204, + "rewards/rejected": -6.344766616821289, + "step": 6251 + }, + { + "epoch": 0.97, + "learning_rate": 9.561864204574927e-06, + "logits/chosen": -2.2421069145202637, + "logits/rejected": -2.7456836700439453, + "logps/chosen": -228.01852416992188, + "logps/rejected": -204.29083251953125, + "loss": 2.0789, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4883224964141846, + "rewards/margins": 2.7032418251037598, + "rewards/rejected": -6.191564083099365, + "step": 6252 + }, + { + "epoch": 0.97, + "learning_rate": 9.561130764043779e-06, + "logits/chosen": -2.3509674072265625, + "logits/rejected": -2.9123339653015137, + "logps/chosen": -156.7491912841797, + "logps/rejected": -184.3101348876953, + "loss": 1.0329, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.681408405303955, + "rewards/margins": 1.6190433502197266, + "rewards/rejected": -6.300451278686523, + "step": 6253 + }, + { + "epoch": 0.97, + "learning_rate": 9.56039732351263e-06, + "logits/chosen": -1.2241071462631226, + "logits/rejected": -2.4240000247955322, + "logps/chosen": -67.85906982421875, + "logps/rejected": -278.0789794921875, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2116475105285645, + "rewards/margins": 4.064115524291992, + "rewards/rejected": -5.275763034820557, + "step": 6254 + }, + { + "epoch": 0.97, + "learning_rate": 9.559663882981483e-06, + "logits/chosen": -2.532745838165283, + "logits/rejected": -2.8436050415039062, + "logps/chosen": -133.65615844726562, + "logps/rejected": -171.55943298339844, + "loss": 2.476, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2985501289367676, + "rewards/margins": 2.4842610359191895, + "rewards/rejected": -5.782811641693115, + "step": 6255 + }, + { + "epoch": 0.97, + "learning_rate": 9.558930442450335e-06, + "logits/chosen": -2.209998369216919, + "logits/rejected": -2.939828634262085, + "logps/chosen": -447.64971923828125, + "logps/rejected": -708.9684448242188, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2013027667999268, + "rewards/margins": 7.709263801574707, + "rewards/rejected": -9.910566329956055, + "step": 6256 + }, + { + "epoch": 0.97, + "learning_rate": 9.558197001919188e-06, + "logits/chosen": -2.8809592723846436, + "logits/rejected": -2.3603243827819824, + "logps/chosen": -314.3543701171875, + "logps/rejected": -313.10662841796875, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1180834770202637, + "rewards/margins": 5.744121551513672, + "rewards/rejected": -7.862204551696777, + "step": 6257 + }, + { + "epoch": 0.97, + "learning_rate": 9.55746356138804e-06, + "logits/chosen": -3.010715961456299, + "logits/rejected": -2.5015246868133545, + "logps/chosen": -396.33831787109375, + "logps/rejected": -350.5722351074219, + "loss": 1.5796, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.256197452545166, + "rewards/margins": 0.5367134809494019, + "rewards/rejected": -3.7929110527038574, + "step": 6258 + }, + { + "epoch": 0.97, + "learning_rate": 9.556730120856892e-06, + "logits/chosen": -2.920743465423584, + "logits/rejected": -3.0676751136779785, + "logps/chosen": -70.7198257446289, + "logps/rejected": -186.8275604248047, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.86407470703125, + "rewards/margins": 4.3359479904174805, + "rewards/rejected": -6.2000226974487305, + "step": 6259 + }, + { + "epoch": 0.97, + "learning_rate": 9.555996680325744e-06, + "logits/chosen": -2.9797396659851074, + "logits/rejected": -3.0971007347106934, + "logps/chosen": -36.64836883544922, + "logps/rejected": -183.67315673828125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7877615690231323, + "rewards/margins": 5.166398048400879, + "rewards/rejected": -6.954159259796143, + "step": 6260 + }, + { + "epoch": 0.97, + "learning_rate": 9.555263239794596e-06, + "logits/chosen": -2.977895975112915, + "logits/rejected": -1.8799028396606445, + "logps/chosen": -449.8037109375, + "logps/rejected": -432.6244812011719, + "loss": 2.5591, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.166683197021484, + "rewards/margins": 0.48493027687072754, + "rewards/rejected": -4.651613712310791, + "step": 6261 + }, + { + "epoch": 0.97, + "learning_rate": 9.554529799263448e-06, + "logits/chosen": -3.0424177646636963, + "logits/rejected": -2.8045482635498047, + "logps/chosen": -468.3880615234375, + "logps/rejected": -592.3558959960938, + "loss": 3.14, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8015854358673096, + "rewards/margins": -0.8339066505432129, + "rewards/rejected": -2.9676787853240967, + "step": 6262 + }, + { + "epoch": 0.97, + "learning_rate": 9.5537963587323e-06, + "logits/chosen": -2.137112617492676, + "logits/rejected": -2.904287099838257, + "logps/chosen": -296.5202331542969, + "logps/rejected": -482.4552307128906, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6267762184143066, + "rewards/margins": 8.743154525756836, + "rewards/rejected": -11.369930267333984, + "step": 6263 + }, + { + "epoch": 0.97, + "learning_rate": 9.553062918201151e-06, + "logits/chosen": -2.780917167663574, + "logits/rejected": -3.1768593788146973, + "logps/chosen": -430.0503845214844, + "logps/rejected": -488.4757080078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5958070755004883, + "rewards/margins": 6.234316349029541, + "rewards/rejected": -6.830123424530029, + "step": 6264 + }, + { + "epoch": 0.97, + "learning_rate": 9.552329477670003e-06, + "logits/chosen": -0.9643553495407104, + "logits/rejected": -1.402828574180603, + "logps/chosen": -208.20481872558594, + "logps/rejected": -365.46575927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19622573256492615, + "rewards/margins": 8.559633255004883, + "rewards/rejected": -8.755858421325684, + "step": 6265 + }, + { + "epoch": 0.97, + "learning_rate": 9.551596037138857e-06, + "logits/chosen": -1.9487982988357544, + "logits/rejected": -2.8665900230407715, + "logps/chosen": -107.36056518554688, + "logps/rejected": -271.0304870605469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18723221123218536, + "rewards/margins": 7.949029445648193, + "rewards/rejected": -7.761796951293945, + "step": 6266 + }, + { + "epoch": 0.97, + "learning_rate": 9.550862596607709e-06, + "logits/chosen": -1.7536399364471436, + "logits/rejected": -2.795323371887207, + "logps/chosen": -133.273681640625, + "logps/rejected": -321.6808166503906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3217787742614746, + "rewards/margins": 6.766413688659668, + "rewards/rejected": -9.0881929397583, + "step": 6267 + }, + { + "epoch": 0.97, + "learning_rate": 9.55012915607656e-06, + "logits/chosen": -2.5865988731384277, + "logits/rejected": -1.88978910446167, + "logps/chosen": -256.7286071777344, + "logps/rejected": -111.65884399414062, + "loss": 4.6385, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.396364212036133, + "rewards/margins": -4.625658988952637, + "rewards/rejected": -1.770704984664917, + "step": 6268 + }, + { + "epoch": 0.97, + "learning_rate": 9.549395715545412e-06, + "logits/chosen": -2.483372211456299, + "logits/rejected": -3.156402826309204, + "logps/chosen": -105.29315185546875, + "logps/rejected": -383.7220458984375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4264121055603027, + "rewards/margins": 4.8169708251953125, + "rewards/rejected": -6.243383407592773, + "step": 6269 + }, + { + "epoch": 0.98, + "learning_rate": 9.548662275014264e-06, + "logits/chosen": -2.149587392807007, + "logits/rejected": -2.686582565307617, + "logps/chosen": -131.23297119140625, + "logps/rejected": -176.36380004882812, + "loss": 2.6898, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.458827972412109, + "rewards/margins": 0.13014459609985352, + "rewards/rejected": -4.588972568511963, + "step": 6270 + }, + { + "epoch": 0.98, + "learning_rate": 9.547928834483116e-06, + "logits/chosen": -2.8014562129974365, + "logits/rejected": -2.953098773956299, + "logps/chosen": -92.59061431884766, + "logps/rejected": -144.1011962890625, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.077298879623413, + "rewards/margins": 5.480295658111572, + "rewards/rejected": -7.557594299316406, + "step": 6271 + }, + { + "epoch": 0.98, + "learning_rate": 9.547195393951968e-06, + "logits/chosen": -2.1831607818603516, + "logits/rejected": -2.999687671661377, + "logps/chosen": -423.4737243652344, + "logps/rejected": -837.76416015625, + "loss": 1.5224, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4579102993011475, + "rewards/margins": 1.3037137985229492, + "rewards/rejected": -4.761624336242676, + "step": 6272 + }, + { + "epoch": 0.98, + "learning_rate": 9.54646195342082e-06, + "logits/chosen": -1.9738187789916992, + "logits/rejected": -3.149378776550293, + "logps/chosen": -370.5250244140625, + "logps/rejected": -425.1445007324219, + "loss": 0.0635, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7151856422424316, + "rewards/margins": 3.8877272605895996, + "rewards/rejected": -5.602912902832031, + "step": 6273 + }, + { + "epoch": 0.98, + "learning_rate": 9.545728512889672e-06, + "logits/chosen": -1.765992522239685, + "logits/rejected": -3.1493940353393555, + "logps/chosen": -215.39642333984375, + "logps/rejected": -503.8292236328125, + "loss": 2.8584, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.052698612213135, + "rewards/margins": -1.9816973209381104, + "rewards/rejected": -3.0710015296936035, + "step": 6274 + }, + { + "epoch": 0.98, + "learning_rate": 9.544995072358525e-06, + "logits/chosen": -2.8470661640167236, + "logits/rejected": -2.476713180541992, + "logps/chosen": -395.40411376953125, + "logps/rejected": -269.396240234375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5952087640762329, + "rewards/margins": 6.9191694259643555, + "rewards/rejected": -7.514377593994141, + "step": 6275 + }, + { + "epoch": 0.98, + "learning_rate": 9.544261631827377e-06, + "logits/chosen": -2.9381914138793945, + "logits/rejected": -2.9747884273529053, + "logps/chosen": -306.36798095703125, + "logps/rejected": -284.15582275390625, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.544219970703125, + "rewards/margins": 5.814659118652344, + "rewards/rejected": -8.358879089355469, + "step": 6276 + }, + { + "epoch": 0.98, + "learning_rate": 9.543528191296229e-06, + "logits/chosen": -2.1154181957244873, + "logits/rejected": -2.876675605773926, + "logps/chosen": -94.83332061767578, + "logps/rejected": -162.85513305664062, + "loss": 0.0416, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.449248194694519, + "rewards/margins": 3.893071413040161, + "rewards/rejected": -5.342319488525391, + "step": 6277 + }, + { + "epoch": 0.98, + "learning_rate": 9.542794750765081e-06, + "logits/chosen": -3.105097532272339, + "logits/rejected": -2.5891056060791016, + "logps/chosen": -361.340576171875, + "logps/rejected": -261.27752685546875, + "loss": 2.1921, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2189781665802, + "rewards/margins": 0.5033717155456543, + "rewards/rejected": -3.7223498821258545, + "step": 6278 + }, + { + "epoch": 0.98, + "learning_rate": 9.542061310233933e-06, + "logits/chosen": -2.3704328536987305, + "logits/rejected": -3.0581541061401367, + "logps/chosen": -275.2314453125, + "logps/rejected": -485.14263916015625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8104454278945923, + "rewards/margins": 6.169065475463867, + "rewards/rejected": -6.97951078414917, + "step": 6279 + }, + { + "epoch": 0.98, + "learning_rate": 9.541327869702785e-06, + "logits/chosen": -2.5284080505371094, + "logits/rejected": -2.442875385284424, + "logps/chosen": -132.10357666015625, + "logps/rejected": -125.28300476074219, + "loss": 0.5312, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.35740327835083, + "rewards/margins": 2.2015750408172607, + "rewards/rejected": -5.558978080749512, + "step": 6280 + }, + { + "epoch": 0.98, + "learning_rate": 9.540594429171637e-06, + "logits/chosen": -2.9783425331115723, + "logits/rejected": -2.6080026626586914, + "logps/chosen": -407.2268371582031, + "logps/rejected": -304.19061279296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013021081686019897, + "rewards/margins": 7.371363639831543, + "rewards/rejected": -7.35834264755249, + "step": 6281 + }, + { + "epoch": 0.98, + "learning_rate": 9.539860988640488e-06, + "logits/chosen": -1.7402796745300293, + "logits/rejected": -2.6315040588378906, + "logps/chosen": -108.52448272705078, + "logps/rejected": -379.8229675292969, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.142714738845825, + "rewards/margins": 6.8936333656311035, + "rewards/rejected": -10.036348342895508, + "step": 6282 + }, + { + "epoch": 0.98, + "learning_rate": 9.53912754810934e-06, + "logits/chosen": -2.0241787433624268, + "logits/rejected": -2.783787250518799, + "logps/chosen": -180.09854125976562, + "logps/rejected": -225.22283935546875, + "loss": 2.2706, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5323219299316406, + "rewards/margins": 1.9072890281677246, + "rewards/rejected": -5.439610481262207, + "step": 6283 + }, + { + "epoch": 0.98, + "learning_rate": 9.538394107578194e-06, + "logits/chosen": -2.0271453857421875, + "logits/rejected": -3.0387423038482666, + "logps/chosen": -114.5009765625, + "logps/rejected": -346.5839538574219, + "loss": 0.188, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2005350589752197, + "rewards/margins": 2.2594001293182373, + "rewards/rejected": -5.459935188293457, + "step": 6284 + }, + { + "epoch": 0.98, + "learning_rate": 9.537660667047046e-06, + "logits/chosen": -3.151834011077881, + "logits/rejected": -2.663898229598999, + "logps/chosen": -285.30169677734375, + "logps/rejected": -131.13027954101562, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8604896664619446, + "rewards/margins": 5.212003707885742, + "rewards/rejected": -6.072493553161621, + "step": 6285 + }, + { + "epoch": 0.98, + "learning_rate": 9.5369272265159e-06, + "logits/chosen": -2.8494019508361816, + "logits/rejected": -2.5285093784332275, + "logps/chosen": -99.36764526367188, + "logps/rejected": -128.6268768310547, + "loss": 1.683, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.178734302520752, + "rewards/margins": 0.6789267063140869, + "rewards/rejected": -5.857661247253418, + "step": 6286 + }, + { + "epoch": 0.98, + "learning_rate": 9.536193785984751e-06, + "logits/chosen": -1.2550311088562012, + "logits/rejected": -2.935441493988037, + "logps/chosen": -94.88755798339844, + "logps/rejected": -423.07550048828125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.643304705619812, + "rewards/margins": 6.048855781555176, + "rewards/rejected": -6.692160129547119, + "step": 6287 + }, + { + "epoch": 0.98, + "learning_rate": 9.535460345453603e-06, + "logits/chosen": -2.5168044567108154, + "logits/rejected": -2.0278618335723877, + "logps/chosen": -536.084228515625, + "logps/rejected": -241.8829345703125, + "loss": 4.0793, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.930716514587402, + "rewards/margins": -1.3045213222503662, + "rewards/rejected": -5.626194953918457, + "step": 6288 + }, + { + "epoch": 0.98, + "learning_rate": 9.534726904922455e-06, + "logits/chosen": -1.7017134428024292, + "logits/rejected": -1.8872944116592407, + "logps/chosen": -426.2369079589844, + "logps/rejected": -411.095458984375, + "loss": 2.0882, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1355462074279785, + "rewards/margins": 2.0981192588806152, + "rewards/rejected": -5.233665466308594, + "step": 6289 + }, + { + "epoch": 0.98, + "learning_rate": 9.533993464391307e-06, + "logits/chosen": -2.925527572631836, + "logits/rejected": -2.525156259536743, + "logps/chosen": -480.1768493652344, + "logps/rejected": -732.31884765625, + "loss": 2.7855, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6062920093536377, + "rewards/margins": 0.23036885261535645, + "rewards/rejected": -3.836660861968994, + "step": 6290 + }, + { + "epoch": 0.98, + "learning_rate": 9.533260023860159e-06, + "logits/chosen": -2.299654006958008, + "logits/rejected": -3.1375622749328613, + "logps/chosen": -148.47537231445312, + "logps/rejected": -377.87017822265625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23201780021190643, + "rewards/margins": 6.168282508850098, + "rewards/rejected": -6.400300025939941, + "step": 6291 + }, + { + "epoch": 0.98, + "learning_rate": 9.53252658332901e-06, + "logits/chosen": -3.0650105476379395, + "logits/rejected": -2.451235055923462, + "logps/chosen": -463.2127380371094, + "logps/rejected": -326.011962890625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.711620807647705, + "rewards/margins": 5.413589000701904, + "rewards/rejected": -7.125209808349609, + "step": 6292 + }, + { + "epoch": 0.98, + "learning_rate": 9.531793142797864e-06, + "logits/chosen": -2.736860990524292, + "logits/rejected": -3.0138044357299805, + "logps/chosen": -122.60274505615234, + "logps/rejected": -150.8092803955078, + "loss": 0.1038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9247679710388184, + "rewards/margins": 3.320284605026245, + "rewards/rejected": -6.245052814483643, + "step": 6293 + }, + { + "epoch": 0.98, + "learning_rate": 9.531059702266716e-06, + "logits/chosen": -3.167555332183838, + "logits/rejected": -2.734844207763672, + "logps/chosen": -523.5243530273438, + "logps/rejected": -475.1851806640625, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3880741000175476, + "rewards/margins": 4.514848709106445, + "rewards/rejected": -4.902922630310059, + "step": 6294 + }, + { + "epoch": 0.98, + "learning_rate": 9.530326261735568e-06, + "logits/chosen": -2.698068857192993, + "logits/rejected": -3.125413417816162, + "logps/chosen": -159.58612060546875, + "logps/rejected": -160.926025390625, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9407265186309814, + "rewards/margins": 1.6771719455718994, + "rewards/rejected": -4.617898464202881, + "step": 6295 + }, + { + "epoch": 0.98, + "learning_rate": 9.52959282120442e-06, + "logits/chosen": -3.0471131801605225, + "logits/rejected": -2.687972068786621, + "logps/chosen": -175.6871337890625, + "logps/rejected": -287.4138488769531, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31198427081108093, + "rewards/margins": 5.734957695007324, + "rewards/rejected": -6.046942234039307, + "step": 6296 + }, + { + "epoch": 0.98, + "learning_rate": 9.528859380673272e-06, + "logits/chosen": -0.8177963495254517, + "logits/rejected": -2.9146642684936523, + "logps/chosen": -108.30397033691406, + "logps/rejected": -472.5620422363281, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7552757263183594, + "rewards/margins": 6.996733665466309, + "rewards/rejected": -8.752009391784668, + "step": 6297 + }, + { + "epoch": 0.98, + "learning_rate": 9.528125940142124e-06, + "logits/chosen": -2.942744255065918, + "logits/rejected": -2.2999255657196045, + "logps/chosen": -326.4026794433594, + "logps/rejected": -322.29937744140625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36645469069480896, + "rewards/margins": 5.938366889953613, + "rewards/rejected": -6.304821968078613, + "step": 6298 + }, + { + "epoch": 0.98, + "learning_rate": 9.527392499610975e-06, + "logits/chosen": -2.860029458999634, + "logits/rejected": -2.725044012069702, + "logps/chosen": -241.4685821533203, + "logps/rejected": -289.3209228515625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7532943487167358, + "rewards/margins": 6.49234676361084, + "rewards/rejected": -7.245641231536865, + "step": 6299 + }, + { + "epoch": 0.98, + "learning_rate": 9.526659059079827e-06, + "logits/chosen": -3.0090157985687256, + "logits/rejected": -1.3678632974624634, + "logps/chosen": -279.80120849609375, + "logps/rejected": -188.85128784179688, + "loss": 2.4258, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.07513427734375, + "rewards/margins": -1.9860694408416748, + "rewards/rejected": -2.089064836502075, + "step": 6300 + }, + { + "epoch": 0.98, + "learning_rate": 9.52592561854868e-06, + "logits/chosen": -2.406580686569214, + "logits/rejected": -3.184741497039795, + "logps/chosen": -379.1357421875, + "logps/rejected": -537.5054931640625, + "loss": 0.0903, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.380476474761963, + "rewards/margins": 2.6121444702148438, + "rewards/rejected": -3.9926209449768066, + "step": 6301 + }, + { + "epoch": 0.98, + "learning_rate": 9.525192178017533e-06, + "logits/chosen": -2.339823007583618, + "logits/rejected": -3.186472177505493, + "logps/chosen": -414.04058837890625, + "logps/rejected": -393.4759826660156, + "loss": 0.0856, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1323089599609375, + "rewards/margins": 2.760007381439209, + "rewards/rejected": -3.8923163414001465, + "step": 6302 + }, + { + "epoch": 0.98, + "learning_rate": 9.524458737486385e-06, + "logits/chosen": -2.533315420150757, + "logits/rejected": -3.250854015350342, + "logps/chosen": -80.1105728149414, + "logps/rejected": -411.0827941894531, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.501000165939331, + "rewards/margins": 7.132032871246338, + "rewards/rejected": -8.63303279876709, + "step": 6303 + }, + { + "epoch": 0.98, + "learning_rate": 9.523725296955237e-06, + "logits/chosen": -2.853959798812866, + "logits/rejected": -2.3998496532440186, + "logps/chosen": -609.014892578125, + "logps/rejected": -404.416015625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.156306505203247, + "rewards/margins": 5.160702705383301, + "rewards/rejected": -7.317008972167969, + "step": 6304 + }, + { + "epoch": 0.98, + "learning_rate": 9.522991856424088e-06, + "logits/chosen": -3.041008949279785, + "logits/rejected": -2.8005268573760986, + "logps/chosen": -476.1737060546875, + "logps/rejected": -393.29632568359375, + "loss": 2.1632, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6872925758361816, + "rewards/margins": 1.5595088005065918, + "rewards/rejected": -5.246801376342773, + "step": 6305 + }, + { + "epoch": 0.98, + "learning_rate": 9.52225841589294e-06, + "logits/chosen": -2.8808352947235107, + "logits/rejected": -2.910748243331909, + "logps/chosen": -270.8564453125, + "logps/rejected": -315.66668701171875, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9933357238769531, + "rewards/margins": 7.029750823974609, + "rewards/rejected": -9.023086547851562, + "step": 6306 + }, + { + "epoch": 0.98, + "learning_rate": 9.521524975361792e-06, + "logits/chosen": -1.9443975687026978, + "logits/rejected": -3.035619020462036, + "logps/chosen": -73.27503967285156, + "logps/rejected": -320.30133056640625, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7505695819854736, + "rewards/margins": 4.517126083374023, + "rewards/rejected": -5.267695903778076, + "step": 6307 + }, + { + "epoch": 0.98, + "learning_rate": 9.520791534830644e-06, + "logits/chosen": -3.0084986686706543, + "logits/rejected": -1.3298133611679077, + "logps/chosen": -307.1539001464844, + "logps/rejected": -102.12457275390625, + "loss": 1.8126, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.896492958068848, + "rewards/margins": -1.324955940246582, + "rewards/rejected": -3.5715367794036865, + "step": 6308 + }, + { + "epoch": 0.98, + "learning_rate": 9.520058094299496e-06, + "logits/chosen": -2.111482858657837, + "logits/rejected": -3.1473302841186523, + "logps/chosen": -392.5611572265625, + "logps/rejected": -583.521240234375, + "loss": 5.2965, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.730644226074219, + "rewards/margins": -2.7388648986816406, + "rewards/rejected": -3.991779327392578, + "step": 6309 + }, + { + "epoch": 0.98, + "learning_rate": 9.519324653768348e-06, + "logits/chosen": -2.8169734477996826, + "logits/rejected": -2.8056235313415527, + "logps/chosen": -48.611328125, + "logps/rejected": -179.7664794921875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9513697028160095, + "rewards/margins": 5.990623474121094, + "rewards/rejected": -6.94199275970459, + "step": 6310 + }, + { + "epoch": 0.98, + "learning_rate": 9.518591213237201e-06, + "logits/chosen": -2.749444007873535, + "logits/rejected": -2.472382068634033, + "logps/chosen": -165.4625701904297, + "logps/rejected": -404.38470458984375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1737008094787598, + "rewards/margins": 9.490644454956055, + "rewards/rejected": -10.664344787597656, + "step": 6311 + }, + { + "epoch": 0.98, + "learning_rate": 9.517857772706053e-06, + "logits/chosen": -2.974703550338745, + "logits/rejected": -3.1776387691497803, + "logps/chosen": -234.91441345214844, + "logps/rejected": -199.48146057128906, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4384515285491943, + "rewards/margins": 5.061104774475098, + "rewards/rejected": -6.499556541442871, + "step": 6312 + }, + { + "epoch": 0.98, + "learning_rate": 9.517124332174905e-06, + "logits/chosen": -2.6285386085510254, + "logits/rejected": -2.776756763458252, + "logps/chosen": -213.78125, + "logps/rejected": -224.1145782470703, + "loss": 0.3686, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3531439304351807, + "rewards/margins": 2.3708972930908203, + "rewards/rejected": -5.724040985107422, + "step": 6313 + }, + { + "epoch": 0.98, + "learning_rate": 9.516390891643757e-06, + "logits/chosen": -2.6830506324768066, + "logits/rejected": -3.0634145736694336, + "logps/chosen": -191.69891357421875, + "logps/rejected": -270.7095642089844, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.953298807144165, + "rewards/margins": 4.6671905517578125, + "rewards/rejected": -5.620489597320557, + "step": 6314 + }, + { + "epoch": 0.98, + "learning_rate": 9.515657451112609e-06, + "logits/chosen": -2.719444513320923, + "logits/rejected": -2.3547215461730957, + "logps/chosen": -312.6716003417969, + "logps/rejected": -280.6664733886719, + "loss": 2.047, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.017836093902588, + "rewards/margins": 1.1007847785949707, + "rewards/rejected": -5.118620872497559, + "step": 6315 + }, + { + "epoch": 0.98, + "learning_rate": 9.51492401058146e-06, + "logits/chosen": -2.3261117935180664, + "logits/rejected": -3.055882453918457, + "logps/chosen": -81.42605590820312, + "logps/rejected": -295.0166015625, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0034780502319336, + "rewards/margins": 4.990140914916992, + "rewards/rejected": -5.993618965148926, + "step": 6316 + }, + { + "epoch": 0.98, + "learning_rate": 9.514190570050313e-06, + "logits/chosen": -2.690403461456299, + "logits/rejected": -2.856163501739502, + "logps/chosen": -227.3450469970703, + "logps/rejected": -205.89596557617188, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5480133295059204, + "rewards/margins": 4.778871536254883, + "rewards/rejected": -6.326885223388672, + "step": 6317 + }, + { + "epoch": 0.98, + "learning_rate": 9.513457129519166e-06, + "logits/chosen": -2.97953200340271, + "logits/rejected": -2.2903895378112793, + "logps/chosen": -221.8594970703125, + "logps/rejected": -148.90174865722656, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9357409477233887, + "rewards/margins": 3.0036048889160156, + "rewards/rejected": -6.939345359802246, + "step": 6318 + }, + { + "epoch": 0.98, + "learning_rate": 9.512723688988018e-06, + "logits/chosen": -2.6347904205322266, + "logits/rejected": -2.6088058948516846, + "logps/chosen": -129.7606964111328, + "logps/rejected": -370.5498962402344, + "loss": 1.7406, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.853400945663452, + "rewards/margins": 1.5388226509094238, + "rewards/rejected": -5.392223358154297, + "step": 6319 + }, + { + "epoch": 0.98, + "learning_rate": 9.511990248456872e-06, + "logits/chosen": -3.153668165206909, + "logits/rejected": -3.1451809406280518, + "logps/chosen": -263.1009521484375, + "logps/rejected": -262.06414794921875, + "loss": 2.0694, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0962300300598145, + "rewards/margins": 0.6605584621429443, + "rewards/rejected": -3.756788492202759, + "step": 6320 + }, + { + "epoch": 0.98, + "learning_rate": 9.511256807925724e-06, + "logits/chosen": -3.1543400287628174, + "logits/rejected": -2.9818413257598877, + "logps/chosen": -129.03480529785156, + "logps/rejected": -92.98744201660156, + "loss": 0.5254, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.425971508026123, + "rewards/margins": 1.9749746322631836, + "rewards/rejected": -5.400946140289307, + "step": 6321 + }, + { + "epoch": 0.98, + "learning_rate": 9.510523367394575e-06, + "logits/chosen": -2.6850507259368896, + "logits/rejected": -2.988753080368042, + "logps/chosen": -51.30260467529297, + "logps/rejected": -151.88949584960938, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9558912515640259, + "rewards/margins": 6.122124671936035, + "rewards/rejected": -7.078015327453613, + "step": 6322 + }, + { + "epoch": 0.98, + "learning_rate": 9.509789926863427e-06, + "logits/chosen": -2.4096481800079346, + "logits/rejected": -2.9185829162597656, + "logps/chosen": -120.92625427246094, + "logps/rejected": -332.0872802734375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9661839008331299, + "rewards/margins": 5.379778861999512, + "rewards/rejected": -7.3459625244140625, + "step": 6323 + }, + { + "epoch": 0.98, + "learning_rate": 9.50905648633228e-06, + "logits/chosen": -3.0306742191314697, + "logits/rejected": -3.035701036453247, + "logps/chosen": -220.74267578125, + "logps/rejected": -149.0623321533203, + "loss": 1.6558, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6396636962890625, + "rewards/margins": 0.2272425889968872, + "rewards/rejected": -3.86690616607666, + "step": 6324 + }, + { + "epoch": 0.98, + "learning_rate": 9.508323045801131e-06, + "logits/chosen": -2.409639835357666, + "logits/rejected": -1.629807949066162, + "logps/chosen": -433.06768798828125, + "logps/rejected": -429.74371337890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07337722182273865, + "rewards/margins": 7.398820877075195, + "rewards/rejected": -7.472198486328125, + "step": 6325 + }, + { + "epoch": 0.98, + "learning_rate": 9.507589605269983e-06, + "logits/chosen": -2.224836587905884, + "logits/rejected": -2.578537940979004, + "logps/chosen": -63.90058517456055, + "logps/rejected": -215.70986938476562, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.512026786804199, + "rewards/margins": 5.8682451248168945, + "rewards/rejected": -9.380271911621094, + "step": 6326 + }, + { + "epoch": 0.98, + "learning_rate": 9.506856164738835e-06, + "logits/chosen": -2.3759348392486572, + "logits/rejected": -2.885784149169922, + "logps/chosen": -190.36605834960938, + "logps/rejected": -358.3629150390625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3180344104766846, + "rewards/margins": 5.705244541168213, + "rewards/rejected": -7.023279190063477, + "step": 6327 + }, + { + "epoch": 0.98, + "learning_rate": 9.506122724207688e-06, + "logits/chosen": -1.6840288639068604, + "logits/rejected": -2.2681994438171387, + "logps/chosen": -511.3381652832031, + "logps/rejected": -380.4231262207031, + "loss": 0.1117, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1853408813476562, + "rewards/margins": 4.422636032104492, + "rewards/rejected": -7.607976913452148, + "step": 6328 + }, + { + "epoch": 0.98, + "learning_rate": 9.50538928367654e-06, + "logits/chosen": -3.1275062561035156, + "logits/rejected": -2.494290590286255, + "logps/chosen": -476.6494445800781, + "logps/rejected": -370.0368957519531, + "loss": 2.8218, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.204710960388184, + "rewards/margins": -1.3445777893066406, + "rewards/rejected": -2.860133409500122, + "step": 6329 + }, + { + "epoch": 0.98, + "learning_rate": 9.504655843145392e-06, + "logits/chosen": -2.858539342880249, + "logits/rejected": -1.6087372303009033, + "logps/chosen": -191.0607147216797, + "logps/rejected": -97.69569396972656, + "loss": 3.5491, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.282566070556641, + "rewards/margins": -3.4642977714538574, + "rewards/rejected": -3.818268060684204, + "step": 6330 + }, + { + "epoch": 0.98, + "learning_rate": 9.503922402614244e-06, + "logits/chosen": -3.000969171524048, + "logits/rejected": -1.130611538887024, + "logps/chosen": -298.07958984375, + "logps/rejected": -223.28392028808594, + "loss": 1.2781, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4422037601470947, + "rewards/margins": 4.629164695739746, + "rewards/rejected": -8.071368217468262, + "step": 6331 + }, + { + "epoch": 0.98, + "learning_rate": 9.503188962083096e-06, + "logits/chosen": -3.0662472248077393, + "logits/rejected": -1.9757437705993652, + "logps/chosen": -315.74359130859375, + "logps/rejected": -361.2558898925781, + "loss": 0.0612, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8659518957138062, + "rewards/margins": 4.999720573425293, + "rewards/rejected": -6.8656721115112305, + "step": 6332 + }, + { + "epoch": 0.98, + "learning_rate": 9.502455521551948e-06, + "logits/chosen": -2.479506731033325, + "logits/rejected": -3.0144197940826416, + "logps/chosen": -142.17527770996094, + "logps/rejected": -235.22705078125, + "loss": 2.4285, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.27931022644043, + "rewards/margins": -0.380603551864624, + "rewards/rejected": -3.8987066745758057, + "step": 6333 + }, + { + "epoch": 0.99, + "learning_rate": 9.5017220810208e-06, + "logits/chosen": -2.643937349319458, + "logits/rejected": -3.0536587238311768, + "logps/chosen": -243.40481567382812, + "logps/rejected": -495.3265686035156, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0244903564453125, + "rewards/margins": 4.7148237228393555, + "rewards/rejected": -6.739314079284668, + "step": 6334 + }, + { + "epoch": 0.99, + "learning_rate": 9.500988640489652e-06, + "logits/chosen": -1.569887399673462, + "logits/rejected": -2.8593227863311768, + "logps/chosen": -29.921600341796875, + "logps/rejected": -605.3924560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.452673316001892, + "rewards/margins": 13.05522346496582, + "rewards/rejected": -14.507896423339844, + "step": 6335 + }, + { + "epoch": 0.99, + "learning_rate": 9.500255199958503e-06, + "logits/chosen": -3.0473520755767822, + "logits/rejected": -3.249485731124878, + "logps/chosen": -34.798892974853516, + "logps/rejected": -127.30162048339844, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.244088888168335, + "rewards/margins": 3.9480745792388916, + "rewards/rejected": -6.192163467407227, + "step": 6336 + }, + { + "epoch": 0.99, + "learning_rate": 9.499521759427357e-06, + "logits/chosen": -2.8295769691467285, + "logits/rejected": -2.759505033493042, + "logps/chosen": -583.8388061523438, + "logps/rejected": -507.1582946777344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1314315795898438, + "rewards/margins": 7.834159851074219, + "rewards/rejected": -8.965591430664062, + "step": 6337 + }, + { + "epoch": 0.99, + "learning_rate": 9.498788318896209e-06, + "logits/chosen": -1.9499307870864868, + "logits/rejected": -2.551103115081787, + "logps/chosen": -189.7916717529297, + "logps/rejected": -294.7610778808594, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.372769355773926, + "rewards/margins": 6.010544300079346, + "rewards/rejected": -9.38331413269043, + "step": 6338 + }, + { + "epoch": 0.99, + "learning_rate": 9.49805487836506e-06, + "logits/chosen": -3.019648313522339, + "logits/rejected": -2.926142930984497, + "logps/chosen": -327.28509521484375, + "logps/rejected": -363.84539794921875, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5544263124465942, + "rewards/margins": 5.159733772277832, + "rewards/rejected": -6.714159965515137, + "step": 6339 + }, + { + "epoch": 0.99, + "learning_rate": 9.497321437833913e-06, + "logits/chosen": -2.961515188217163, + "logits/rejected": -2.586639642715454, + "logps/chosen": -450.2028503417969, + "logps/rejected": -484.6520080566406, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7239331007003784, + "rewards/margins": 7.968518257141113, + "rewards/rejected": -9.692451477050781, + "step": 6340 + }, + { + "epoch": 0.99, + "learning_rate": 9.496587997302765e-06, + "logits/chosen": -1.9190313816070557, + "logits/rejected": -3.1108365058898926, + "logps/chosen": -154.90603637695312, + "logps/rejected": -433.2611083984375, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8451550006866455, + "rewards/margins": 3.5334949493408203, + "rewards/rejected": -5.378649711608887, + "step": 6341 + }, + { + "epoch": 0.99, + "learning_rate": 9.495854556771616e-06, + "logits/chosen": -1.606711745262146, + "logits/rejected": -2.8010094165802, + "logps/chosen": -101.88411712646484, + "logps/rejected": -193.05740356445312, + "loss": 0.7264, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.504355430603027, + "rewards/margins": 0.48207128047943115, + "rewards/rejected": -5.986426830291748, + "step": 6342 + }, + { + "epoch": 0.99, + "learning_rate": 9.495121116240468e-06, + "logits/chosen": -3.0905985832214355, + "logits/rejected": -3.0955018997192383, + "logps/chosen": -695.5835571289062, + "logps/rejected": -514.6160888671875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1613327264785767, + "rewards/margins": 5.018902778625488, + "rewards/rejected": -6.180235385894775, + "step": 6343 + }, + { + "epoch": 0.99, + "learning_rate": 9.49438767570932e-06, + "logits/chosen": -2.209514617919922, + "logits/rejected": -3.1309962272644043, + "logps/chosen": -51.35834503173828, + "logps/rejected": -397.16705322265625, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7604806423187256, + "rewards/margins": 3.821000099182129, + "rewards/rejected": -7.581480979919434, + "step": 6344 + }, + { + "epoch": 0.99, + "learning_rate": 9.493654235178172e-06, + "logits/chosen": -2.475137948989868, + "logits/rejected": -3.149953842163086, + "logps/chosen": -146.5004119873047, + "logps/rejected": -188.76895141601562, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8598098754882812, + "rewards/margins": 3.2621731758117676, + "rewards/rejected": -7.121982574462891, + "step": 6345 + }, + { + "epoch": 0.99, + "learning_rate": 9.492920794647026e-06, + "logits/chosen": -2.85143780708313, + "logits/rejected": -2.94970440864563, + "logps/chosen": -490.8096618652344, + "logps/rejected": -609.389892578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.891824722290039, + "rewards/margins": 7.015666484832764, + "rewards/rejected": -8.907491683959961, + "step": 6346 + }, + { + "epoch": 0.99, + "learning_rate": 9.492187354115878e-06, + "logits/chosen": -2.3071813583374023, + "logits/rejected": -2.653644323348999, + "logps/chosen": -183.43978881835938, + "logps/rejected": -265.76776123046875, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4430129528045654, + "rewards/margins": 6.41908073425293, + "rewards/rejected": -7.862093448638916, + "step": 6347 + }, + { + "epoch": 0.99, + "learning_rate": 9.49145391358473e-06, + "logits/chosen": -3.1153557300567627, + "logits/rejected": -3.0880608558654785, + "logps/chosen": -179.71185302734375, + "logps/rejected": -254.57510375976562, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9020156860351562, + "rewards/margins": 5.982268333435059, + "rewards/rejected": -7.884284019470215, + "step": 6348 + }, + { + "epoch": 0.99, + "learning_rate": 9.490720473053581e-06, + "logits/chosen": -2.6199066638946533, + "logits/rejected": -3.223735809326172, + "logps/chosen": -47.80442810058594, + "logps/rejected": -205.55702209472656, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6309566497802734, + "rewards/margins": 4.087784290313721, + "rewards/rejected": -5.718740940093994, + "step": 6349 + }, + { + "epoch": 0.99, + "learning_rate": 9.489987032522433e-06, + "logits/chosen": -2.0814762115478516, + "logits/rejected": -2.944786548614502, + "logps/chosen": -113.29522705078125, + "logps/rejected": -402.066650390625, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6582751870155334, + "rewards/margins": 5.857536315917969, + "rewards/rejected": -6.515811443328857, + "step": 6350 + }, + { + "epoch": 0.99, + "learning_rate": 9.489253591991285e-06, + "logits/chosen": -2.9612882137298584, + "logits/rejected": -3.044400691986084, + "logps/chosen": -200.70310974121094, + "logps/rejected": -213.39755249023438, + "loss": 0.1373, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8736116886138916, + "rewards/margins": 3.679675817489624, + "rewards/rejected": -6.553287506103516, + "step": 6351 + }, + { + "epoch": 0.99, + "learning_rate": 9.488520151460139e-06, + "logits/chosen": -2.5403642654418945, + "logits/rejected": -3.046433210372925, + "logps/chosen": -296.3453674316406, + "logps/rejected": -301.1310119628906, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44318774342536926, + "rewards/margins": 5.698952674865723, + "rewards/rejected": -6.1421403884887695, + "step": 6352 + }, + { + "epoch": 0.99, + "learning_rate": 9.48778671092899e-06, + "logits/chosen": -1.919936180114746, + "logits/rejected": -2.734008312225342, + "logps/chosen": -193.47349548339844, + "logps/rejected": -302.498046875, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8893203735351562, + "rewards/margins": 4.862601280212402, + "rewards/rejected": -6.751921653747559, + "step": 6353 + }, + { + "epoch": 0.99, + "learning_rate": 9.487053270397842e-06, + "logits/chosen": -2.785837173461914, + "logits/rejected": -3.1383068561553955, + "logps/chosen": -591.1490478515625, + "logps/rejected": -523.6068725585938, + "loss": 0.3816, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3399291038513184, + "rewards/margins": 1.176581621170044, + "rewards/rejected": -3.5165109634399414, + "step": 6354 + }, + { + "epoch": 0.99, + "learning_rate": 9.486319829866696e-06, + "logits/chosen": -1.246146321296692, + "logits/rejected": -2.907911777496338, + "logps/chosen": -64.26265716552734, + "logps/rejected": -238.75912475585938, + "loss": 0.7902, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.584866046905518, + "rewards/margins": 1.3928556442260742, + "rewards/rejected": -5.977721214294434, + "step": 6355 + }, + { + "epoch": 0.99, + "learning_rate": 9.485586389335548e-06, + "logits/chosen": -2.950493812561035, + "logits/rejected": -2.589534282684326, + "logps/chosen": -237.64718627929688, + "logps/rejected": -290.1290283203125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5251754522323608, + "rewards/margins": 5.617137908935547, + "rewards/rejected": -7.142313480377197, + "step": 6356 + }, + { + "epoch": 0.99, + "learning_rate": 9.4848529488044e-06, + "logits/chosen": -3.0200228691101074, + "logits/rejected": -3.2179088592529297, + "logps/chosen": -230.30245971679688, + "logps/rejected": -95.30833435058594, + "loss": 2.1528, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6126389503479004, + "rewards/margins": 0.22130465507507324, + "rewards/rejected": -3.8339436054229736, + "step": 6357 + }, + { + "epoch": 0.99, + "learning_rate": 9.484119508273252e-06, + "logits/chosen": -2.097409248352051, + "logits/rejected": -3.0074398517608643, + "logps/chosen": -178.23361206054688, + "logps/rejected": -416.77252197265625, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.284785509109497, + "rewards/margins": 4.676678657531738, + "rewards/rejected": -5.961463928222656, + "step": 6358 + }, + { + "epoch": 0.99, + "learning_rate": 9.483386067742103e-06, + "logits/chosen": -2.3655502796173096, + "logits/rejected": -3.070396900177002, + "logps/chosen": -102.88423919677734, + "logps/rejected": -160.52972412109375, + "loss": 1.2668, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2698020935058594, + "rewards/margins": 1.8733900785446167, + "rewards/rejected": -5.143192291259766, + "step": 6359 + }, + { + "epoch": 0.99, + "learning_rate": 9.482652627210955e-06, + "logits/chosen": -1.5367605686187744, + "logits/rejected": -3.060858726501465, + "logps/chosen": -200.56942749023438, + "logps/rejected": -363.935791015625, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4606231451034546, + "rewards/margins": 4.934182167053223, + "rewards/rejected": -6.394805908203125, + "step": 6360 + }, + { + "epoch": 0.99, + "learning_rate": 9.481919186679807e-06, + "logits/chosen": -2.422767400741577, + "logits/rejected": -2.817361831665039, + "logps/chosen": -77.74043273925781, + "logps/rejected": -217.81214904785156, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2424702644348145, + "rewards/margins": 5.4025654792785645, + "rewards/rejected": -6.645035743713379, + "step": 6361 + }, + { + "epoch": 0.99, + "learning_rate": 9.481185746148659e-06, + "logits/chosen": -1.2279633283615112, + "logits/rejected": -3.0732600688934326, + "logps/chosen": -384.14349365234375, + "logps/rejected": -495.49676513671875, + "loss": 5.0363, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.201959609985352, + "rewards/margins": -1.4706330299377441, + "rewards/rejected": -5.731326580047607, + "step": 6362 + }, + { + "epoch": 0.99, + "learning_rate": 9.480452305617511e-06, + "logits/chosen": -1.330612063407898, + "logits/rejected": -2.5229876041412354, + "logps/chosen": -250.397216796875, + "logps/rejected": -382.33551025390625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.159102201461792, + "rewards/margins": 6.459720611572266, + "rewards/rejected": -8.61882209777832, + "step": 6363 + }, + { + "epoch": 0.99, + "learning_rate": 9.479718865086365e-06, + "logits/chosen": -2.909672975540161, + "logits/rejected": -3.056440591812134, + "logps/chosen": -151.24215698242188, + "logps/rejected": -244.30050659179688, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3771377801895142, + "rewards/margins": 4.752840995788574, + "rewards/rejected": -6.129978179931641, + "step": 6364 + }, + { + "epoch": 0.99, + "learning_rate": 9.478985424555216e-06, + "logits/chosen": -2.8566787242889404, + "logits/rejected": -3.078768491744995, + "logps/chosen": -55.830177307128906, + "logps/rejected": -544.5816650390625, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.657740354537964, + "rewards/margins": 3.837766170501709, + "rewards/rejected": -6.495506286621094, + "step": 6365 + }, + { + "epoch": 0.99, + "learning_rate": 9.478251984024068e-06, + "logits/chosen": -3.0204949378967285, + "logits/rejected": -3.0926849842071533, + "logps/chosen": -552.1082763671875, + "logps/rejected": -481.43359375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7550538778305054, + "rewards/margins": 5.612074375152588, + "rewards/rejected": -7.367128372192383, + "step": 6366 + }, + { + "epoch": 0.99, + "learning_rate": 9.47751854349292e-06, + "logits/chosen": -3.283492088317871, + "logits/rejected": -2.30501651763916, + "logps/chosen": -331.884521484375, + "logps/rejected": -85.52110290527344, + "loss": 1.7453, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.464405059814453, + "rewards/margins": 1.261454701423645, + "rewards/rejected": -3.7258596420288086, + "step": 6367 + }, + { + "epoch": 0.99, + "learning_rate": 9.476785102961772e-06, + "logits/chosen": -2.8036205768585205, + "logits/rejected": -2.760868787765503, + "logps/chosen": -371.07904052734375, + "logps/rejected": -258.6640319824219, + "loss": 3.6718, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.521193027496338, + "rewards/margins": -0.31219482421875, + "rewards/rejected": -5.208998203277588, + "step": 6368 + }, + { + "epoch": 0.99, + "learning_rate": 9.476051662430624e-06, + "logits/chosen": -2.130375623703003, + "logits/rejected": -2.786188840866089, + "logps/chosen": -175.00582885742188, + "logps/rejected": -373.183349609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4747689962387085, + "rewards/margins": 8.239795684814453, + "rewards/rejected": -9.71456527709961, + "step": 6369 + }, + { + "epoch": 0.99, + "learning_rate": 9.475318221899476e-06, + "logits/chosen": -2.3880579471588135, + "logits/rejected": -2.9268062114715576, + "logps/chosen": -139.27902221679688, + "logps/rejected": -204.7096710205078, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7152514457702637, + "rewards/margins": 4.830801010131836, + "rewards/rejected": -7.546052932739258, + "step": 6370 + }, + { + "epoch": 0.99, + "learning_rate": 9.474584781368328e-06, + "logits/chosen": -1.3224409818649292, + "logits/rejected": -2.8834848403930664, + "logps/chosen": -75.40292358398438, + "logps/rejected": -436.4786376953125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7901926040649414, + "rewards/margins": 6.231494903564453, + "rewards/rejected": -8.021687507629395, + "step": 6371 + }, + { + "epoch": 0.99, + "learning_rate": 9.47385134083718e-06, + "logits/chosen": -3.078397750854492, + "logits/rejected": -2.605498790740967, + "logps/chosen": -430.0230407714844, + "logps/rejected": -333.12054443359375, + "loss": 0.5, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.214681386947632, + "rewards/margins": 2.3455541133880615, + "rewards/rejected": -4.560235500335693, + "step": 6372 + }, + { + "epoch": 0.99, + "learning_rate": 9.473117900306033e-06, + "logits/chosen": -2.8889825344085693, + "logits/rejected": -2.964895725250244, + "logps/chosen": -243.30751037597656, + "logps/rejected": -496.04296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4649704694747925, + "rewards/margins": 6.778628349304199, + "rewards/rejected": -8.243598937988281, + "step": 6373 + }, + { + "epoch": 0.99, + "learning_rate": 9.472384459774885e-06, + "logits/chosen": -3.080010175704956, + "logits/rejected": -2.8144030570983887, + "logps/chosen": -488.34307861328125, + "logps/rejected": -496.8262023925781, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.206909656524658, + "rewards/margins": 5.392284393310547, + "rewards/rejected": -7.599194526672363, + "step": 6374 + }, + { + "epoch": 0.99, + "learning_rate": 9.471651019243737e-06, + "logits/chosen": -2.2333624362945557, + "logits/rejected": -2.839704751968384, + "logps/chosen": -127.47563171386719, + "logps/rejected": -373.52117919921875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3329381942749023, + "rewards/margins": 7.056604862213135, + "rewards/rejected": -9.389543533325195, + "step": 6375 + }, + { + "epoch": 0.99, + "learning_rate": 9.470917578712589e-06, + "logits/chosen": -2.8941714763641357, + "logits/rejected": -3.216449499130249, + "logps/chosen": -273.6935729980469, + "logps/rejected": -489.81536865234375, + "loss": 2.2448, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.067573070526123, + "rewards/margins": -0.5373084545135498, + "rewards/rejected": -2.5302646160125732, + "step": 6376 + }, + { + "epoch": 0.99, + "learning_rate": 9.47018413818144e-06, + "logits/chosen": -2.847029685974121, + "logits/rejected": -1.517313003540039, + "logps/chosen": -140.97900390625, + "logps/rejected": -111.22372436523438, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.615508556365967, + "rewards/margins": 1.214006781578064, + "rewards/rejected": -3.8295154571533203, + "step": 6377 + }, + { + "epoch": 0.99, + "learning_rate": 9.469450697650293e-06, + "logits/chosen": -2.910733938217163, + "logits/rejected": -2.615896224975586, + "logps/chosen": -200.09902954101562, + "logps/rejected": -289.7428894042969, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3660942316055298, + "rewards/margins": 4.633685111999512, + "rewards/rejected": -5.999779224395752, + "step": 6378 + }, + { + "epoch": 0.99, + "learning_rate": 9.468717257119144e-06, + "logits/chosen": -2.533628463745117, + "logits/rejected": -2.866513967514038, + "logps/chosen": -103.17671203613281, + "logps/rejected": -312.698486328125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6719985008239746, + "rewards/margins": 6.80152702331543, + "rewards/rejected": -9.473525047302246, + "step": 6379 + }, + { + "epoch": 0.99, + "learning_rate": 9.467983816587996e-06, + "logits/chosen": -3.1247944831848145, + "logits/rejected": -2.3656647205352783, + "logps/chosen": -955.5994262695312, + "logps/rejected": -618.3101196289062, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7911369800567627, + "rewards/margins": 6.5365118980407715, + "rewards/rejected": -7.327649116516113, + "step": 6380 + }, + { + "epoch": 0.99, + "learning_rate": 9.467250376056848e-06, + "logits/chosen": -2.371762275695801, + "logits/rejected": -2.5532217025756836, + "logps/chosen": -280.6007995605469, + "logps/rejected": -538.653076171875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3789345026016235, + "rewards/margins": 7.140578746795654, + "rewards/rejected": -8.519513130187988, + "step": 6381 + }, + { + "epoch": 0.99, + "learning_rate": 9.466516935525702e-06, + "logits/chosen": -1.772840142250061, + "logits/rejected": -3.1371333599090576, + "logps/chosen": -171.48562622070312, + "logps/rejected": -512.77294921875, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.127462387084961, + "rewards/margins": 4.725662708282471, + "rewards/rejected": -5.853125095367432, + "step": 6382 + }, + { + "epoch": 0.99, + "learning_rate": 9.465783494994554e-06, + "logits/chosen": -2.881103277206421, + "logits/rejected": -2.8524115085601807, + "logps/chosen": -101.28580474853516, + "logps/rejected": -229.76504516601562, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6037299633026123, + "rewards/margins": 7.479036808013916, + "rewards/rejected": -9.08276653289795, + "step": 6383 + }, + { + "epoch": 0.99, + "learning_rate": 9.465050054463405e-06, + "logits/chosen": -1.9627410173416138, + "logits/rejected": -2.931741237640381, + "logps/chosen": -135.77696228027344, + "logps/rejected": -445.9021911621094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1964924335479736, + "rewards/margins": 8.410991668701172, + "rewards/rejected": -9.607484817504883, + "step": 6384 + }, + { + "epoch": 0.99, + "learning_rate": 9.464316613932257e-06, + "logits/chosen": -3.047384023666382, + "logits/rejected": -2.0474793910980225, + "logps/chosen": -596.7432861328125, + "logps/rejected": -666.0281982421875, + "loss": 1.6505, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4697158336639404, + "rewards/margins": 0.8236144781112671, + "rewards/rejected": -3.293330430984497, + "step": 6385 + }, + { + "epoch": 0.99, + "learning_rate": 9.463583173401111e-06, + "logits/chosen": -2.2934751510620117, + "logits/rejected": -2.945824384689331, + "logps/chosen": -383.8029479980469, + "logps/rejected": -384.1810302734375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7462403774261475, + "rewards/margins": 4.959092617034912, + "rewards/rejected": -6.7053327560424805, + "step": 6386 + }, + { + "epoch": 0.99, + "learning_rate": 9.462849732869963e-06, + "logits/chosen": -2.110459566116333, + "logits/rejected": -3.099825859069824, + "logps/chosen": -72.1607666015625, + "logps/rejected": -399.51055908203125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8445547223091125, + "rewards/margins": 7.335260391235352, + "rewards/rejected": -8.179815292358398, + "step": 6387 + }, + { + "epoch": 0.99, + "learning_rate": 9.462116292338815e-06, + "logits/chosen": -1.0265021324157715, + "logits/rejected": -2.783613920211792, + "logps/chosen": -86.77883911132812, + "logps/rejected": -379.81951904296875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7740683555603027, + "rewards/margins": 5.441957473754883, + "rewards/rejected": -8.216026306152344, + "step": 6388 + }, + { + "epoch": 0.99, + "learning_rate": 9.461382851807667e-06, + "logits/chosen": -2.947705030441284, + "logits/rejected": -1.7302722930908203, + "logps/chosen": -510.54827880859375, + "logps/rejected": -419.40576171875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.189223527908325, + "rewards/margins": 6.855406761169434, + "rewards/rejected": -9.04463005065918, + "step": 6389 + }, + { + "epoch": 0.99, + "learning_rate": 9.460649411276518e-06, + "logits/chosen": -1.97743558883667, + "logits/rejected": -3.0679962635040283, + "logps/chosen": -153.62774658203125, + "logps/rejected": -421.67242431640625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9038753509521484, + "rewards/margins": 5.846622467041016, + "rewards/rejected": -7.750497817993164, + "step": 6390 + }, + { + "epoch": 0.99, + "learning_rate": 9.459915970745372e-06, + "logits/chosen": -1.982263445854187, + "logits/rejected": -3.097834587097168, + "logps/chosen": -247.62545776367188, + "logps/rejected": -394.7275390625, + "loss": 3.7209, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.546736717224121, + "rewards/margins": 0.11634373664855957, + "rewards/rejected": -5.66308069229126, + "step": 6391 + }, + { + "epoch": 0.99, + "learning_rate": 9.459182530214224e-06, + "logits/chosen": -1.2087454795837402, + "logits/rejected": -2.8374035358428955, + "logps/chosen": -196.234375, + "logps/rejected": -404.7807922363281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18595734238624573, + "rewards/margins": 7.382783889770508, + "rewards/rejected": -7.196825981140137, + "step": 6392 + }, + { + "epoch": 0.99, + "learning_rate": 9.458449089683076e-06, + "logits/chosen": -2.661052703857422, + "logits/rejected": -3.117382764816284, + "logps/chosen": -465.11248779296875, + "logps/rejected": -548.8359375, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1176350116729736, + "rewards/margins": 3.0666751861572266, + "rewards/rejected": -4.184309959411621, + "step": 6393 + }, + { + "epoch": 0.99, + "learning_rate": 9.457715649151928e-06, + "logits/chosen": -2.5281407833099365, + "logits/rejected": -3.0636823177337646, + "logps/chosen": -220.12274169921875, + "logps/rejected": -317.0426940917969, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8679991364479065, + "rewards/margins": 5.249415874481201, + "rewards/rejected": -6.117414951324463, + "step": 6394 + }, + { + "epoch": 0.99, + "learning_rate": 9.45698220862078e-06, + "logits/chosen": -2.0801010131835938, + "logits/rejected": -3.24247670173645, + "logps/chosen": -282.3612365722656, + "logps/rejected": -494.7205810546875, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9642502069473267, + "rewards/margins": 4.7117156982421875, + "rewards/rejected": -5.675965785980225, + "step": 6395 + }, + { + "epoch": 0.99, + "learning_rate": 9.456248768089631e-06, + "logits/chosen": -2.9901955127716064, + "logits/rejected": -2.196606159210205, + "logps/chosen": -743.7793579101562, + "logps/rejected": -458.36492919921875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9731319546699524, + "rewards/margins": 5.116425037384033, + "rewards/rejected": -6.089556694030762, + "step": 6396 + }, + { + "epoch": 0.99, + "learning_rate": 9.455515327558483e-06, + "logits/chosen": -1.6283961534500122, + "logits/rejected": -2.8709511756896973, + "logps/chosen": -70.16464233398438, + "logps/rejected": -488.55914306640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7823858261108398, + "rewards/margins": 7.76223611831665, + "rewards/rejected": -9.544622421264648, + "step": 6397 + }, + { + "epoch": 1.0, + "learning_rate": 9.454781887027335e-06, + "logits/chosen": -1.6933465003967285, + "logits/rejected": -3.008685827255249, + "logps/chosen": -230.3267822265625, + "logps/rejected": -304.0933837890625, + "loss": 2.8705, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.077270984649658, + "rewards/margins": -1.0777111053466797, + "rewards/rejected": -2.9995598793029785, + "step": 6398 + }, + { + "epoch": 1.0, + "learning_rate": 9.454048446496187e-06, + "logits/chosen": -2.5312187671661377, + "logits/rejected": -2.9061341285705566, + "logps/chosen": -207.22262573242188, + "logps/rejected": -506.8125915527344, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8620246648788452, + "rewards/margins": 6.355381011962891, + "rewards/rejected": -8.217405319213867, + "step": 6399 + }, + { + "epoch": 1.0, + "learning_rate": 9.45331500596504e-06, + "logits/chosen": -2.611300230026245, + "logits/rejected": -2.8758556842803955, + "logps/chosen": -179.00894165039062, + "logps/rejected": -201.89239501953125, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2218341827392578, + "rewards/margins": 4.975636005401611, + "rewards/rejected": -6.197470188140869, + "step": 6400 + }, + { + "epoch": 1.0, + "learning_rate": 9.452581565433892e-06, + "logits/chosen": -2.8717639446258545, + "logits/rejected": -2.7457685470581055, + "logps/chosen": -128.5055389404297, + "logps/rejected": -211.85316467285156, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1895272731781006, + "rewards/margins": 3.6212148666381836, + "rewards/rejected": -5.810742378234863, + "step": 6401 + }, + { + "epoch": 1.0, + "learning_rate": 9.451848124902744e-06, + "logits/chosen": -3.0658695697784424, + "logits/rejected": -3.1465134620666504, + "logps/chosen": -143.17718505859375, + "logps/rejected": -293.1206359863281, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0194942951202393, + "rewards/margins": 6.563529014587402, + "rewards/rejected": -7.583024024963379, + "step": 6402 + }, + { + "epoch": 1.0, + "learning_rate": 9.451114684371596e-06, + "logits/chosen": -3.0928845405578613, + "logits/rejected": -2.3192782402038574, + "logps/chosen": -178.9847412109375, + "logps/rejected": -115.87442016601562, + "loss": 0.1226, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7334275245666504, + "rewards/margins": 2.705650806427002, + "rewards/rejected": -5.439078330993652, + "step": 6403 + }, + { + "epoch": 1.0, + "learning_rate": 9.450381243840448e-06, + "logits/chosen": -2.658332109451294, + "logits/rejected": -2.887573003768921, + "logps/chosen": -84.92176818847656, + "logps/rejected": -178.24249267578125, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7631508111953735, + "rewards/margins": 5.144905090332031, + "rewards/rejected": -5.908055305480957, + "step": 6404 + }, + { + "epoch": 1.0, + "learning_rate": 9.4496478033093e-06, + "logits/chosen": -2.909548044204712, + "logits/rejected": -2.40360689163208, + "logps/chosen": -167.3360595703125, + "logps/rejected": -205.9615478515625, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.110461473464966, + "rewards/margins": 5.168453216552734, + "rewards/rejected": -7.278914928436279, + "step": 6405 + }, + { + "epoch": 1.0, + "learning_rate": 9.448914362778152e-06, + "logits/chosen": -3.1166582107543945, + "logits/rejected": -2.3253798484802246, + "logps/chosen": -429.3183288574219, + "logps/rejected": -331.7352294921875, + "loss": 0.0553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21494722366333008, + "rewards/margins": 4.4167022705078125, + "rewards/rejected": -4.631649971008301, + "step": 6406 + }, + { + "epoch": 1.0, + "learning_rate": 9.448180922247004e-06, + "logits/chosen": -2.5735812187194824, + "logits/rejected": -2.990727663040161, + "logps/chosen": -69.7055892944336, + "logps/rejected": -273.2742004394531, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5612567067146301, + "rewards/margins": 6.166739463806152, + "rewards/rejected": -6.727995872497559, + "step": 6407 + }, + { + "epoch": 1.0, + "learning_rate": 9.447447481715856e-06, + "logits/chosen": -1.5194733142852783, + "logits/rejected": -2.885479211807251, + "logps/chosen": -95.25411987304688, + "logps/rejected": -168.72518920898438, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7167299389839172, + "rewards/margins": 4.711302280426025, + "rewards/rejected": -5.428031921386719, + "step": 6408 + }, + { + "epoch": 1.0, + "learning_rate": 9.44671404118471e-06, + "logits/chosen": -2.776838541030884, + "logits/rejected": -3.1692981719970703, + "logps/chosen": -222.25363159179688, + "logps/rejected": -393.6810607910156, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7449047565460205, + "rewards/margins": 5.536629676818848, + "rewards/rejected": -7.281534194946289, + "step": 6409 + }, + { + "epoch": 1.0, + "learning_rate": 9.445980600653561e-06, + "logits/chosen": -2.933486223220825, + "logits/rejected": -2.8967559337615967, + "logps/chosen": -414.790771484375, + "logps/rejected": -531.8858642578125, + "loss": 3.0205, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.632265567779541, + "rewards/margins": 1.2598152160644531, + "rewards/rejected": -5.892080783843994, + "step": 6410 + }, + { + "epoch": 1.0, + "learning_rate": 9.445247160122413e-06, + "logits/chosen": -2.735024929046631, + "logits/rejected": -3.0732927322387695, + "logps/chosen": -99.06690216064453, + "logps/rejected": -303.70367431640625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2766873836517334, + "rewards/margins": 7.3947858810424805, + "rewards/rejected": -8.671473503112793, + "step": 6411 + }, + { + "epoch": 1.0, + "learning_rate": 9.444513719591265e-06, + "logits/chosen": -2.9259164333343506, + "logits/rejected": -2.3146183490753174, + "logps/chosen": -235.24839782714844, + "logps/rejected": -311.27093505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07711944729089737, + "rewards/margins": 8.244937896728516, + "rewards/rejected": -8.322057723999023, + "step": 6412 + }, + { + "epoch": 1.0, + "learning_rate": 9.443780279060117e-06, + "logits/chosen": -3.2166216373443604, + "logits/rejected": -1.9920610189437866, + "logps/chosen": -515.29638671875, + "logps/rejected": -343.07928466796875, + "loss": 0.9975, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.213268995285034, + "rewards/margins": 4.55964469909668, + "rewards/rejected": -6.772913932800293, + "step": 6413 + }, + { + "epoch": 1.0, + "learning_rate": 9.443046838528969e-06, + "logits/chosen": -1.2751438617706299, + "logits/rejected": -3.037292957305908, + "logps/chosen": -220.65408325195312, + "logps/rejected": -608.923095703125, + "loss": 2.4452, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9601926803588867, + "rewards/margins": -0.43907809257507324, + "rewards/rejected": -3.5211143493652344, + "step": 6414 + }, + { + "epoch": 1.0, + "learning_rate": 9.44231339799782e-06, + "logits/chosen": -2.0329439640045166, + "logits/rejected": -2.660959482192993, + "logps/chosen": -220.75233459472656, + "logps/rejected": -375.53582763671875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1053040027618408, + "rewards/margins": 8.00291919708252, + "rewards/rejected": -9.108222961425781, + "step": 6415 + }, + { + "epoch": 1.0, + "learning_rate": 9.441579957466672e-06, + "logits/chosen": -2.864837169647217, + "logits/rejected": -3.036529064178467, + "logps/chosen": -308.8245849609375, + "logps/rejected": -364.33184814453125, + "loss": 5.3009, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.318079948425293, + "rewards/margins": -5.295875549316406, + "rewards/rejected": -0.022204220294952393, + "step": 6416 + }, + { + "epoch": 1.0, + "learning_rate": 9.440846516935524e-06, + "logits/chosen": -3.1637039184570312, + "logits/rejected": -2.9045629501342773, + "logps/chosen": -145.05320739746094, + "logps/rejected": -195.65274047851562, + "loss": 0.4722, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.149149179458618, + "rewards/margins": 1.6728547811508179, + "rewards/rejected": -3.8220038414001465, + "step": 6417 + }, + { + "epoch": 1.0, + "learning_rate": 9.440113076404378e-06, + "logits/chosen": -3.050767660140991, + "logits/rejected": -3.0820112228393555, + "logps/chosen": -106.83613586425781, + "logps/rejected": -249.01614379882812, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.378908395767212, + "rewards/margins": 5.030496597290039, + "rewards/rejected": -6.409404754638672, + "step": 6418 + }, + { + "epoch": 1.0, + "learning_rate": 9.43937963587323e-06, + "logits/chosen": -2.8180453777313232, + "logits/rejected": -2.4450645446777344, + "logps/chosen": -262.197509765625, + "logps/rejected": -567.8596801757812, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1939780712127686, + "rewards/margins": 8.165899276733398, + "rewards/rejected": -10.35987663269043, + "step": 6419 + }, + { + "epoch": 1.0, + "learning_rate": 9.438646195342083e-06, + "logits/chosen": -3.062378168106079, + "logits/rejected": -2.988694906234741, + "logps/chosen": -148.8890380859375, + "logps/rejected": -166.8584442138672, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1934151649475098, + "rewards/margins": 3.941648006439209, + "rewards/rejected": -6.135063171386719, + "step": 6420 + }, + { + "epoch": 1.0, + "learning_rate": 9.437912754810935e-06, + "logits/chosen": -2.776782751083374, + "logits/rejected": -3.1542904376983643, + "logps/chosen": -99.46110534667969, + "logps/rejected": -450.0007019042969, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0801670551300049, + "rewards/margins": 6.231449127197266, + "rewards/rejected": -7.31161642074585, + "step": 6421 + }, + { + "epoch": 1.0, + "learning_rate": 9.437179314279787e-06, + "logits/chosen": -3.172449827194214, + "logits/rejected": -3.27042818069458, + "logps/chosen": -15.404319763183594, + "logps/rejected": -86.6043701171875, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49892258644104004, + "rewards/margins": 4.536954879760742, + "rewards/rejected": -5.035877704620361, + "step": 6422 + }, + { + "epoch": 1.0, + "learning_rate": 9.436445873748639e-06, + "logits/chosen": -2.314847469329834, + "logits/rejected": -3.2993276119232178, + "logps/chosen": -171.55154418945312, + "logps/rejected": -501.7010498046875, + "loss": 2.3075, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4081621170043945, + "rewards/margins": 0.8162686824798584, + "rewards/rejected": -5.224431037902832, + "step": 6423 + }, + { + "epoch": 1.0, + "learning_rate": 9.43571243321749e-06, + "logits/chosen": -2.506309747695923, + "logits/rejected": -3.027618169784546, + "logps/chosen": -459.7822265625, + "logps/rejected": -428.83319091796875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0036796629428863525, + "rewards/margins": 6.526309967041016, + "rewards/rejected": -6.522630214691162, + "step": 6424 + }, + { + "epoch": 1.0, + "learning_rate": 9.434978992686343e-06, + "logits/chosen": -1.9253959655761719, + "logits/rejected": -2.8997838497161865, + "logps/chosen": -94.92422485351562, + "logps/rejected": -205.58395385742188, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2519803047180176, + "rewards/margins": 5.019776821136475, + "rewards/rejected": -6.271757125854492, + "step": 6425 + }, + { + "epoch": 1.0, + "learning_rate": 9.434245552155196e-06, + "logits/chosen": -2.8772242069244385, + "logits/rejected": -2.909634590148926, + "logps/chosen": -97.80813598632812, + "logps/rejected": -276.7656555175781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8005746006965637, + "rewards/margins": 7.989311695098877, + "rewards/rejected": -8.789886474609375, + "step": 6426 + }, + { + "epoch": 1.0, + "learning_rate": 9.433512111624048e-06, + "logits/chosen": -2.928046703338623, + "logits/rejected": -2.923729419708252, + "logps/chosen": -197.11776733398438, + "logps/rejected": -180.58135986328125, + "loss": 4.0684, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7574052810668945, + "rewards/margins": -2.131040096282959, + "rewards/rejected": -2.6263649463653564, + "step": 6427 + }, + { + "epoch": 1.0, + "learning_rate": 9.4327786710929e-06, + "logits/chosen": -2.7665185928344727, + "logits/rejected": -3.0532779693603516, + "logps/chosen": -140.42901611328125, + "logps/rejected": -279.28082275390625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1732052564620972, + "rewards/margins": 6.099740982055664, + "rewards/rejected": -7.272946357727051, + "step": 6428 + }, + { + "epoch": 1.0, + "learning_rate": 9.432045230561752e-06, + "logits/chosen": -2.0562503337860107, + "logits/rejected": -2.870471477508545, + "logps/chosen": -150.71177673339844, + "logps/rejected": -298.49664306640625, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6887092590332031, + "rewards/margins": 5.839355945587158, + "rewards/rejected": -7.528065204620361, + "step": 6429 + }, + { + "epoch": 1.0, + "learning_rate": 9.431311790030604e-06, + "logits/chosen": -1.4720690250396729, + "logits/rejected": -2.2795791625976562, + "logps/chosen": -265.8201904296875, + "logps/rejected": -496.8008117675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.77716064453125, + "rewards/margins": 10.906119346618652, + "rewards/rejected": -12.683279991149902, + "step": 6430 + }, + { + "epoch": 1.0, + "learning_rate": 9.430578349499456e-06, + "logits/chosen": -2.855703353881836, + "logits/rejected": -2.6729965209960938, + "logps/chosen": -184.36793518066406, + "logps/rejected": -313.81640625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5581417083740234, + "rewards/margins": 6.594213008880615, + "rewards/rejected": -8.152355194091797, + "step": 6431 + }, + { + "epoch": 1.0, + "learning_rate": 9.429844908968307e-06, + "logits/chosen": -2.5033016204833984, + "logits/rejected": -1.248089075088501, + "logps/chosen": -205.88592529296875, + "logps/rejected": -122.7938461303711, + "loss": 1.0356, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7091891765594482, + "rewards/margins": 1.4003806114196777, + "rewards/rejected": -5.109569549560547, + "step": 6432 + }, + { + "epoch": 1.0, + "learning_rate": 9.42911146843716e-06, + "logits/chosen": -2.1028668880462646, + "logits/rejected": -3.064894676208496, + "logps/chosen": -133.68838500976562, + "logps/rejected": -405.97882080078125, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2963581085205078, + "rewards/margins": 5.676438331604004, + "rewards/rejected": -6.972796440124512, + "step": 6433 + }, + { + "epoch": 1.0, + "learning_rate": 9.428378027906011e-06, + "logits/chosen": -1.933091640472412, + "logits/rejected": -2.9619734287261963, + "logps/chosen": -123.091552734375, + "logps/rejected": -286.0731201171875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8531337976455688, + "rewards/margins": 6.477269649505615, + "rewards/rejected": -7.3304033279418945, + "step": 6434 + }, + { + "epoch": 1.0, + "learning_rate": 9.427644587374865e-06, + "logits/chosen": -1.8697482347488403, + "logits/rejected": -2.2624595165252686, + "logps/chosen": -362.5534362792969, + "logps/rejected": -380.7596435546875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.242016553878784, + "rewards/margins": 8.236994743347168, + "rewards/rejected": -10.479011535644531, + "step": 6435 + }, + { + "epoch": 1.0, + "learning_rate": 9.426911146843717e-06, + "logits/chosen": -2.5640172958374023, + "logits/rejected": -3.0327534675598145, + "logps/chosen": -101.99266815185547, + "logps/rejected": -230.08599853515625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1863462924957275, + "rewards/margins": 5.958914756774902, + "rewards/rejected": -7.145261287689209, + "step": 6436 + }, + { + "epoch": 1.0, + "learning_rate": 9.426177706312569e-06, + "logits/chosen": -2.5167391300201416, + "logits/rejected": -2.905303716659546, + "logps/chosen": -220.21041870117188, + "logps/rejected": -355.809814453125, + "loss": 0.7622, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.530251979827881, + "rewards/margins": 2.7640321254730225, + "rewards/rejected": -5.294283866882324, + "step": 6437 + }, + { + "epoch": 1.0, + "learning_rate": 9.42544426578142e-06, + "logits/chosen": -2.7479567527770996, + "logits/rejected": -1.8447152376174927, + "logps/chosen": -188.75411987304688, + "logps/rejected": -237.79832458496094, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6472747921943665, + "rewards/margins": 6.286394119262695, + "rewards/rejected": -6.933669090270996, + "step": 6438 + }, + { + "epoch": 1.0, + "learning_rate": 9.424710825250272e-06, + "logits/chosen": -2.0680630207061768, + "logits/rejected": -3.016763210296631, + "logps/chosen": -319.611572265625, + "logps/rejected": -418.8427429199219, + "loss": 0.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9837784171104431, + "rewards/margins": 6.104439735412598, + "rewards/rejected": -7.088217735290527, + "step": 6439 + }, + { + "epoch": 1.0, + "learning_rate": 9.423977384719124e-06, + "logits/chosen": -1.7164835929870605, + "logits/rejected": -2.8177506923675537, + "logps/chosen": -246.69830322265625, + "logps/rejected": -280.9791564941406, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3764152526855469, + "rewards/margins": 5.173174858093262, + "rewards/rejected": -5.549590110778809, + "step": 6440 + }, + { + "epoch": 1.0, + "learning_rate": 9.423243944187976e-06, + "logits/chosen": -3.2136757373809814, + "logits/rejected": -2.9699556827545166, + "logps/chosen": -316.1868896484375, + "logps/rejected": -217.79554748535156, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9169209003448486, + "rewards/margins": 5.492935657501221, + "rewards/rejected": -6.409856796264648, + "step": 6441 + }, + { + "epoch": 1.0, + "learning_rate": 9.422510503656828e-06, + "logits/chosen": -2.4931211471557617, + "logits/rejected": -2.599745035171509, + "logps/chosen": -58.88410949707031, + "logps/rejected": -189.24131774902344, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4555853009223938, + "rewards/margins": 6.77703857421875, + "rewards/rejected": -7.232624053955078, + "step": 6442 + }, + { + "epoch": 1.0, + "learning_rate": 9.42177706312568e-06, + "logits/chosen": -2.76566743850708, + "logits/rejected": -2.5894012451171875, + "logps/chosen": -241.2933349609375, + "logps/rejected": -204.79449462890625, + "loss": 1.6777, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8137218952178955, + "rewards/margins": 2.9197840690612793, + "rewards/rejected": -5.733506202697754, + "step": 6443 + }, + { + "epoch": 1.0, + "learning_rate": 9.421043622594533e-06, + "logits/chosen": -3.0757830142974854, + "logits/rejected": -0.8338223099708557, + "logps/chosen": -305.0421447753906, + "logps/rejected": -115.13813781738281, + "loss": 0.5929, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7981605529785156, + "rewards/margins": 2.7172560691833496, + "rewards/rejected": -4.515416622161865, + "step": 6444 + }, + { + "epoch": 1.0, + "learning_rate": 9.420310182063385e-06, + "logits/chosen": -1.5815722942352295, + "logits/rejected": -2.945416212081909, + "logps/chosen": -139.91111755371094, + "logps/rejected": -388.1049499511719, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.539698839187622, + "rewards/margins": 7.75538969039917, + "rewards/rejected": -9.295088768005371, + "step": 6445 + }, + { + "epoch": 1.0, + "learning_rate": 9.419576741532237e-06, + "logits/chosen": -3.2021915912628174, + "logits/rejected": -2.9184460639953613, + "logps/chosen": -118.88926696777344, + "logps/rejected": -197.26986694335938, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6923267245292664, + "rewards/margins": 3.8688182830810547, + "rewards/rejected": -4.561145305633545, + "step": 6446 + }, + { + "epoch": 1.0, + "learning_rate": 9.418843301001089e-06, + "logits/chosen": -2.3083887100219727, + "logits/rejected": -3.1347930431365967, + "logps/chosen": -145.00833129882812, + "logps/rejected": -244.06735229492188, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9763046503067017, + "rewards/margins": 5.897892951965332, + "rewards/rejected": -6.874197959899902, + "step": 6447 + }, + { + "epoch": 1.0, + "learning_rate": 9.418109860469941e-06, + "logits/chosen": -2.1511080265045166, + "logits/rejected": -2.6473500728607178, + "logps/chosen": -117.38983917236328, + "logps/rejected": -199.95082092285156, + "loss": 1.2589, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7248544692993164, + "rewards/margins": 2.9563565254211426, + "rewards/rejected": -6.681210994720459, + "step": 6448 + }, + { + "epoch": 1.0, + "learning_rate": 9.417376419938793e-06, + "logits/chosen": -2.5038340091705322, + "logits/rejected": -2.5780515670776367, + "logps/chosen": -232.9844970703125, + "logps/rejected": -143.76153564453125, + "loss": 0.384, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2537758350372314, + "rewards/margins": 3.869858980178833, + "rewards/rejected": -6.1236348152160645, + "step": 6449 + }, + { + "epoch": 1.0, + "learning_rate": 9.416642979407645e-06, + "logits/chosen": -1.9759079217910767, + "logits/rejected": -3.021634101867676, + "logps/chosen": -61.74269485473633, + "logps/rejected": -204.45095825195312, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4173274040222168, + "rewards/margins": 4.946061611175537, + "rewards/rejected": -6.363389015197754, + "step": 6450 + }, + { + "epoch": 1.0, + "learning_rate": 9.415909538876497e-06, + "logits/chosen": -2.353708505630493, + "logits/rejected": -2.7477529048919678, + "logps/chosen": -107.03549194335938, + "logps/rejected": -347.4691162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6413108110427856, + "rewards/margins": 8.991117477416992, + "rewards/rejected": -9.632428169250488, + "step": 6451 + }, + { + "epoch": 1.0, + "learning_rate": 9.415176098345348e-06, + "logits/chosen": -2.0282649993896484, + "logits/rejected": -2.7580461502075195, + "logps/chosen": -127.00152587890625, + "logps/rejected": -265.8690490722656, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.706524670124054, + "rewards/margins": 6.5883026123046875, + "rewards/rejected": -7.294827461242676, + "step": 6452 + }, + { + "epoch": 1.0, + "learning_rate": 9.414442657814202e-06, + "logits/chosen": -2.922114133834839, + "logits/rejected": -1.8725682497024536, + "logps/chosen": -560.22119140625, + "logps/rejected": -342.53033447265625, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06606444716453552, + "rewards/margins": 4.202416896820068, + "rewards/rejected": -4.1363525390625, + "step": 6453 + }, + { + "epoch": 1.0, + "learning_rate": 9.413709217283054e-06, + "logits/chosen": -2.936959743499756, + "logits/rejected": -2.2469863891601562, + "logps/chosen": -689.0238037109375, + "logps/rejected": -492.17938232421875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1738784313201904, + "rewards/margins": 6.482515335083008, + "rewards/rejected": -7.656394004821777, + "step": 6454 + }, + { + "epoch": 1.0, + "learning_rate": 9.412975776751907e-06, + "logits/chosen": -3.1041347980499268, + "logits/rejected": -2.6936700344085693, + "logps/chosen": -355.2508850097656, + "logps/rejected": -230.04910278320312, + "loss": 0.9989, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.32290494441986084, + "rewards/margins": 3.751255989074707, + "rewards/rejected": -4.074161052703857, + "step": 6455 + }, + { + "epoch": 1.0, + "learning_rate": 9.41224233622076e-06, + "logits/chosen": -2.1058239936828613, + "logits/rejected": -2.9646055698394775, + "logps/chosen": -125.12095642089844, + "logps/rejected": -177.61146545410156, + "loss": 1.5025, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.356701135635376, + "rewards/margins": 1.4302968978881836, + "rewards/rejected": -4.7869977951049805, + "step": 6456 + }, + { + "epoch": 1.0, + "learning_rate": 9.411508895689611e-06, + "logits/chosen": -1.9375876188278198, + "logits/rejected": -2.841597318649292, + "logps/chosen": -90.11822509765625, + "logps/rejected": -311.33660888671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3059329986572266, + "rewards/margins": 7.995494842529297, + "rewards/rejected": -9.301427841186523, + "step": 6457 + }, + { + "epoch": 1.0, + "learning_rate": 9.410775455158463e-06, + "logits/chosen": -2.9310503005981445, + "logits/rejected": -1.8544130325317383, + "logps/chosen": -571.5857543945312, + "logps/rejected": -330.5654296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2808700501918793, + "rewards/margins": 7.953841209411621, + "rewards/rejected": -8.234710693359375, + "step": 6458 + }, + { + "epoch": 1.0, + "learning_rate": 9.410042014627315e-06, + "logits/chosen": -1.7253305912017822, + "logits/rejected": -2.957223892211914, + "logps/chosen": -116.94570922851562, + "logps/rejected": -231.59695434570312, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6567490100860596, + "rewards/margins": 4.314279556274414, + "rewards/rejected": -5.9710283279418945, + "step": 6459 + }, + { + "epoch": 1.0, + "learning_rate": 9.409308574096167e-06, + "logits/chosen": -1.917067527770996, + "logits/rejected": -3.028033494949341, + "logps/chosen": -86.71775817871094, + "logps/rejected": -301.56976318359375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5725997686386108, + "rewards/margins": 6.756804466247559, + "rewards/rejected": -7.329403877258301, + "step": 6460 + }, + { + "epoch": 1.0, + "learning_rate": 9.408575133565019e-06, + "logits/chosen": -3.242112636566162, + "logits/rejected": -3.001235246658325, + "logps/chosen": -411.09954833984375, + "logps/rejected": -325.255126953125, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7290176153182983, + "rewards/margins": 6.857331275939941, + "rewards/rejected": -5.1283135414123535, + "step": 6461 + }, + { + "epoch": 1.0, + "learning_rate": 9.407841693033872e-06, + "logits/chosen": -1.8120449781417847, + "logits/rejected": -3.026381731033325, + "logps/chosen": -100.28565979003906, + "logps/rejected": -486.2162780761719, + "loss": 0.1383, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.972238302230835, + "rewards/margins": 5.516462802886963, + "rewards/rejected": -8.488700866699219, + "step": 6462 + }, + { + "epoch": 1.01, + "learning_rate": 9.407108252502724e-06, + "logits/chosen": -3.0224244594573975, + "logits/rejected": -2.672733783721924, + "logps/chosen": -119.73564910888672, + "logps/rejected": -133.10043334960938, + "loss": 2.9109, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.780041456222534, + "rewards/margins": -0.15415000915527344, + "rewards/rejected": -3.6258914470672607, + "step": 6463 + }, + { + "epoch": 1.01, + "learning_rate": 9.406374811971576e-06, + "logits/chosen": -1.8921042680740356, + "logits/rejected": -2.8009421825408936, + "logps/chosen": -131.45806884765625, + "logps/rejected": -229.4015350341797, + "loss": 0.181, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6015677452087402, + "rewards/margins": 2.709571361541748, + "rewards/rejected": -5.311139106750488, + "step": 6464 + }, + { + "epoch": 1.01, + "learning_rate": 9.405641371440428e-06, + "logits/chosen": -2.94553542137146, + "logits/rejected": -2.6781108379364014, + "logps/chosen": -285.02886962890625, + "logps/rejected": -191.14891052246094, + "loss": 0.7661, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.0580612421035767, + "rewards/margins": 2.8001437187194824, + "rewards/rejected": -3.8582050800323486, + "step": 6465 + }, + { + "epoch": 1.01, + "learning_rate": 9.40490793090928e-06, + "logits/chosen": -2.5696959495544434, + "logits/rejected": -2.296560764312744, + "logps/chosen": -319.05413818359375, + "logps/rejected": -296.63018798828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029566951096057892, + "rewards/margins": 7.010492324829102, + "rewards/rejected": -6.9809250831604, + "step": 6466 + }, + { + "epoch": 1.01, + "learning_rate": 9.404174490378132e-06, + "logits/chosen": -2.9729626178741455, + "logits/rejected": -3.1650798320770264, + "logps/chosen": -33.6431999206543, + "logps/rejected": -154.8265380859375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8393966555595398, + "rewards/margins": 5.150697231292725, + "rewards/rejected": -5.990094184875488, + "step": 6467 + }, + { + "epoch": 1.01, + "learning_rate": 9.403441049846984e-06, + "logits/chosen": -1.9659847021102905, + "logits/rejected": -2.7381751537323, + "logps/chosen": -118.8638916015625, + "logps/rejected": -198.5543212890625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4600021839141846, + "rewards/margins": 5.462432384490967, + "rewards/rejected": -6.9224348068237305, + "step": 6468 + }, + { + "epoch": 1.01, + "learning_rate": 9.402707609315835e-06, + "logits/chosen": -2.422224998474121, + "logits/rejected": -3.2453725337982178, + "logps/chosen": -227.63070678710938, + "logps/rejected": -558.0271606445312, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30897751450538635, + "rewards/margins": 6.6377458572387695, + "rewards/rejected": -6.946723461151123, + "step": 6469 + }, + { + "epoch": 1.01, + "learning_rate": 9.401974168784687e-06, + "logits/chosen": -2.953651189804077, + "logits/rejected": -2.8707029819488525, + "logps/chosen": -148.95956420898438, + "logps/rejected": -74.9451675415039, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6847741603851318, + "rewards/margins": 3.4806461334228516, + "rewards/rejected": -5.165420055389404, + "step": 6470 + }, + { + "epoch": 1.01, + "learning_rate": 9.401240728253541e-06, + "logits/chosen": -2.5753045082092285, + "logits/rejected": -3.034909248352051, + "logps/chosen": -25.58556365966797, + "logps/rejected": -313.2694396972656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8698399066925049, + "rewards/margins": 8.415304183959961, + "rewards/rejected": -9.28514289855957, + "step": 6471 + }, + { + "epoch": 1.01, + "learning_rate": 9.400507287722393e-06, + "logits/chosen": -1.80540931224823, + "logits/rejected": -2.885969400405884, + "logps/chosen": -291.55242919921875, + "logps/rejected": -354.1851806640625, + "loss": 0.0798, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0008453130722046, + "rewards/margins": 3.680018424987793, + "rewards/rejected": -4.680863857269287, + "step": 6472 + }, + { + "epoch": 1.01, + "learning_rate": 9.399773847191245e-06, + "logits/chosen": -2.9172990322113037, + "logits/rejected": -2.9013495445251465, + "logps/chosen": -494.85693359375, + "logps/rejected": -764.8935546875, + "loss": 0.0653, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9005424976348877, + "rewards/margins": 4.004862308502197, + "rewards/rejected": -6.905405044555664, + "step": 6473 + }, + { + "epoch": 1.01, + "learning_rate": 9.399040406660097e-06, + "logits/chosen": -2.8417768478393555, + "logits/rejected": -2.1854124069213867, + "logps/chosen": -443.7767333984375, + "logps/rejected": -317.7007751464844, + "loss": 0.5908, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8707706928253174, + "rewards/margins": 1.847090482711792, + "rewards/rejected": -3.7178611755371094, + "step": 6474 + }, + { + "epoch": 1.01, + "learning_rate": 9.398306966128948e-06, + "logits/chosen": -2.8756799697875977, + "logits/rejected": -3.0990514755249023, + "logps/chosen": -48.74555969238281, + "logps/rejected": -147.27713012695312, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.113510012626648, + "rewards/margins": 5.401823997497559, + "rewards/rejected": -6.515333652496338, + "step": 6475 + }, + { + "epoch": 1.01, + "learning_rate": 9.3975735255978e-06, + "logits/chosen": -3.095848321914673, + "logits/rejected": -2.5642917156219482, + "logps/chosen": -178.1845703125, + "logps/rejected": -169.1006622314453, + "loss": 1.8831, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.841860771179199, + "rewards/margins": 1.1417667865753174, + "rewards/rejected": -4.983627796173096, + "step": 6476 + }, + { + "epoch": 1.01, + "learning_rate": 9.396840085066652e-06, + "logits/chosen": -1.4795019626617432, + "logits/rejected": -3.0379865169525146, + "logps/chosen": -188.89430236816406, + "logps/rejected": -613.0264892578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8673878312110901, + "rewards/margins": 7.958940029144287, + "rewards/rejected": -8.82632827758789, + "step": 6477 + }, + { + "epoch": 1.01, + "learning_rate": 9.396106644535504e-06, + "logits/chosen": -2.3248775005340576, + "logits/rejected": -3.095980405807495, + "logps/chosen": -442.00146484375, + "logps/rejected": -545.06396484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.810707926750183, + "rewards/margins": 7.422260284423828, + "rewards/rejected": -9.2329683303833, + "step": 6478 + }, + { + "epoch": 1.01, + "learning_rate": 9.395373204004356e-06, + "logits/chosen": -2.938251495361328, + "logits/rejected": -3.084383487701416, + "logps/chosen": -77.02641296386719, + "logps/rejected": -304.3032531738281, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9291880130767822, + "rewards/margins": 4.904651165008545, + "rewards/rejected": -6.833839416503906, + "step": 6479 + }, + { + "epoch": 1.01, + "learning_rate": 9.39463976347321e-06, + "logits/chosen": -3.2450625896453857, + "logits/rejected": -2.5715227127075195, + "logps/chosen": -784.8056030273438, + "logps/rejected": -482.17047119140625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1165224313735962, + "rewards/margins": 7.899385929107666, + "rewards/rejected": -6.782863616943359, + "step": 6480 + }, + { + "epoch": 1.01, + "learning_rate": 9.393906322942061e-06, + "logits/chosen": -3.046322822570801, + "logits/rejected": -1.561355710029602, + "logps/chosen": -447.88409423828125, + "logps/rejected": -262.0584411621094, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9589332938194275, + "rewards/margins": 5.62612247467041, + "rewards/rejected": -6.585055351257324, + "step": 6481 + }, + { + "epoch": 1.01, + "learning_rate": 9.393172882410913e-06, + "logits/chosen": -2.8197176456451416, + "logits/rejected": -3.0729315280914307, + "logps/chosen": -104.75553894042969, + "logps/rejected": -279.3174743652344, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7421629428863525, + "rewards/margins": 5.087829113006592, + "rewards/rejected": -5.829992294311523, + "step": 6482 + }, + { + "epoch": 1.01, + "learning_rate": 9.392439441879765e-06, + "logits/chosen": -2.1198413372039795, + "logits/rejected": -2.812952995300293, + "logps/chosen": -144.76263427734375, + "logps/rejected": -300.15863037109375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.98097562789917, + "rewards/margins": 5.218141555786133, + "rewards/rejected": -8.199117660522461, + "step": 6483 + }, + { + "epoch": 1.01, + "learning_rate": 9.391706001348617e-06, + "logits/chosen": -2.8201255798339844, + "logits/rejected": -2.633875608444214, + "logps/chosen": -93.11317443847656, + "logps/rejected": -154.02392578125, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5411276817321777, + "rewards/margins": 4.269639015197754, + "rewards/rejected": -5.810766220092773, + "step": 6484 + }, + { + "epoch": 1.01, + "learning_rate": 9.390972560817469e-06, + "logits/chosen": -2.49836802482605, + "logits/rejected": -1.1870156526565552, + "logps/chosen": -279.4806823730469, + "logps/rejected": -121.89070129394531, + "loss": 0.488, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.644592046737671, + "rewards/margins": 2.9601895809173584, + "rewards/rejected": -5.604781627655029, + "step": 6485 + }, + { + "epoch": 1.01, + "learning_rate": 9.39023912028632e-06, + "logits/chosen": -2.7315642833709717, + "logits/rejected": -3.2450222969055176, + "logps/chosen": -371.7166748046875, + "logps/rejected": -397.8489990234375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9967758059501648, + "rewards/margins": 5.762331008911133, + "rewards/rejected": -6.759106636047363, + "step": 6486 + }, + { + "epoch": 1.01, + "learning_rate": 9.389505679755174e-06, + "logits/chosen": -3.283409357070923, + "logits/rejected": -3.311552047729492, + "logps/chosen": -76.3998794555664, + "logps/rejected": -124.9170150756836, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0644456148147583, + "rewards/margins": 5.540316581726074, + "rewards/rejected": -6.604762077331543, + "step": 6487 + }, + { + "epoch": 1.01, + "learning_rate": 9.388772239224026e-06, + "logits/chosen": -2.73830246925354, + "logits/rejected": -3.0648975372314453, + "logps/chosen": -99.70794677734375, + "logps/rejected": -201.4022674560547, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7334462404251099, + "rewards/margins": 6.435462474822998, + "rewards/rejected": -8.168909072875977, + "step": 6488 + }, + { + "epoch": 1.01, + "learning_rate": 9.38803879869288e-06, + "logits/chosen": -2.0619239807128906, + "logits/rejected": -2.7819886207580566, + "logps/chosen": -63.82278823852539, + "logps/rejected": -404.841796875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.999086380004883, + "rewards/margins": 8.147807121276855, + "rewards/rejected": -11.146893501281738, + "step": 6489 + }, + { + "epoch": 1.01, + "learning_rate": 9.387305358161732e-06, + "logits/chosen": -2.4691708087921143, + "logits/rejected": -3.0971856117248535, + "logps/chosen": -371.535400390625, + "logps/rejected": -439.6014099121094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4071109890937805, + "rewards/margins": 6.5546770095825195, + "rewards/rejected": -6.961787700653076, + "step": 6490 + }, + { + "epoch": 1.01, + "learning_rate": 9.386571917630584e-06, + "logits/chosen": -3.0147550106048584, + "logits/rejected": -1.8762301206588745, + "logps/chosen": -430.1658020019531, + "logps/rejected": -360.0809326171875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016661453992128372, + "rewards/margins": 6.404088020324707, + "rewards/rejected": -6.420749187469482, + "step": 6491 + }, + { + "epoch": 1.01, + "learning_rate": 9.385838477099435e-06, + "logits/chosen": -3.0493414402008057, + "logits/rejected": -3.0101311206817627, + "logps/chosen": -98.27833557128906, + "logps/rejected": -85.78709411621094, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.488245725631714, + "rewards/margins": 3.6121468544006348, + "rewards/rejected": -6.1003923416137695, + "step": 6492 + }, + { + "epoch": 1.01, + "learning_rate": 9.385105036568287e-06, + "logits/chosen": -0.6282303929328918, + "logits/rejected": -3.017690896987915, + "logps/chosen": -123.5777587890625, + "logps/rejected": -227.56790161132812, + "loss": 1.3958, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.808847427368164, + "rewards/margins": 0.4384719133377075, + "rewards/rejected": -4.247319221496582, + "step": 6493 + }, + { + "epoch": 1.01, + "learning_rate": 9.38437159603714e-06, + "logits/chosen": -3.097156524658203, + "logits/rejected": -3.1948564052581787, + "logps/chosen": -459.3401184082031, + "logps/rejected": -416.9037780761719, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4836136102676392, + "rewards/margins": 4.808546543121338, + "rewards/rejected": -6.2921600341796875, + "step": 6494 + }, + { + "epoch": 1.01, + "learning_rate": 9.383638155505991e-06, + "logits/chosen": -2.6619205474853516, + "logits/rejected": -2.3078486919403076, + "logps/chosen": -240.6157684326172, + "logps/rejected": -286.2912292480469, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9652374982833862, + "rewards/margins": 5.624886512756348, + "rewards/rejected": -6.590124130249023, + "step": 6495 + }, + { + "epoch": 1.01, + "learning_rate": 9.382904714974843e-06, + "logits/chosen": -3.0016539096832275, + "logits/rejected": -1.8547450304031372, + "logps/chosen": -474.97711181640625, + "logps/rejected": -248.6956024169922, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3083301782608032, + "rewards/margins": 6.35048770904541, + "rewards/rejected": -7.658818244934082, + "step": 6496 + }, + { + "epoch": 1.01, + "learning_rate": 9.382171274443695e-06, + "logits/chosen": -1.9936907291412354, + "logits/rejected": -2.9766623973846436, + "logps/chosen": -73.02200317382812, + "logps/rejected": -269.5260925292969, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.023465633392334, + "rewards/margins": 4.808860778808594, + "rewards/rejected": -7.832326889038086, + "step": 6497 + }, + { + "epoch": 1.01, + "learning_rate": 9.381437833912548e-06, + "logits/chosen": -2.8374204635620117, + "logits/rejected": -3.2449769973754883, + "logps/chosen": -169.95928955078125, + "logps/rejected": -262.43426513671875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1577820777893066, + "rewards/margins": 5.966811656951904, + "rewards/rejected": -7.124593734741211, + "step": 6498 + }, + { + "epoch": 1.01, + "learning_rate": 9.3807043933814e-06, + "logits/chosen": -2.781553030014038, + "logits/rejected": -2.398763656616211, + "logps/chosen": -90.80830383300781, + "logps/rejected": -190.62350463867188, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4567736387252808, + "rewards/margins": 5.2462615966796875, + "rewards/rejected": -6.703035354614258, + "step": 6499 + }, + { + "epoch": 1.01, + "learning_rate": 9.379970952850252e-06, + "logits/chosen": -2.433004140853882, + "logits/rejected": -2.912663698196411, + "logps/chosen": -122.49021911621094, + "logps/rejected": -138.65524291992188, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8100461959838867, + "rewards/margins": 4.173593997955322, + "rewards/rejected": -6.983640670776367, + "step": 6500 + }, + { + "epoch": 1.01, + "learning_rate": 9.379237512319104e-06, + "logits/chosen": -2.8266310691833496, + "logits/rejected": -2.8309743404388428, + "logps/chosen": -115.51094818115234, + "logps/rejected": -205.1836395263672, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052745819091797, + "rewards/margins": 6.157750129699707, + "rewards/rejected": -8.210495948791504, + "step": 6501 + }, + { + "epoch": 1.01, + "learning_rate": 9.378504071787956e-06, + "logits/chosen": -3.0324625968933105, + "logits/rejected": -2.942169427871704, + "logps/chosen": -130.7449951171875, + "logps/rejected": -277.22381591796875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6636757850646973, + "rewards/margins": 4.775074005126953, + "rewards/rejected": -6.43874979019165, + "step": 6502 + }, + { + "epoch": 1.01, + "learning_rate": 9.377770631256808e-06, + "logits/chosen": -3.1357614994049072, + "logits/rejected": -1.6460802555084229, + "logps/chosen": -300.7294616699219, + "logps/rejected": -158.99966430664062, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7508987188339233, + "rewards/margins": 4.862287998199463, + "rewards/rejected": -6.613186836242676, + "step": 6503 + }, + { + "epoch": 1.01, + "learning_rate": 9.37703719072566e-06, + "logits/chosen": -3.130399703979492, + "logits/rejected": -2.811275005340576, + "logps/chosen": -270.20892333984375, + "logps/rejected": -186.55093383789062, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.209516167640686, + "rewards/margins": 6.954984188079834, + "rewards/rejected": -8.16450023651123, + "step": 6504 + }, + { + "epoch": 1.01, + "learning_rate": 9.376303750194512e-06, + "logits/chosen": -2.6743805408477783, + "logits/rejected": -3.158328056335449, + "logps/chosen": -100.49281311035156, + "logps/rejected": -319.1980285644531, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.08051335811615, + "rewards/margins": 4.92570686340332, + "rewards/rejected": -6.00622034072876, + "step": 6505 + }, + { + "epoch": 1.01, + "learning_rate": 9.375570309663363e-06, + "logits/chosen": -0.5969315767288208, + "logits/rejected": -3.066645860671997, + "logps/chosen": -93.49984741210938, + "logps/rejected": -442.21075439453125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.005563497543335, + "rewards/margins": 6.060815334320068, + "rewards/rejected": -9.06637954711914, + "step": 6506 + }, + { + "epoch": 1.01, + "learning_rate": 9.374836869132217e-06, + "logits/chosen": -3.1300241947174072, + "logits/rejected": -2.2285687923431396, + "logps/chosen": -279.16119384765625, + "logps/rejected": -149.95033264160156, + "loss": 1.5553, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.192309617996216, + "rewards/margins": 0.938313364982605, + "rewards/rejected": -4.130622863769531, + "step": 6507 + }, + { + "epoch": 1.01, + "learning_rate": 9.374103428601069e-06, + "logits/chosen": -1.4325754642486572, + "logits/rejected": -2.7769582271575928, + "logps/chosen": -260.5143737792969, + "logps/rejected": -294.8643798828125, + "loss": 0.2448, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7038383483886719, + "rewards/margins": 3.5268144607543945, + "rewards/rejected": -5.230652809143066, + "step": 6508 + }, + { + "epoch": 1.01, + "learning_rate": 9.37336998806992e-06, + "logits/chosen": -2.7267770767211914, + "logits/rejected": -2.951533794403076, + "logps/chosen": -254.33041381835938, + "logps/rejected": -361.4361572265625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5524444580078125, + "rewards/margins": 6.63460636138916, + "rewards/rejected": -8.187050819396973, + "step": 6509 + }, + { + "epoch": 1.01, + "learning_rate": 9.372636547538773e-06, + "logits/chosen": -2.997344493865967, + "logits/rejected": -2.1422626972198486, + "logps/chosen": -251.41427612304688, + "logps/rejected": -179.18728637695312, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8792030811309814, + "rewards/margins": 4.022280693054199, + "rewards/rejected": -5.901483535766602, + "step": 6510 + }, + { + "epoch": 1.01, + "learning_rate": 9.371903107007625e-06, + "logits/chosen": -3.2148172855377197, + "logits/rejected": -3.210602045059204, + "logps/chosen": -262.1007385253906, + "logps/rejected": -400.37542724609375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49868422746658325, + "rewards/margins": 6.294179439544678, + "rewards/rejected": -6.792863845825195, + "step": 6511 + }, + { + "epoch": 1.01, + "learning_rate": 9.371169666476476e-06, + "logits/chosen": -2.2159671783447266, + "logits/rejected": -3.0266599655151367, + "logps/chosen": -79.67568969726562, + "logps/rejected": -355.6741027832031, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0784223079681396, + "rewards/margins": 6.465778350830078, + "rewards/rejected": -8.544200897216797, + "step": 6512 + }, + { + "epoch": 1.01, + "learning_rate": 9.370436225945328e-06, + "logits/chosen": -2.7538399696350098, + "logits/rejected": -1.5640233755111694, + "logps/chosen": -401.12652587890625, + "logps/rejected": -248.37088012695312, + "loss": 1.905, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1313934326171875, + "rewards/margins": 1.2485275268554688, + "rewards/rejected": -4.379920959472656, + "step": 6513 + }, + { + "epoch": 1.01, + "learning_rate": 9.36970278541418e-06, + "logits/chosen": -1.3846877813339233, + "logits/rejected": -3.080822467803955, + "logps/chosen": -261.68695068359375, + "logps/rejected": -747.2848510742188, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2879146337509155, + "rewards/margins": 7.591447830200195, + "rewards/rejected": -8.879362106323242, + "step": 6514 + }, + { + "epoch": 1.01, + "learning_rate": 9.368969344883034e-06, + "logits/chosen": -3.1238279342651367, + "logits/rejected": -2.6307218074798584, + "logps/chosen": -266.03277587890625, + "logps/rejected": -205.5369415283203, + "loss": 1.4369, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.960890293121338, + "rewards/margins": 1.4245764017105103, + "rewards/rejected": -5.385466575622559, + "step": 6515 + }, + { + "epoch": 1.01, + "learning_rate": 9.368235904351886e-06, + "logits/chosen": -2.9735429286956787, + "logits/rejected": -1.4661376476287842, + "logps/chosen": -287.71820068359375, + "logps/rejected": -200.8189697265625, + "loss": 0.0848, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3964520692825317, + "rewards/margins": 2.820744037628174, + "rewards/rejected": -4.217195987701416, + "step": 6516 + }, + { + "epoch": 1.01, + "learning_rate": 9.367502463820737e-06, + "logits/chosen": -3.105095386505127, + "logits/rejected": -2.492032527923584, + "logps/chosen": -284.115234375, + "logps/rejected": -66.5220947265625, + "loss": 1.9783, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.482394218444824, + "rewards/margins": -1.8153438568115234, + "rewards/rejected": -2.66705060005188, + "step": 6517 + }, + { + "epoch": 1.01, + "learning_rate": 9.36676902328959e-06, + "logits/chosen": -1.776910662651062, + "logits/rejected": -2.8818459510803223, + "logps/chosen": -67.64315795898438, + "logps/rejected": -289.17840576171875, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9075039625167847, + "rewards/margins": 3.828535556793213, + "rewards/rejected": -5.736039638519287, + "step": 6518 + }, + { + "epoch": 1.01, + "learning_rate": 9.366035582758441e-06, + "logits/chosen": -2.605213165283203, + "logits/rejected": -2.838191509246826, + "logps/chosen": -336.99273681640625, + "logps/rejected": -256.388671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6450127363204956, + "rewards/margins": 7.243679046630859, + "rewards/rejected": -8.888690948486328, + "step": 6519 + }, + { + "epoch": 1.01, + "learning_rate": 9.365302142227293e-06, + "logits/chosen": -3.0571670532226562, + "logits/rejected": -2.7395012378692627, + "logps/chosen": -246.6778564453125, + "logps/rejected": -309.57696533203125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6338157653808594, + "rewards/margins": 6.307578086853027, + "rewards/rejected": -7.941393852233887, + "step": 6520 + }, + { + "epoch": 1.01, + "learning_rate": 9.364568701696147e-06, + "logits/chosen": -3.182154893875122, + "logits/rejected": -2.535651445388794, + "logps/chosen": -507.92352294921875, + "logps/rejected": -374.7762756347656, + "loss": 0.2268, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.410806179046631, + "rewards/margins": 2.5117030143737793, + "rewards/rejected": -4.92250919342041, + "step": 6521 + }, + { + "epoch": 1.01, + "learning_rate": 9.363835261164999e-06, + "logits/chosen": -2.074507713317871, + "logits/rejected": -3.094451904296875, + "logps/chosen": -388.85455322265625, + "logps/rejected": -452.7828369140625, + "loss": 0.1096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8032256364822388, + "rewards/margins": 3.3988564014434814, + "rewards/rejected": -5.20208215713501, + "step": 6522 + }, + { + "epoch": 1.01, + "learning_rate": 9.36310182063385e-06, + "logits/chosen": -2.4004452228546143, + "logits/rejected": -2.856778383255005, + "logps/chosen": -482.7469482421875, + "logps/rejected": -598.7510986328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5612953305244446, + "rewards/margins": 7.742815971374512, + "rewards/rejected": -8.30411148071289, + "step": 6523 + }, + { + "epoch": 1.01, + "learning_rate": 9.362368380102704e-06, + "logits/chosen": -2.5978426933288574, + "logits/rejected": -3.081498622894287, + "logps/chosen": -83.69024658203125, + "logps/rejected": -253.97634887695312, + "loss": 0.7026, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.881500244140625, + "rewards/margins": 1.8878085613250732, + "rewards/rejected": -5.769309043884277, + "step": 6524 + }, + { + "epoch": 1.01, + "learning_rate": 9.361634939571556e-06, + "logits/chosen": -2.3465723991394043, + "logits/rejected": -3.0348074436187744, + "logps/chosen": -389.7762451171875, + "logps/rejected": -509.1829833984375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7023966312408447, + "rewards/margins": 6.3491058349609375, + "rewards/rejected": -8.051502227783203, + "step": 6525 + }, + { + "epoch": 1.01, + "learning_rate": 9.360901499040408e-06, + "logits/chosen": -2.8906195163726807, + "logits/rejected": -2.847968339920044, + "logps/chosen": -125.12205505371094, + "logps/rejected": -155.30467224121094, + "loss": 0.7086, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.052083492279053, + "rewards/margins": 0.6528928279876709, + "rewards/rejected": -5.7049760818481445, + "step": 6526 + }, + { + "epoch": 1.02, + "learning_rate": 9.36016805850926e-06, + "logits/chosen": -1.436404824256897, + "logits/rejected": -2.5941245555877686, + "logps/chosen": -193.35665893554688, + "logps/rejected": -351.8099670410156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9412285089492798, + "rewards/margins": 7.542934417724609, + "rewards/rejected": -8.484163284301758, + "step": 6527 + }, + { + "epoch": 1.02, + "learning_rate": 9.359434617978112e-06, + "logits/chosen": -2.791170835494995, + "logits/rejected": -3.0684244632720947, + "logps/chosen": -686.0691528320312, + "logps/rejected": -618.1649169921875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8919311761856079, + "rewards/margins": 6.2285990715026855, + "rewards/rejected": -5.336668014526367, + "step": 6528 + }, + { + "epoch": 1.02, + "learning_rate": 9.358701177446963e-06, + "logits/chosen": -2.9237220287323, + "logits/rejected": -2.6014413833618164, + "logps/chosen": -160.63897705078125, + "logps/rejected": -223.4855194091797, + "loss": 0.1131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1035234928131104, + "rewards/margins": 3.042487621307373, + "rewards/rejected": -6.1460113525390625, + "step": 6529 + }, + { + "epoch": 1.02, + "learning_rate": 9.357967736915815e-06, + "logits/chosen": -2.050943374633789, + "logits/rejected": -3.067608594894409, + "logps/chosen": -63.04120635986328, + "logps/rejected": -208.60894775390625, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.557270050048828, + "rewards/margins": 2.74131178855896, + "rewards/rejected": -7.298582077026367, + "step": 6530 + }, + { + "epoch": 1.02, + "learning_rate": 9.357234296384667e-06, + "logits/chosen": -1.8304107189178467, + "logits/rejected": -2.5566983222961426, + "logps/chosen": -106.09214782714844, + "logps/rejected": -261.30218505859375, + "loss": 0.0897, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.380830764770508, + "rewards/margins": 5.7138776779174805, + "rewards/rejected": -9.094708442687988, + "step": 6531 + }, + { + "epoch": 1.02, + "learning_rate": 9.356500855853519e-06, + "logits/chosen": -2.4956562519073486, + "logits/rejected": -2.8920822143554688, + "logps/chosen": -158.309326171875, + "logps/rejected": -259.9871826171875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5896377563476562, + "rewards/margins": 6.393649578094482, + "rewards/rejected": -8.98328685760498, + "step": 6532 + }, + { + "epoch": 1.02, + "learning_rate": 9.355767415322373e-06, + "logits/chosen": -1.5808812379837036, + "logits/rejected": -3.0470497608184814, + "logps/chosen": -49.96037292480469, + "logps/rejected": -301.63250732421875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6453068256378174, + "rewards/margins": 6.194581985473633, + "rewards/rejected": -8.839889526367188, + "step": 6533 + }, + { + "epoch": 1.02, + "learning_rate": 9.355033974791224e-06, + "logits/chosen": -3.200161933898926, + "logits/rejected": -2.305730104446411, + "logps/chosen": -313.26373291015625, + "logps/rejected": -171.027587890625, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4708520174026489, + "rewards/margins": 3.8068923950195312, + "rewards/rejected": -4.277744293212891, + "step": 6534 + }, + { + "epoch": 1.02, + "learning_rate": 9.354300534260076e-06, + "logits/chosen": -1.8930755853652954, + "logits/rejected": -2.862165927886963, + "logps/chosen": -120.05332946777344, + "logps/rejected": -356.8844299316406, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1205618381500244, + "rewards/margins": 6.7333664894104, + "rewards/rejected": -8.853928565979004, + "step": 6535 + }, + { + "epoch": 1.02, + "learning_rate": 9.353567093728928e-06, + "logits/chosen": -1.788398265838623, + "logits/rejected": -2.6465346813201904, + "logps/chosen": -169.67283630371094, + "logps/rejected": -414.9405212402344, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0352089405059814, + "rewards/margins": 6.307441234588623, + "rewards/rejected": -9.342649459838867, + "step": 6536 + }, + { + "epoch": 1.02, + "learning_rate": 9.35283365319778e-06, + "logits/chosen": -3.205333709716797, + "logits/rejected": -2.5090644359588623, + "logps/chosen": -535.1802368164062, + "logps/rejected": -311.63421630859375, + "loss": 0.3509, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9960637092590332, + "rewards/margins": 3.3555917739868164, + "rewards/rejected": -5.35165548324585, + "step": 6537 + }, + { + "epoch": 1.02, + "learning_rate": 9.352100212666632e-06, + "logits/chosen": -3.1442227363586426, + "logits/rejected": -2.476318120956421, + "logps/chosen": -211.79833984375, + "logps/rejected": -199.69520568847656, + "loss": 0.3939, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.540566444396973, + "rewards/margins": 1.3093593120574951, + "rewards/rejected": -5.849925518035889, + "step": 6538 + }, + { + "epoch": 1.02, + "learning_rate": 9.351366772135484e-06, + "logits/chosen": -3.2841062545776367, + "logits/rejected": -3.0874874591827393, + "logps/chosen": -536.860595703125, + "logps/rejected": -431.51544189453125, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.192785382270813, + "rewards/margins": 4.342831611633301, + "rewards/rejected": -5.535616874694824, + "step": 6539 + }, + { + "epoch": 1.02, + "learning_rate": 9.350633331604336e-06, + "logits/chosen": -2.8501369953155518, + "logits/rejected": -2.9094841480255127, + "logps/chosen": -192.33139038085938, + "logps/rejected": -280.2981872558594, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9766891598701477, + "rewards/margins": 4.351016521453857, + "rewards/rejected": -5.327705383300781, + "step": 6540 + }, + { + "epoch": 1.02, + "learning_rate": 9.349899891073188e-06, + "logits/chosen": -1.9709943532943726, + "logits/rejected": -3.3055574893951416, + "logps/chosen": -80.17390441894531, + "logps/rejected": -352.9354248046875, + "loss": 0.1316, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8238275051116943, + "rewards/margins": 2.0150413513183594, + "rewards/rejected": -5.838869094848633, + "step": 6541 + }, + { + "epoch": 1.02, + "learning_rate": 9.349166450542041e-06, + "logits/chosen": -3.2288551330566406, + "logits/rejected": -3.1532764434814453, + "logps/chosen": -356.5921630859375, + "logps/rejected": -488.6516418457031, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.011244773864746, + "rewards/margins": 2.6008682250976562, + "rewards/rejected": -5.612112998962402, + "step": 6542 + }, + { + "epoch": 1.02, + "learning_rate": 9.348433010010893e-06, + "logits/chosen": -2.8657727241516113, + "logits/rejected": -3.003300428390503, + "logps/chosen": -46.11761474609375, + "logps/rejected": -174.2233123779297, + "loss": 0.1121, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8163399696350098, + "rewards/margins": 4.494363307952881, + "rewards/rejected": -8.31070327758789, + "step": 6543 + }, + { + "epoch": 1.02, + "learning_rate": 9.347699569479745e-06, + "logits/chosen": -2.9210824966430664, + "logits/rejected": -2.7499449253082275, + "logps/chosen": -163.82376098632812, + "logps/rejected": -347.0396728515625, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2689545154571533, + "rewards/margins": 4.573237895965576, + "rewards/rejected": -7.842192649841309, + "step": 6544 + }, + { + "epoch": 1.02, + "learning_rate": 9.346966128948597e-06, + "logits/chosen": -3.291144371032715, + "logits/rejected": -3.2607572078704834, + "logps/chosen": -72.29707336425781, + "logps/rejected": -129.53334045410156, + "loss": 3.3168, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.631402492523193, + "rewards/margins": -0.97916579246521, + "rewards/rejected": -3.6522369384765625, + "step": 6545 + }, + { + "epoch": 1.02, + "learning_rate": 9.346232688417449e-06, + "logits/chosen": -2.7815399169921875, + "logits/rejected": -2.8757805824279785, + "logps/chosen": -91.62934112548828, + "logps/rejected": -150.88619995117188, + "loss": 2.0545, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.333352088928223, + "rewards/margins": 1.4416985511779785, + "rewards/rejected": -8.77505111694336, + "step": 6546 + }, + { + "epoch": 1.02, + "learning_rate": 9.3454992478863e-06, + "logits/chosen": -3.1143736839294434, + "logits/rejected": -1.8710371255874634, + "logps/chosen": -387.95849609375, + "logps/rejected": -283.94647216796875, + "loss": 0.1186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.194889783859253, + "rewards/margins": 6.592953681945801, + "rewards/rejected": -8.787843704223633, + "step": 6547 + }, + { + "epoch": 1.02, + "learning_rate": 9.344765807355152e-06, + "logits/chosen": -2.933953046798706, + "logits/rejected": -2.1978328227996826, + "logps/chosen": -122.68428039550781, + "logps/rejected": -170.66879272460938, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2892725467681885, + "rewards/margins": 5.883368492126465, + "rewards/rejected": -8.172640800476074, + "step": 6548 + }, + { + "epoch": 1.02, + "learning_rate": 9.344032366824004e-06, + "logits/chosen": -3.024600028991699, + "logits/rejected": -3.0851309299468994, + "logps/chosen": -94.15306091308594, + "logps/rejected": -215.3385772705078, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1320643424987793, + "rewards/margins": 3.938725709915161, + "rewards/rejected": -7.0707902908325195, + "step": 6549 + }, + { + "epoch": 1.02, + "learning_rate": 9.343298926292856e-06, + "logits/chosen": -2.8965904712677, + "logits/rejected": -2.147209882736206, + "logps/chosen": -322.4168701171875, + "logps/rejected": -262.6751708984375, + "loss": 1.6748, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9464735984802246, + "rewards/margins": 1.9833850860595703, + "rewards/rejected": -5.929858684539795, + "step": 6550 + }, + { + "epoch": 1.02, + "learning_rate": 9.34256548576171e-06, + "logits/chosen": -2.701465368270874, + "logits/rejected": -2.8024611473083496, + "logps/chosen": -402.9687805175781, + "logps/rejected": -381.0283203125, + "loss": 2.3444, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6996872425079346, + "rewards/margins": 1.979567050933838, + "rewards/rejected": -5.679254055023193, + "step": 6551 + }, + { + "epoch": 1.02, + "learning_rate": 9.341832045230562e-06, + "logits/chosen": -2.548410415649414, + "logits/rejected": -2.9229323863983154, + "logps/chosen": -79.10159301757812, + "logps/rejected": -236.94491577148438, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8475499153137207, + "rewards/margins": 6.368451118469238, + "rewards/rejected": -9.216001510620117, + "step": 6552 + }, + { + "epoch": 1.02, + "learning_rate": 9.341098604699414e-06, + "logits/chosen": -2.0785186290740967, + "logits/rejected": -2.815786600112915, + "logps/chosen": -143.002685546875, + "logps/rejected": -218.27194213867188, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7404632568359375, + "rewards/margins": 2.9984817504882812, + "rewards/rejected": -4.738945007324219, + "step": 6553 + }, + { + "epoch": 1.02, + "learning_rate": 9.340365164168265e-06, + "logits/chosen": -2.3525166511535645, + "logits/rejected": -2.9405245780944824, + "logps/chosen": -112.27252960205078, + "logps/rejected": -330.00274658203125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5784804821014404, + "rewards/margins": 4.042041301727295, + "rewards/rejected": -6.620521545410156, + "step": 6554 + }, + { + "epoch": 1.02, + "learning_rate": 9.339631723637119e-06, + "logits/chosen": -3.0984232425689697, + "logits/rejected": -3.100118637084961, + "logps/chosen": -158.60757446289062, + "logps/rejected": -254.3324432373047, + "loss": 0.5166, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6344234943389893, + "rewards/margins": 3.9677658081054688, + "rewards/rejected": -6.602189540863037, + "step": 6555 + }, + { + "epoch": 1.02, + "learning_rate": 9.338898283105971e-06, + "logits/chosen": -2.680856943130493, + "logits/rejected": -2.222642660140991, + "logps/chosen": -252.19735717773438, + "logps/rejected": -336.0857849121094, + "loss": 0.1848, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.554370164871216, + "rewards/margins": 4.403522968292236, + "rewards/rejected": -6.957893371582031, + "step": 6556 + }, + { + "epoch": 1.02, + "learning_rate": 9.338164842574823e-06, + "logits/chosen": -2.6725218296051025, + "logits/rejected": -1.0532348155975342, + "logps/chosen": -388.11376953125, + "logps/rejected": -285.3446350097656, + "loss": 0.1762, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9167542457580566, + "rewards/margins": 4.621652126312256, + "rewards/rejected": -7.5384063720703125, + "step": 6557 + }, + { + "epoch": 1.02, + "learning_rate": 9.337431402043675e-06, + "logits/chosen": -2.360818862915039, + "logits/rejected": -3.004518747329712, + "logps/chosen": -621.8323974609375, + "logps/rejected": -458.9056396484375, + "loss": 0.9749, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.421339511871338, + "rewards/margins": 2.1028969287872314, + "rewards/rejected": -5.524236679077148, + "step": 6558 + }, + { + "epoch": 1.02, + "learning_rate": 9.336697961512527e-06, + "logits/chosen": -2.4226484298706055, + "logits/rejected": -2.717426300048828, + "logps/chosen": -140.79266357421875, + "logps/rejected": -150.05307006835938, + "loss": 2.2072, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5923473834991455, + "rewards/margins": 0.8726317882537842, + "rewards/rejected": -4.46497917175293, + "step": 6559 + }, + { + "epoch": 1.02, + "learning_rate": 9.33596452098138e-06, + "logits/chosen": -2.9755001068115234, + "logits/rejected": -1.681967854499817, + "logps/chosen": -294.8371887207031, + "logps/rejected": -262.03094482421875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2341556549072266, + "rewards/margins": 6.1427202224731445, + "rewards/rejected": -7.376875877380371, + "step": 6560 + }, + { + "epoch": 1.02, + "learning_rate": 9.335231080450232e-06, + "logits/chosen": -1.9087456464767456, + "logits/rejected": -2.700481653213501, + "logps/chosen": -98.48341369628906, + "logps/rejected": -341.62109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0038902759552002, + "rewards/margins": 7.536673069000244, + "rewards/rejected": -8.540563583374023, + "step": 6561 + }, + { + "epoch": 1.02, + "learning_rate": 9.334497639919084e-06, + "logits/chosen": -2.9633195400238037, + "logits/rejected": -2.315776824951172, + "logps/chosen": -181.02420043945312, + "logps/rejected": -247.27426147460938, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5734093189239502, + "rewards/margins": 5.911656856536865, + "rewards/rejected": -7.4850664138793945, + "step": 6562 + }, + { + "epoch": 1.02, + "learning_rate": 9.333764199387936e-06, + "logits/chosen": -2.972506284713745, + "logits/rejected": -2.4068446159362793, + "logps/chosen": -141.15225219726562, + "logps/rejected": -139.46766662597656, + "loss": 1.0814, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.827338218688965, + "rewards/margins": 1.9408537149429321, + "rewards/rejected": -4.768191814422607, + "step": 6563 + }, + { + "epoch": 1.02, + "learning_rate": 9.333030758856788e-06, + "logits/chosen": -2.295642852783203, + "logits/rejected": -2.842125415802002, + "logps/chosen": -107.3184585571289, + "logps/rejected": -342.575927734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6053019762039185, + "rewards/margins": 6.489298343658447, + "rewards/rejected": -8.094600677490234, + "step": 6564 + }, + { + "epoch": 1.02, + "learning_rate": 9.33229731832564e-06, + "logits/chosen": -2.1380839347839355, + "logits/rejected": -2.9916625022888184, + "logps/chosen": -188.48504638671875, + "logps/rejected": -391.55792236328125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2010674476623535, + "rewards/margins": 7.0667877197265625, + "rewards/rejected": -9.267855644226074, + "step": 6565 + }, + { + "epoch": 1.02, + "learning_rate": 9.331563877794491e-06, + "logits/chosen": -2.5427143573760986, + "logits/rejected": -2.9114816188812256, + "logps/chosen": -109.07899475097656, + "logps/rejected": -264.58697509765625, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.248420238494873, + "rewards/margins": 3.894254207611084, + "rewards/rejected": -7.142674446105957, + "step": 6566 + }, + { + "epoch": 1.02, + "learning_rate": 9.330830437263343e-06, + "logits/chosen": -2.8379483222961426, + "logits/rejected": -2.1767051219940186, + "logps/chosen": -117.75421905517578, + "logps/rejected": -238.59866333007812, + "loss": 0.2159, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.661616086959839, + "rewards/margins": 5.5014519691467285, + "rewards/rejected": -9.163068771362305, + "step": 6567 + }, + { + "epoch": 1.02, + "learning_rate": 9.330096996732195e-06, + "logits/chosen": -2.3618595600128174, + "logits/rejected": -2.7700722217559814, + "logps/chosen": -319.3789367675781, + "logps/rejected": -335.8224182128906, + "loss": 0.4301, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.921889543533325, + "rewards/margins": 4.9111714363098145, + "rewards/rejected": -7.8330607414245605, + "step": 6568 + }, + { + "epoch": 1.02, + "learning_rate": 9.329363556201049e-06, + "logits/chosen": -2.4741904735565186, + "logits/rejected": -3.0176618099212646, + "logps/chosen": -115.72806549072266, + "logps/rejected": -311.96044921875, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5965111255645752, + "rewards/margins": 6.411821365356445, + "rewards/rejected": -8.008332252502441, + "step": 6569 + }, + { + "epoch": 1.02, + "learning_rate": 9.3286301156699e-06, + "logits/chosen": -1.8588963747024536, + "logits/rejected": -3.113436222076416, + "logps/chosen": -284.328125, + "logps/rejected": -507.19317626953125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8178001642227173, + "rewards/margins": 5.410280227661133, + "rewards/rejected": -7.228080749511719, + "step": 6570 + }, + { + "epoch": 1.02, + "learning_rate": 9.327896675138752e-06, + "logits/chosen": -3.0196926593780518, + "logits/rejected": -3.027820348739624, + "logps/chosen": -43.10812759399414, + "logps/rejected": -139.62875366210938, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3337011337280273, + "rewards/margins": 3.7639689445495605, + "rewards/rejected": -7.097670078277588, + "step": 6571 + }, + { + "epoch": 1.02, + "learning_rate": 9.327163234607604e-06, + "logits/chosen": -3.2104127407073975, + "logits/rejected": -1.9099994897842407, + "logps/chosen": -350.18316650390625, + "logps/rejected": -140.2403564453125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4155372381210327, + "rewards/margins": 6.309348106384277, + "rewards/rejected": -6.724885940551758, + "step": 6572 + }, + { + "epoch": 1.02, + "learning_rate": 9.326429794076456e-06, + "logits/chosen": -2.966352939605713, + "logits/rejected": -2.890260934829712, + "logps/chosen": -107.70911407470703, + "logps/rejected": -187.88214111328125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9504802227020264, + "rewards/margins": 5.598856449127197, + "rewards/rejected": -8.549337387084961, + "step": 6573 + }, + { + "epoch": 1.02, + "learning_rate": 9.325696353545308e-06, + "logits/chosen": -3.0967025756835938, + "logits/rejected": -2.954333782196045, + "logps/chosen": -135.2734375, + "logps/rejected": -49.14060974121094, + "loss": 3.2943, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.3360371589660645, + "rewards/margins": -2.4847824573516846, + "rewards/rejected": -3.85125470161438, + "step": 6574 + }, + { + "epoch": 1.02, + "learning_rate": 9.32496291301416e-06, + "logits/chosen": -1.4319061040878296, + "logits/rejected": -2.8637466430664062, + "logps/chosen": -179.60385131835938, + "logps/rejected": -494.5675048828125, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.676607608795166, + "rewards/margins": 4.696291446685791, + "rewards/rejected": -7.372899055480957, + "step": 6575 + }, + { + "epoch": 1.02, + "learning_rate": 9.324229472483012e-06, + "logits/chosen": -2.6656007766723633, + "logits/rejected": -3.123617172241211, + "logps/chosen": -478.5966796875, + "logps/rejected": -543.6611938476562, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.974452257156372, + "rewards/margins": 4.700564861297607, + "rewards/rejected": -7.675017356872559, + "step": 6576 + }, + { + "epoch": 1.02, + "learning_rate": 9.323496031951864e-06, + "logits/chosen": -2.3337020874023438, + "logits/rejected": -2.914696216583252, + "logps/chosen": -281.1531066894531, + "logps/rejected": -380.6228942871094, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.889844536781311, + "rewards/margins": 4.829767227172852, + "rewards/rejected": -6.719611167907715, + "step": 6577 + }, + { + "epoch": 1.02, + "learning_rate": 9.322762591420717e-06, + "logits/chosen": -3.0605363845825195, + "logits/rejected": -3.181525230407715, + "logps/chosen": -104.99917602539062, + "logps/rejected": -250.41143798828125, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5032709836959839, + "rewards/margins": 6.751530647277832, + "rewards/rejected": -8.254801750183105, + "step": 6578 + }, + { + "epoch": 1.02, + "learning_rate": 9.32202915088957e-06, + "logits/chosen": -3.2325844764709473, + "logits/rejected": -3.1098220348358154, + "logps/chosen": -247.1800994873047, + "logps/rejected": -238.1407012939453, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6970190405845642, + "rewards/margins": 4.8474907875061035, + "rewards/rejected": -5.5445098876953125, + "step": 6579 + }, + { + "epoch": 1.02, + "learning_rate": 9.321295710358421e-06, + "logits/chosen": -2.7224228382110596, + "logits/rejected": -3.038619041442871, + "logps/chosen": -410.0400695800781, + "logps/rejected": -665.10498046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8443055152893066, + "rewards/margins": 8.523465156555176, + "rewards/rejected": -11.36777114868164, + "step": 6580 + }, + { + "epoch": 1.02, + "learning_rate": 9.320562269827273e-06, + "logits/chosen": -2.8675968647003174, + "logits/rejected": -2.9034712314605713, + "logps/chosen": -129.45108032226562, + "logps/rejected": -226.8731231689453, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.140439987182617, + "rewards/margins": 5.648548603057861, + "rewards/rejected": -7.7889885902404785, + "step": 6581 + }, + { + "epoch": 1.02, + "learning_rate": 9.319828829296125e-06, + "logits/chosen": -1.2243475914001465, + "logits/rejected": -2.9422664642333984, + "logps/chosen": -109.6605453491211, + "logps/rejected": -391.58489990234375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8538551330566406, + "rewards/margins": 6.749844551086426, + "rewards/rejected": -8.603699684143066, + "step": 6582 + }, + { + "epoch": 1.02, + "learning_rate": 9.319095388764977e-06, + "logits/chosen": -2.6935696601867676, + "logits/rejected": -3.0364952087402344, + "logps/chosen": -102.1611099243164, + "logps/rejected": -218.40162658691406, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.637727737426758, + "rewards/margins": 4.1242475509643555, + "rewards/rejected": -6.761975288391113, + "step": 6583 + }, + { + "epoch": 1.02, + "learning_rate": 9.318361948233829e-06, + "logits/chosen": -2.04140305519104, + "logits/rejected": -3.0738883018493652, + "logps/chosen": -271.5624084472656, + "logps/rejected": -372.4546203613281, + "loss": 0.0785, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1698410511016846, + "rewards/margins": 5.435821056365967, + "rewards/rejected": -7.6056623458862305, + "step": 6584 + }, + { + "epoch": 1.02, + "learning_rate": 9.31762850770268e-06, + "logits/chosen": -2.8482558727264404, + "logits/rejected": -1.7187484502792358, + "logps/chosen": -144.33567810058594, + "logps/rejected": -148.63119506835938, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.192518472671509, + "rewards/margins": 3.5883712768554688, + "rewards/rejected": -5.780889987945557, + "step": 6585 + }, + { + "epoch": 1.02, + "learning_rate": 9.316895067171532e-06, + "logits/chosen": -2.9966163635253906, + "logits/rejected": -3.1321070194244385, + "logps/chosen": -416.87469482421875, + "logps/rejected": -473.8980712890625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6750888824462891, + "rewards/margins": 7.896439552307129, + "rewards/rejected": -8.571528434753418, + "step": 6586 + }, + { + "epoch": 1.02, + "learning_rate": 9.316161626640386e-06, + "logits/chosen": -1.7204859256744385, + "logits/rejected": -2.9718775749206543, + "logps/chosen": -122.79736328125, + "logps/rejected": -198.57713317871094, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3531643152236938, + "rewards/margins": 5.571624755859375, + "rewards/rejected": -6.9247894287109375, + "step": 6587 + }, + { + "epoch": 1.02, + "learning_rate": 9.315428186109238e-06, + "logits/chosen": -3.215207815170288, + "logits/rejected": -2.644974946975708, + "logps/chosen": -335.90423583984375, + "logps/rejected": -90.29119873046875, + "loss": 0.1196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.726996898651123, + "rewards/margins": 3.1155622005462646, + "rewards/rejected": -5.842559337615967, + "step": 6588 + }, + { + "epoch": 1.02, + "learning_rate": 9.314694745578091e-06, + "logits/chosen": -1.646547794342041, + "logits/rejected": -2.7341983318328857, + "logps/chosen": -182.80886840820312, + "logps/rejected": -450.941650390625, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.26236629486084, + "rewards/margins": 3.20162034034729, + "rewards/rejected": -7.463986396789551, + "step": 6589 + }, + { + "epoch": 1.02, + "learning_rate": 9.313961305046943e-06, + "logits/chosen": -1.4747360944747925, + "logits/rejected": -2.9159598350524902, + "logps/chosen": -74.69784545898438, + "logps/rejected": -335.71600341796875, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4332352876663208, + "rewards/margins": 3.3168487548828125, + "rewards/rejected": -4.750083923339844, + "step": 6590 + }, + { + "epoch": 1.03, + "learning_rate": 9.313227864515795e-06, + "logits/chosen": -2.950321674346924, + "logits/rejected": -2.52593994140625, + "logps/chosen": -441.41448974609375, + "logps/rejected": -171.21432495117188, + "loss": 1.4353, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.5791428089141846, + "rewards/margins": 1.3636713027954102, + "rewards/rejected": -3.942814350128174, + "step": 6591 + }, + { + "epoch": 1.03, + "learning_rate": 9.312494423984647e-06, + "logits/chosen": -2.8214223384857178, + "logits/rejected": -3.0799083709716797, + "logps/chosen": -104.82504272460938, + "logps/rejected": -144.6966552734375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.607508659362793, + "rewards/margins": 5.044743537902832, + "rewards/rejected": -8.652252197265625, + "step": 6592 + }, + { + "epoch": 1.03, + "learning_rate": 9.311760983453499e-06, + "logits/chosen": -2.8396081924438477, + "logits/rejected": -2.8039040565490723, + "logps/chosen": -324.7421569824219, + "logps/rejected": -182.3791046142578, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4536727964878082, + "rewards/margins": 5.5868353843688965, + "rewards/rejected": -5.133162498474121, + "step": 6593 + }, + { + "epoch": 1.03, + "learning_rate": 9.31102754292235e-06, + "logits/chosen": -2.593993663787842, + "logits/rejected": -2.78884220123291, + "logps/chosen": -189.40139770507812, + "logps/rejected": -345.0008544921875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3821563720703125, + "rewards/margins": 5.898580551147461, + "rewards/rejected": -8.280736923217773, + "step": 6594 + }, + { + "epoch": 1.03, + "learning_rate": 9.310294102391203e-06, + "logits/chosen": -3.1503148078918457, + "logits/rejected": -2.882739305496216, + "logps/chosen": -487.8366394042969, + "logps/rejected": -422.1435241699219, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19874420762062073, + "rewards/margins": 6.8326921463012695, + "rewards/rejected": -7.031435966491699, + "step": 6595 + }, + { + "epoch": 1.03, + "learning_rate": 9.309560661860056e-06, + "logits/chosen": -2.9377920627593994, + "logits/rejected": -2.136829137802124, + "logps/chosen": -476.4455261230469, + "logps/rejected": -452.30615234375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.074530839920044, + "rewards/margins": 6.379077911376953, + "rewards/rejected": -7.453608512878418, + "step": 6596 + }, + { + "epoch": 1.03, + "learning_rate": 9.308827221328908e-06, + "logits/chosen": -2.377516508102417, + "logits/rejected": -2.4532699584960938, + "logps/chosen": -808.4424438476562, + "logps/rejected": -819.0701293945312, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.213531494140625, + "rewards/margins": 7.187616348266602, + "rewards/rejected": -9.401147842407227, + "step": 6597 + }, + { + "epoch": 1.03, + "learning_rate": 9.30809378079776e-06, + "logits/chosen": -2.408874273300171, + "logits/rejected": -2.9780051708221436, + "logps/chosen": -154.99810791015625, + "logps/rejected": -296.35137939453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.525653600692749, + "rewards/margins": 6.377979278564453, + "rewards/rejected": -6.903633117675781, + "step": 6598 + }, + { + "epoch": 1.03, + "learning_rate": 9.307360340266612e-06, + "logits/chosen": -3.068117380142212, + "logits/rejected": -2.9514708518981934, + "logps/chosen": -514.7188720703125, + "logps/rejected": -332.7095642089844, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.472948431968689, + "rewards/margins": 5.375095367431641, + "rewards/rejected": -6.848043918609619, + "step": 6599 + }, + { + "epoch": 1.03, + "learning_rate": 9.306626899735464e-06, + "logits/chosen": -0.8758653998374939, + "logits/rejected": -2.770474672317505, + "logps/chosen": -38.389671325683594, + "logps/rejected": -239.51824951171875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7356369495391846, + "rewards/margins": 5.8753662109375, + "rewards/rejected": -7.6110029220581055, + "step": 6600 + }, + { + "epoch": 1.03, + "learning_rate": 9.305893459204316e-06, + "logits/chosen": -2.9357073307037354, + "logits/rejected": -2.327090263366699, + "logps/chosen": -729.2440185546875, + "logps/rejected": -469.2121887207031, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.652207851409912, + "rewards/margins": 4.41194486618042, + "rewards/rejected": -7.064152717590332, + "step": 6601 + }, + { + "epoch": 1.03, + "learning_rate": 9.305160018673167e-06, + "logits/chosen": -2.717078924179077, + "logits/rejected": -2.014191150665283, + "logps/chosen": -210.35427856445312, + "logps/rejected": -320.68035888671875, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3622232675552368, + "rewards/margins": 5.046537399291992, + "rewards/rejected": -6.408761024475098, + "step": 6602 + }, + { + "epoch": 1.03, + "learning_rate": 9.30442657814202e-06, + "logits/chosen": -2.9562699794769287, + "logits/rejected": -2.595318555831909, + "logps/chosen": -790.11279296875, + "logps/rejected": -576.3497924804688, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7641220092773438, + "rewards/margins": 4.857884407043457, + "rewards/rejected": -5.622006416320801, + "step": 6603 + }, + { + "epoch": 1.03, + "learning_rate": 9.303693137610871e-06, + "logits/chosen": -2.8046040534973145, + "logits/rejected": -2.3695147037506104, + "logps/chosen": -187.85194396972656, + "logps/rejected": -222.5325927734375, + "loss": 0.7257, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.548542022705078, + "rewards/margins": 3.665180206298828, + "rewards/rejected": -7.213722229003906, + "step": 6604 + }, + { + "epoch": 1.03, + "learning_rate": 9.302959697079725e-06, + "logits/chosen": -2.444286584854126, + "logits/rejected": -2.6307384967803955, + "logps/chosen": -225.47779846191406, + "logps/rejected": -361.2099609375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3336213827133179, + "rewards/margins": 7.16393518447876, + "rewards/rejected": -8.497556686401367, + "step": 6605 + }, + { + "epoch": 1.03, + "learning_rate": 9.302226256548577e-06, + "logits/chosen": -1.1214346885681152, + "logits/rejected": -2.886639356613159, + "logps/chosen": -51.45853805541992, + "logps/rejected": -245.4111785888672, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.874902367591858, + "rewards/margins": 4.131236553192139, + "rewards/rejected": -6.006138801574707, + "step": 6606 + }, + { + "epoch": 1.03, + "learning_rate": 9.301492816017429e-06, + "logits/chosen": -2.776837110519409, + "logits/rejected": -3.052135467529297, + "logps/chosen": -653.497314453125, + "logps/rejected": -532.6582641601562, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2686340808868408, + "rewards/margins": 4.77195930480957, + "rewards/rejected": -6.04059362411499, + "step": 6607 + }, + { + "epoch": 1.03, + "learning_rate": 9.30075937548628e-06, + "logits/chosen": -2.8583438396453857, + "logits/rejected": -2.132230520248413, + "logps/chosen": -207.5349578857422, + "logps/rejected": -320.3345947265625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9260638952255249, + "rewards/margins": 5.065392971038818, + "rewards/rejected": -5.991456985473633, + "step": 6608 + }, + { + "epoch": 1.03, + "learning_rate": 9.300025934955132e-06, + "logits/chosen": -1.8521798849105835, + "logits/rejected": -3.104348659515381, + "logps/chosen": -259.2403564453125, + "logps/rejected": -278.56182861328125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2787185907363892, + "rewards/margins": 5.058439254760742, + "rewards/rejected": -6.337157249450684, + "step": 6609 + }, + { + "epoch": 1.03, + "learning_rate": 9.299292494423984e-06, + "logits/chosen": -2.835273027420044, + "logits/rejected": -3.0259580612182617, + "logps/chosen": -418.0987243652344, + "logps/rejected": -482.42047119140625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.474419355392456, + "rewards/margins": 5.474171161651611, + "rewards/rejected": -6.948590278625488, + "step": 6610 + }, + { + "epoch": 1.03, + "learning_rate": 9.298559053892836e-06, + "logits/chosen": -1.8217343091964722, + "logits/rejected": -2.9183850288391113, + "logps/chosen": -123.32415771484375, + "logps/rejected": -355.1697998046875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9338231086730957, + "rewards/margins": 8.098381996154785, + "rewards/rejected": -10.032204627990723, + "step": 6611 + }, + { + "epoch": 1.03, + "learning_rate": 9.297825613361688e-06, + "logits/chosen": -2.8949759006500244, + "logits/rejected": -3.048321008682251, + "logps/chosen": -124.35072326660156, + "logps/rejected": -326.32611083984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.329017162322998, + "rewards/margins": 6.3063249588012695, + "rewards/rejected": -8.63534164428711, + "step": 6612 + }, + { + "epoch": 1.03, + "learning_rate": 9.297092172830542e-06, + "logits/chosen": -3.1325736045837402, + "logits/rejected": -2.688814401626587, + "logps/chosen": -183.9446563720703, + "logps/rejected": -174.93162536621094, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.189504861831665, + "rewards/margins": 3.819289207458496, + "rewards/rejected": -5.008793830871582, + "step": 6613 + }, + { + "epoch": 1.03, + "learning_rate": 9.296358732299393e-06, + "logits/chosen": -2.936818838119507, + "logits/rejected": -3.1339287757873535, + "logps/chosen": -236.42149353027344, + "logps/rejected": -188.61837768554688, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3296021223068237, + "rewards/margins": 5.674588203430176, + "rewards/rejected": -7.004190444946289, + "step": 6614 + }, + { + "epoch": 1.03, + "learning_rate": 9.295625291768245e-06, + "logits/chosen": -3.0211455821990967, + "logits/rejected": -3.0854101181030273, + "logps/chosen": -69.71601104736328, + "logps/rejected": -278.2493896484375, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.304976224899292, + "rewards/margins": 5.177175521850586, + "rewards/rejected": -8.482151985168457, + "step": 6615 + }, + { + "epoch": 1.03, + "learning_rate": 9.294891851237097e-06, + "logits/chosen": -2.147698402404785, + "logits/rejected": -3.078195095062256, + "logps/chosen": -154.40219116210938, + "logps/rejected": -390.14093017578125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3901615142822266, + "rewards/margins": 6.666592121124268, + "rewards/rejected": -8.056753158569336, + "step": 6616 + }, + { + "epoch": 1.03, + "learning_rate": 9.294158410705949e-06, + "logits/chosen": -2.953622817993164, + "logits/rejected": -2.9478607177734375, + "logps/chosen": -236.97222900390625, + "logps/rejected": -410.6820068359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5465917587280273, + "rewards/margins": 7.347426414489746, + "rewards/rejected": -10.894018173217773, + "step": 6617 + }, + { + "epoch": 1.03, + "learning_rate": 9.293424970174801e-06, + "logits/chosen": -2.908512830734253, + "logits/rejected": -2.9865071773529053, + "logps/chosen": -54.28024673461914, + "logps/rejected": -212.85205078125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8798574209213257, + "rewards/margins": 5.835419654846191, + "rewards/rejected": -7.715276718139648, + "step": 6618 + }, + { + "epoch": 1.03, + "learning_rate": 9.292691529643653e-06, + "logits/chosen": -2.3890256881713867, + "logits/rejected": -2.928637742996216, + "logps/chosen": -108.3765869140625, + "logps/rejected": -168.60281372070312, + "loss": 0.577, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.01254940032959, + "rewards/margins": 1.8237149715423584, + "rewards/rejected": -6.836264133453369, + "step": 6619 + }, + { + "epoch": 1.03, + "learning_rate": 9.291958089112505e-06, + "logits/chosen": -2.152332305908203, + "logits/rejected": -3.085390090942383, + "logps/chosen": -156.86221313476562, + "logps/rejected": -464.1451416015625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5257632732391357, + "rewards/margins": 5.8444743156433105, + "rewards/rejected": -7.370237350463867, + "step": 6620 + }, + { + "epoch": 1.03, + "learning_rate": 9.291224648581358e-06, + "logits/chosen": -2.911983013153076, + "logits/rejected": -2.905906915664673, + "logps/chosen": -46.78715515136719, + "logps/rejected": -221.08824157714844, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2029271125793457, + "rewards/margins": 6.8880486488342285, + "rewards/rejected": -9.090975761413574, + "step": 6621 + }, + { + "epoch": 1.03, + "learning_rate": 9.29049120805021e-06, + "logits/chosen": -2.4370527267456055, + "logits/rejected": -2.977086305618286, + "logps/chosen": -61.06297302246094, + "logps/rejected": -167.60366821289062, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6971330642700195, + "rewards/margins": 6.195275783538818, + "rewards/rejected": -7.892409324645996, + "step": 6622 + }, + { + "epoch": 1.03, + "learning_rate": 9.289757767519064e-06, + "logits/chosen": -2.041388988494873, + "logits/rejected": -2.8700928688049316, + "logps/chosen": -238.3725128173828, + "logps/rejected": -318.38427734375, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.711693286895752, + "rewards/margins": 3.7313430309295654, + "rewards/rejected": -5.443036079406738, + "step": 6623 + }, + { + "epoch": 1.03, + "learning_rate": 9.289024326987916e-06, + "logits/chosen": -1.4316898584365845, + "logits/rejected": -3.0286169052124023, + "logps/chosen": -212.29986572265625, + "logps/rejected": -404.5263671875, + "loss": 1.0878, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3066673278808594, + "rewards/margins": 1.0841240882873535, + "rewards/rejected": -4.390791416168213, + "step": 6624 + }, + { + "epoch": 1.03, + "learning_rate": 9.288290886456767e-06, + "logits/chosen": -3.0024948120117188, + "logits/rejected": -2.5619988441467285, + "logps/chosen": -199.8824462890625, + "logps/rejected": -309.1644287109375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.247854471206665, + "rewards/margins": 6.089165687561035, + "rewards/rejected": -8.337019920349121, + "step": 6625 + }, + { + "epoch": 1.03, + "learning_rate": 9.28755744592562e-06, + "logits/chosen": -1.0956015586853027, + "logits/rejected": -2.8355963230133057, + "logps/chosen": -49.028873443603516, + "logps/rejected": -347.6172180175781, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5716816186904907, + "rewards/margins": 7.731889724731445, + "rewards/rejected": -9.303571701049805, + "step": 6626 + }, + { + "epoch": 1.03, + "learning_rate": 9.286824005394471e-06, + "logits/chosen": -2.5998990535736084, + "logits/rejected": -2.53300142288208, + "logps/chosen": -584.2669067382812, + "logps/rejected": -735.3333740234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4568020105361938, + "rewards/margins": 9.383218765258789, + "rewards/rejected": -10.840021133422852, + "step": 6627 + }, + { + "epoch": 1.03, + "learning_rate": 9.286090564863323e-06, + "logits/chosen": -2.5815203189849854, + "logits/rejected": -2.773118734359741, + "logps/chosen": -125.83351135253906, + "logps/rejected": -312.07470703125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.284181594848633, + "rewards/margins": 6.759505271911621, + "rewards/rejected": -9.043686866760254, + "step": 6628 + }, + { + "epoch": 1.03, + "learning_rate": 9.285357124332175e-06, + "logits/chosen": -1.726884126663208, + "logits/rejected": -2.964251756668091, + "logps/chosen": -64.44215393066406, + "logps/rejected": -202.01788330078125, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6240625381469727, + "rewards/margins": 3.678011178970337, + "rewards/rejected": -5.3020734786987305, + "step": 6629 + }, + { + "epoch": 1.03, + "learning_rate": 9.284623683801027e-06, + "logits/chosen": -2.503091335296631, + "logits/rejected": -2.987321138381958, + "logps/chosen": -70.25395202636719, + "logps/rejected": -307.7337951660156, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.195878744125366, + "rewards/margins": 7.8188371658325195, + "rewards/rejected": -10.014715194702148, + "step": 6630 + }, + { + "epoch": 1.03, + "learning_rate": 9.28389024326988e-06, + "logits/chosen": -2.2889835834503174, + "logits/rejected": -2.940595865249634, + "logps/chosen": -212.03456115722656, + "logps/rejected": -349.94720458984375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1574589014053345, + "rewards/margins": 6.28843879699707, + "rewards/rejected": -7.445898056030273, + "step": 6631 + }, + { + "epoch": 1.03, + "learning_rate": 9.283156802738732e-06, + "logits/chosen": -1.8563302755355835, + "logits/rejected": -2.613455295562744, + "logps/chosen": -214.1060333251953, + "logps/rejected": -310.39508056640625, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.570369243621826, + "rewards/margins": 5.496257781982422, + "rewards/rejected": -8.066627502441406, + "step": 6632 + }, + { + "epoch": 1.03, + "learning_rate": 9.282423362207584e-06, + "logits/chosen": -1.6822624206542969, + "logits/rejected": -2.4572927951812744, + "logps/chosen": -154.08473205566406, + "logps/rejected": -169.97308349609375, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.348857045173645, + "rewards/margins": 5.680835723876953, + "rewards/rejected": -7.029692649841309, + "step": 6633 + }, + { + "epoch": 1.03, + "learning_rate": 9.281689921676436e-06, + "logits/chosen": -2.825183868408203, + "logits/rejected": -3.0598604679107666, + "logps/chosen": -293.7607727050781, + "logps/rejected": -410.9503173828125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4013664722442627, + "rewards/margins": 7.11341667175293, + "rewards/rejected": -8.514782905578613, + "step": 6634 + }, + { + "epoch": 1.03, + "learning_rate": 9.280956481145288e-06, + "logits/chosen": -3.0859365463256836, + "logits/rejected": -3.109773635864258, + "logps/chosen": -557.048828125, + "logps/rejected": -534.6112060546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9353058338165283, + "rewards/margins": 6.409916877746582, + "rewards/rejected": -7.345222473144531, + "step": 6635 + }, + { + "epoch": 1.03, + "learning_rate": 9.28022304061414e-06, + "logits/chosen": -1.7836050987243652, + "logits/rejected": -2.971522331237793, + "logps/chosen": -147.2137908935547, + "logps/rejected": -392.1634216308594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7641992568969727, + "rewards/margins": 7.360283851623535, + "rewards/rejected": -10.124483108520508, + "step": 6636 + }, + { + "epoch": 1.03, + "learning_rate": 9.279489600082992e-06, + "logits/chosen": -2.887732982635498, + "logits/rejected": -2.9616692066192627, + "logps/chosen": -481.95281982421875, + "logps/rejected": -598.6343383789062, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5494483709335327, + "rewards/margins": 7.825490951538086, + "rewards/rejected": -8.37493896484375, + "step": 6637 + }, + { + "epoch": 1.03, + "learning_rate": 9.278756159551844e-06, + "logits/chosen": -2.8730087280273438, + "logits/rejected": -3.1639766693115234, + "logps/chosen": -167.61476135253906, + "logps/rejected": -233.77426147460938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3644258379936218, + "rewards/margins": 7.960603713989258, + "rewards/rejected": -7.59617805480957, + "step": 6638 + }, + { + "epoch": 1.03, + "learning_rate": 9.278022719020695e-06, + "logits/chosen": -3.0706193447113037, + "logits/rejected": -2.7188737392425537, + "logps/chosen": -318.3710632324219, + "logps/rejected": -281.21453857421875, + "loss": 2.3057, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.790757179260254, + "rewards/margins": 1.789945125579834, + "rewards/rejected": -4.580702304840088, + "step": 6639 + }, + { + "epoch": 1.03, + "learning_rate": 9.277289278489549e-06, + "logits/chosen": -2.784580945968628, + "logits/rejected": -2.866457223892212, + "logps/chosen": -81.09674072265625, + "logps/rejected": -194.4781494140625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.505916118621826, + "rewards/margins": 6.1474928855896, + "rewards/rejected": -8.653409004211426, + "step": 6640 + }, + { + "epoch": 1.03, + "learning_rate": 9.276555837958401e-06, + "logits/chosen": -2.3794045448303223, + "logits/rejected": -3.084162473678589, + "logps/chosen": -462.80145263671875, + "logps/rejected": -678.1262817382812, + "loss": 0.2023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1347336769104004, + "rewards/margins": 3.8488969802856445, + "rewards/rejected": -6.983630657196045, + "step": 6641 + }, + { + "epoch": 1.03, + "learning_rate": 9.275822397427253e-06, + "logits/chosen": -2.5062296390533447, + "logits/rejected": -2.8272109031677246, + "logps/chosen": -174.01739501953125, + "logps/rejected": -249.65902709960938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.768798828125, + "rewards/margins": 8.394902229309082, + "rewards/rejected": -9.163701057434082, + "step": 6642 + }, + { + "epoch": 1.03, + "learning_rate": 9.275088956896105e-06, + "logits/chosen": -2.9996163845062256, + "logits/rejected": -3.1134817600250244, + "logps/chosen": -420.08197021484375, + "logps/rejected": -415.3863525390625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4940654933452606, + "rewards/margins": 6.400157928466797, + "rewards/rejected": -6.894223213195801, + "step": 6643 + }, + { + "epoch": 1.03, + "learning_rate": 9.274355516364957e-06, + "logits/chosen": -2.8344523906707764, + "logits/rejected": -2.5125794410705566, + "logps/chosen": -373.90081787109375, + "logps/rejected": -290.63897705078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.833446741104126, + "rewards/margins": 6.843178749084473, + "rewards/rejected": -8.67662525177002, + "step": 6644 + }, + { + "epoch": 1.03, + "learning_rate": 9.273622075833808e-06, + "logits/chosen": -2.6984853744506836, + "logits/rejected": -2.9278533458709717, + "logps/chosen": -101.74024963378906, + "logps/rejected": -219.36326599121094, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3193244934082031, + "rewards/margins": 6.17936897277832, + "rewards/rejected": -7.498693466186523, + "step": 6645 + }, + { + "epoch": 1.03, + "learning_rate": 9.27288863530266e-06, + "logits/chosen": -1.8638168573379517, + "logits/rejected": -2.8075568675994873, + "logps/chosen": -166.88442993164062, + "logps/rejected": -328.60369873046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5673816800117493, + "rewards/margins": 8.586014747619629, + "rewards/rejected": -9.153396606445312, + "step": 6646 + }, + { + "epoch": 1.03, + "learning_rate": 9.272155194771512e-06, + "logits/chosen": -2.349754571914673, + "logits/rejected": -2.9075207710266113, + "logps/chosen": -196.29330444335938, + "logps/rejected": -289.88079833984375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5911728143692017, + "rewards/margins": 6.5317606925964355, + "rewards/rejected": -8.122933387756348, + "step": 6647 + }, + { + "epoch": 1.03, + "learning_rate": 9.271421754240364e-06, + "logits/chosen": -2.7920284271240234, + "logits/rejected": -2.985261917114258, + "logps/chosen": -83.89005279541016, + "logps/rejected": -173.4571533203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5046648979187012, + "rewards/margins": 7.591925621032715, + "rewards/rejected": -9.096590042114258, + "step": 6648 + }, + { + "epoch": 1.03, + "learning_rate": 9.270688313709218e-06, + "logits/chosen": -2.0910451412200928, + "logits/rejected": -2.8634629249572754, + "logps/chosen": -178.5977783203125, + "logps/rejected": -452.9720764160156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2424182891845703, + "rewards/margins": 9.79094123840332, + "rewards/rejected": -13.03335952758789, + "step": 6649 + }, + { + "epoch": 1.03, + "learning_rate": 9.26995487317807e-06, + "logits/chosen": -2.5652496814727783, + "logits/rejected": -1.4792839288711548, + "logps/chosen": -201.96136474609375, + "logps/rejected": -88.9246597290039, + "loss": 2.5232, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.165485382080078, + "rewards/margins": -0.1725778579711914, + "rewards/rejected": -4.992907524108887, + "step": 6650 + }, + { + "epoch": 1.03, + "learning_rate": 9.269221432646921e-06, + "logits/chosen": -2.868706464767456, + "logits/rejected": -2.0157108306884766, + "logps/chosen": -207.24740600585938, + "logps/rejected": -222.340087890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9793099164962769, + "rewards/margins": 6.978446960449219, + "rewards/rejected": -7.957756996154785, + "step": 6651 + }, + { + "epoch": 1.03, + "learning_rate": 9.268487992115773e-06, + "logits/chosen": -2.0367376804351807, + "logits/rejected": -2.8351540565490723, + "logps/chosen": -127.99644470214844, + "logps/rejected": -272.7388000488281, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5299850702285767, + "rewards/margins": 6.684222221374512, + "rewards/rejected": -8.214207649230957, + "step": 6652 + }, + { + "epoch": 1.03, + "learning_rate": 9.267754551584625e-06, + "logits/chosen": -1.950735092163086, + "logits/rejected": -2.9295766353607178, + "logps/chosen": -94.54151916503906, + "logps/rejected": -157.0067138671875, + "loss": 0.7855, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.033756256103516, + "rewards/margins": 2.7724459171295166, + "rewards/rejected": -6.806201934814453, + "step": 6653 + }, + { + "epoch": 1.03, + "learning_rate": 9.267021111053477e-06, + "logits/chosen": -1.2708938121795654, + "logits/rejected": -2.697133779525757, + "logps/chosen": -113.6558837890625, + "logps/rejected": -222.18316650390625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4405055046081543, + "rewards/margins": 5.299684524536133, + "rewards/rejected": -8.740190505981445, + "step": 6654 + }, + { + "epoch": 1.03, + "learning_rate": 9.26628767052233e-06, + "logits/chosen": -3.09938645362854, + "logits/rejected": -3.09614634513855, + "logps/chosen": -505.884521484375, + "logps/rejected": -330.57940673828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4314590692520142, + "rewards/margins": 7.343784809112549, + "rewards/rejected": -8.775243759155273, + "step": 6655 + }, + { + "epoch": 1.04, + "learning_rate": 9.265554229991182e-06, + "logits/chosen": -2.713597536087036, + "logits/rejected": -3.1534173488616943, + "logps/chosen": -99.54148864746094, + "logps/rejected": -241.77452087402344, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.383970022201538, + "rewards/margins": 7.45387077331543, + "rewards/rejected": -8.837841033935547, + "step": 6656 + }, + { + "epoch": 1.04, + "learning_rate": 9.264820789460034e-06, + "logits/chosen": -2.3444793224334717, + "logits/rejected": -2.289506435394287, + "logps/chosen": -374.11041259765625, + "logps/rejected": -329.2860412597656, + "loss": 0.8037, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6207902431488037, + "rewards/margins": 4.631314277648926, + "rewards/rejected": -8.252104759216309, + "step": 6657 + }, + { + "epoch": 1.04, + "learning_rate": 9.264087348928888e-06, + "logits/chosen": -3.1573328971862793, + "logits/rejected": -2.0153043270111084, + "logps/chosen": -260.5406494140625, + "logps/rejected": -127.74922943115234, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5288967490196228, + "rewards/margins": 7.85412073135376, + "rewards/rejected": -8.383017539978027, + "step": 6658 + }, + { + "epoch": 1.04, + "learning_rate": 9.26335390839774e-06, + "logits/chosen": -1.5849974155426025, + "logits/rejected": -2.236443042755127, + "logps/chosen": -146.10101318359375, + "logps/rejected": -366.75555419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5615818500518799, + "rewards/margins": 10.799659729003906, + "rewards/rejected": -12.361242294311523, + "step": 6659 + }, + { + "epoch": 1.04, + "learning_rate": 9.262620467866592e-06, + "logits/chosen": -2.4184770584106445, + "logits/rejected": -2.7491142749786377, + "logps/chosen": -239.520751953125, + "logps/rejected": -272.847412109375, + "loss": 2.5465, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.044445991516113, + "rewards/margins": 0.8048074245452881, + "rewards/rejected": -5.8492536544799805, + "step": 6660 + }, + { + "epoch": 1.04, + "learning_rate": 9.261887027335444e-06, + "logits/chosen": -2.8865551948547363, + "logits/rejected": -1.8975225687026978, + "logps/chosen": -347.5950012207031, + "logps/rejected": -203.19091796875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5003342628479004, + "rewards/margins": 6.113211154937744, + "rewards/rejected": -8.613545417785645, + "step": 6661 + }, + { + "epoch": 1.04, + "learning_rate": 9.261153586804295e-06, + "logits/chosen": -2.8757920265197754, + "logits/rejected": -3.104166030883789, + "logps/chosen": -310.062744140625, + "logps/rejected": -413.58209228515625, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2169007062911987, + "rewards/margins": 4.676721096038818, + "rewards/rejected": -5.893621921539307, + "step": 6662 + }, + { + "epoch": 1.04, + "learning_rate": 9.260420146273147e-06, + "logits/chosen": -1.9794429540634155, + "logits/rejected": -3.258368968963623, + "logps/chosen": -196.24459838867188, + "logps/rejected": -487.51800537109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9761247634887695, + "rewards/margins": 8.863876342773438, + "rewards/rejected": -10.840002059936523, + "step": 6663 + }, + { + "epoch": 1.04, + "learning_rate": 9.259686705742e-06, + "logits/chosen": -2.645742416381836, + "logits/rejected": -2.839714527130127, + "logps/chosen": -57.24848556518555, + "logps/rejected": -151.57583618164062, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.295776605606079, + "rewards/margins": 5.333476543426514, + "rewards/rejected": -7.629253387451172, + "step": 6664 + }, + { + "epoch": 1.04, + "learning_rate": 9.258953265210851e-06, + "logits/chosen": -3.0714268684387207, + "logits/rejected": -0.7737785577774048, + "logps/chosen": -333.63970947265625, + "logps/rejected": -211.34835815429688, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6447136402130127, + "rewards/margins": 6.0458502769470215, + "rewards/rejected": -6.690564155578613, + "step": 6665 + }, + { + "epoch": 1.04, + "learning_rate": 9.258219824679703e-06, + "logits/chosen": -2.9690263271331787, + "logits/rejected": -2.8353710174560547, + "logps/chosen": -292.4644775390625, + "logps/rejected": -421.46966552734375, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5250114798545837, + "rewards/margins": 6.056251525878906, + "rewards/rejected": -6.581263065338135, + "step": 6666 + }, + { + "epoch": 1.04, + "learning_rate": 9.257486384148556e-06, + "logits/chosen": -3.0519423484802246, + "logits/rejected": -2.6546432971954346, + "logps/chosen": -366.12872314453125, + "logps/rejected": -524.4764404296875, + "loss": 0.6695, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.823696136474609, + "rewards/margins": 4.2311930656433105, + "rewards/rejected": -9.054889678955078, + "step": 6667 + }, + { + "epoch": 1.04, + "learning_rate": 9.256752943617408e-06, + "logits/chosen": -2.432046413421631, + "logits/rejected": -3.0293896198272705, + "logps/chosen": -55.88264846801758, + "logps/rejected": -207.92510986328125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5122023820877075, + "rewards/margins": 5.585910320281982, + "rewards/rejected": -7.0981125831604, + "step": 6668 + }, + { + "epoch": 1.04, + "learning_rate": 9.25601950308626e-06, + "logits/chosen": -1.590922474861145, + "logits/rejected": -2.6437017917633057, + "logps/chosen": -127.27090454101562, + "logps/rejected": -270.0609130859375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6524730920791626, + "rewards/margins": 6.5792317390441895, + "rewards/rejected": -8.231704711914062, + "step": 6669 + }, + { + "epoch": 1.04, + "learning_rate": 9.255286062555112e-06, + "logits/chosen": -3.070335865020752, + "logits/rejected": -2.4487903118133545, + "logps/chosen": -372.2063293457031, + "logps/rejected": -211.7774200439453, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2536773681640625, + "rewards/margins": 4.468347549438477, + "rewards/rejected": -5.722025394439697, + "step": 6670 + }, + { + "epoch": 1.04, + "learning_rate": 9.254552622023964e-06, + "logits/chosen": -2.9158387184143066, + "logits/rejected": -3.102618932723999, + "logps/chosen": -610.13818359375, + "logps/rejected": -630.7363891601562, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1981858015060425, + "rewards/margins": 7.328598499298096, + "rewards/rejected": -8.52678394317627, + "step": 6671 + }, + { + "epoch": 1.04, + "learning_rate": 9.253819181492816e-06, + "logits/chosen": -3.0589020252227783, + "logits/rejected": -1.7400565147399902, + "logps/chosen": -436.5498962402344, + "logps/rejected": -251.7225341796875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9563522934913635, + "rewards/margins": 5.399381637573242, + "rewards/rejected": -6.355733871459961, + "step": 6672 + }, + { + "epoch": 1.04, + "learning_rate": 9.253085740961668e-06, + "logits/chosen": -2.923200845718384, + "logits/rejected": -2.984657049179077, + "logps/chosen": -398.7679748535156, + "logps/rejected": -503.9455871582031, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7475506067276001, + "rewards/margins": 7.6474151611328125, + "rewards/rejected": -8.394965171813965, + "step": 6673 + }, + { + "epoch": 1.04, + "learning_rate": 9.25235230043052e-06, + "logits/chosen": -2.979065418243408, + "logits/rejected": -1.3731486797332764, + "logps/chosen": -754.2408447265625, + "logps/rejected": -416.451416015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8353850841522217, + "rewards/margins": 8.140260696411133, + "rewards/rejected": -9.975645065307617, + "step": 6674 + }, + { + "epoch": 1.04, + "learning_rate": 9.251618859899372e-06, + "logits/chosen": -3.0327460765838623, + "logits/rejected": -2.902578115463257, + "logps/chosen": -163.6629180908203, + "logps/rejected": -242.628173828125, + "loss": 1.051, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2171947956085205, + "rewards/margins": 2.6689445972442627, + "rewards/rejected": -5.886139392852783, + "step": 6675 + }, + { + "epoch": 1.04, + "learning_rate": 9.250885419368225e-06, + "logits/chosen": -2.3140101432800293, + "logits/rejected": -2.8791797161102295, + "logps/chosen": -113.55500793457031, + "logps/rejected": -363.2518310546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.504423141479492, + "rewards/margins": 7.766391277313232, + "rewards/rejected": -10.270814895629883, + "step": 6676 + }, + { + "epoch": 1.04, + "learning_rate": 9.250151978837077e-06, + "logits/chosen": -2.9799208641052246, + "logits/rejected": -3.0775644779205322, + "logps/chosen": -130.40834045410156, + "logps/rejected": -272.79510498046875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.96706485748291, + "rewards/margins": 6.1134233474731445, + "rewards/rejected": -9.080488204956055, + "step": 6677 + }, + { + "epoch": 1.04, + "learning_rate": 9.249418538305929e-06, + "logits/chosen": -1.1684399843215942, + "logits/rejected": -2.857985019683838, + "logps/chosen": -176.20001220703125, + "logps/rejected": -613.856689453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1478004455566406, + "rewards/margins": 7.118801116943359, + "rewards/rejected": -10.2666015625, + "step": 6678 + }, + { + "epoch": 1.04, + "learning_rate": 9.24868509777478e-06, + "logits/chosen": -2.952976942062378, + "logits/rejected": -2.9845316410064697, + "logps/chosen": -385.3420715332031, + "logps/rejected": -421.0870666503906, + "loss": 0.2092, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.297890663146973, + "rewards/margins": 5.5635175704956055, + "rewards/rejected": -9.861408233642578, + "step": 6679 + }, + { + "epoch": 1.04, + "learning_rate": 9.247951657243633e-06, + "logits/chosen": -2.922457218170166, + "logits/rejected": -3.039457082748413, + "logps/chosen": -156.4934844970703, + "logps/rejected": -186.04183959960938, + "loss": 0.5636, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.499194622039795, + "rewards/margins": 2.585381269454956, + "rewards/rejected": -6.084575653076172, + "step": 6680 + }, + { + "epoch": 1.04, + "learning_rate": 9.247218216712484e-06, + "logits/chosen": -2.569807529449463, + "logits/rejected": -2.9458048343658447, + "logps/chosen": -85.27084350585938, + "logps/rejected": -284.1024169921875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3280465602874756, + "rewards/margins": 5.853357315063477, + "rewards/rejected": -8.181404113769531, + "step": 6681 + }, + { + "epoch": 1.04, + "learning_rate": 9.246484776181336e-06, + "logits/chosen": -2.453839063644409, + "logits/rejected": -2.675490617752075, + "logps/chosen": -119.41313171386719, + "logps/rejected": -429.9970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6778042316436768, + "rewards/margins": 10.264028549194336, + "rewards/rejected": -11.94183349609375, + "step": 6682 + }, + { + "epoch": 1.04, + "learning_rate": 9.245751335650188e-06, + "logits/chosen": -1.9761630296707153, + "logits/rejected": -2.9114155769348145, + "logps/chosen": -119.51402282714844, + "logps/rejected": -430.0771179199219, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.090364456176758, + "rewards/margins": 8.271647453308105, + "rewards/rejected": -10.362011909484863, + "step": 6683 + }, + { + "epoch": 1.04, + "learning_rate": 9.24501789511904e-06, + "logits/chosen": -3.0550191402435303, + "logits/rejected": -3.158437967300415, + "logps/chosen": -154.28448486328125, + "logps/rejected": -154.96420288085938, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.198070526123047, + "rewards/margins": 6.185238361358643, + "rewards/rejected": -9.383308410644531, + "step": 6684 + }, + { + "epoch": 1.04, + "learning_rate": 9.244284454587894e-06, + "logits/chosen": -2.8392021656036377, + "logits/rejected": -2.9413368701934814, + "logps/chosen": -131.37185668945312, + "logps/rejected": -255.37417602539062, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.150968074798584, + "rewards/margins": 4.907834053039551, + "rewards/rejected": -9.058801651000977, + "step": 6685 + }, + { + "epoch": 1.04, + "learning_rate": 9.243551014056746e-06, + "logits/chosen": -2.3520216941833496, + "logits/rejected": -3.0299577713012695, + "logps/chosen": -433.59954833984375, + "logps/rejected": -653.2017211914062, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4284164905548096, + "rewards/margins": 6.780998229980469, + "rewards/rejected": -9.2094144821167, + "step": 6686 + }, + { + "epoch": 1.04, + "learning_rate": 9.242817573525597e-06, + "logits/chosen": -1.2290719747543335, + "logits/rejected": -2.894334554672241, + "logps/chosen": -163.36129760742188, + "logps/rejected": -417.43865966796875, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.153367757797241, + "rewards/margins": 4.655619144439697, + "rewards/rejected": -7.808986663818359, + "step": 6687 + }, + { + "epoch": 1.04, + "learning_rate": 9.24208413299445e-06, + "logits/chosen": -2.8536078929901123, + "logits/rejected": -2.1602118015289307, + "logps/chosen": -110.09552764892578, + "logps/rejected": -137.38734436035156, + "loss": 0.8126, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4556643962860107, + "rewards/margins": 1.259418249130249, + "rewards/rejected": -4.71508264541626, + "step": 6688 + }, + { + "epoch": 1.04, + "learning_rate": 9.241350692463303e-06, + "logits/chosen": -1.1437567472457886, + "logits/rejected": -3.0392138957977295, + "logps/chosen": -75.30451202392578, + "logps/rejected": -393.58099365234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.795008897781372, + "rewards/margins": 7.60758113861084, + "rewards/rejected": -9.402589797973633, + "step": 6689 + }, + { + "epoch": 1.04, + "learning_rate": 9.240617251932155e-06, + "logits/chosen": -3.1047821044921875, + "logits/rejected": -3.0437803268432617, + "logps/chosen": -225.6767578125, + "logps/rejected": -218.84811401367188, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6962188482284546, + "rewards/margins": 6.452794075012207, + "rewards/rejected": -7.149013042449951, + "step": 6690 + }, + { + "epoch": 1.04, + "learning_rate": 9.239883811401007e-06, + "logits/chosen": -3.047254800796509, + "logits/rejected": -3.1264357566833496, + "logps/chosen": -138.30552673339844, + "logps/rejected": -273.61083984375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8040974140167236, + "rewards/margins": 6.9283599853515625, + "rewards/rejected": -8.732457160949707, + "step": 6691 + }, + { + "epoch": 1.04, + "learning_rate": 9.239150370869859e-06, + "logits/chosen": -2.068824529647827, + "logits/rejected": -3.06463885307312, + "logps/chosen": -417.4994201660156, + "logps/rejected": -542.1092529296875, + "loss": 1.299, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7841126918792725, + "rewards/margins": 4.318212032318115, + "rewards/rejected": -6.102324962615967, + "step": 6692 + }, + { + "epoch": 1.04, + "learning_rate": 9.23841693033871e-06, + "logits/chosen": -2.9837968349456787, + "logits/rejected": -0.9447559714317322, + "logps/chosen": -301.0760803222656, + "logps/rejected": -180.66136169433594, + "loss": 1.2933, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9471161365509033, + "rewards/margins": 3.079923152923584, + "rewards/rejected": -7.027039051055908, + "step": 6693 + }, + { + "epoch": 1.04, + "learning_rate": 9.237683489807564e-06, + "logits/chosen": -2.477689504623413, + "logits/rejected": -3.122447967529297, + "logps/chosen": -113.5949478149414, + "logps/rejected": -374.64068603515625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.495168685913086, + "rewards/margins": 5.947548866271973, + "rewards/rejected": -8.442717552185059, + "step": 6694 + }, + { + "epoch": 1.04, + "learning_rate": 9.236950049276416e-06, + "logits/chosen": -2.8416688442230225, + "logits/rejected": -2.9794039726257324, + "logps/chosen": -313.79730224609375, + "logps/rejected": -375.597900390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3760207891464233, + "rewards/margins": 6.947798728942871, + "rewards/rejected": -8.323819160461426, + "step": 6695 + }, + { + "epoch": 1.04, + "learning_rate": 9.236216608745268e-06, + "logits/chosen": -2.596329689025879, + "logits/rejected": -3.1482038497924805, + "logps/chosen": -118.0099105834961, + "logps/rejected": -275.7831115722656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6227118968963623, + "rewards/margins": 7.002472877502441, + "rewards/rejected": -9.625185012817383, + "step": 6696 + }, + { + "epoch": 1.04, + "learning_rate": 9.23548316821412e-06, + "logits/chosen": -2.497154712677002, + "logits/rejected": -3.0950348377227783, + "logps/chosen": -65.47663879394531, + "logps/rejected": -263.4646911621094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.420879364013672, + "rewards/margins": 7.891144752502441, + "rewards/rejected": -10.312024116516113, + "step": 6697 + }, + { + "epoch": 1.04, + "learning_rate": 9.234749727682971e-06, + "logits/chosen": -1.754901647567749, + "logits/rejected": -2.9732506275177, + "logps/chosen": -91.29471588134766, + "logps/rejected": -249.07034301757812, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.00512957572937, + "rewards/margins": 4.865162372589111, + "rewards/rejected": -6.870291709899902, + "step": 6698 + }, + { + "epoch": 1.04, + "learning_rate": 9.234016287151823e-06, + "logits/chosen": -2.9455766677856445, + "logits/rejected": -2.9795572757720947, + "logps/chosen": -476.7978515625, + "logps/rejected": -366.327392578125, + "loss": 1.3109, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1249136924743652, + "rewards/margins": 3.4107065200805664, + "rewards/rejected": -6.535620212554932, + "step": 6699 + }, + { + "epoch": 1.04, + "learning_rate": 9.233282846620675e-06, + "logits/chosen": -2.92085862159729, + "logits/rejected": -1.8589571714401245, + "logps/chosen": -466.0860595703125, + "logps/rejected": -334.74163818359375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8341140747070312, + "rewards/margins": 5.1816792488098145, + "rewards/rejected": -7.015793323516846, + "step": 6700 + }, + { + "epoch": 1.04, + "learning_rate": 9.232549406089527e-06, + "logits/chosen": -2.8167800903320312, + "logits/rejected": -3.356128692626953, + "logps/chosen": -313.2142028808594, + "logps/rejected": -401.1877746582031, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.569096565246582, + "rewards/margins": 6.85338020324707, + "rewards/rejected": -9.422475814819336, + "step": 6701 + }, + { + "epoch": 1.04, + "learning_rate": 9.23181596555838e-06, + "logits/chosen": -2.8165793418884277, + "logits/rejected": -2.3675215244293213, + "logps/chosen": -256.30987548828125, + "logps/rejected": -247.01577758789062, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9440600872039795, + "rewards/margins": 6.394326210021973, + "rewards/rejected": -8.338386535644531, + "step": 6702 + }, + { + "epoch": 1.04, + "learning_rate": 9.231082525027233e-06, + "logits/chosen": -3.074186325073242, + "logits/rejected": -1.9650487899780273, + "logps/chosen": -718.4907836914062, + "logps/rejected": -452.4923095703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2137089967727661, + "rewards/margins": 8.599245071411133, + "rewards/rejected": -8.812954902648926, + "step": 6703 + }, + { + "epoch": 1.04, + "learning_rate": 9.230349084496084e-06, + "logits/chosen": -3.0286543369293213, + "logits/rejected": -2.7536511421203613, + "logps/chosen": -92.43318176269531, + "logps/rejected": -239.44358825683594, + "loss": 0.4765, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9788055419921875, + "rewards/margins": 2.8482227325439453, + "rewards/rejected": -6.827028274536133, + "step": 6704 + }, + { + "epoch": 1.04, + "learning_rate": 9.229615643964936e-06, + "logits/chosen": -2.84989333152771, + "logits/rejected": -2.7143921852111816, + "logps/chosen": -130.1668243408203, + "logps/rejected": -135.19361877441406, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3794541358947754, + "rewards/margins": 6.656914710998535, + "rewards/rejected": -9.036368370056152, + "step": 6705 + }, + { + "epoch": 1.04, + "learning_rate": 9.228882203433788e-06, + "logits/chosen": -2.5599584579467773, + "logits/rejected": -3.1381149291992188, + "logps/chosen": -292.6063232421875, + "logps/rejected": -352.66259765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9026665687561035, + "rewards/margins": 6.201197624206543, + "rewards/rejected": -8.103863716125488, + "step": 6706 + }, + { + "epoch": 1.04, + "learning_rate": 9.22814876290264e-06, + "logits/chosen": -3.0792834758758545, + "logits/rejected": -2.496307373046875, + "logps/chosen": -446.119384765625, + "logps/rejected": -370.7695007324219, + "loss": 0.2016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118105888366699, + "rewards/margins": 3.3973476886749268, + "rewards/rejected": -7.515453815460205, + "step": 6707 + }, + { + "epoch": 1.04, + "learning_rate": 9.227415322371492e-06, + "logits/chosen": -1.6302683353424072, + "logits/rejected": -1.4772558212280273, + "logps/chosen": -261.9117431640625, + "logps/rejected": -367.00164794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0501304864883423, + "rewards/margins": 9.64860725402832, + "rewards/rejected": -8.598477363586426, + "step": 6708 + }, + { + "epoch": 1.04, + "learning_rate": 9.226681881840344e-06, + "logits/chosen": -2.8199713230133057, + "logits/rejected": -3.0728368759155273, + "logps/chosen": -242.4241485595703, + "logps/rejected": -579.6822509765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5155727863311768, + "rewards/margins": 8.629693984985352, + "rewards/rejected": -10.145265579223633, + "step": 6709 + }, + { + "epoch": 1.04, + "learning_rate": 9.225948441309196e-06, + "logits/chosen": -2.2271981239318848, + "logits/rejected": -3.1632487773895264, + "logps/chosen": -234.1510467529297, + "logps/rejected": -364.46014404296875, + "loss": 1.3064, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.532867431640625, + "rewards/margins": 1.228958010673523, + "rewards/rejected": -4.7618255615234375, + "step": 6710 + }, + { + "epoch": 1.04, + "learning_rate": 9.22521500077805e-06, + "logits/chosen": -3.065600633621216, + "logits/rejected": -3.074978828430176, + "logps/chosen": -256.547119140625, + "logps/rejected": -393.4046630859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5567703247070312, + "rewards/margins": 7.514476776123047, + "rewards/rejected": -8.071247100830078, + "step": 6711 + }, + { + "epoch": 1.04, + "learning_rate": 9.224481560246901e-06, + "logits/chosen": -2.8711283206939697, + "logits/rejected": -3.0318679809570312, + "logps/chosen": -126.69341278076172, + "logps/rejected": -346.8788757324219, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7709038257598877, + "rewards/margins": 8.784923553466797, + "rewards/rejected": -10.555828094482422, + "step": 6712 + }, + { + "epoch": 1.04, + "learning_rate": 9.223748119715753e-06, + "logits/chosen": -2.1185123920440674, + "logits/rejected": -2.8434383869171143, + "logps/chosen": -159.016357421875, + "logps/rejected": -230.92738342285156, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6538147926330566, + "rewards/margins": 4.786405563354492, + "rewards/rejected": -6.440220832824707, + "step": 6713 + }, + { + "epoch": 1.04, + "learning_rate": 9.223014679184605e-06, + "logits/chosen": -1.801963210105896, + "logits/rejected": -2.904567241668701, + "logps/chosen": -53.221988677978516, + "logps/rejected": -304.7108459472656, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253979682922363, + "rewards/margins": 6.289855480194092, + "rewards/rejected": -10.543834686279297, + "step": 6714 + }, + { + "epoch": 1.04, + "learning_rate": 9.222281238653457e-06, + "logits/chosen": -3.115020275115967, + "logits/rejected": -2.4511067867279053, + "logps/chosen": -530.5157470703125, + "logps/rejected": -430.60198974609375, + "loss": 0.4219, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0681703090667725, + "rewards/margins": 1.1953445672988892, + "rewards/rejected": -4.263514995574951, + "step": 6715 + }, + { + "epoch": 1.04, + "learning_rate": 9.221547798122309e-06, + "logits/chosen": -1.5291730165481567, + "logits/rejected": -3.1761534214019775, + "logps/chosen": -160.84483337402344, + "logps/rejected": -363.857421875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.435920476913452, + "rewards/margins": 5.01772928237915, + "rewards/rejected": -7.453649520874023, + "step": 6716 + }, + { + "epoch": 1.04, + "learning_rate": 9.22081435759116e-06, + "logits/chosen": -2.9534335136413574, + "logits/rejected": -1.1346901655197144, + "logps/chosen": -321.1614990234375, + "logps/rejected": -242.89698791503906, + "loss": 0.557, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0528085231781006, + "rewards/margins": 3.572169303894043, + "rewards/rejected": -5.624978065490723, + "step": 6717 + }, + { + "epoch": 1.04, + "learning_rate": 9.220080917060012e-06, + "logits/chosen": -2.8470115661621094, + "logits/rejected": -2.4722626209259033, + "logps/chosen": -685.4134521484375, + "logps/rejected": -603.1386108398438, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8313992023468018, + "rewards/margins": 8.793102264404297, + "rewards/rejected": -9.62450122833252, + "step": 6718 + }, + { + "epoch": 1.04, + "learning_rate": 9.219347476528864e-06, + "logits/chosen": -3.234153985977173, + "logits/rejected": -2.9489431381225586, + "logps/chosen": -631.6217041015625, + "logps/rejected": -338.89923095703125, + "loss": 0.7196, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.438739061355591, + "rewards/margins": 1.3648052215576172, + "rewards/rejected": -3.803544044494629, + "step": 6719 + }, + { + "epoch": 1.05, + "learning_rate": 9.218614035997718e-06, + "logits/chosen": -2.96175479888916, + "logits/rejected": -2.8548150062561035, + "logps/chosen": -294.606689453125, + "logps/rejected": -190.24191284179688, + "loss": 1.0866, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4504599571228027, + "rewards/margins": 3.181241273880005, + "rewards/rejected": -6.631701469421387, + "step": 6720 + }, + { + "epoch": 1.05, + "learning_rate": 9.21788059546657e-06, + "logits/chosen": -2.625410556793213, + "logits/rejected": -2.7486331462860107, + "logps/chosen": -197.88832092285156, + "logps/rejected": -351.74127197265625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7098233699798584, + "rewards/margins": 6.141010284423828, + "rewards/rejected": -8.850833892822266, + "step": 6721 + }, + { + "epoch": 1.05, + "learning_rate": 9.217147154935422e-06, + "logits/chosen": -3.019225597381592, + "logits/rejected": -2.8415050506591797, + "logps/chosen": -412.4367370605469, + "logps/rejected": -292.243408203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21502763032913208, + "rewards/margins": 7.616829872131348, + "rewards/rejected": -7.831856727600098, + "step": 6722 + }, + { + "epoch": 1.05, + "learning_rate": 9.216413714404275e-06, + "logits/chosen": -3.051995038986206, + "logits/rejected": -2.1729238033294678, + "logps/chosen": -178.9333953857422, + "logps/rejected": -155.36997985839844, + "loss": 0.437, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.68391227722168, + "rewards/margins": 1.0910890102386475, + "rewards/rejected": -5.775001525878906, + "step": 6723 + }, + { + "epoch": 1.05, + "learning_rate": 9.215680273873127e-06, + "logits/chosen": -3.0787248611450195, + "logits/rejected": -2.771838903427124, + "logps/chosen": -730.2269287109375, + "logps/rejected": -548.635498046875, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7016434073448181, + "rewards/margins": 4.9422607421875, + "rewards/rejected": -5.643904209136963, + "step": 6724 + }, + { + "epoch": 1.05, + "learning_rate": 9.214946833341979e-06, + "logits/chosen": -2.0710158348083496, + "logits/rejected": -2.846374273300171, + "logps/chosen": -374.8289794921875, + "logps/rejected": -586.520751953125, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.663487195968628, + "rewards/margins": 3.972062587738037, + "rewards/rejected": -6.635550022125244, + "step": 6725 + }, + { + "epoch": 1.05, + "learning_rate": 9.214213392810831e-06, + "logits/chosen": -1.93238365650177, + "logits/rejected": -2.2427308559417725, + "logps/chosen": -92.49078369140625, + "logps/rejected": -231.2771759033203, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2385430335998535, + "rewards/margins": 6.650448322296143, + "rewards/rejected": -9.888991355895996, + "step": 6726 + }, + { + "epoch": 1.05, + "learning_rate": 9.213479952279683e-06, + "logits/chosen": -3.0563182830810547, + "logits/rejected": -2.9286491870880127, + "logps/chosen": -108.86062622070312, + "logps/rejected": -186.28042602539062, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4352211952209473, + "rewards/margins": 5.8163018226623535, + "rewards/rejected": -7.251523017883301, + "step": 6727 + }, + { + "epoch": 1.05, + "learning_rate": 9.212746511748535e-06, + "logits/chosen": -1.6788389682769775, + "logits/rejected": -2.9918041229248047, + "logps/chosen": -78.63284301757812, + "logps/rejected": -314.5372314453125, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4627208709716797, + "rewards/margins": 3.5276601314544678, + "rewards/rejected": -5.990381240844727, + "step": 6728 + }, + { + "epoch": 1.05, + "learning_rate": 9.212013071217388e-06, + "logits/chosen": -3.0213565826416016, + "logits/rejected": -2.141350746154785, + "logps/chosen": -1184.1343994140625, + "logps/rejected": -643.381591796875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8204102516174316, + "rewards/margins": 5.608136177062988, + "rewards/rejected": -7.428545951843262, + "step": 6729 + }, + { + "epoch": 1.05, + "learning_rate": 9.21127963068624e-06, + "logits/chosen": -1.4197851419448853, + "logits/rejected": -3.074352502822876, + "logps/chosen": -89.41256713867188, + "logps/rejected": -317.7859802246094, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.819913864135742, + "rewards/margins": 3.190436363220215, + "rewards/rejected": -7.010350227355957, + "step": 6730 + }, + { + "epoch": 1.05, + "learning_rate": 9.210546190155092e-06, + "logits/chosen": -2.6796748638153076, + "logits/rejected": -2.2198245525360107, + "logps/chosen": -150.1830291748047, + "logps/rejected": -175.48233032226562, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0869228839874268, + "rewards/margins": 6.808870792388916, + "rewards/rejected": -8.895793914794922, + "step": 6731 + }, + { + "epoch": 1.05, + "learning_rate": 9.209812749623944e-06, + "logits/chosen": -2.3540284633636475, + "logits/rejected": -2.7512292861938477, + "logps/chosen": -273.184814453125, + "logps/rejected": -384.8851318359375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7071969509124756, + "rewards/margins": 5.2813720703125, + "rewards/rejected": -8.988569259643555, + "step": 6732 + }, + { + "epoch": 1.05, + "learning_rate": 9.209079309092796e-06, + "logits/chosen": -2.1544010639190674, + "logits/rejected": -2.43284273147583, + "logps/chosen": -94.55472564697266, + "logps/rejected": -219.89808654785156, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6431236267089844, + "rewards/margins": 5.823359489440918, + "rewards/rejected": -8.466483116149902, + "step": 6733 + }, + { + "epoch": 1.05, + "learning_rate": 9.208345868561648e-06, + "logits/chosen": -2.4178202152252197, + "logits/rejected": -3.0755808353424072, + "logps/chosen": -248.37342834472656, + "logps/rejected": -473.59783935546875, + "loss": 0.3429, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.533885955810547, + "rewards/margins": 2.7208895683288574, + "rewards/rejected": -6.254775524139404, + "step": 6734 + }, + { + "epoch": 1.05, + "learning_rate": 9.2076124280305e-06, + "logits/chosen": -2.750704288482666, + "logits/rejected": -3.017599582672119, + "logps/chosen": -124.02865600585938, + "logps/rejected": -144.7266845703125, + "loss": 0.2321, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1120693683624268, + "rewards/margins": 3.668978214263916, + "rewards/rejected": -6.781047821044922, + "step": 6735 + }, + { + "epoch": 1.05, + "learning_rate": 9.206878987499351e-06, + "logits/chosen": -1.1179566383361816, + "logits/rejected": -2.8966686725616455, + "logps/chosen": -202.58865356445312, + "logps/rejected": -714.7398681640625, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90138053894043, + "rewards/margins": 2.424062490463257, + "rewards/rejected": -8.325443267822266, + "step": 6736 + }, + { + "epoch": 1.05, + "learning_rate": 9.206145546968203e-06, + "logits/chosen": -2.3934743404388428, + "logits/rejected": -2.9341862201690674, + "logps/chosen": -237.83810424804688, + "logps/rejected": -326.0042724609375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8360172510147095, + "rewards/margins": 5.292534828186035, + "rewards/rejected": -6.128552436828613, + "step": 6737 + }, + { + "epoch": 1.05, + "learning_rate": 9.205412106437057e-06, + "logits/chosen": -2.4197351932525635, + "logits/rejected": -2.6295809745788574, + "logps/chosen": -194.87164306640625, + "logps/rejected": -344.82208251953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.126976728439331, + "rewards/margins": 7.569277763366699, + "rewards/rejected": -9.69625473022461, + "step": 6738 + }, + { + "epoch": 1.05, + "learning_rate": 9.204678665905909e-06, + "logits/chosen": -2.4437880516052246, + "logits/rejected": -2.8207764625549316, + "logps/chosen": -591.7989501953125, + "logps/rejected": -742.5068359375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5649155378341675, + "rewards/margins": 8.093520164489746, + "rewards/rejected": -8.658435821533203, + "step": 6739 + }, + { + "epoch": 1.05, + "learning_rate": 9.20394522537476e-06, + "logits/chosen": -2.99137806892395, + "logits/rejected": -3.0643320083618164, + "logps/chosen": -166.62472534179688, + "logps/rejected": -271.6679992675781, + "loss": 0.8597, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.911123275756836, + "rewards/margins": 3.279386281967163, + "rewards/rejected": -7.19050931930542, + "step": 6740 + }, + { + "epoch": 1.05, + "learning_rate": 9.203211784843612e-06, + "logits/chosen": -3.2057669162750244, + "logits/rejected": -3.154438018798828, + "logps/chosen": -322.3726501464844, + "logps/rejected": -392.66339111328125, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.336381673812866, + "rewards/margins": 5.295696258544922, + "rewards/rejected": -8.632078170776367, + "step": 6741 + }, + { + "epoch": 1.05, + "learning_rate": 9.202478344312464e-06, + "logits/chosen": -2.4410808086395264, + "logits/rejected": -3.2509243488311768, + "logps/chosen": -506.29473876953125, + "logps/rejected": -744.1171875, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7022829055786133, + "rewards/margins": 4.5212883949279785, + "rewards/rejected": -7.223570823669434, + "step": 6742 + }, + { + "epoch": 1.05, + "learning_rate": 9.201744903781316e-06, + "logits/chosen": -2.5581462383270264, + "logits/rejected": -2.9372239112854004, + "logps/chosen": -61.12030029296875, + "logps/rejected": -207.21095275878906, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2290942668914795, + "rewards/margins": 6.287783622741699, + "rewards/rejected": -8.516878128051758, + "step": 6743 + }, + { + "epoch": 1.05, + "learning_rate": 9.201011463250168e-06, + "logits/chosen": -3.187647581100464, + "logits/rejected": -2.9614317417144775, + "logps/chosen": -378.6884765625, + "logps/rejected": -260.1839599609375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.381880760192871, + "rewards/margins": 5.425327301025391, + "rewards/rejected": -6.807208061218262, + "step": 6744 + }, + { + "epoch": 1.05, + "learning_rate": 9.20027802271902e-06, + "logits/chosen": -3.2330939769744873, + "logits/rejected": -2.353362798690796, + "logps/chosen": -441.5964050292969, + "logps/rejected": -263.9300537109375, + "loss": 0.913, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4285786151885986, + "rewards/margins": 2.1196749210357666, + "rewards/rejected": -4.548253536224365, + "step": 6745 + }, + { + "epoch": 1.05, + "learning_rate": 9.199544582187872e-06, + "logits/chosen": -2.1837987899780273, + "logits/rejected": -3.0228800773620605, + "logps/chosen": -361.12884521484375, + "logps/rejected": -534.7160034179688, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8325676918029785, + "rewards/margins": 6.312060356140137, + "rewards/rejected": -8.144627571105957, + "step": 6746 + }, + { + "epoch": 1.05, + "learning_rate": 9.198811141656725e-06, + "logits/chosen": -2.2979369163513184, + "logits/rejected": -2.883885383605957, + "logps/chosen": -112.02964782714844, + "logps/rejected": -183.98098754882812, + "loss": 1.3002, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.596712112426758, + "rewards/margins": 2.339529275894165, + "rewards/rejected": -6.936241149902344, + "step": 6747 + }, + { + "epoch": 1.05, + "learning_rate": 9.198077701125577e-06, + "logits/chosen": -2.6143715381622314, + "logits/rejected": -3.1706743240356445, + "logps/chosen": -140.38140869140625, + "logps/rejected": -371.66278076171875, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9947943687438965, + "rewards/margins": 5.1710662841796875, + "rewards/rejected": -8.165861129760742, + "step": 6748 + }, + { + "epoch": 1.05, + "learning_rate": 9.197344260594429e-06, + "logits/chosen": -2.1376161575317383, + "logits/rejected": -2.122706174850464, + "logps/chosen": -166.4172821044922, + "logps/rejected": -346.97021484375, + "loss": 0.7201, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.963199615478516, + "rewards/margins": 3.2452290058135986, + "rewards/rejected": -8.208429336547852, + "step": 6749 + }, + { + "epoch": 1.05, + "learning_rate": 9.196610820063281e-06, + "logits/chosen": -2.975200653076172, + "logits/rejected": -2.8642737865448, + "logps/chosen": -196.89654541015625, + "logps/rejected": -220.89849853515625, + "loss": 0.7484, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.382489204406738, + "rewards/margins": 3.4907402992248535, + "rewards/rejected": -8.87322998046875, + "step": 6750 + }, + { + "epoch": 1.05, + "learning_rate": 9.195877379532133e-06, + "logits/chosen": -2.458467960357666, + "logits/rejected": -2.9945361614227295, + "logps/chosen": -159.6855926513672, + "logps/rejected": -403.29010009765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.201361894607544, + "rewards/margins": 6.4201531410217285, + "rewards/rejected": -9.621515274047852, + "step": 6751 + }, + { + "epoch": 1.05, + "learning_rate": 9.195143939000985e-06, + "logits/chosen": -2.675060749053955, + "logits/rejected": -2.277017593383789, + "logps/chosen": -72.87554931640625, + "logps/rejected": -202.89340209960938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2340633869171143, + "rewards/margins": 7.291860580444336, + "rewards/rejected": -9.525923728942871, + "step": 6752 + }, + { + "epoch": 1.05, + "learning_rate": 9.194410498469837e-06, + "logits/chosen": -2.0522751808166504, + "logits/rejected": -2.850287914276123, + "logps/chosen": -46.534934997558594, + "logps/rejected": -212.39190673828125, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.944791078567505, + "rewards/margins": 6.369740962982178, + "rewards/rejected": -9.314532279968262, + "step": 6753 + }, + { + "epoch": 1.05, + "learning_rate": 9.193677057938689e-06, + "logits/chosen": -3.0265908241271973, + "logits/rejected": -2.7756686210632324, + "logps/chosen": -54.847572326660156, + "logps/rejected": -255.3782958984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1764118671417236, + "rewards/margins": 8.476982116699219, + "rewards/rejected": -10.653393745422363, + "step": 6754 + }, + { + "epoch": 1.05, + "learning_rate": 9.192943617407542e-06, + "logits/chosen": -1.526090145111084, + "logits/rejected": -2.7891194820404053, + "logps/chosen": -232.64898681640625, + "logps/rejected": -487.859375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7787787914276123, + "rewards/margins": 7.424497604370117, + "rewards/rejected": -10.203276634216309, + "step": 6755 + }, + { + "epoch": 1.05, + "learning_rate": 9.192210176876394e-06, + "logits/chosen": -3.1274828910827637, + "logits/rejected": -3.086904525756836, + "logps/chosen": -95.63627624511719, + "logps/rejected": -112.78904724121094, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.159214973449707, + "rewards/margins": 4.9887375831604, + "rewards/rejected": -9.14795207977295, + "step": 6756 + }, + { + "epoch": 1.05, + "learning_rate": 9.191476736345248e-06, + "logits/chosen": -2.763200283050537, + "logits/rejected": -3.1269705295562744, + "logps/chosen": -110.16619873046875, + "logps/rejected": -327.8561706542969, + "loss": 0.1137, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7229087352752686, + "rewards/margins": 2.225022315979004, + "rewards/rejected": -5.947931289672852, + "step": 6757 + }, + { + "epoch": 1.05, + "learning_rate": 9.1907432958141e-06, + "logits/chosen": -2.1772758960723877, + "logits/rejected": -2.8487393856048584, + "logps/chosen": -206.6593017578125, + "logps/rejected": -324.1839599609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7526352405548096, + "rewards/margins": 7.465811252593994, + "rewards/rejected": -10.218446731567383, + "step": 6758 + }, + { + "epoch": 1.05, + "learning_rate": 9.190009855282951e-06, + "logits/chosen": -2.4534966945648193, + "logits/rejected": -2.980891227722168, + "logps/chosen": -312.55322265625, + "logps/rejected": -421.11041259765625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.819953441619873, + "rewards/margins": 6.92504358291626, + "rewards/rejected": -9.744997024536133, + "step": 6759 + }, + { + "epoch": 1.05, + "learning_rate": 9.189276414751803e-06, + "logits/chosen": -2.0982792377471924, + "logits/rejected": -2.3930346965789795, + "logps/chosen": -118.96687316894531, + "logps/rejected": -422.8992004394531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.43798828125, + "rewards/margins": 7.5147600173950195, + "rewards/rejected": -10.95274829864502, + "step": 6760 + }, + { + "epoch": 1.05, + "learning_rate": 9.188542974220655e-06, + "logits/chosen": -2.9959824085235596, + "logits/rejected": -2.948978900909424, + "logps/chosen": -711.9072875976562, + "logps/rejected": -883.9964599609375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1394424438476562, + "rewards/margins": 7.271768093109131, + "rewards/rejected": -10.411211013793945, + "step": 6761 + }, + { + "epoch": 1.05, + "learning_rate": 9.187809533689507e-06, + "logits/chosen": -2.1422505378723145, + "logits/rejected": -2.623483657836914, + "logps/chosen": -366.895263671875, + "logps/rejected": -385.835205078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1111481189727783, + "rewards/margins": 10.636468887329102, + "rewards/rejected": -13.7476167678833, + "step": 6762 + }, + { + "epoch": 1.05, + "learning_rate": 9.187076093158359e-06, + "logits/chosen": -2.219853639602661, + "logits/rejected": -2.7685036659240723, + "logps/chosen": -241.81201171875, + "logps/rejected": -360.1797790527344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.317072868347168, + "rewards/margins": 9.697188377380371, + "rewards/rejected": -14.014261245727539, + "step": 6763 + }, + { + "epoch": 1.05, + "learning_rate": 9.18634265262721e-06, + "logits/chosen": -2.8550543785095215, + "logits/rejected": -2.4285731315612793, + "logps/chosen": -176.16441345214844, + "logps/rejected": -506.3008728027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.375685453414917, + "rewards/margins": 11.468267440795898, + "rewards/rejected": -13.843954086303711, + "step": 6764 + }, + { + "epoch": 1.05, + "learning_rate": 9.185609212096064e-06, + "logits/chosen": -2.0523557662963867, + "logits/rejected": -2.7091915607452393, + "logps/chosen": -112.27899169921875, + "logps/rejected": -264.95379638671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7620044946670532, + "rewards/margins": 6.856638431549072, + "rewards/rejected": -8.618642807006836, + "step": 6765 + }, + { + "epoch": 1.05, + "learning_rate": 9.184875771564916e-06, + "logits/chosen": -3.0779521465301514, + "logits/rejected": -2.5169808864593506, + "logps/chosen": -162.6109619140625, + "logps/rejected": -176.23326110839844, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.923306703567505, + "rewards/margins": 3.3664543628692627, + "rewards/rejected": -6.289761066436768, + "step": 6766 + }, + { + "epoch": 1.05, + "learning_rate": 9.184142331033768e-06, + "logits/chosen": -2.257237195968628, + "logits/rejected": -2.998396873474121, + "logps/chosen": -94.40707397460938, + "logps/rejected": -224.06399536132812, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.289402484893799, + "rewards/margins": 4.204056262969971, + "rewards/rejected": -6.4934587478637695, + "step": 6767 + }, + { + "epoch": 1.05, + "learning_rate": 9.18340889050262e-06, + "logits/chosen": -2.9255728721618652, + "logits/rejected": -2.7746968269348145, + "logps/chosen": -373.6536560058594, + "logps/rejected": -503.0736999511719, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8965927362442017, + "rewards/margins": 8.627212524414062, + "rewards/rejected": -10.523805618286133, + "step": 6768 + }, + { + "epoch": 1.05, + "learning_rate": 9.182675449971472e-06, + "logits/chosen": -2.9867801666259766, + "logits/rejected": -2.763031244277954, + "logps/chosen": -364.7969970703125, + "logps/rejected": -416.5952453613281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0830771923065186, + "rewards/margins": 8.18099594116211, + "rewards/rejected": -9.264073371887207, + "step": 6769 + }, + { + "epoch": 1.05, + "learning_rate": 9.181942009440324e-06, + "logits/chosen": -2.970144748687744, + "logits/rejected": -1.7369307279586792, + "logps/chosen": -483.7337646484375, + "logps/rejected": -299.04669189453125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9762260913848877, + "rewards/margins": 8.797660827636719, + "rewards/rejected": -9.773887634277344, + "step": 6770 + }, + { + "epoch": 1.05, + "learning_rate": 9.181208568909176e-06, + "logits/chosen": -3.241090774536133, + "logits/rejected": -3.167163133621216, + "logps/chosen": -150.19479370117188, + "logps/rejected": -224.87319946289062, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.118051052093506, + "rewards/margins": 4.9177045822143555, + "rewards/rejected": -8.03575611114502, + "step": 6771 + }, + { + "epoch": 1.05, + "learning_rate": 9.180475128378027e-06, + "logits/chosen": -2.1223535537719727, + "logits/rejected": -3.0908892154693604, + "logps/chosen": -157.50572204589844, + "logps/rejected": -416.096435546875, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.901236057281494, + "rewards/margins": 6.2026896476745605, + "rewards/rejected": -10.103925704956055, + "step": 6772 + }, + { + "epoch": 1.05, + "learning_rate": 9.17974168784688e-06, + "logits/chosen": -3.0362319946289062, + "logits/rejected": -1.5361409187316895, + "logps/chosen": -381.0615539550781, + "logps/rejected": -129.58094787597656, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3321685791015625, + "rewards/margins": 4.430188179016113, + "rewards/rejected": -4.762356758117676, + "step": 6773 + }, + { + "epoch": 1.05, + "learning_rate": 9.179008247315733e-06, + "logits/chosen": -2.4288170337677, + "logits/rejected": -2.8982186317443848, + "logps/chosen": -87.92695617675781, + "logps/rejected": -318.7952575683594, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.348501205444336, + "rewards/margins": 6.747032165527344, + "rewards/rejected": -10.09553337097168, + "step": 6774 + }, + { + "epoch": 1.05, + "learning_rate": 9.178274806784585e-06, + "logits/chosen": -2.6103124618530273, + "logits/rejected": -2.803102970123291, + "logps/chosen": -247.97427368164062, + "logps/rejected": -233.4471435546875, + "loss": 0.6457, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.408054351806641, + "rewards/margins": 2.8821558952331543, + "rewards/rejected": -7.290210247039795, + "step": 6775 + }, + { + "epoch": 1.05, + "learning_rate": 9.177541366253437e-06, + "logits/chosen": -2.8317325115203857, + "logits/rejected": -0.9392560124397278, + "logps/chosen": -341.80902099609375, + "logps/rejected": -166.60848999023438, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3037922382354736, + "rewards/margins": 7.1242265701293945, + "rewards/rejected": -8.428018569946289, + "step": 6776 + }, + { + "epoch": 1.05, + "learning_rate": 9.176807925722289e-06, + "logits/chosen": -0.8630320429801941, + "logits/rejected": -2.1364049911499023, + "logps/chosen": -226.94093322753906, + "logps/rejected": -629.413818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.282329559326172, + "rewards/margins": 9.667346954345703, + "rewards/rejected": -12.949676513671875, + "step": 6777 + }, + { + "epoch": 1.05, + "learning_rate": 9.17607448519114e-06, + "logits/chosen": -3.0208466053009033, + "logits/rejected": -1.9457566738128662, + "logps/chosen": -166.92665100097656, + "logps/rejected": -273.47991943359375, + "loss": 0.2073, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.06713342666626, + "rewards/margins": 2.88154935836792, + "rewards/rejected": -6.94868278503418, + "step": 6778 + }, + { + "epoch": 1.05, + "learning_rate": 9.175341044659992e-06, + "logits/chosen": -0.76230788230896, + "logits/rejected": -2.8100430965423584, + "logps/chosen": -145.61947631835938, + "logps/rejected": -540.45751953125, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9051191806793213, + "rewards/margins": 4.157341480255127, + "rewards/rejected": -7.062460422515869, + "step": 6779 + }, + { + "epoch": 1.05, + "learning_rate": 9.174607604128844e-06, + "logits/chosen": -3.2887749671936035, + "logits/rejected": -3.179111957550049, + "logps/chosen": -205.1509552001953, + "logps/rejected": -223.38963317871094, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.348439931869507, + "rewards/margins": 5.068134784698486, + "rewards/rejected": -7.416574954986572, + "step": 6780 + }, + { + "epoch": 1.05, + "learning_rate": 9.173874163597696e-06, + "logits/chosen": -1.4656985998153687, + "logits/rejected": -2.163832902908325, + "logps/chosen": -214.80453491210938, + "logps/rejected": -311.9585876464844, + "loss": 1.0266, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.265443325042725, + "rewards/margins": 2.624095916748047, + "rewards/rejected": -7.88953971862793, + "step": 6781 + }, + { + "epoch": 1.05, + "learning_rate": 9.173140723066548e-06, + "logits/chosen": -3.2425734996795654, + "logits/rejected": -3.233633279800415, + "logps/chosen": -268.1334228515625, + "logps/rejected": -258.7322998046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1254258155822754, + "rewards/margins": 7.221370697021484, + "rewards/rejected": -9.346796989440918, + "step": 6782 + }, + { + "epoch": 1.05, + "learning_rate": 9.172407282535401e-06, + "logits/chosen": -2.525733232498169, + "logits/rejected": -3.001373529434204, + "logps/chosen": -166.8929443359375, + "logps/rejected": -198.07998657226562, + "loss": 0.0432, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0029220581054688, + "rewards/margins": 4.908829689025879, + "rewards/rejected": -7.911751747131348, + "step": 6783 + }, + { + "epoch": 1.06, + "learning_rate": 9.171673842004253e-06, + "logits/chosen": -3.0261738300323486, + "logits/rejected": -1.810131549835205, + "logps/chosen": -384.0201416015625, + "logps/rejected": -265.5106506347656, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7094589471817017, + "rewards/margins": 6.537749767303467, + "rewards/rejected": -8.247208595275879, + "step": 6784 + }, + { + "epoch": 1.06, + "learning_rate": 9.170940401473105e-06, + "logits/chosen": -2.478858470916748, + "logits/rejected": -2.984290361404419, + "logps/chosen": -496.18194580078125, + "logps/rejected": -612.0090942382812, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.199429512023926, + "rewards/margins": 8.041919708251953, + "rewards/rejected": -10.241348266601562, + "step": 6785 + }, + { + "epoch": 1.06, + "learning_rate": 9.170206960941957e-06, + "logits/chosen": -2.9987680912017822, + "logits/rejected": -2.2350170612335205, + "logps/chosen": -234.364990234375, + "logps/rejected": -173.6617431640625, + "loss": 0.6453, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1429038047790527, + "rewards/margins": 3.0037126541137695, + "rewards/rejected": -6.146616458892822, + "step": 6786 + }, + { + "epoch": 1.06, + "learning_rate": 9.169473520410809e-06, + "logits/chosen": -1.9369627237319946, + "logits/rejected": -2.7921302318573, + "logps/chosen": -155.2340545654297, + "logps/rejected": -422.39544677734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.561662197113037, + "rewards/margins": 7.347871780395508, + "rewards/rejected": -10.909534454345703, + "step": 6787 + }, + { + "epoch": 1.06, + "learning_rate": 9.168740079879661e-06, + "logits/chosen": -2.9257330894470215, + "logits/rejected": -3.132530450820923, + "logps/chosen": -56.775787353515625, + "logps/rejected": -416.6245422363281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3457040786743164, + "rewards/margins": 7.395732879638672, + "rewards/rejected": -9.741436958312988, + "step": 6788 + }, + { + "epoch": 1.06, + "learning_rate": 9.168006639348514e-06, + "logits/chosen": -1.2248765230178833, + "logits/rejected": -1.8185839653015137, + "logps/chosen": -116.25639343261719, + "logps/rejected": -360.4111328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.085268497467041, + "rewards/margins": 7.540258407592773, + "rewards/rejected": -8.625526428222656, + "step": 6789 + }, + { + "epoch": 1.06, + "learning_rate": 9.167273198817366e-06, + "logits/chosen": -1.7202653884887695, + "logits/rejected": -2.8896026611328125, + "logps/chosen": -91.23381042480469, + "logps/rejected": -418.16741943359375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.603508949279785, + "rewards/margins": 5.953258514404297, + "rewards/rejected": -8.556767463684082, + "step": 6790 + }, + { + "epoch": 1.06, + "learning_rate": 9.166539758286218e-06, + "logits/chosen": -3.0767228603363037, + "logits/rejected": -3.19140362739563, + "logps/chosen": -334.4802551269531, + "logps/rejected": -216.04754638671875, + "loss": 2.9981, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.305964469909668, + "rewards/margins": 0.060923099517822266, + "rewards/rejected": -5.366888046264648, + "step": 6791 + }, + { + "epoch": 1.06, + "learning_rate": 9.165806317755072e-06, + "logits/chosen": -2.822566270828247, + "logits/rejected": -3.0169737339019775, + "logps/chosen": -351.0382995605469, + "logps/rejected": -480.680908203125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.998461127281189, + "rewards/margins": 7.304013252258301, + "rewards/rejected": -9.302474975585938, + "step": 6792 + }, + { + "epoch": 1.06, + "learning_rate": 9.165072877223924e-06, + "logits/chosen": -3.0062272548675537, + "logits/rejected": -2.544135570526123, + "logps/chosen": -539.0159912109375, + "logps/rejected": -330.8834533691406, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1149563789367676, + "rewards/margins": 4.784170150756836, + "rewards/rejected": -7.899127006530762, + "step": 6793 + }, + { + "epoch": 1.06, + "learning_rate": 9.164339436692776e-06, + "logits/chosen": -2.9103732109069824, + "logits/rejected": -2.221402883529663, + "logps/chosen": -159.30209350585938, + "logps/rejected": -126.088623046875, + "loss": 1.0762, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.997248649597168, + "rewards/margins": -0.637669563293457, + "rewards/rejected": -5.359579086303711, + "step": 6794 + }, + { + "epoch": 1.06, + "learning_rate": 9.163605996161627e-06, + "logits/chosen": -1.5068233013153076, + "logits/rejected": -3.0966122150421143, + "logps/chosen": -147.05593872070312, + "logps/rejected": -320.0391845703125, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4156851768493652, + "rewards/margins": 5.068595886230469, + "rewards/rejected": -7.484281539916992, + "step": 6795 + }, + { + "epoch": 1.06, + "learning_rate": 9.16287255563048e-06, + "logits/chosen": -1.4365992546081543, + "logits/rejected": -2.873100996017456, + "logps/chosen": -148.37925720214844, + "logps/rejected": -354.30609130859375, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6965770721435547, + "rewards/margins": 4.497470855712891, + "rewards/rejected": -7.194047927856445, + "step": 6796 + }, + { + "epoch": 1.06, + "learning_rate": 9.162139115099331e-06, + "logits/chosen": -3.0783777236938477, + "logits/rejected": -2.2312166690826416, + "logps/chosen": -279.1379699707031, + "logps/rejected": -116.25164794921875, + "loss": 0.4325, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.240944862365723, + "rewards/margins": 1.3618794679641724, + "rewards/rejected": -5.6028242111206055, + "step": 6797 + }, + { + "epoch": 1.06, + "learning_rate": 9.161405674568183e-06, + "logits/chosen": -2.7720398902893066, + "logits/rejected": -3.1859257221221924, + "logps/chosen": -45.028228759765625, + "logps/rejected": -146.09780883789062, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5851497650146484, + "rewards/margins": 5.3493547439575195, + "rewards/rejected": -7.934504508972168, + "step": 6798 + }, + { + "epoch": 1.06, + "learning_rate": 9.160672234037035e-06, + "logits/chosen": -2.9175217151641846, + "logits/rejected": -2.0371930599212646, + "logps/chosen": -581.0311279296875, + "logps/rejected": -348.66058349609375, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8452224731445312, + "rewards/margins": 3.8121750354766846, + "rewards/rejected": -7.657397270202637, + "step": 6799 + }, + { + "epoch": 1.06, + "learning_rate": 9.159938793505888e-06, + "logits/chosen": -1.6421685218811035, + "logits/rejected": -3.0204882621765137, + "logps/chosen": -153.75033569335938, + "logps/rejected": -348.1168212890625, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.187961101531982, + "rewards/margins": 4.471518516540527, + "rewards/rejected": -8.659480094909668, + "step": 6800 + }, + { + "epoch": 1.06, + "learning_rate": 9.15920535297474e-06, + "logits/chosen": -3.032329797744751, + "logits/rejected": -1.6654725074768066, + "logps/chosen": -538.4638671875, + "logps/rejected": -264.6224060058594, + "loss": 0.2358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18724679946899414, + "rewards/margins": 5.374567031860352, + "rewards/rejected": -5.561813831329346, + "step": 6801 + }, + { + "epoch": 1.06, + "learning_rate": 9.158471912443592e-06, + "logits/chosen": -3.0695571899414062, + "logits/rejected": -0.8700563311576843, + "logps/chosen": -200.07278442382812, + "logps/rejected": -119.14965057373047, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5560967922210693, + "rewards/margins": 6.188199043273926, + "rewards/rejected": -8.744296073913574, + "step": 6802 + }, + { + "epoch": 1.06, + "learning_rate": 9.157738471912444e-06, + "logits/chosen": -2.8678934574127197, + "logits/rejected": -1.3914666175842285, + "logps/chosen": -462.9654846191406, + "logps/rejected": -344.1185607910156, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3612208366394043, + "rewards/margins": 6.658155918121338, + "rewards/rejected": -10.019376754760742, + "step": 6803 + }, + { + "epoch": 1.06, + "learning_rate": 9.157005031381296e-06, + "logits/chosen": -3.12732195854187, + "logits/rejected": -2.4402859210968018, + "logps/chosen": -162.67431640625, + "logps/rejected": -255.72918701171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.110497236251831, + "rewards/margins": 8.709136962890625, + "rewards/rejected": -9.819633483886719, + "step": 6804 + }, + { + "epoch": 1.06, + "learning_rate": 9.156271590850148e-06, + "logits/chosen": -2.9151437282562256, + "logits/rejected": -2.3550643920898438, + "logps/chosen": -677.789794921875, + "logps/rejected": -440.94879150390625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9357941150665283, + "rewards/margins": 5.875476360321045, + "rewards/rejected": -8.811269760131836, + "step": 6805 + }, + { + "epoch": 1.06, + "learning_rate": 9.155538150319e-06, + "logits/chosen": -0.7567313313484192, + "logits/rejected": -2.945267915725708, + "logps/chosen": -117.61076354980469, + "logps/rejected": -560.8701171875, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.008466720581055, + "rewards/margins": 4.799921035766602, + "rewards/rejected": -8.808387756347656, + "step": 6806 + }, + { + "epoch": 1.06, + "learning_rate": 9.154804709787852e-06, + "logits/chosen": -1.0493650436401367, + "logits/rejected": -2.891005277633667, + "logps/chosen": -64.56581115722656, + "logps/rejected": -250.7037353515625, + "loss": 0.0486, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8095850944519043, + "rewards/margins": 3.026761054992676, + "rewards/rejected": -6.83634614944458, + "step": 6807 + }, + { + "epoch": 1.06, + "learning_rate": 9.154071269256704e-06, + "logits/chosen": -1.4323036670684814, + "logits/rejected": -2.8510665893554688, + "logps/chosen": -275.2581481933594, + "logps/rejected": -591.0656127929688, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.354459285736084, + "rewards/margins": 5.567464828491211, + "rewards/rejected": -8.921924591064453, + "step": 6808 + }, + { + "epoch": 1.06, + "learning_rate": 9.153337828725557e-06, + "logits/chosen": -2.9106898307800293, + "logits/rejected": -1.8016937971115112, + "logps/chosen": -460.21026611328125, + "logps/rejected": -326.0626220703125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1353775262832642, + "rewards/margins": 8.794074058532715, + "rewards/rejected": -9.929451942443848, + "step": 6809 + }, + { + "epoch": 1.06, + "learning_rate": 9.152604388194409e-06, + "logits/chosen": -2.2390291690826416, + "logits/rejected": -3.105828046798706, + "logps/chosen": -49.19826889038086, + "logps/rejected": -292.53619384765625, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.22672438621521, + "rewards/margins": 6.118906021118164, + "rewards/rejected": -9.345630645751953, + "step": 6810 + }, + { + "epoch": 1.06, + "learning_rate": 9.15187094766326e-06, + "logits/chosen": -2.3644039630889893, + "logits/rejected": -3.051938772201538, + "logps/chosen": -120.75316619873047, + "logps/rejected": -324.1671142578125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9332823753356934, + "rewards/margins": 6.099895000457764, + "rewards/rejected": -9.033177375793457, + "step": 6811 + }, + { + "epoch": 1.06, + "learning_rate": 9.151137507132113e-06, + "logits/chosen": -2.2926063537597656, + "logits/rejected": -2.565037965774536, + "logps/chosen": -98.0243148803711, + "logps/rejected": -294.692138671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.414736270904541, + "rewards/margins": 8.193146705627441, + "rewards/rejected": -11.60788345336914, + "step": 6812 + }, + { + "epoch": 1.06, + "learning_rate": 9.150404066600965e-06, + "logits/chosen": -2.190826892852783, + "logits/rejected": -2.60532546043396, + "logps/chosen": -71.93844604492188, + "logps/rejected": -169.51954650878906, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7326900959014893, + "rewards/margins": 3.4867889881134033, + "rewards/rejected": -7.219479084014893, + "step": 6813 + }, + { + "epoch": 1.06, + "learning_rate": 9.149670626069816e-06, + "logits/chosen": -3.0640158653259277, + "logits/rejected": -1.6931328773498535, + "logps/chosen": -427.18756103515625, + "logps/rejected": -274.19244384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2105988264083862, + "rewards/margins": 9.388463973999023, + "rewards/rejected": -10.599061965942383, + "step": 6814 + }, + { + "epoch": 1.06, + "learning_rate": 9.148937185538668e-06, + "logits/chosen": -2.759718894958496, + "logits/rejected": -1.6269859075546265, + "logps/chosen": -187.27340698242188, + "logps/rejected": -216.752197265625, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.778224945068359, + "rewards/margins": 4.773382186889648, + "rewards/rejected": -9.551607131958008, + "step": 6815 + }, + { + "epoch": 1.06, + "learning_rate": 9.14820374500752e-06, + "logits/chosen": -1.736215353012085, + "logits/rejected": -2.904496192932129, + "logps/chosen": -129.21673583984375, + "logps/rejected": -194.66714477539062, + "loss": 0.0574, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0601043701171875, + "rewards/margins": 2.855210781097412, + "rewards/rejected": -8.915315628051758, + "step": 6816 + }, + { + "epoch": 1.06, + "learning_rate": 9.147470304476372e-06, + "logits/chosen": -2.416760206222534, + "logits/rejected": -2.04807186126709, + "logps/chosen": -408.5675964355469, + "logps/rejected": -431.181640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0891876220703125, + "rewards/margins": 8.263209342956543, + "rewards/rejected": -11.352396965026855, + "step": 6817 + }, + { + "epoch": 1.06, + "learning_rate": 9.146736863945226e-06, + "logits/chosen": -2.7583181858062744, + "logits/rejected": -2.6316988468170166, + "logps/chosen": -175.62222290039062, + "logps/rejected": -206.82388305664062, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3539838790893555, + "rewards/margins": 4.989495277404785, + "rewards/rejected": -8.34347915649414, + "step": 6818 + }, + { + "epoch": 1.06, + "learning_rate": 9.146003423414078e-06, + "logits/chosen": -3.100656032562256, + "logits/rejected": -2.839869260787964, + "logps/chosen": -176.08047485351562, + "logps/rejected": -282.5252990722656, + "loss": 0.2351, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0179145336151123, + "rewards/margins": 4.747584342956543, + "rewards/rejected": -7.765498638153076, + "step": 6819 + }, + { + "epoch": 1.06, + "learning_rate": 9.14526998288293e-06, + "logits/chosen": -2.3355660438537598, + "logits/rejected": -3.0108256340026855, + "logps/chosen": -302.3869934082031, + "logps/rejected": -444.0763854980469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0119765996932983, + "rewards/margins": 7.570618629455566, + "rewards/rejected": -8.582594871520996, + "step": 6820 + }, + { + "epoch": 1.06, + "learning_rate": 9.144536542351781e-06, + "logits/chosen": -2.515455961227417, + "logits/rejected": -2.8051435947418213, + "logps/chosen": -165.48663330078125, + "logps/rejected": -294.44366455078125, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5278072357177734, + "rewards/margins": 6.915964603424072, + "rewards/rejected": -9.443771362304688, + "step": 6821 + }, + { + "epoch": 1.06, + "learning_rate": 9.143803101820633e-06, + "logits/chosen": -2.155893564224243, + "logits/rejected": -2.997333288192749, + "logps/chosen": -128.01844787597656, + "logps/rejected": -238.85305786132812, + "loss": 0.5631, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.61159086227417, + "rewards/margins": 3.9141900539398193, + "rewards/rejected": -8.52578067779541, + "step": 6822 + }, + { + "epoch": 1.06, + "learning_rate": 9.143069661289485e-06, + "logits/chosen": -2.487791061401367, + "logits/rejected": -2.5512640476226807, + "logps/chosen": -128.46575927734375, + "logps/rejected": -322.88470458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.73160457611084, + "rewards/margins": 9.080718040466309, + "rewards/rejected": -11.812322616577148, + "step": 6823 + }, + { + "epoch": 1.06, + "learning_rate": 9.142336220758339e-06, + "logits/chosen": -1.7504074573516846, + "logits/rejected": -2.668874502182007, + "logps/chosen": -179.74337768554688, + "logps/rejected": -626.1238403320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8499376773834229, + "rewards/margins": 9.662626266479492, + "rewards/rejected": -10.512563705444336, + "step": 6824 + }, + { + "epoch": 1.06, + "learning_rate": 9.14160278022719e-06, + "logits/chosen": -3.124396800994873, + "logits/rejected": -2.9116992950439453, + "logps/chosen": -183.06906127929688, + "logps/rejected": -190.23504638671875, + "loss": 1.8621, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.783867359161377, + "rewards/margins": 1.5340712070465088, + "rewards/rejected": -7.317938804626465, + "step": 6825 + }, + { + "epoch": 1.06, + "learning_rate": 9.140869339696042e-06, + "logits/chosen": -2.9703900814056396, + "logits/rejected": -3.091254949569702, + "logps/chosen": -160.50738525390625, + "logps/rejected": -180.67764282226562, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.013545274734497, + "rewards/margins": 6.596234321594238, + "rewards/rejected": -8.609779357910156, + "step": 6826 + }, + { + "epoch": 1.06, + "learning_rate": 9.140135899164896e-06, + "logits/chosen": -2.784404754638672, + "logits/rejected": -3.108036518096924, + "logps/chosen": -69.97789001464844, + "logps/rejected": -248.10704040527344, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4749486446380615, + "rewards/margins": 6.005791187286377, + "rewards/rejected": -9.48073959350586, + "step": 6827 + }, + { + "epoch": 1.06, + "learning_rate": 9.139402458633748e-06, + "logits/chosen": -1.4324500560760498, + "logits/rejected": -2.935131072998047, + "logps/chosen": -85.74822998046875, + "logps/rejected": -340.0775146484375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.015422821044922, + "rewards/margins": 6.221761703491211, + "rewards/rejected": -8.237184524536133, + "step": 6828 + }, + { + "epoch": 1.06, + "learning_rate": 9.1386690181026e-06, + "logits/chosen": -3.064277172088623, + "logits/rejected": -2.7165169715881348, + "logps/chosen": -547.0738525390625, + "logps/rejected": -606.4649047851562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2987430095672607, + "rewards/margins": 7.427238464355469, + "rewards/rejected": -8.725980758666992, + "step": 6829 + }, + { + "epoch": 1.06, + "learning_rate": 9.137935577571452e-06, + "logits/chosen": -2.3584134578704834, + "logits/rejected": -3.0021555423736572, + "logps/chosen": -100.38063049316406, + "logps/rejected": -354.6230163574219, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.423792839050293, + "rewards/margins": 7.384748935699463, + "rewards/rejected": -9.808542251586914, + "step": 6830 + }, + { + "epoch": 1.06, + "learning_rate": 9.137202137040303e-06, + "logits/chosen": -2.7667336463928223, + "logits/rejected": -2.1046552658081055, + "logps/chosen": -310.62237548828125, + "logps/rejected": -281.83056640625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.639955997467041, + "rewards/margins": 6.771851539611816, + "rewards/rejected": -10.411808013916016, + "step": 6831 + }, + { + "epoch": 1.06, + "learning_rate": 9.136468696509155e-06, + "logits/chosen": -2.8740463256835938, + "logits/rejected": -2.166701316833496, + "logps/chosen": -219.88064575195312, + "logps/rejected": -236.3221435546875, + "loss": 4.029, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.976627349853516, + "rewards/margins": -1.4738788604736328, + "rewards/rejected": -5.502748489379883, + "step": 6832 + }, + { + "epoch": 1.06, + "learning_rate": 9.135735255978007e-06, + "logits/chosen": -2.448718309402466, + "logits/rejected": -2.905698776245117, + "logps/chosen": -156.08200073242188, + "logps/rejected": -445.8045654296875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8539609909057617, + "rewards/margins": 10.122491836547852, + "rewards/rejected": -12.976452827453613, + "step": 6833 + }, + { + "epoch": 1.06, + "learning_rate": 9.135001815446859e-06, + "logits/chosen": -3.141721248626709, + "logits/rejected": -1.8375709056854248, + "logps/chosen": -461.50262451171875, + "logps/rejected": -235.75088500976562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6562118530273438, + "rewards/margins": 9.520393371582031, + "rewards/rejected": -7.8641815185546875, + "step": 6834 + }, + { + "epoch": 1.06, + "learning_rate": 9.134268374915711e-06, + "logits/chosen": -2.894551992416382, + "logits/rejected": -2.5615036487579346, + "logps/chosen": -278.3948669433594, + "logps/rejected": -584.02294921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.408783435821533, + "rewards/margins": 9.15119743347168, + "rewards/rejected": -12.559980392456055, + "step": 6835 + }, + { + "epoch": 1.06, + "learning_rate": 9.133534934384565e-06, + "logits/chosen": -3.0080175399780273, + "logits/rejected": -3.1050288677215576, + "logps/chosen": -352.43231201171875, + "logps/rejected": -401.2526550292969, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2803215980529785, + "rewards/margins": 5.657088756561279, + "rewards/rejected": -6.937410354614258, + "step": 6836 + }, + { + "epoch": 1.06, + "learning_rate": 9.132801493853416e-06, + "logits/chosen": -1.8418673276901245, + "logits/rejected": -3.0102458000183105, + "logps/chosen": -84.16239929199219, + "logps/rejected": -365.961669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0024237632751465, + "rewards/margins": 9.703512191772461, + "rewards/rejected": -12.705936431884766, + "step": 6837 + }, + { + "epoch": 1.06, + "learning_rate": 9.132068053322268e-06, + "logits/chosen": -1.8548756837844849, + "logits/rejected": -2.7178332805633545, + "logps/chosen": -105.99089050292969, + "logps/rejected": -225.0596466064453, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.210946559906006, + "rewards/margins": 6.987067222595215, + "rewards/rejected": -10.198013305664062, + "step": 6838 + }, + { + "epoch": 1.06, + "learning_rate": 9.13133461279112e-06, + "logits/chosen": -2.8213000297546387, + "logits/rejected": -3.135854959487915, + "logps/chosen": -1010.8065185546875, + "logps/rejected": -556.738525390625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6218445301055908, + "rewards/margins": 6.34872579574585, + "rewards/rejected": -7.9705705642700195, + "step": 6839 + }, + { + "epoch": 1.06, + "learning_rate": 9.130601172259972e-06, + "logits/chosen": -2.2056877613067627, + "logits/rejected": -2.7112414836883545, + "logps/chosen": -170.87710571289062, + "logps/rejected": -307.70123291015625, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5430030822753906, + "rewards/margins": 6.444533348083496, + "rewards/rejected": -9.987536430358887, + "step": 6840 + }, + { + "epoch": 1.06, + "learning_rate": 9.129867731728824e-06, + "logits/chosen": -2.767024040222168, + "logits/rejected": -2.5538129806518555, + "logps/chosen": -368.8935852050781, + "logps/rejected": -479.7788391113281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19535136222839355, + "rewards/margins": 11.12034797668457, + "rewards/rejected": -11.315698623657227, + "step": 6841 + }, + { + "epoch": 1.06, + "learning_rate": 9.129134291197676e-06, + "logits/chosen": -2.8335440158843994, + "logits/rejected": -2.938981533050537, + "logps/chosen": -600.5777587890625, + "logps/rejected": -568.19091796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7614593505859375, + "rewards/margins": 8.31537914276123, + "rewards/rejected": -12.076838493347168, + "step": 6842 + }, + { + "epoch": 1.06, + "learning_rate": 9.128400850666528e-06, + "logits/chosen": -0.9102210998535156, + "logits/rejected": -2.405076026916504, + "logps/chosen": -138.7130889892578, + "logps/rejected": -551.3994140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.598832607269287, + "rewards/margins": 8.601778030395508, + "rewards/rejected": -12.200611114501953, + "step": 6843 + }, + { + "epoch": 1.06, + "learning_rate": 9.12766741013538e-06, + "logits/chosen": -2.8531386852264404, + "logits/rejected": -3.087195873260498, + "logps/chosen": -198.67498779296875, + "logps/rejected": -351.9840393066406, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6912691593170166, + "rewards/margins": 5.538454055786133, + "rewards/rejected": -8.22972297668457, + "step": 6844 + }, + { + "epoch": 1.06, + "learning_rate": 9.126933969604233e-06, + "logits/chosen": -2.0054800510406494, + "logits/rejected": -3.032697916030884, + "logps/chosen": -262.4191589355469, + "logps/rejected": -537.9627075195312, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4150948524475098, + "rewards/margins": 7.83410120010376, + "rewards/rejected": -10.24919605255127, + "step": 6845 + }, + { + "epoch": 1.06, + "learning_rate": 9.126200529073085e-06, + "logits/chosen": -1.4926832914352417, + "logits/rejected": -2.5165765285491943, + "logps/chosen": -106.320556640625, + "logps/rejected": -305.13311767578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4855704307556152, + "rewards/margins": 8.431720733642578, + "rewards/rejected": -11.917290687561035, + "step": 6846 + }, + { + "epoch": 1.06, + "learning_rate": 9.125467088541937e-06, + "logits/chosen": -2.1965417861938477, + "logits/rejected": -2.712071418762207, + "logps/chosen": -109.95841979980469, + "logps/rejected": -242.9066925048828, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.136991500854492, + "rewards/margins": 8.099336624145508, + "rewards/rejected": -10.236327171325684, + "step": 6847 + }, + { + "epoch": 1.07, + "learning_rate": 9.124733648010789e-06, + "logits/chosen": -1.4827438592910767, + "logits/rejected": -2.4716684818267822, + "logps/chosen": -181.46710205078125, + "logps/rejected": -441.893798828125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4748167991638184, + "rewards/margins": 6.2568254470825195, + "rewards/rejected": -9.73164176940918, + "step": 6848 + }, + { + "epoch": 1.07, + "learning_rate": 9.12400020747964e-06, + "logits/chosen": -2.648853302001953, + "logits/rejected": -2.727665424346924, + "logps/chosen": -458.677734375, + "logps/rejected": -595.056884765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4327592849731445, + "rewards/margins": 7.636407375335693, + "rewards/rejected": -12.06916618347168, + "step": 6849 + }, + { + "epoch": 1.07, + "learning_rate": 9.123266766948493e-06, + "logits/chosen": -2.926783561706543, + "logits/rejected": -2.5442562103271484, + "logps/chosen": -577.7557983398438, + "logps/rejected": -489.36236572265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4736924171447754, + "rewards/margins": 8.595520973205566, + "rewards/rejected": -11.0692138671875, + "step": 6850 + }, + { + "epoch": 1.07, + "learning_rate": 9.122533326417344e-06, + "logits/chosen": -3.123981475830078, + "logits/rejected": -2.830472469329834, + "logps/chosen": -599.2965087890625, + "logps/rejected": -282.6353759765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1088058948516846, + "rewards/margins": 8.041975975036621, + "rewards/rejected": -10.150781631469727, + "step": 6851 + }, + { + "epoch": 1.07, + "learning_rate": 9.121799885886196e-06, + "logits/chosen": -1.8679941892623901, + "logits/rejected": -2.926774501800537, + "logps/chosen": -287.3333435058594, + "logps/rejected": -400.40380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7427154779434204, + "rewards/margins": 13.690458297729492, + "rewards/rejected": -12.94774341583252, + "step": 6852 + }, + { + "epoch": 1.07, + "learning_rate": 9.121066445355048e-06, + "logits/chosen": -1.765875220298767, + "logits/rejected": -2.827939987182617, + "logps/chosen": -106.80355834960938, + "logps/rejected": -111.62173461914062, + "loss": 0.9206, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.735579490661621, + "rewards/margins": 1.1277016401290894, + "rewards/rejected": -6.86328125, + "step": 6853 + }, + { + "epoch": 1.07, + "learning_rate": 9.120333004823902e-06, + "logits/chosen": -2.7613372802734375, + "logits/rejected": -3.1387147903442383, + "logps/chosen": -100.15411376953125, + "logps/rejected": -233.4661865234375, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.760657787322998, + "rewards/margins": 4.088333606719971, + "rewards/rejected": -6.848991394042969, + "step": 6854 + }, + { + "epoch": 1.07, + "learning_rate": 9.119599564292754e-06, + "logits/chosen": -1.1346615552902222, + "logits/rejected": -2.965477466583252, + "logps/chosen": -254.56488037109375, + "logps/rejected": -529.3017578125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.911611557006836, + "rewards/margins": 7.677524089813232, + "rewards/rejected": -11.589136123657227, + "step": 6855 + }, + { + "epoch": 1.07, + "learning_rate": 9.118866123761606e-06, + "logits/chosen": -2.4222965240478516, + "logits/rejected": -2.930652141571045, + "logps/chosen": -385.03216552734375, + "logps/rejected": -399.4149169921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.206064224243164, + "rewards/margins": 6.434722900390625, + "rewards/rejected": -10.640787124633789, + "step": 6856 + }, + { + "epoch": 1.07, + "learning_rate": 9.118132683230457e-06, + "logits/chosen": -2.6965105533599854, + "logits/rejected": -2.348595142364502, + "logps/chosen": -165.0435028076172, + "logps/rejected": -279.2232666015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.792050838470459, + "rewards/margins": 7.222209930419922, + "rewards/rejected": -10.014261245727539, + "step": 6857 + }, + { + "epoch": 1.07, + "learning_rate": 9.117399242699311e-06, + "logits/chosen": -1.9135514497756958, + "logits/rejected": -2.8190066814422607, + "logps/chosen": -363.6854248046875, + "logps/rejected": -631.258544921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.053518772125244, + "rewards/margins": 9.449703216552734, + "rewards/rejected": -12.50322151184082, + "step": 6858 + }, + { + "epoch": 1.07, + "learning_rate": 9.116665802168163e-06, + "logits/chosen": -2.858304977416992, + "logits/rejected": -2.2510058879852295, + "logps/chosen": -233.1415252685547, + "logps/rejected": -136.30813598632812, + "loss": 2.0898, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.92790150642395, + "rewards/margins": 1.7373998165130615, + "rewards/rejected": -5.665301322937012, + "step": 6859 + }, + { + "epoch": 1.07, + "learning_rate": 9.115932361637015e-06, + "logits/chosen": -2.735933303833008, + "logits/rejected": -1.4418219327926636, + "logps/chosen": -220.08966064453125, + "logps/rejected": -185.89817810058594, + "loss": 1.6994, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7297749519348145, + "rewards/margins": 2.566502571105957, + "rewards/rejected": -8.29627799987793, + "step": 6860 + }, + { + "epoch": 1.07, + "learning_rate": 9.115198921105867e-06, + "logits/chosen": -1.3796027898788452, + "logits/rejected": -2.703537702560425, + "logps/chosen": -88.454833984375, + "logps/rejected": -329.7862243652344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.693828582763672, + "rewards/margins": 8.007665634155273, + "rewards/rejected": -10.701493263244629, + "step": 6861 + }, + { + "epoch": 1.07, + "learning_rate": 9.114465480574718e-06, + "logits/chosen": -3.1874794960021973, + "logits/rejected": -1.2817350625991821, + "logps/chosen": -330.6856689453125, + "logps/rejected": -175.98252868652344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7200638055801392, + "rewards/margins": 7.723386764526367, + "rewards/rejected": -8.443450927734375, + "step": 6862 + }, + { + "epoch": 1.07, + "learning_rate": 9.113732040043572e-06, + "logits/chosen": -2.108398675918579, + "logits/rejected": -2.9220468997955322, + "logps/chosen": -116.25405883789062, + "logps/rejected": -178.337890625, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8721911907196045, + "rewards/margins": 4.965279579162598, + "rewards/rejected": -8.837471008300781, + "step": 6863 + }, + { + "epoch": 1.07, + "learning_rate": 9.112998599512424e-06, + "logits/chosen": -2.5824012756347656, + "logits/rejected": -2.646019697189331, + "logps/chosen": -189.5977020263672, + "logps/rejected": -223.8475341796875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.432807207107544, + "rewards/margins": 5.4399309158325195, + "rewards/rejected": -8.872737884521484, + "step": 6864 + }, + { + "epoch": 1.07, + "learning_rate": 9.112265158981276e-06, + "logits/chosen": -1.7271511554718018, + "logits/rejected": -2.783393383026123, + "logps/chosen": -147.04208374023438, + "logps/rejected": -332.0059814453125, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.051219940185547, + "rewards/margins": 5.454468727111816, + "rewards/rejected": -9.505688667297363, + "step": 6865 + }, + { + "epoch": 1.07, + "learning_rate": 9.111531718450128e-06, + "logits/chosen": -2.7392256259918213, + "logits/rejected": -2.946307420730591, + "logps/chosen": -329.3594970703125, + "logps/rejected": -421.48992919921875, + "loss": 0.5807, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7736213207244873, + "rewards/margins": 3.916729211807251, + "rewards/rejected": -7.690350532531738, + "step": 6866 + }, + { + "epoch": 1.07, + "learning_rate": 9.11079827791898e-06, + "logits/chosen": -0.822198748588562, + "logits/rejected": -2.89778995513916, + "logps/chosen": -84.93627166748047, + "logps/rejected": -502.837646484375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2764124870300293, + "rewards/margins": 6.82246208190918, + "rewards/rejected": -10.09887409210205, + "step": 6867 + }, + { + "epoch": 1.07, + "learning_rate": 9.110064837387831e-06, + "logits/chosen": -2.73768949508667, + "logits/rejected": -3.0996391773223877, + "logps/chosen": -257.8064270019531, + "logps/rejected": -332.89044189453125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.257291078567505, + "rewards/margins": 6.084748268127441, + "rewards/rejected": -9.342039108276367, + "step": 6868 + }, + { + "epoch": 1.07, + "learning_rate": 9.109331396856683e-06, + "logits/chosen": -2.8222999572753906, + "logits/rejected": -2.8397061824798584, + "logps/chosen": -211.24940490722656, + "logps/rejected": -248.97047424316406, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2384140491485596, + "rewards/margins": 7.111806869506836, + "rewards/rejected": -8.350220680236816, + "step": 6869 + }, + { + "epoch": 1.07, + "learning_rate": 9.108597956325535e-06, + "logits/chosen": -1.1197631359100342, + "logits/rejected": -3.058488607406616, + "logps/chosen": -269.3195495605469, + "logps/rejected": -491.9903564453125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3786020278930664, + "rewards/margins": 6.3275556564331055, + "rewards/rejected": -9.706157684326172, + "step": 6870 + }, + { + "epoch": 1.07, + "learning_rate": 9.107864515794387e-06, + "logits/chosen": -1.6592034101486206, + "logits/rejected": -2.890357494354248, + "logps/chosen": -151.30508422851562, + "logps/rejected": -308.8789978027344, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.227484703063965, + "rewards/margins": 5.703197479248047, + "rewards/rejected": -9.930682182312012, + "step": 6871 + }, + { + "epoch": 1.07, + "learning_rate": 9.10713107526324e-06, + "logits/chosen": -2.3238978385925293, + "logits/rejected": -2.364950656890869, + "logps/chosen": -80.37738037109375, + "logps/rejected": -330.7001647949219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4459266662597656, + "rewards/margins": 10.34805679321289, + "rewards/rejected": -12.793983459472656, + "step": 6872 + }, + { + "epoch": 1.07, + "learning_rate": 9.106397634732093e-06, + "logits/chosen": -1.7456670999526978, + "logits/rejected": -2.6264398097991943, + "logps/chosen": -211.82235717773438, + "logps/rejected": -306.0031433105469, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1669163703918457, + "rewards/margins": 6.319269180297852, + "rewards/rejected": -8.486185073852539, + "step": 6873 + }, + { + "epoch": 1.07, + "learning_rate": 9.105664194200944e-06, + "logits/chosen": -2.8492743968963623, + "logits/rejected": -3.0265982151031494, + "logps/chosen": -44.582481384277344, + "logps/rejected": -138.7051544189453, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1295652389526367, + "rewards/margins": 3.9545235633850098, + "rewards/rejected": -7.0840888023376465, + "step": 6874 + }, + { + "epoch": 1.07, + "learning_rate": 9.104930753669796e-06, + "logits/chosen": -1.0742329359054565, + "logits/rejected": -3.000290632247925, + "logps/chosen": -161.48330688476562, + "logps/rejected": -571.8525390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1021695137023926, + "rewards/margins": 8.055339813232422, + "rewards/rejected": -10.157508850097656, + "step": 6875 + }, + { + "epoch": 1.07, + "learning_rate": 9.104197313138648e-06, + "logits/chosen": -2.5841314792633057, + "logits/rejected": -2.927650213241577, + "logps/chosen": -312.2226867675781, + "logps/rejected": -455.7711486816406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.623945713043213, + "rewards/margins": 8.110149383544922, + "rewards/rejected": -10.734094619750977, + "step": 6876 + }, + { + "epoch": 1.07, + "learning_rate": 9.1034638726075e-06, + "logits/chosen": -2.2074191570281982, + "logits/rejected": -2.979257106781006, + "logps/chosen": -135.7446746826172, + "logps/rejected": -410.15277099609375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7276346683502197, + "rewards/margins": 7.7472357749938965, + "rewards/rejected": -10.474870681762695, + "step": 6877 + }, + { + "epoch": 1.07, + "learning_rate": 9.102730432076352e-06, + "logits/chosen": -3.0968306064605713, + "logits/rejected": -2.6386165618896484, + "logps/chosen": -270.0394592285156, + "logps/rejected": -227.54579162597656, + "loss": 0.4783, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0139975547790527, + "rewards/margins": 3.491224765777588, + "rewards/rejected": -6.505222320556641, + "step": 6878 + }, + { + "epoch": 1.07, + "learning_rate": 9.101996991545204e-06, + "logits/chosen": -2.9425086975097656, + "logits/rejected": -2.62861704826355, + "logps/chosen": -181.26690673828125, + "logps/rejected": -429.7705993652344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29525840282440186, + "rewards/margins": 9.814048767089844, + "rewards/rejected": -9.518791198730469, + "step": 6879 + }, + { + "epoch": 1.07, + "learning_rate": 9.101263551014056e-06, + "logits/chosen": -2.6272904872894287, + "logits/rejected": -2.82301926612854, + "logps/chosen": -119.22073364257812, + "logps/rejected": -287.4997253417969, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.163578510284424, + "rewards/margins": 6.339366436004639, + "rewards/rejected": -8.502944946289062, + "step": 6880 + }, + { + "epoch": 1.07, + "learning_rate": 9.10053011048291e-06, + "logits/chosen": -3.0778636932373047, + "logits/rejected": -3.0085911750793457, + "logps/chosen": -666.4175415039062, + "logps/rejected": -616.9153442382812, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9899704456329346, + "rewards/margins": 7.488306999206543, + "rewards/rejected": -9.478277206420898, + "step": 6881 + }, + { + "epoch": 1.07, + "learning_rate": 9.099796669951761e-06, + "logits/chosen": -2.9990639686584473, + "logits/rejected": -2.7131423950195312, + "logps/chosen": -319.1308898925781, + "logps/rejected": -236.26724243164062, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8221008777618408, + "rewards/margins": 7.4655232429504395, + "rewards/rejected": -8.28762435913086, + "step": 6882 + }, + { + "epoch": 1.07, + "learning_rate": 9.099063229420613e-06, + "logits/chosen": -1.580452799797058, + "logits/rejected": -2.0586740970611572, + "logps/chosen": -191.4217529296875, + "logps/rejected": -298.09759521484375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6768124103546143, + "rewards/margins": 7.473421096801758, + "rewards/rejected": -10.15023422241211, + "step": 6883 + }, + { + "epoch": 1.07, + "learning_rate": 9.098329788889465e-06, + "logits/chosen": -1.6452752351760864, + "logits/rejected": -2.959960460662842, + "logps/chosen": -103.25442504882812, + "logps/rejected": -431.7643127441406, + "loss": 0.6577, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.58760929107666, + "rewards/margins": 3.344188690185547, + "rewards/rejected": -7.931797981262207, + "step": 6884 + }, + { + "epoch": 1.07, + "learning_rate": 9.097596348358317e-06, + "logits/chosen": -2.8092873096466064, + "logits/rejected": -3.0881075859069824, + "logps/chosen": -151.6480255126953, + "logps/rejected": -222.16323852539062, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.681165337562561, + "rewards/margins": 6.693121910095215, + "rewards/rejected": -7.374286651611328, + "step": 6885 + }, + { + "epoch": 1.07, + "learning_rate": 9.096862907827169e-06, + "logits/chosen": -2.8755059242248535, + "logits/rejected": -2.011906147003174, + "logps/chosen": -322.42584228515625, + "logps/rejected": -339.56787109375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6213070154190063, + "rewards/margins": 6.87586784362793, + "rewards/rejected": -8.497175216674805, + "step": 6886 + }, + { + "epoch": 1.07, + "learning_rate": 9.09612946729602e-06, + "logits/chosen": -2.14758038520813, + "logits/rejected": -2.7451908588409424, + "logps/chosen": -64.07051086425781, + "logps/rejected": -311.37835693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0484938621520996, + "rewards/margins": 10.015013694763184, + "rewards/rejected": -12.063507080078125, + "step": 6887 + }, + { + "epoch": 1.07, + "learning_rate": 9.095396026764872e-06, + "logits/chosen": -2.8603708744049072, + "logits/rejected": -3.1381943225860596, + "logps/chosen": -756.214599609375, + "logps/rejected": -705.5534057617188, + "loss": 2.4993, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.898150444030762, + "rewards/margins": -0.4046814441680908, + "rewards/rejected": -5.49346923828125, + "step": 6888 + }, + { + "epoch": 1.07, + "learning_rate": 9.094662586233726e-06, + "logits/chosen": -2.8283145427703857, + "logits/rejected": -2.07665753364563, + "logps/chosen": -261.63580322265625, + "logps/rejected": -238.07705688476562, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2578370571136475, + "rewards/margins": 6.573276519775391, + "rewards/rejected": -8.831113815307617, + "step": 6889 + }, + { + "epoch": 1.07, + "learning_rate": 9.093929145702578e-06, + "logits/chosen": -3.094853162765503, + "logits/rejected": -2.4924750328063965, + "logps/chosen": -204.30978393554688, + "logps/rejected": -38.07954025268555, + "loss": 2.5816, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.334537506103516, + "rewards/margins": -2.463390350341797, + "rewards/rejected": -2.8711469173431396, + "step": 6890 + }, + { + "epoch": 1.07, + "learning_rate": 9.09319570517143e-06, + "logits/chosen": -2.9206433296203613, + "logits/rejected": -2.636009454727173, + "logps/chosen": -465.680419921875, + "logps/rejected": -395.05413818359375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3024094104766846, + "rewards/margins": 7.282723903656006, + "rewards/rejected": -9.58513355255127, + "step": 6891 + }, + { + "epoch": 1.07, + "learning_rate": 9.092462264640283e-06, + "logits/chosen": -1.9535107612609863, + "logits/rejected": -2.855140209197998, + "logps/chosen": -146.35528564453125, + "logps/rejected": -213.94747924804688, + "loss": 0.4431, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7956371307373047, + "rewards/margins": 4.558498382568359, + "rewards/rejected": -7.354135513305664, + "step": 6892 + }, + { + "epoch": 1.07, + "learning_rate": 9.091728824109135e-06, + "logits/chosen": -2.728811740875244, + "logits/rejected": -3.1755762100219727, + "logps/chosen": -153.06373596191406, + "logps/rejected": -449.6890563964844, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.659259796142578, + "rewards/margins": 4.558483123779297, + "rewards/rejected": -9.217742919921875, + "step": 6893 + }, + { + "epoch": 1.07, + "learning_rate": 9.090995383577987e-06, + "logits/chosen": -2.9621479511260986, + "logits/rejected": -3.082304000854492, + "logps/chosen": -575.3142700195312, + "logps/rejected": -139.17745971679688, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5966992378234863, + "rewards/margins": 4.400900840759277, + "rewards/rejected": -6.9975996017456055, + "step": 6894 + }, + { + "epoch": 1.07, + "learning_rate": 9.090261943046839e-06, + "logits/chosen": -2.957899808883667, + "logits/rejected": -2.9512481689453125, + "logps/chosen": -210.76275634765625, + "logps/rejected": -283.6822509765625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4425811767578125, + "rewards/margins": 6.755003929138184, + "rewards/rejected": -9.197586059570312, + "step": 6895 + }, + { + "epoch": 1.07, + "learning_rate": 9.08952850251569e-06, + "logits/chosen": -2.554384708404541, + "logits/rejected": -2.7615551948547363, + "logps/chosen": -167.90286254882812, + "logps/rejected": -122.4052734375, + "loss": 0.1856, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.973397731781006, + "rewards/margins": 3.084000825881958, + "rewards/rejected": -7.057398796081543, + "step": 6896 + }, + { + "epoch": 1.07, + "learning_rate": 9.088795061984543e-06, + "logits/chosen": -1.8989557027816772, + "logits/rejected": -2.828227996826172, + "logps/chosen": -136.55638122558594, + "logps/rejected": -374.304931640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2314372062683105, + "rewards/margins": 8.160602569580078, + "rewards/rejected": -10.39203929901123, + "step": 6897 + }, + { + "epoch": 1.07, + "learning_rate": 9.088061621453396e-06, + "logits/chosen": -2.8766238689422607, + "logits/rejected": -2.505972385406494, + "logps/chosen": -353.72613525390625, + "logps/rejected": -275.59814453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.135591983795166, + "rewards/margins": 7.750673770904541, + "rewards/rejected": -9.886265754699707, + "step": 6898 + }, + { + "epoch": 1.07, + "learning_rate": 9.087328180922248e-06, + "logits/chosen": -2.258469820022583, + "logits/rejected": -2.4961941242218018, + "logps/chosen": -276.3663635253906, + "logps/rejected": -388.1636047363281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.735417127609253, + "rewards/margins": 9.249637603759766, + "rewards/rejected": -10.985054016113281, + "step": 6899 + }, + { + "epoch": 1.07, + "learning_rate": 9.0865947403911e-06, + "logits/chosen": -2.87994384765625, + "logits/rejected": -3.009906530380249, + "logps/chosen": -140.87454223632812, + "logps/rejected": -257.788818359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4778270721435547, + "rewards/margins": 6.326638698577881, + "rewards/rejected": -9.804466247558594, + "step": 6900 + }, + { + "epoch": 1.07, + "learning_rate": 9.085861299859952e-06, + "logits/chosen": -2.9921820163726807, + "logits/rejected": -1.552281141281128, + "logps/chosen": -374.39593505859375, + "logps/rejected": -198.53314208984375, + "loss": 0.4744, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.0467653274536133, + "rewards/margins": 3.42205810546875, + "rewards/rejected": -6.468823432922363, + "step": 6901 + }, + { + "epoch": 1.07, + "learning_rate": 9.085127859328804e-06, + "logits/chosen": -2.354682445526123, + "logits/rejected": -3.184995412826538, + "logps/chosen": -124.54840087890625, + "logps/rejected": -371.73785400390625, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.066956520080566, + "rewards/margins": 6.124401092529297, + "rewards/rejected": -10.191357612609863, + "step": 6902 + }, + { + "epoch": 1.07, + "learning_rate": 9.084394418797656e-06, + "logits/chosen": -2.1393656730651855, + "logits/rejected": -2.888723373413086, + "logps/chosen": -183.6368408203125, + "logps/rejected": -283.24407958984375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1581716537475586, + "rewards/margins": 6.394448757171631, + "rewards/rejected": -8.552619934082031, + "step": 6903 + }, + { + "epoch": 1.07, + "learning_rate": 9.083660978266508e-06, + "logits/chosen": -2.51773738861084, + "logits/rejected": -2.339181900024414, + "logps/chosen": -317.487060546875, + "logps/rejected": -342.3935546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5902900695800781, + "rewards/margins": 8.340801239013672, + "rewards/rejected": -9.93109130859375, + "step": 6904 + }, + { + "epoch": 1.07, + "learning_rate": 9.08292753773536e-06, + "logits/chosen": -3.1343119144439697, + "logits/rejected": -2.9242703914642334, + "logps/chosen": -440.095947265625, + "logps/rejected": -201.28189086914062, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.838890790939331, + "rewards/margins": 7.50266170501709, + "rewards/rejected": -9.341552734375, + "step": 6905 + }, + { + "epoch": 1.07, + "learning_rate": 9.082194097204211e-06, + "logits/chosen": -2.8317136764526367, + "logits/rejected": -1.926086664199829, + "logps/chosen": -173.85179138183594, + "logps/rejected": -240.71864318847656, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2444872856140137, + "rewards/margins": 4.539206504821777, + "rewards/rejected": -7.783694267272949, + "step": 6906 + }, + { + "epoch": 1.07, + "learning_rate": 9.081460656673065e-06, + "logits/chosen": -1.355346441268921, + "logits/rejected": -2.536954164505005, + "logps/chosen": -146.4397735595703, + "logps/rejected": -427.2475891113281, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.098005294799805, + "rewards/margins": 7.198092460632324, + "rewards/rejected": -11.296097755432129, + "step": 6907 + }, + { + "epoch": 1.07, + "learning_rate": 9.080727216141917e-06, + "logits/chosen": -1.8007502555847168, + "logits/rejected": -3.0449118614196777, + "logps/chosen": -166.9221954345703, + "logps/rejected": -539.00048828125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1001007556915283, + "rewards/margins": 6.126528739929199, + "rewards/rejected": -9.226629257202148, + "step": 6908 + }, + { + "epoch": 1.07, + "learning_rate": 9.079993775610769e-06, + "logits/chosen": -3.0459675788879395, + "logits/rejected": -3.045332193374634, + "logps/chosen": -63.19782638549805, + "logps/rejected": -222.15411376953125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.64113187789917, + "rewards/margins": 6.207056999206543, + "rewards/rejected": -8.848189353942871, + "step": 6909 + }, + { + "epoch": 1.07, + "learning_rate": 9.07926033507962e-06, + "logits/chosen": -2.7888283729553223, + "logits/rejected": -2.085843086242676, + "logps/chosen": -164.6505584716797, + "logps/rejected": -256.1473083496094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9845713376998901, + "rewards/margins": 8.589797973632812, + "rewards/rejected": -9.574369430541992, + "step": 6910 + }, + { + "epoch": 1.07, + "learning_rate": 9.078526894548472e-06, + "logits/chosen": -2.185603141784668, + "logits/rejected": -2.972398281097412, + "logps/chosen": -128.43081665039062, + "logps/rejected": -307.4857177734375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.457692861557007, + "rewards/margins": 5.634528160095215, + "rewards/rejected": -8.0922212600708, + "step": 6911 + }, + { + "epoch": 1.07, + "learning_rate": 9.077793454017324e-06, + "logits/chosen": -2.5528082847595215, + "logits/rejected": -3.064323663711548, + "logps/chosen": -319.7332763671875, + "logps/rejected": -343.02728271484375, + "loss": 0.2539, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7147659063339233, + "rewards/margins": 2.92451810836792, + "rewards/rejected": -4.639284133911133, + "step": 6912 + }, + { + "epoch": 1.08, + "learning_rate": 9.077060013486176e-06, + "logits/chosen": -2.3047029972076416, + "logits/rejected": -2.927513599395752, + "logps/chosen": -78.98696899414062, + "logps/rejected": -310.394775390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.531362295150757, + "rewards/margins": 8.104397773742676, + "rewards/rejected": -10.635760307312012, + "step": 6913 + }, + { + "epoch": 1.08, + "learning_rate": 9.076326572955028e-06, + "logits/chosen": -1.4546977281570435, + "logits/rejected": -2.8169846534729004, + "logps/chosen": -107.84962463378906, + "logps/rejected": -339.85205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5302982330322266, + "rewards/margins": 11.074285507202148, + "rewards/rejected": -13.604583740234375, + "step": 6914 + }, + { + "epoch": 1.08, + "learning_rate": 9.07559313242388e-06, + "logits/chosen": -2.1461453437805176, + "logits/rejected": -2.858041286468506, + "logps/chosen": -246.40122985839844, + "logps/rejected": -355.7720947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9113516807556152, + "rewards/margins": 9.388402938842773, + "rewards/rejected": -11.299755096435547, + "step": 6915 + }, + { + "epoch": 1.08, + "learning_rate": 9.074859691892733e-06, + "logits/chosen": -2.6754720211029053, + "logits/rejected": -2.662271738052368, + "logps/chosen": -139.7667236328125, + "logps/rejected": -284.5702209472656, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1057701110839844, + "rewards/margins": 6.6806230545043945, + "rewards/rejected": -9.786393165588379, + "step": 6916 + }, + { + "epoch": 1.08, + "learning_rate": 9.074126251361585e-06, + "logits/chosen": -2.5824332237243652, + "logits/rejected": -2.82006573677063, + "logps/chosen": -69.13494873046875, + "logps/rejected": -145.54820251464844, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3490898609161377, + "rewards/margins": 4.456465244293213, + "rewards/rejected": -6.8055548667907715, + "step": 6917 + }, + { + "epoch": 1.08, + "learning_rate": 9.073392810830437e-06, + "logits/chosen": -2.9152560234069824, + "logits/rejected": -3.012885332107544, + "logps/chosen": -70.80857849121094, + "logps/rejected": -182.33055114746094, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.791109085083008, + "rewards/margins": 5.256010055541992, + "rewards/rejected": -8.047119140625, + "step": 6918 + }, + { + "epoch": 1.08, + "learning_rate": 9.072659370299289e-06, + "logits/chosen": -2.9207680225372314, + "logits/rejected": -2.293921947479248, + "logps/chosen": -558.15869140625, + "logps/rejected": -399.544677734375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.417738437652588, + "rewards/margins": 5.449028968811035, + "rewards/rejected": -7.866766929626465, + "step": 6919 + }, + { + "epoch": 1.08, + "learning_rate": 9.071925929768141e-06, + "logits/chosen": -2.077871561050415, + "logits/rejected": -3.0418500900268555, + "logps/chosen": -72.04859924316406, + "logps/rejected": -421.77093505859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3643593788146973, + "rewards/margins": 6.938068866729736, + "rewards/rejected": -10.302428245544434, + "step": 6920 + }, + { + "epoch": 1.08, + "learning_rate": 9.071192489236993e-06, + "logits/chosen": -3.056204080581665, + "logits/rejected": -2.972745180130005, + "logps/chosen": -118.09759521484375, + "logps/rejected": -156.32313537597656, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7290427684783936, + "rewards/margins": 5.335387229919434, + "rewards/rejected": -7.064430236816406, + "step": 6921 + }, + { + "epoch": 1.08, + "learning_rate": 9.070459048705845e-06, + "logits/chosen": -2.2283711433410645, + "logits/rejected": -2.936954975128174, + "logps/chosen": -172.80006408691406, + "logps/rejected": -235.73414611816406, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.091794013977051, + "rewards/margins": 5.390498161315918, + "rewards/rejected": -8.482292175292969, + "step": 6922 + }, + { + "epoch": 1.08, + "learning_rate": 9.069725608174697e-06, + "logits/chosen": -2.764056921005249, + "logits/rejected": -1.7687170505523682, + "logps/chosen": -375.8681945800781, + "logps/rejected": -230.97012329101562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3807835578918457, + "rewards/margins": 7.083122253417969, + "rewards/rejected": -8.463905334472656, + "step": 6923 + }, + { + "epoch": 1.08, + "learning_rate": 9.06899216764355e-06, + "logits/chosen": -2.2558505535125732, + "logits/rejected": -3.0979037284851074, + "logps/chosen": -200.6930389404297, + "logps/rejected": -185.46444702148438, + "loss": 0.6053, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7012360095977783, + "rewards/margins": 3.1457719802856445, + "rewards/rejected": -6.847007751464844, + "step": 6924 + }, + { + "epoch": 1.08, + "learning_rate": 9.068258727112402e-06, + "logits/chosen": -0.7242793440818787, + "logits/rejected": -2.190035104751587, + "logps/chosen": -159.2113800048828, + "logps/rejected": -477.2412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.749560594558716, + "rewards/margins": 10.408358573913574, + "rewards/rejected": -13.157918930053711, + "step": 6925 + }, + { + "epoch": 1.08, + "learning_rate": 9.067525286581256e-06, + "logits/chosen": -1.2974265813827515, + "logits/rejected": -2.815375328063965, + "logps/chosen": -118.06889343261719, + "logps/rejected": -365.80975341796875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7190701961517334, + "rewards/margins": 6.041686058044434, + "rewards/rejected": -8.760756492614746, + "step": 6926 + }, + { + "epoch": 1.08, + "learning_rate": 9.066791846050108e-06, + "logits/chosen": -2.729637861251831, + "logits/rejected": -3.0442869663238525, + "logps/chosen": -94.87826538085938, + "logps/rejected": -273.64801025390625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3470025062561035, + "rewards/margins": 6.510743618011475, + "rewards/rejected": -9.857746124267578, + "step": 6927 + }, + { + "epoch": 1.08, + "learning_rate": 9.06605840551896e-06, + "logits/chosen": -3.094271659851074, + "logits/rejected": -2.5235612392425537, + "logps/chosen": -164.4636688232422, + "logps/rejected": -230.17059326171875, + "loss": 0.6949, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.487173080444336, + "rewards/margins": 4.775568962097168, + "rewards/rejected": -7.262742519378662, + "step": 6928 + }, + { + "epoch": 1.08, + "learning_rate": 9.065324964987811e-06, + "logits/chosen": -2.3604884147644043, + "logits/rejected": -2.059282064437866, + "logps/chosen": -690.8001708984375, + "logps/rejected": -362.65283203125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2861327528953552, + "rewards/margins": 8.111845970153809, + "rewards/rejected": -8.397978782653809, + "step": 6929 + }, + { + "epoch": 1.08, + "learning_rate": 9.064591524456663e-06, + "logits/chosen": -1.7894909381866455, + "logits/rejected": -2.863565683364868, + "logps/chosen": -344.9819641113281, + "logps/rejected": -540.200439453125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4457062482833862, + "rewards/margins": 7.62405252456665, + "rewards/rejected": -9.069758415222168, + "step": 6930 + }, + { + "epoch": 1.08, + "learning_rate": 9.063858083925515e-06, + "logits/chosen": -2.0832762718200684, + "logits/rejected": -2.875239372253418, + "logps/chosen": -76.4103775024414, + "logps/rejected": -318.38299560546875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.870861530303955, + "rewards/margins": 6.263648986816406, + "rewards/rejected": -9.134510040283203, + "step": 6931 + }, + { + "epoch": 1.08, + "learning_rate": 9.063124643394367e-06, + "logits/chosen": -2.237727403640747, + "logits/rejected": -3.026474952697754, + "logps/chosen": -96.43400573730469, + "logps/rejected": -281.4963684082031, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.525953531265259, + "rewards/margins": 7.483450412750244, + "rewards/rejected": -10.009404182434082, + "step": 6932 + }, + { + "epoch": 1.08, + "learning_rate": 9.062391202863219e-06, + "logits/chosen": -2.927642583847046, + "logits/rejected": -2.478695869445801, + "logps/chosen": -366.4757080078125, + "logps/rejected": -348.27911376953125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1289916038513184, + "rewards/margins": 6.808781623840332, + "rewards/rejected": -9.937772750854492, + "step": 6933 + }, + { + "epoch": 1.08, + "learning_rate": 9.061657762332072e-06, + "logits/chosen": -2.07603120803833, + "logits/rejected": -3.109753131866455, + "logps/chosen": -134.29739379882812, + "logps/rejected": -351.41705322265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6155338287353516, + "rewards/margins": 7.485654354095459, + "rewards/rejected": -10.101188659667969, + "step": 6934 + }, + { + "epoch": 1.08, + "learning_rate": 9.060924321800924e-06, + "logits/chosen": -2.5456132888793945, + "logits/rejected": -3.1493375301361084, + "logps/chosen": -135.20899963378906, + "logps/rejected": -269.08306884765625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5138399600982666, + "rewards/margins": 7.172608852386475, + "rewards/rejected": -9.68644905090332, + "step": 6935 + }, + { + "epoch": 1.08, + "learning_rate": 9.060190881269776e-06, + "logits/chosen": -2.8932113647460938, + "logits/rejected": -2.806600570678711, + "logps/chosen": -102.6792221069336, + "logps/rejected": -192.3681182861328, + "loss": 0.4284, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.433197975158691, + "rewards/margins": 3.108187437057495, + "rewards/rejected": -7.541385650634766, + "step": 6936 + }, + { + "epoch": 1.08, + "learning_rate": 9.059457440738628e-06, + "logits/chosen": -2.379281759262085, + "logits/rejected": -2.8999154567718506, + "logps/chosen": -173.81398010253906, + "logps/rejected": -359.71051025390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2576091289520264, + "rewards/margins": 8.688594818115234, + "rewards/rejected": -9.946203231811523, + "step": 6937 + }, + { + "epoch": 1.08, + "learning_rate": 9.05872400020748e-06, + "logits/chosen": -3.0185582637786865, + "logits/rejected": -2.42862868309021, + "logps/chosen": -536.5829467773438, + "logps/rejected": -402.48870849609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3593568801879883, + "rewards/margins": 8.333477020263672, + "rewards/rejected": -11.692832946777344, + "step": 6938 + }, + { + "epoch": 1.08, + "learning_rate": 9.057990559676332e-06, + "logits/chosen": -3.021580219268799, + "logits/rejected": -2.885188102722168, + "logps/chosen": -708.389404296875, + "logps/rejected": -416.5993957519531, + "loss": 1.1894, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4803085327148438, + "rewards/margins": 3.6323935985565186, + "rewards/rejected": -7.112702369689941, + "step": 6939 + }, + { + "epoch": 1.08, + "learning_rate": 9.057257119145184e-06, + "logits/chosen": -2.7167751789093018, + "logits/rejected": -2.993093490600586, + "logps/chosen": -365.60821533203125, + "logps/rejected": -449.78338623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9659637808799744, + "rewards/margins": 9.531301498413086, + "rewards/rejected": -10.497265815734863, + "step": 6940 + }, + { + "epoch": 1.08, + "learning_rate": 9.056523678614036e-06, + "logits/chosen": -3.14813232421875, + "logits/rejected": -3.059701919555664, + "logps/chosen": -180.26382446289062, + "logps/rejected": -89.9237060546875, + "loss": 3.5712, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.540768623352051, + "rewards/margins": -3.5212063789367676, + "rewards/rejected": -4.019561767578125, + "step": 6941 + }, + { + "epoch": 1.08, + "learning_rate": 9.055790238082887e-06, + "logits/chosen": -1.6448230743408203, + "logits/rejected": -2.789677619934082, + "logps/chosen": -110.54564666748047, + "logps/rejected": -494.9072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1199012994766235, + "rewards/margins": 12.679803848266602, + "rewards/rejected": -13.799704551696777, + "step": 6942 + }, + { + "epoch": 1.08, + "learning_rate": 9.055056797551741e-06, + "logits/chosen": -1.6655539274215698, + "logits/rejected": -3.0248448848724365, + "logps/chosen": -112.48426818847656, + "logps/rejected": -303.5530700683594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.208540916442871, + "rewards/margins": 7.025720596313477, + "rewards/rejected": -9.234261512756348, + "step": 6943 + }, + { + "epoch": 1.08, + "learning_rate": 9.054323357020593e-06, + "logits/chosen": -2.6417863368988037, + "logits/rejected": -3.0032031536102295, + "logps/chosen": -137.10067749023438, + "logps/rejected": -135.3663330078125, + "loss": 1.2171, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.408125877380371, + "rewards/margins": 1.1480351686477661, + "rewards/rejected": -4.556160926818848, + "step": 6944 + }, + { + "epoch": 1.08, + "learning_rate": 9.053589916489445e-06, + "logits/chosen": -2.7576513290405273, + "logits/rejected": -2.043278932571411, + "logps/chosen": -456.99383544921875, + "logps/rejected": -445.6650085449219, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.507429599761963, + "rewards/margins": 6.593867301940918, + "rewards/rejected": -11.101297378540039, + "step": 6945 + }, + { + "epoch": 1.08, + "learning_rate": 9.052856475958297e-06, + "logits/chosen": -2.680539846420288, + "logits/rejected": -2.0307297706604004, + "logps/chosen": -482.1033020019531, + "logps/rejected": -591.5314331054688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.605349063873291, + "rewards/margins": 11.39560317993164, + "rewards/rejected": -15.00095272064209, + "step": 6946 + }, + { + "epoch": 1.08, + "learning_rate": 9.052123035427148e-06, + "logits/chosen": -2.8961849212646484, + "logits/rejected": -3.0309417247772217, + "logps/chosen": -174.99786376953125, + "logps/rejected": -177.10464477539062, + "loss": 1.2281, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.484465599060059, + "rewards/margins": 1.4683384895324707, + "rewards/rejected": -5.952804088592529, + "step": 6947 + }, + { + "epoch": 1.08, + "learning_rate": 9.051389594896e-06, + "logits/chosen": -1.5166270732879639, + "logits/rejected": -3.0965170860290527, + "logps/chosen": -190.83444213867188, + "logps/rejected": -280.2991943359375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2418675422668457, + "rewards/margins": 8.462858200073242, + "rewards/rejected": -10.704726219177246, + "step": 6948 + }, + { + "epoch": 1.08, + "learning_rate": 9.050656154364852e-06, + "logits/chosen": -3.1710312366485596, + "logits/rejected": -2.668705463409424, + "logps/chosen": -197.34095764160156, + "logps/rejected": -377.92681884765625, + "loss": 0.2383, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6269471645355225, + "rewards/margins": 5.308981895446777, + "rewards/rejected": -7.935929298400879, + "step": 6949 + }, + { + "epoch": 1.08, + "learning_rate": 9.049922713833704e-06, + "logits/chosen": -1.202019453048706, + "logits/rejected": -2.663926362991333, + "logps/chosen": -131.0850067138672, + "logps/rejected": -421.13812255859375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2629899978637695, + "rewards/margins": 8.116060256958008, + "rewards/rejected": -11.379050254821777, + "step": 6950 + }, + { + "epoch": 1.08, + "learning_rate": 9.049189273302556e-06, + "logits/chosen": -2.5155084133148193, + "logits/rejected": -2.7438814640045166, + "logps/chosen": -95.42797088623047, + "logps/rejected": -235.9034423828125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5622646808624268, + "rewards/margins": 6.971973419189453, + "rewards/rejected": -10.5342378616333, + "step": 6951 + }, + { + "epoch": 1.08, + "learning_rate": 9.04845583277141e-06, + "logits/chosen": -2.025352716445923, + "logits/rejected": -2.4149527549743652, + "logps/chosen": -217.91696166992188, + "logps/rejected": -464.87860107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.88818359375, + "rewards/margins": 9.356792449951172, + "rewards/rejected": -13.244976997375488, + "step": 6952 + }, + { + "epoch": 1.08, + "learning_rate": 9.047722392240261e-06, + "logits/chosen": -1.757631540298462, + "logits/rejected": -2.478994607925415, + "logps/chosen": -241.1395263671875, + "logps/rejected": -363.02874755859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5568084716796875, + "rewards/margins": 9.024565696716309, + "rewards/rejected": -15.581374168395996, + "step": 6953 + }, + { + "epoch": 1.08, + "learning_rate": 9.046988951709113e-06, + "logits/chosen": -3.131324291229248, + "logits/rejected": -3.1988613605499268, + "logps/chosen": -300.94097900390625, + "logps/rejected": -442.23431396484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6573216915130615, + "rewards/margins": 6.562077045440674, + "rewards/rejected": -9.219398498535156, + "step": 6954 + }, + { + "epoch": 1.08, + "learning_rate": 9.046255511177965e-06, + "logits/chosen": -2.7580339908599854, + "logits/rejected": -1.8714113235473633, + "logps/chosen": -159.08627319335938, + "logps/rejected": -140.0175018310547, + "loss": 0.6276, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.97765588760376, + "rewards/margins": 0.6339190006256104, + "rewards/rejected": -6.611575126647949, + "step": 6955 + }, + { + "epoch": 1.08, + "learning_rate": 9.045522070646817e-06, + "logits/chosen": -2.1362881660461426, + "logits/rejected": -1.2221094369888306, + "logps/chosen": -439.67205810546875, + "logps/rejected": -500.1505126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.903738498687744, + "rewards/margins": 13.67912769317627, + "rewards/rejected": -17.582866668701172, + "step": 6956 + }, + { + "epoch": 1.08, + "learning_rate": 9.044788630115669e-06, + "logits/chosen": -2.7683470249176025, + "logits/rejected": -1.9063713550567627, + "logps/chosen": -366.6524963378906, + "logps/rejected": -196.31466674804688, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.317948341369629, + "rewards/margins": 5.436423301696777, + "rewards/rejected": -9.754371643066406, + "step": 6957 + }, + { + "epoch": 1.08, + "learning_rate": 9.044055189584523e-06, + "logits/chosen": -1.6108741760253906, + "logits/rejected": -2.4990532398223877, + "logps/chosen": -219.21994018554688, + "logps/rejected": -430.79266357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7702178955078125, + "rewards/margins": 12.221126556396484, + "rewards/rejected": -13.991344451904297, + "step": 6958 + }, + { + "epoch": 1.08, + "learning_rate": 9.043321749053374e-06, + "logits/chosen": -2.6774346828460693, + "logits/rejected": -3.0787577629089355, + "logps/chosen": -105.14984130859375, + "logps/rejected": -165.98118591308594, + "loss": 0.3974, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.944870471954346, + "rewards/margins": 2.3487157821655273, + "rewards/rejected": -7.293586254119873, + "step": 6959 + }, + { + "epoch": 1.08, + "learning_rate": 9.042588308522226e-06, + "logits/chosen": -2.921851873397827, + "logits/rejected": -3.036640167236328, + "logps/chosen": -531.774658203125, + "logps/rejected": -314.35791015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.512928009033203, + "rewards/margins": 7.220674991607666, + "rewards/rejected": -10.733602523803711, + "step": 6960 + }, + { + "epoch": 1.08, + "learning_rate": 9.04185486799108e-06, + "logits/chosen": -1.8703837394714355, + "logits/rejected": -2.8528223037719727, + "logps/chosen": -212.74453735351562, + "logps/rejected": -303.6642150878906, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.300607204437256, + "rewards/margins": 4.010588645935059, + "rewards/rejected": -7.311195373535156, + "step": 6961 + }, + { + "epoch": 1.08, + "learning_rate": 9.041121427459932e-06, + "logits/chosen": -2.80096697807312, + "logits/rejected": -2.3231658935546875, + "logps/chosen": -228.36514282226562, + "logps/rejected": -498.0947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8490982055664062, + "rewards/margins": 11.104050636291504, + "rewards/rejected": -12.95314884185791, + "step": 6962 + }, + { + "epoch": 1.08, + "learning_rate": 9.040387986928784e-06, + "logits/chosen": -2.6903865337371826, + "logits/rejected": -2.170032262802124, + "logps/chosen": -188.92877197265625, + "logps/rejected": -198.88780212402344, + "loss": 0.0932, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.678562641143799, + "rewards/margins": 4.250636100769043, + "rewards/rejected": -7.929198265075684, + "step": 6963 + }, + { + "epoch": 1.08, + "learning_rate": 9.039654546397635e-06, + "logits/chosen": -2.994523525238037, + "logits/rejected": -2.8248655796051025, + "logps/chosen": -131.0648193359375, + "logps/rejected": -251.48385620117188, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6152069568634033, + "rewards/margins": 5.496021270751953, + "rewards/rejected": -9.111227989196777, + "step": 6964 + }, + { + "epoch": 1.08, + "learning_rate": 9.038921105866487e-06, + "logits/chosen": -2.5681650638580322, + "logits/rejected": -2.78009295463562, + "logps/chosen": -172.1997528076172, + "logps/rejected": -277.0082702636719, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6340765953063965, + "rewards/margins": 6.782492160797119, + "rewards/rejected": -9.416568756103516, + "step": 6965 + }, + { + "epoch": 1.08, + "learning_rate": 9.03818766533534e-06, + "logits/chosen": -2.5395660400390625, + "logits/rejected": -2.8768157958984375, + "logps/chosen": -98.90386199951172, + "logps/rejected": -288.97625732421875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5326638221740723, + "rewards/margins": 6.728507041931152, + "rewards/rejected": -9.261171340942383, + "step": 6966 + }, + { + "epoch": 1.08, + "learning_rate": 9.037454224804191e-06, + "logits/chosen": -2.662522077560425, + "logits/rejected": -2.685948610305786, + "logps/chosen": -187.9051513671875, + "logps/rejected": -346.755126953125, + "loss": 0.0601, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.646503448486328, + "rewards/margins": 6.550457954406738, + "rewards/rejected": -12.196962356567383, + "step": 6967 + }, + { + "epoch": 1.08, + "learning_rate": 9.036720784273043e-06, + "logits/chosen": -1.567294716835022, + "logits/rejected": -2.6761958599090576, + "logps/chosen": -159.1808319091797, + "logps/rejected": -433.8924560546875, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0033183097839355, + "rewards/margins": 6.511788368225098, + "rewards/rejected": -12.515107154846191, + "step": 6968 + }, + { + "epoch": 1.08, + "learning_rate": 9.035987343741895e-06, + "logits/chosen": -2.8422586917877197, + "logits/rejected": -1.889424204826355, + "logps/chosen": -417.16455078125, + "logps/rejected": -254.64178466796875, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.099325656890869, + "rewards/margins": 3.9818313121795654, + "rewards/rejected": -7.0811567306518555, + "step": 6969 + }, + { + "epoch": 1.08, + "learning_rate": 9.035253903210748e-06, + "logits/chosen": -2.1197681427001953, + "logits/rejected": -2.8449575901031494, + "logps/chosen": -280.0748291015625, + "logps/rejected": -292.1954650878906, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2189011573791504, + "rewards/margins": 5.2968878746032715, + "rewards/rejected": -6.515789031982422, + "step": 6970 + }, + { + "epoch": 1.08, + "learning_rate": 9.0345204626796e-06, + "logits/chosen": -2.6110570430755615, + "logits/rejected": -3.125702142715454, + "logps/chosen": -186.52073669433594, + "logps/rejected": -426.3913269042969, + "loss": 1.764, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.37665319442749, + "rewards/margins": -0.7933669090270996, + "rewards/rejected": -5.583286285400391, + "step": 6971 + }, + { + "epoch": 1.08, + "learning_rate": 9.033787022148452e-06, + "logits/chosen": -2.7605433464050293, + "logits/rejected": -3.056562900543213, + "logps/chosen": -137.52557373046875, + "logps/rejected": -231.00283813476562, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.685023307800293, + "rewards/margins": 6.516195297241211, + "rewards/rejected": -9.201218605041504, + "step": 6972 + }, + { + "epoch": 1.08, + "learning_rate": 9.033053581617304e-06, + "logits/chosen": -2.7304441928863525, + "logits/rejected": -3.026541233062744, + "logps/chosen": -113.9437255859375, + "logps/rejected": -238.4783477783203, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5324251651763916, + "rewards/margins": 8.362459182739258, + "rewards/rejected": -11.89488410949707, + "step": 6973 + }, + { + "epoch": 1.08, + "learning_rate": 9.032320141086156e-06, + "logits/chosen": -1.7636125087738037, + "logits/rejected": -2.723443031311035, + "logps/chosen": -94.36715698242188, + "logps/rejected": -366.5033264160156, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.799541473388672, + "rewards/margins": 7.493813514709473, + "rewards/rejected": -11.293354988098145, + "step": 6974 + }, + { + "epoch": 1.08, + "learning_rate": 9.031586700555008e-06, + "logits/chosen": -2.8050644397735596, + "logits/rejected": -2.2791812419891357, + "logps/chosen": -253.65228271484375, + "logps/rejected": -240.8431396484375, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.737433910369873, + "rewards/margins": 4.3279619216918945, + "rewards/rejected": -8.065396308898926, + "step": 6975 + }, + { + "epoch": 1.08, + "learning_rate": 9.03085326002386e-06, + "logits/chosen": -3.0062906742095947, + "logits/rejected": -2.983858346939087, + "logps/chosen": -140.5595703125, + "logps/rejected": -233.85226440429688, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.532515048980713, + "rewards/margins": 7.997250556945801, + "rewards/rejected": -11.529766082763672, + "step": 6976 + }, + { + "epoch": 1.09, + "learning_rate": 9.030119819492712e-06, + "logits/chosen": -2.9767563343048096, + "logits/rejected": -2.2579686641693115, + "logps/chosen": -778.762939453125, + "logps/rejected": -406.93109130859375, + "loss": 0.7133, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.3558952808380127, + "rewards/margins": 2.7177796363830566, + "rewards/rejected": -5.073675155639648, + "step": 6977 + }, + { + "epoch": 1.09, + "learning_rate": 9.029386378961563e-06, + "logits/chosen": -2.8842248916625977, + "logits/rejected": -2.460935115814209, + "logps/chosen": -917.9967041015625, + "logps/rejected": -556.21533203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5350801944732666, + "rewards/margins": 7.664319038391113, + "rewards/rejected": -10.1993989944458, + "step": 6978 + }, + { + "epoch": 1.09, + "learning_rate": 9.028652938430417e-06, + "logits/chosen": -1.5959312915802002, + "logits/rejected": -2.7209930419921875, + "logps/chosen": -104.83021545410156, + "logps/rejected": -446.89532470703125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6076011657714844, + "rewards/margins": 8.6539888381958, + "rewards/rejected": -12.261589050292969, + "step": 6979 + }, + { + "epoch": 1.09, + "learning_rate": 9.027919497899269e-06, + "logits/chosen": -3.011760950088501, + "logits/rejected": -2.1582915782928467, + "logps/chosen": -213.25018310546875, + "logps/rejected": -191.65243530273438, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.094702959060669, + "rewards/margins": 6.4362616539001465, + "rewards/rejected": -9.530964851379395, + "step": 6980 + }, + { + "epoch": 1.09, + "learning_rate": 9.02718605736812e-06, + "logits/chosen": -2.239274263381958, + "logits/rejected": -2.8788702487945557, + "logps/chosen": -120.29048156738281, + "logps/rejected": -352.4672546386719, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5960783958435059, + "rewards/margins": 6.5219831466674805, + "rewards/rejected": -8.118061065673828, + "step": 6981 + }, + { + "epoch": 1.09, + "learning_rate": 9.026452616836973e-06, + "logits/chosen": -1.925079345703125, + "logits/rejected": -2.5047805309295654, + "logps/chosen": -130.26571655273438, + "logps/rejected": -234.02760314941406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6620686054229736, + "rewards/margins": 8.060730934143066, + "rewards/rejected": -10.722799301147461, + "step": 6982 + }, + { + "epoch": 1.09, + "learning_rate": 9.025719176305825e-06, + "logits/chosen": -2.29103946685791, + "logits/rejected": -2.8725836277008057, + "logps/chosen": -167.9941864013672, + "logps/rejected": -163.84674072265625, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.94269061088562, + "rewards/margins": 2.3410162925720215, + "rewards/rejected": -6.2837066650390625, + "step": 6983 + }, + { + "epoch": 1.09, + "learning_rate": 9.024985735774676e-06, + "logits/chosen": -2.9732391834259033, + "logits/rejected": -2.164736032485962, + "logps/chosen": -538.5401000976562, + "logps/rejected": -355.7719421386719, + "loss": 2.8551, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.985772609710693, + "rewards/margins": 0.10939455032348633, + "rewards/rejected": -5.095167636871338, + "step": 6984 + }, + { + "epoch": 1.09, + "learning_rate": 9.024252295243528e-06, + "logits/chosen": -1.4205430746078491, + "logits/rejected": -2.7988109588623047, + "logps/chosen": -175.72103881835938, + "logps/rejected": -512.6175537109375, + "loss": 0.4139, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5257821083068848, + "rewards/margins": 4.463078498840332, + "rewards/rejected": -7.988861083984375, + "step": 6985 + }, + { + "epoch": 1.09, + "learning_rate": 9.02351885471238e-06, + "logits/chosen": -2.6799087524414062, + "logits/rejected": -3.032221555709839, + "logps/chosen": -48.656158447265625, + "logps/rejected": -252.44871520996094, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3839221000671387, + "rewards/margins": 6.903402328491211, + "rewards/rejected": -10.287323951721191, + "step": 6986 + }, + { + "epoch": 1.09, + "learning_rate": 9.022785414181234e-06, + "logits/chosen": -2.019465208053589, + "logits/rejected": -3.0717597007751465, + "logps/chosen": -181.60958862304688, + "logps/rejected": -418.63250732421875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.035548448562622, + "rewards/margins": 8.7264404296875, + "rewards/rejected": -10.76198959350586, + "step": 6987 + }, + { + "epoch": 1.09, + "learning_rate": 9.022051973650086e-06, + "logits/chosen": -3.094794988632202, + "logits/rejected": -1.918035626411438, + "logps/chosen": -282.7337646484375, + "logps/rejected": -123.51326751708984, + "loss": 0.1686, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0147600173950195, + "rewards/margins": 1.7968671321868896, + "rewards/rejected": -5.81162691116333, + "step": 6988 + }, + { + "epoch": 1.09, + "learning_rate": 9.021318533118938e-06, + "logits/chosen": -1.777044653892517, + "logits/rejected": -2.692168951034546, + "logps/chosen": -170.16136169433594, + "logps/rejected": -491.67474365234375, + "loss": 0.1486, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219383716583252, + "rewards/margins": 7.117995738983154, + "rewards/rejected": -13.337379455566406, + "step": 6989 + }, + { + "epoch": 1.09, + "learning_rate": 9.02058509258779e-06, + "logits/chosen": -0.9182853698730469, + "logits/rejected": -2.585512638092041, + "logps/chosen": -195.37872314453125, + "logps/rejected": -527.9235229492188, + "loss": 1.2002, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.520112037658691, + "rewards/margins": 2.672804594039917, + "rewards/rejected": -9.192916870117188, + "step": 6990 + }, + { + "epoch": 1.09, + "learning_rate": 9.019851652056641e-06, + "logits/chosen": -2.83117938041687, + "logits/rejected": -2.905282974243164, + "logps/chosen": -64.4894027709961, + "logps/rejected": -248.77008056640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5382471084594727, + "rewards/margins": 7.513070583343506, + "rewards/rejected": -10.05131721496582, + "step": 6991 + }, + { + "epoch": 1.09, + "learning_rate": 9.019118211525495e-06, + "logits/chosen": -1.3010863065719604, + "logits/rejected": -2.817230463027954, + "logps/chosen": -77.2630844116211, + "logps/rejected": -535.4718017578125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9774070978164673, + "rewards/margins": 7.531005382537842, + "rewards/rejected": -9.50841236114502, + "step": 6992 + }, + { + "epoch": 1.09, + "learning_rate": 9.018384770994347e-06, + "logits/chosen": -3.07391095161438, + "logits/rejected": -1.2767109870910645, + "logps/chosen": -458.5877990722656, + "logps/rejected": -207.6725616455078, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9158711433410645, + "rewards/margins": 6.635408401489258, + "rewards/rejected": -9.551279067993164, + "step": 6993 + }, + { + "epoch": 1.09, + "learning_rate": 9.017651330463199e-06, + "logits/chosen": -3.096349000930786, + "logits/rejected": -2.988706111907959, + "logps/chosen": -220.45352172851562, + "logps/rejected": -342.87359619140625, + "loss": 0.1699, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4737648963928223, + "rewards/margins": 3.7824044227600098, + "rewards/rejected": -6.256169319152832, + "step": 6994 + }, + { + "epoch": 1.09, + "learning_rate": 9.01691788993205e-06, + "logits/chosen": -2.878392457962036, + "logits/rejected": -1.9763073921203613, + "logps/chosen": -314.4493408203125, + "logps/rejected": -317.5836181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34233057498931885, + "rewards/margins": 10.83946418762207, + "rewards/rejected": -11.181795120239258, + "step": 6995 + }, + { + "epoch": 1.09, + "learning_rate": 9.016184449400904e-06, + "logits/chosen": -2.7143447399139404, + "logits/rejected": -1.967666506767273, + "logps/chosen": -118.77877807617188, + "logps/rejected": -147.40887451171875, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.668578147888184, + "rewards/margins": 5.471816062927246, + "rewards/rejected": -10.14039421081543, + "step": 6996 + }, + { + "epoch": 1.09, + "learning_rate": 9.015451008869756e-06, + "logits/chosen": -1.7003642320632935, + "logits/rejected": -3.025813341140747, + "logps/chosen": -74.52967834472656, + "logps/rejected": -312.58099365234375, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.385769844055176, + "rewards/margins": 4.325788974761963, + "rewards/rejected": -9.711559295654297, + "step": 6997 + }, + { + "epoch": 1.09, + "learning_rate": 9.014717568338608e-06, + "logits/chosen": -2.819154977798462, + "logits/rejected": -3.1120636463165283, + "logps/chosen": -134.46307373046875, + "logps/rejected": -289.4745788574219, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2396557331085205, + "rewards/margins": 7.6146931648254395, + "rewards/rejected": -9.854349136352539, + "step": 6998 + }, + { + "epoch": 1.09, + "learning_rate": 9.01398412780746e-06, + "logits/chosen": -2.5196611881256104, + "logits/rejected": -3.0078237056732178, + "logps/chosen": -699.4360961914062, + "logps/rejected": -626.080078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1917831301689148, + "rewards/margins": 7.783077716827393, + "rewards/rejected": -7.591294288635254, + "step": 6999 + }, + { + "epoch": 1.09, + "learning_rate": 9.013250687276312e-06, + "logits/chosen": -1.6046572923660278, + "logits/rejected": -2.6503453254699707, + "logps/chosen": -148.56561279296875, + "logps/rejected": -455.7823181152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8166278600692749, + "rewards/margins": 10.327596664428711, + "rewards/rejected": -11.144224166870117, + "step": 7000 + }, + { + "epoch": 1.09, + "learning_rate": 9.012517246745163e-06, + "logits/chosen": -2.2031962871551514, + "logits/rejected": -2.7128262519836426, + "logps/chosen": -174.491943359375, + "logps/rejected": -320.73919677734375, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.713834762573242, + "rewards/margins": 5.361172199249268, + "rewards/rejected": -8.075006484985352, + "step": 7001 + }, + { + "epoch": 1.09, + "learning_rate": 9.011783806214015e-06, + "logits/chosen": -2.4535505771636963, + "logits/rejected": -3.0371198654174805, + "logps/chosen": -641.1670532226562, + "logps/rejected": -625.6235961914062, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4898056387901306, + "rewards/margins": 6.4575042724609375, + "rewards/rejected": -6.947309494018555, + "step": 7002 + }, + { + "epoch": 1.09, + "learning_rate": 9.011050365682867e-06, + "logits/chosen": -2.790536880493164, + "logits/rejected": -2.959907293319702, + "logps/chosen": -305.60675048828125, + "logps/rejected": -715.7681884765625, + "loss": 0.1132, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.126992702484131, + "rewards/margins": 6.399991035461426, + "rewards/rejected": -9.526983261108398, + "step": 7003 + }, + { + "epoch": 1.09, + "learning_rate": 9.010316925151719e-06, + "logits/chosen": -2.8120813369750977, + "logits/rejected": -2.572521448135376, + "logps/chosen": -216.51593017578125, + "logps/rejected": -203.9181671142578, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7832443714141846, + "rewards/margins": 4.968147277832031, + "rewards/rejected": -7.751391887664795, + "step": 7004 + }, + { + "epoch": 1.09, + "learning_rate": 9.009583484620573e-06, + "logits/chosen": -1.7423995733261108, + "logits/rejected": -2.8142783641815186, + "logps/chosen": -106.76097869873047, + "logps/rejected": -307.002685546875, + "loss": 0.9292, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6365861892700195, + "rewards/margins": 5.29525899887085, + "rewards/rejected": -9.931845664978027, + "step": 7005 + }, + { + "epoch": 1.09, + "learning_rate": 9.008850044089425e-06, + "logits/chosen": -2.1876394748687744, + "logits/rejected": -2.918891429901123, + "logps/chosen": -294.8846435546875, + "logps/rejected": -421.37957763671875, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9842010736465454, + "rewards/margins": 5.53091287612915, + "rewards/rejected": -7.515113830566406, + "step": 7006 + }, + { + "epoch": 1.09, + "learning_rate": 9.008116603558276e-06, + "logits/chosen": -1.5202629566192627, + "logits/rejected": -2.7452316284179688, + "logps/chosen": -127.40495300292969, + "logps/rejected": -462.7469787597656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8485875129699707, + "rewards/margins": 9.156466484069824, + "rewards/rejected": -13.005054473876953, + "step": 7007 + }, + { + "epoch": 1.09, + "learning_rate": 9.007383163027128e-06, + "logits/chosen": -2.332545042037964, + "logits/rejected": -2.716529130935669, + "logps/chosen": -305.13421630859375, + "logps/rejected": -290.2042236328125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.562981605529785, + "rewards/margins": 5.9646406173706055, + "rewards/rejected": -10.52762222290039, + "step": 7008 + }, + { + "epoch": 1.09, + "learning_rate": 9.00664972249598e-06, + "logits/chosen": -3.031445264816284, + "logits/rejected": -1.4097868204116821, + "logps/chosen": -527.1995239257812, + "logps/rejected": -239.9971923828125, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.481355905532837, + "rewards/margins": 6.73310661315918, + "rewards/rejected": -9.214462280273438, + "step": 7009 + }, + { + "epoch": 1.09, + "learning_rate": 9.005916281964832e-06, + "logits/chosen": -2.1011922359466553, + "logits/rejected": -2.794550895690918, + "logps/chosen": -82.06044006347656, + "logps/rejected": -361.8721923828125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7214322090148926, + "rewards/margins": 8.036933898925781, + "rewards/rejected": -11.758365631103516, + "step": 7010 + }, + { + "epoch": 1.09, + "learning_rate": 9.005182841433684e-06, + "logits/chosen": -2.996084451675415, + "logits/rejected": -2.779974937438965, + "logps/chosen": -405.188232421875, + "logps/rejected": -649.7360229492188, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.565581798553467, + "rewards/margins": 7.052175045013428, + "rewards/rejected": -10.617756843566895, + "step": 7011 + }, + { + "epoch": 1.09, + "learning_rate": 9.004449400902536e-06, + "logits/chosen": -3.1111104488372803, + "logits/rejected": -1.9601043462753296, + "logps/chosen": -353.1990661621094, + "logps/rejected": -363.0093994140625, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8041291236877441, + "rewards/margins": 6.855964660644531, + "rewards/rejected": -8.660093307495117, + "step": 7012 + }, + { + "epoch": 1.09, + "learning_rate": 9.003715960371388e-06, + "logits/chosen": -2.0412533283233643, + "logits/rejected": -2.877889633178711, + "logps/chosen": -308.34344482421875, + "logps/rejected": -610.7135009765625, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.015367031097412, + "rewards/margins": 5.221907138824463, + "rewards/rejected": -10.237274169921875, + "step": 7013 + }, + { + "epoch": 1.09, + "learning_rate": 9.002982519840241e-06, + "logits/chosen": -2.6010046005249023, + "logits/rejected": -2.828409194946289, + "logps/chosen": -87.00337219238281, + "logps/rejected": -253.79727172851562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9651217460632324, + "rewards/margins": 7.600132942199707, + "rewards/rejected": -10.565255165100098, + "step": 7014 + }, + { + "epoch": 1.09, + "learning_rate": 9.002249079309093e-06, + "logits/chosen": -3.1335322856903076, + "logits/rejected": -2.3223798274993896, + "logps/chosen": -913.1950073242188, + "logps/rejected": -621.4364013671875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.232818603515625, + "rewards/margins": 5.810140609741211, + "rewards/rejected": -10.042959213256836, + "step": 7015 + }, + { + "epoch": 1.09, + "learning_rate": 9.001515638777945e-06, + "logits/chosen": -2.388845443725586, + "logits/rejected": -2.8251678943634033, + "logps/chosen": -142.83013916015625, + "logps/rejected": -405.9112548828125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9900600910186768, + "rewards/margins": 8.436393737792969, + "rewards/rejected": -11.426453590393066, + "step": 7016 + }, + { + "epoch": 1.09, + "learning_rate": 9.000782198246797e-06, + "logits/chosen": -2.10343599319458, + "logits/rejected": -2.7001919746398926, + "logps/chosen": -158.66851806640625, + "logps/rejected": -361.1376953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.853424072265625, + "rewards/margins": 8.705838203430176, + "rewards/rejected": -11.5592622756958, + "step": 7017 + }, + { + "epoch": 1.09, + "learning_rate": 9.000048757715649e-06, + "logits/chosen": -2.7874162197113037, + "logits/rejected": -2.4815053939819336, + "logps/chosen": -238.63418579101562, + "logps/rejected": -350.98095703125, + "loss": 0.8499, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.2583608627319336, + "rewards/margins": 2.625709056854248, + "rewards/rejected": -5.884069919586182, + "step": 7018 + }, + { + "epoch": 1.09, + "learning_rate": 8.9993153171845e-06, + "logits/chosen": -2.3537960052490234, + "logits/rejected": -2.8673009872436523, + "logps/chosen": -126.98062133789062, + "logps/rejected": -228.82785034179688, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.443866729736328, + "rewards/margins": 7.834189414978027, + "rewards/rejected": -13.278056144714355, + "step": 7019 + }, + { + "epoch": 1.09, + "learning_rate": 8.998581876653353e-06, + "logits/chosen": -2.904033660888672, + "logits/rejected": -3.045839309692383, + "logps/chosen": -220.07400512695312, + "logps/rejected": -253.72344970703125, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6440277099609375, + "rewards/margins": 4.061659336090088, + "rewards/rejected": -7.705687046051025, + "step": 7020 + }, + { + "epoch": 1.09, + "learning_rate": 8.997848436122204e-06, + "logits/chosen": -2.2934045791625977, + "logits/rejected": -2.5506417751312256, + "logps/chosen": -120.26657104492188, + "logps/rejected": -255.14186096191406, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.792919158935547, + "rewards/margins": 6.877907752990723, + "rewards/rejected": -10.670827865600586, + "step": 7021 + }, + { + "epoch": 1.09, + "learning_rate": 8.997114995591056e-06, + "logits/chosen": -1.5923783779144287, + "logits/rejected": -2.880044460296631, + "logps/chosen": -89.36135864257812, + "logps/rejected": -235.75767517089844, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.498947620391846, + "rewards/margins": 4.872878551483154, + "rewards/rejected": -9.371826171875, + "step": 7022 + }, + { + "epoch": 1.09, + "learning_rate": 8.99638155505991e-06, + "logits/chosen": -2.9391608238220215, + "logits/rejected": -3.105938673019409, + "logps/chosen": -116.99913024902344, + "logps/rejected": -290.5003967285156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.572267532348633, + "rewards/margins": 7.363720417022705, + "rewards/rejected": -9.93598747253418, + "step": 7023 + }, + { + "epoch": 1.09, + "learning_rate": 8.995648114528762e-06, + "logits/chosen": -2.682340621948242, + "logits/rejected": -2.3856425285339355, + "logps/chosen": -219.48291015625, + "logps/rejected": -200.17735290527344, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119316101074219, + "rewards/margins": 4.837643623352051, + "rewards/rejected": -8.956960678100586, + "step": 7024 + }, + { + "epoch": 1.09, + "learning_rate": 8.994914673997614e-06, + "logits/chosen": -2.7054309844970703, + "logits/rejected": -3.080740451812744, + "logps/chosen": -448.93011474609375, + "logps/rejected": -469.4849548339844, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.831775188446045, + "rewards/margins": 7.318212032318115, + "rewards/rejected": -10.14998722076416, + "step": 7025 + }, + { + "epoch": 1.09, + "learning_rate": 8.994181233466467e-06, + "logits/chosen": -3.053290605545044, + "logits/rejected": -2.1067941188812256, + "logps/chosen": -635.6272583007812, + "logps/rejected": -322.0774230957031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5597763061523438, + "rewards/margins": 7.58481502532959, + "rewards/rejected": -9.144591331481934, + "step": 7026 + }, + { + "epoch": 1.09, + "learning_rate": 8.993447792935319e-06, + "logits/chosen": -2.850820302963257, + "logits/rejected": -3.0268747806549072, + "logps/chosen": -83.66779327392578, + "logps/rejected": -171.87266540527344, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.094625949859619, + "rewards/margins": 5.183356285095215, + "rewards/rejected": -7.277981758117676, + "step": 7027 + }, + { + "epoch": 1.09, + "learning_rate": 8.992714352404171e-06, + "logits/chosen": -2.3788928985595703, + "logits/rejected": -2.931522846221924, + "logps/chosen": -201.9696044921875, + "logps/rejected": -424.5958251953125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8948454856872559, + "rewards/margins": 6.783975601196289, + "rewards/rejected": -8.678821563720703, + "step": 7028 + }, + { + "epoch": 1.09, + "learning_rate": 8.991980911873023e-06, + "logits/chosen": -2.8778653144836426, + "logits/rejected": -1.477903962135315, + "logps/chosen": -413.8657531738281, + "logps/rejected": -325.72308349609375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.703370094299316, + "rewards/margins": 6.971259117126465, + "rewards/rejected": -11.674629211425781, + "step": 7029 + }, + { + "epoch": 1.09, + "learning_rate": 8.991247471341875e-06, + "logits/chosen": -2.6566693782806396, + "logits/rejected": -3.111691474914551, + "logps/chosen": -95.36001586914062, + "logps/rejected": -324.3180847167969, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1243605613708496, + "rewards/margins": 5.918076515197754, + "rewards/rejected": -9.042436599731445, + "step": 7030 + }, + { + "epoch": 1.09, + "learning_rate": 8.990514030810727e-06, + "logits/chosen": -2.932194948196411, + "logits/rejected": -2.4396421909332275, + "logps/chosen": -315.1790466308594, + "logps/rejected": -395.2447814941406, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.26288104057312, + "rewards/margins": 5.94720458984375, + "rewards/rejected": -8.21008586883545, + "step": 7031 + }, + { + "epoch": 1.09, + "learning_rate": 8.98978059027958e-06, + "logits/chosen": -1.324393630027771, + "logits/rejected": -2.2071571350097656, + "logps/chosen": -209.53025817871094, + "logps/rejected": -537.6527099609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.000852584838867, + "rewards/margins": 8.5010986328125, + "rewards/rejected": -12.50195026397705, + "step": 7032 + }, + { + "epoch": 1.09, + "learning_rate": 8.989047149748432e-06, + "logits/chosen": -3.165778636932373, + "logits/rejected": -2.434560537338257, + "logps/chosen": -221.9083709716797, + "logps/rejected": -239.51722717285156, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9972469806671143, + "rewards/margins": 7.379391193389893, + "rewards/rejected": -9.376638412475586, + "step": 7033 + }, + { + "epoch": 1.09, + "learning_rate": 8.988313709217284e-06, + "logits/chosen": -1.5216313600540161, + "logits/rejected": -2.8710551261901855, + "logps/chosen": -80.46011352539062, + "logps/rejected": -254.489501953125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1061527729034424, + "rewards/margins": 4.841814994812012, + "rewards/rejected": -7.947967529296875, + "step": 7034 + }, + { + "epoch": 1.09, + "learning_rate": 8.987580268686136e-06, + "logits/chosen": -2.365685224533081, + "logits/rejected": -1.9134491682052612, + "logps/chosen": -325.5101623535156, + "logps/rejected": -151.1806640625, + "loss": 1.2455, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.4964141845703125, + "rewards/margins": 1.3986269235610962, + "rewards/rejected": -5.895041465759277, + "step": 7035 + }, + { + "epoch": 1.09, + "learning_rate": 8.986846828154988e-06, + "logits/chosen": -1.6121066808700562, + "logits/rejected": -2.9297828674316406, + "logps/chosen": -157.7869110107422, + "logps/rejected": -352.9267578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.307387590408325, + "rewards/margins": 7.676939010620117, + "rewards/rejected": -10.98432731628418, + "step": 7036 + }, + { + "epoch": 1.09, + "learning_rate": 8.98611338762384e-06, + "logits/chosen": -3.0224170684814453, + "logits/rejected": -1.7985857725143433, + "logps/chosen": -210.5179901123047, + "logps/rejected": -164.38485717773438, + "loss": 0.2163, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5679707527160645, + "rewards/margins": 1.9067983627319336, + "rewards/rejected": -7.474769115447998, + "step": 7037 + }, + { + "epoch": 1.09, + "learning_rate": 8.985379947092691e-06, + "logits/chosen": -2.6559553146362305, + "logits/rejected": -2.7662200927734375, + "logps/chosen": -270.19000244140625, + "logps/rejected": -315.6243896484375, + "loss": 0.6047, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.570662975311279, + "rewards/margins": 3.5542285442352295, + "rewards/rejected": -10.12489128112793, + "step": 7038 + }, + { + "epoch": 1.09, + "learning_rate": 8.984646506561543e-06, + "logits/chosen": -2.0499467849731445, + "logits/rejected": -3.088069438934326, + "logps/chosen": -98.87521362304688, + "logps/rejected": -474.8000183105469, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1295981407165527, + "rewards/margins": 8.167681694030762, + "rewards/rejected": -11.297279357910156, + "step": 7039 + }, + { + "epoch": 1.09, + "learning_rate": 8.983913066030395e-06, + "logits/chosen": -2.985628604888916, + "logits/rejected": -1.889965295791626, + "logps/chosen": -242.65684509277344, + "logps/rejected": -183.900146484375, + "loss": 0.1558, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.674787998199463, + "rewards/margins": 4.858963489532471, + "rewards/rejected": -8.533751487731934, + "step": 7040 + }, + { + "epoch": 1.1, + "learning_rate": 8.983179625499249e-06, + "logits/chosen": -2.9537689685821533, + "logits/rejected": -3.1041808128356934, + "logps/chosen": -488.8023376464844, + "logps/rejected": -546.987548828125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301679611206055, + "rewards/margins": 6.093935966491699, + "rewards/rejected": -10.395615577697754, + "step": 7041 + }, + { + "epoch": 1.1, + "learning_rate": 8.9824461849681e-06, + "logits/chosen": -3.1568641662597656, + "logits/rejected": -2.551478624343872, + "logps/chosen": -342.70867919921875, + "logps/rejected": -352.1246337890625, + "loss": 0.1668, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362392902374268, + "rewards/margins": 3.7227749824523926, + "rewards/rejected": -9.08516788482666, + "step": 7042 + }, + { + "epoch": 1.1, + "learning_rate": 8.981712744436953e-06, + "logits/chosen": -2.947303533554077, + "logits/rejected": -3.0118794441223145, + "logps/chosen": -58.80561828613281, + "logps/rejected": -177.45849609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2841713428497314, + "rewards/margins": 8.329047203063965, + "rewards/rejected": -10.613218307495117, + "step": 7043 + }, + { + "epoch": 1.1, + "learning_rate": 8.980979303905804e-06, + "logits/chosen": -2.897341012954712, + "logits/rejected": -2.3404059410095215, + "logps/chosen": -357.4366760253906, + "logps/rejected": -238.5228271484375, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.274216413497925, + "rewards/margins": 3.4055233001708984, + "rewards/rejected": -6.679739475250244, + "step": 7044 + }, + { + "epoch": 1.1, + "learning_rate": 8.980245863374656e-06, + "logits/chosen": -2.5262508392333984, + "logits/rejected": -3.009739637374878, + "logps/chosen": -109.66268920898438, + "logps/rejected": -219.1483612060547, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.995313882827759, + "rewards/margins": 9.06301498413086, + "rewards/rejected": -12.058328628540039, + "step": 7045 + }, + { + "epoch": 1.1, + "learning_rate": 8.979512422843508e-06, + "logits/chosen": -2.9471309185028076, + "logits/rejected": -3.114549398422241, + "logps/chosen": -110.79875183105469, + "logps/rejected": -226.63906860351562, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.950713634490967, + "rewards/margins": 5.676031112670898, + "rewards/rejected": -9.626745223999023, + "step": 7046 + }, + { + "epoch": 1.1, + "learning_rate": 8.97877898231236e-06, + "logits/chosen": -2.890580415725708, + "logits/rejected": -1.7474836111068726, + "logps/chosen": -257.7724914550781, + "logps/rejected": -180.81985473632812, + "loss": 1.4604, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.258821487426758, + "rewards/margins": 1.2857023477554321, + "rewards/rejected": -6.5445237159729, + "step": 7047 + }, + { + "epoch": 1.1, + "learning_rate": 8.978045541781212e-06, + "logits/chosen": -3.016740083694458, + "logits/rejected": -2.7618019580841064, + "logps/chosen": -281.8809814453125, + "logps/rejected": -236.81210327148438, + "loss": 0.4753, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.031530380249023, + "rewards/margins": 3.649838447570801, + "rewards/rejected": -9.681368827819824, + "step": 7048 + }, + { + "epoch": 1.1, + "learning_rate": 8.977312101250064e-06, + "logits/chosen": -2.980323314666748, + "logits/rejected": -2.9146978855133057, + "logps/chosen": -558.0947875976562, + "logps/rejected": -528.994873046875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6072678565979004, + "rewards/margins": 5.559112071990967, + "rewards/rejected": -8.166379928588867, + "step": 7049 + }, + { + "epoch": 1.1, + "learning_rate": 8.976578660718917e-06, + "logits/chosen": -2.502927780151367, + "logits/rejected": -3.1291604042053223, + "logps/chosen": -93.3377685546875, + "logps/rejected": -289.2791748046875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6378769874572754, + "rewards/margins": 6.113324165344238, + "rewards/rejected": -9.751201629638672, + "step": 7050 + }, + { + "epoch": 1.1, + "learning_rate": 8.97584522018777e-06, + "logits/chosen": -3.1885554790496826, + "logits/rejected": -2.947613477706909, + "logps/chosen": -588.722412109375, + "logps/rejected": -581.5593872070312, + "loss": 0.8499, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.407038688659668, + "rewards/margins": 4.524130821228027, + "rewards/rejected": -7.931169509887695, + "step": 7051 + }, + { + "epoch": 1.1, + "learning_rate": 8.975111779656621e-06, + "logits/chosen": -1.5643517971038818, + "logits/rejected": -2.6670398712158203, + "logps/chosen": -102.98733520507812, + "logps/rejected": -392.0530090332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.210209846496582, + "rewards/margins": 10.877405166625977, + "rewards/rejected": -16.087615966796875, + "step": 7052 + }, + { + "epoch": 1.1, + "learning_rate": 8.974378339125473e-06, + "logits/chosen": -2.550304412841797, + "logits/rejected": -2.850722312927246, + "logps/chosen": -155.837890625, + "logps/rejected": -272.6076965332031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7215237021446228, + "rewards/margins": 8.076471328735352, + "rewards/rejected": -8.797994613647461, + "step": 7053 + }, + { + "epoch": 1.1, + "learning_rate": 8.973644898594325e-06, + "logits/chosen": -1.8185051679611206, + "logits/rejected": -2.4064903259277344, + "logps/chosen": -137.12078857421875, + "logps/rejected": -339.161865234375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9776453971862793, + "rewards/margins": 6.734719276428223, + "rewards/rejected": -10.712364196777344, + "step": 7054 + }, + { + "epoch": 1.1, + "learning_rate": 8.972911458063177e-06, + "logits/chosen": -1.4877806901931763, + "logits/rejected": -2.9025979042053223, + "logps/chosen": -93.62667846679688, + "logps/rejected": -395.2467956542969, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8286426067352295, + "rewards/margins": 7.563442230224609, + "rewards/rejected": -9.392085075378418, + "step": 7055 + }, + { + "epoch": 1.1, + "learning_rate": 8.972178017532029e-06, + "logits/chosen": -2.421527624130249, + "logits/rejected": -3.012515068054199, + "logps/chosen": -105.88256072998047, + "logps/rejected": -217.58917236328125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.233089923858643, + "rewards/margins": 4.6039934158325195, + "rewards/rejected": -10.83708381652832, + "step": 7056 + }, + { + "epoch": 1.1, + "learning_rate": 8.97144457700088e-06, + "logits/chosen": -3.0226058959960938, + "logits/rejected": -2.7287285327911377, + "logps/chosen": -266.77740478515625, + "logps/rejected": -340.3948669433594, + "loss": 0.0561, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4904556274414062, + "rewards/margins": 4.525835990905762, + "rewards/rejected": -8.016291618347168, + "step": 7057 + }, + { + "epoch": 1.1, + "learning_rate": 8.970711136469734e-06, + "logits/chosen": -1.8879746198654175, + "logits/rejected": -2.7042219638824463, + "logps/chosen": -167.6458740234375, + "logps/rejected": -243.69126892089844, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.59018874168396, + "rewards/margins": 6.407350540161133, + "rewards/rejected": -9.997539520263672, + "step": 7058 + }, + { + "epoch": 1.1, + "learning_rate": 8.969977695938586e-06, + "logits/chosen": -2.9118878841400146, + "logits/rejected": -1.5203790664672852, + "logps/chosen": -204.94192504882812, + "logps/rejected": -116.31625366210938, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.189234256744385, + "rewards/margins": 1.1339675188064575, + "rewards/rejected": -6.323201656341553, + "step": 7059 + }, + { + "epoch": 1.1, + "learning_rate": 8.96924425540744e-06, + "logits/chosen": -2.91945743560791, + "logits/rejected": -1.6910539865493774, + "logps/chosen": -429.9605712890625, + "logps/rejected": -392.30401611328125, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9632214307785034, + "rewards/margins": 7.560679912567139, + "rewards/rejected": -9.523900985717773, + "step": 7060 + }, + { + "epoch": 1.1, + "learning_rate": 8.968510814876291e-06, + "logits/chosen": -2.984099864959717, + "logits/rejected": -2.636899471282959, + "logps/chosen": -238.6775665283203, + "logps/rejected": -288.3319396972656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.701787233352661, + "rewards/margins": 8.116598129272461, + "rewards/rejected": -10.81838607788086, + "step": 7061 + }, + { + "epoch": 1.1, + "learning_rate": 8.967777374345143e-06, + "logits/chosen": -3.105790853500366, + "logits/rejected": -2.853355884552002, + "logps/chosen": -475.19134521484375, + "logps/rejected": -399.3091735839844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4359909296035767, + "rewards/margins": 6.940255641937256, + "rewards/rejected": -8.376246452331543, + "step": 7062 + }, + { + "epoch": 1.1, + "learning_rate": 8.967043933813995e-06, + "logits/chosen": -2.6212267875671387, + "logits/rejected": -3.0796849727630615, + "logps/chosen": -140.64820861816406, + "logps/rejected": -377.94989013671875, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.386970520019531, + "rewards/margins": 7.2308807373046875, + "rewards/rejected": -11.617851257324219, + "step": 7063 + }, + { + "epoch": 1.1, + "learning_rate": 8.966310493282847e-06, + "logits/chosen": -1.4827096462249756, + "logits/rejected": -2.7869367599487305, + "logps/chosen": -64.72692108154297, + "logps/rejected": -324.20538330078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.835751533508301, + "rewards/margins": 8.264766693115234, + "rewards/rejected": -12.100519180297852, + "step": 7064 + }, + { + "epoch": 1.1, + "learning_rate": 8.965577052751699e-06, + "logits/chosen": -3.0478694438934326, + "logits/rejected": -2.5478434562683105, + "logps/chosen": -149.216552734375, + "logps/rejected": -154.34815979003906, + "loss": 0.81, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.697842597961426, + "rewards/margins": 2.45835542678833, + "rewards/rejected": -8.156198501586914, + "step": 7065 + }, + { + "epoch": 1.1, + "learning_rate": 8.96484361222055e-06, + "logits/chosen": -1.683489441871643, + "logits/rejected": -2.849418878555298, + "logps/chosen": -88.72541809082031, + "logps/rejected": -304.4131164550781, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.406664848327637, + "rewards/margins": 6.219125747680664, + "rewards/rejected": -11.6257905960083, + "step": 7066 + }, + { + "epoch": 1.1, + "learning_rate": 8.964110171689403e-06, + "logits/chosen": -2.4862818717956543, + "logits/rejected": -2.907487630844116, + "logps/chosen": -144.4571533203125, + "logps/rejected": -396.636962890625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3432512283325195, + "rewards/margins": 8.857114791870117, + "rewards/rejected": -13.20036506652832, + "step": 7067 + }, + { + "epoch": 1.1, + "learning_rate": 8.963376731158256e-06, + "logits/chosen": -0.7895092368125916, + "logits/rejected": -2.7708659172058105, + "logps/chosen": -120.05461883544922, + "logps/rejected": -369.0970458984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.984443187713623, + "rewards/margins": 8.104545593261719, + "rewards/rejected": -12.0889892578125, + "step": 7068 + }, + { + "epoch": 1.1, + "learning_rate": 8.962643290627108e-06, + "logits/chosen": -2.917240858078003, + "logits/rejected": -2.989710807800293, + "logps/chosen": -119.441162109375, + "logps/rejected": -175.11798095703125, + "loss": 0.3622, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.700803279876709, + "rewards/margins": 3.2687361240386963, + "rewards/rejected": -8.969539642333984, + "step": 7069 + }, + { + "epoch": 1.1, + "learning_rate": 8.96190985009596e-06, + "logits/chosen": -2.8415114879608154, + "logits/rejected": -2.947908401489258, + "logps/chosen": -211.8509521484375, + "logps/rejected": -133.8934783935547, + "loss": 0.1517, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.925036430358887, + "rewards/margins": 3.2187390327453613, + "rewards/rejected": -8.14377498626709, + "step": 7070 + }, + { + "epoch": 1.1, + "learning_rate": 8.961176409564812e-06, + "logits/chosen": -2.6845548152923584, + "logits/rejected": -3.073749542236328, + "logps/chosen": -414.20440673828125, + "logps/rejected": -499.4539794921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2473092079162598, + "rewards/margins": 7.58380651473999, + "rewards/rejected": -8.83111572265625, + "step": 7071 + }, + { + "epoch": 1.1, + "learning_rate": 8.960442969033664e-06, + "logits/chosen": -2.1916537284851074, + "logits/rejected": -3.0449352264404297, + "logps/chosen": -158.72120666503906, + "logps/rejected": -547.9631958007812, + "loss": 0.1934, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967205047607422, + "rewards/margins": 7.656917095184326, + "rewards/rejected": -13.624122619628906, + "step": 7072 + }, + { + "epoch": 1.1, + "learning_rate": 8.959709528502516e-06, + "logits/chosen": -2.4094927310943604, + "logits/rejected": -3.067909002304077, + "logps/chosen": -522.5751342773438, + "logps/rejected": -695.011962890625, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4189534187316895, + "rewards/margins": 4.865545272827148, + "rewards/rejected": -9.28449821472168, + "step": 7073 + }, + { + "epoch": 1.1, + "learning_rate": 8.958976087971368e-06, + "logits/chosen": -2.1994333267211914, + "logits/rejected": -2.5258243083953857, + "logps/chosen": -207.0902099609375, + "logps/rejected": -167.1344451904297, + "loss": 1.9778, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.939375400543213, + "rewards/margins": 1.5552189350128174, + "rewards/rejected": -8.49459457397461, + "step": 7074 + }, + { + "epoch": 1.1, + "learning_rate": 8.95824264744022e-06, + "logits/chosen": -2.779564142227173, + "logits/rejected": -2.9398441314697266, + "logps/chosen": -161.54946899414062, + "logps/rejected": -241.6057891845703, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.849142551422119, + "rewards/margins": 6.498505115509033, + "rewards/rejected": -9.347647666931152, + "step": 7075 + }, + { + "epoch": 1.1, + "learning_rate": 8.957509206909073e-06, + "logits/chosen": -2.033674716949463, + "logits/rejected": -2.882219076156616, + "logps/chosen": -131.2142791748047, + "logps/rejected": -312.0093688964844, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8062713146209717, + "rewards/margins": 8.293628692626953, + "rewards/rejected": -11.099900245666504, + "step": 7076 + }, + { + "epoch": 1.1, + "learning_rate": 8.956775766377925e-06, + "logits/chosen": -3.0743489265441895, + "logits/rejected": -2.7647147178649902, + "logps/chosen": -219.69027709960938, + "logps/rejected": -213.485595703125, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6017951965332031, + "rewards/margins": 5.850701332092285, + "rewards/rejected": -7.452496528625488, + "step": 7077 + }, + { + "epoch": 1.1, + "learning_rate": 8.956042325846777e-06, + "logits/chosen": -2.5626626014709473, + "logits/rejected": -3.0080132484436035, + "logps/chosen": -310.9486083984375, + "logps/rejected": -377.71527099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4321630001068115, + "rewards/margins": 9.39294147491455, + "rewards/rejected": -11.825104713439941, + "step": 7078 + }, + { + "epoch": 1.1, + "learning_rate": 8.955308885315629e-06, + "logits/chosen": -2.909156084060669, + "logits/rejected": -2.93217396736145, + "logps/chosen": -87.99272155761719, + "logps/rejected": -167.03189086914062, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.495139122009277, + "rewards/margins": 4.402443885803223, + "rewards/rejected": -9.8975830078125, + "step": 7079 + }, + { + "epoch": 1.1, + "learning_rate": 8.95457544478448e-06, + "logits/chosen": -1.8639994859695435, + "logits/rejected": -2.9672353267669678, + "logps/chosen": -325.72802734375, + "logps/rejected": -309.1641540527344, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.677206516265869, + "rewards/margins": 5.978029251098633, + "rewards/rejected": -8.655235290527344, + "step": 7080 + }, + { + "epoch": 1.1, + "learning_rate": 8.953842004253332e-06, + "logits/chosen": -2.2749247550964355, + "logits/rejected": -2.6339354515075684, + "logps/chosen": -172.09893798828125, + "logps/rejected": -220.02349853515625, + "loss": 0.5604, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.479860305786133, + "rewards/margins": 1.9028329849243164, + "rewards/rejected": -10.382692337036133, + "step": 7081 + }, + { + "epoch": 1.1, + "learning_rate": 8.953108563722184e-06, + "logits/chosen": -2.4123969078063965, + "logits/rejected": -2.589279890060425, + "logps/chosen": -426.2546081542969, + "logps/rejected": -540.2254028320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7131593227386475, + "rewards/margins": 9.421775817871094, + "rewards/rejected": -12.13493537902832, + "step": 7082 + }, + { + "epoch": 1.1, + "learning_rate": 8.952375123191036e-06, + "logits/chosen": -2.922360897064209, + "logits/rejected": -3.0759406089782715, + "logps/chosen": -69.93001556396484, + "logps/rejected": -152.95689392089844, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1077880859375, + "rewards/margins": 4.646710395812988, + "rewards/rejected": -10.754498481750488, + "step": 7083 + }, + { + "epoch": 1.1, + "learning_rate": 8.951641682659888e-06, + "logits/chosen": -2.431349039077759, + "logits/rejected": -2.6801843643188477, + "logps/chosen": -189.157958984375, + "logps/rejected": -392.063720703125, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.376456260681152, + "rewards/margins": 5.853260040283203, + "rewards/rejected": -12.229716300964355, + "step": 7084 + }, + { + "epoch": 1.1, + "learning_rate": 8.950908242128742e-06, + "logits/chosen": -2.9695279598236084, + "logits/rejected": -2.9229650497436523, + "logps/chosen": -383.6735534667969, + "logps/rejected": -621.4136962890625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5905137062072754, + "rewards/margins": 5.691163539886475, + "rewards/rejected": -8.28167724609375, + "step": 7085 + }, + { + "epoch": 1.1, + "learning_rate": 8.950174801597593e-06, + "logits/chosen": -2.5344064235687256, + "logits/rejected": -2.914547920227051, + "logps/chosen": -170.24990844726562, + "logps/rejected": -282.939208984375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319307804107666, + "rewards/margins": 4.752058506011963, + "rewards/rejected": -8.071366310119629, + "step": 7086 + }, + { + "epoch": 1.1, + "learning_rate": 8.949441361066445e-06, + "logits/chosen": -3.055288076400757, + "logits/rejected": -2.691925287246704, + "logps/chosen": -287.85791015625, + "logps/rejected": -196.63955688476562, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.512570381164551, + "rewards/margins": 3.421417713165283, + "rewards/rejected": -7.933988571166992, + "step": 7087 + }, + { + "epoch": 1.1, + "learning_rate": 8.948707920535297e-06, + "logits/chosen": -2.8907663822174072, + "logits/rejected": -2.277263641357422, + "logps/chosen": -403.1717224121094, + "logps/rejected": -316.224365234375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.222332000732422, + "rewards/margins": 6.287281036376953, + "rewards/rejected": -8.509613037109375, + "step": 7088 + }, + { + "epoch": 1.1, + "learning_rate": 8.947974480004149e-06, + "logits/chosen": -2.7374672889709473, + "logits/rejected": -1.3670741319656372, + "logps/chosen": -201.63186645507812, + "logps/rejected": -225.47445678710938, + "loss": 0.2352, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.070627212524414, + "rewards/margins": 4.704709529876709, + "rewards/rejected": -11.775337219238281, + "step": 7089 + }, + { + "epoch": 1.1, + "learning_rate": 8.947241039473001e-06, + "logits/chosen": -1.3222376108169556, + "logits/rejected": -2.8616790771484375, + "logps/chosen": -160.21551513671875, + "logps/rejected": -423.32904052734375, + "loss": 0.1551, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.547515869140625, + "rewards/margins": 3.834836483001709, + "rewards/rejected": -12.382352828979492, + "step": 7090 + }, + { + "epoch": 1.1, + "learning_rate": 8.946507598941853e-06, + "logits/chosen": -3.1098196506500244, + "logits/rejected": -1.4998395442962646, + "logps/chosen": -356.74554443359375, + "logps/rejected": -162.7257080078125, + "loss": 1.5949, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.6891889572143555, + "rewards/margins": -1.0646922588348389, + "rewards/rejected": -3.6244969367980957, + "step": 7091 + }, + { + "epoch": 1.1, + "learning_rate": 8.945774158410706e-06, + "logits/chosen": -2.614305019378662, + "logits/rejected": -3.1160876750946045, + "logps/chosen": -83.67540740966797, + "logps/rejected": -303.254638671875, + "loss": 0.5841, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.963359832763672, + "rewards/margins": 3.0055108070373535, + "rewards/rejected": -8.968870162963867, + "step": 7092 + }, + { + "epoch": 1.1, + "learning_rate": 8.945040717879558e-06, + "logits/chosen": -2.338369846343994, + "logits/rejected": -3.2476541996002197, + "logps/chosen": -715.8193359375, + "logps/rejected": -841.041748046875, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6315430402755737, + "rewards/margins": 4.938514709472656, + "rewards/rejected": -6.5700578689575195, + "step": 7093 + }, + { + "epoch": 1.1, + "learning_rate": 8.944307277348412e-06, + "logits/chosen": -2.6805872917175293, + "logits/rejected": -2.4524099826812744, + "logps/chosen": -147.23936462402344, + "logps/rejected": -256.49615478515625, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.401570796966553, + "rewards/margins": 6.37261962890625, + "rewards/rejected": -11.774190902709961, + "step": 7094 + }, + { + "epoch": 1.1, + "learning_rate": 8.943573836817264e-06, + "logits/chosen": -2.791234254837036, + "logits/rejected": -3.072094678878784, + "logps/chosen": -94.78964233398438, + "logps/rejected": -334.7320556640625, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.613064765930176, + "rewards/margins": 7.398593902587891, + "rewards/rejected": -13.011658668518066, + "step": 7095 + }, + { + "epoch": 1.1, + "learning_rate": 8.942840396286116e-06, + "logits/chosen": -1.3659700155258179, + "logits/rejected": -2.6974053382873535, + "logps/chosen": -100.98749542236328, + "logps/rejected": -264.14556884765625, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115689277648926, + "rewards/margins": 7.149509429931641, + "rewards/rejected": -13.26519775390625, + "step": 7096 + }, + { + "epoch": 1.1, + "learning_rate": 8.942106955754967e-06, + "logits/chosen": -2.4100663661956787, + "logits/rejected": -1.6053308248519897, + "logps/chosen": -242.21905517578125, + "logps/rejected": -197.33766174316406, + "loss": 0.5834, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.604030609130859, + "rewards/margins": 4.933234214782715, + "rewards/rejected": -10.537264823913574, + "step": 7097 + }, + { + "epoch": 1.1, + "learning_rate": 8.94137351522382e-06, + "logits/chosen": -3.1529150009155273, + "logits/rejected": -2.9786622524261475, + "logps/chosen": -495.0419921875, + "logps/rejected": -325.2153625488281, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2906293869018555, + "rewards/margins": 6.290904998779297, + "rewards/rejected": -9.581534385681152, + "step": 7098 + }, + { + "epoch": 1.1, + "learning_rate": 8.940640074692671e-06, + "logits/chosen": -3.034438133239746, + "logits/rejected": -2.8655805587768555, + "logps/chosen": -193.71096801757812, + "logps/rejected": -209.21539306640625, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4886388778686523, + "rewards/margins": 4.111681938171387, + "rewards/rejected": -7.600321292877197, + "step": 7099 + }, + { + "epoch": 1.1, + "learning_rate": 8.939906634161523e-06, + "logits/chosen": -3.1496615409851074, + "logits/rejected": -2.9601588249206543, + "logps/chosen": -121.5860595703125, + "logps/rejected": -90.04454803466797, + "loss": 2.8844, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.450846672058105, + "rewards/margins": -1.6115539073944092, + "rewards/rejected": -6.839292526245117, + "step": 7100 + }, + { + "epoch": 1.1, + "learning_rate": 8.939173193630375e-06, + "logits/chosen": -0.9466270804405212, + "logits/rejected": -3.035767078399658, + "logps/chosen": -171.45870971679688, + "logps/rejected": -668.9205322265625, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.632589817047119, + "rewards/margins": 2.073699951171875, + "rewards/rejected": -7.706289768218994, + "step": 7101 + }, + { + "epoch": 1.1, + "learning_rate": 8.938439753099227e-06, + "logits/chosen": -1.7264375686645508, + "logits/rejected": -2.769505739212036, + "logps/chosen": -235.78009033203125, + "logps/rejected": -400.7015075683594, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7682085037231445, + "rewards/margins": 5.064910888671875, + "rewards/rejected": -9.83311939239502, + "step": 7102 + }, + { + "epoch": 1.1, + "learning_rate": 8.93770631256808e-06, + "logits/chosen": -3.1291191577911377, + "logits/rejected": -3.0498509407043457, + "logps/chosen": -333.5082702636719, + "logps/rejected": -295.0493469238281, + "loss": 0.4175, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.810402870178223, + "rewards/margins": 3.133873224258423, + "rewards/rejected": -7.944275856018066, + "step": 7103 + }, + { + "epoch": 1.1, + "learning_rate": 8.936972872036932e-06, + "logits/chosen": -2.648709535598755, + "logits/rejected": -3.153864622116089, + "logps/chosen": -161.59002685546875, + "logps/rejected": -224.30931091308594, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.990687370300293, + "rewards/margins": 5.127467632293701, + "rewards/rejected": -10.118154525756836, + "step": 7104 + }, + { + "epoch": 1.1, + "learning_rate": 8.936239431505784e-06, + "logits/chosen": -3.2413763999938965, + "logits/rejected": -2.2057132720947266, + "logps/chosen": -885.4761962890625, + "logps/rejected": -511.1219482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2146759033203125, + "rewards/margins": 8.685993194580078, + "rewards/rejected": -9.90066909790039, + "step": 7105 + }, + { + "epoch": 1.11, + "learning_rate": 8.935505990974636e-06, + "logits/chosen": -2.8940534591674805, + "logits/rejected": -3.067945718765259, + "logps/chosen": -87.69326782226562, + "logps/rejected": -240.41043090820312, + "loss": 0.7217, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.70039701461792, + "rewards/margins": 2.471726894378662, + "rewards/rejected": -9.172123908996582, + "step": 7106 + }, + { + "epoch": 1.11, + "learning_rate": 8.934772550443488e-06, + "logits/chosen": -1.9961113929748535, + "logits/rejected": -2.8450655937194824, + "logps/chosen": -131.61622619628906, + "logps/rejected": -373.1682434082031, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.413956880569458, + "rewards/margins": 7.255185604095459, + "rewards/rejected": -10.66914176940918, + "step": 7107 + }, + { + "epoch": 1.11, + "learning_rate": 8.93403910991234e-06, + "logits/chosen": -2.962423086166382, + "logits/rejected": -3.0809621810913086, + "logps/chosen": -411.0166931152344, + "logps/rejected": -337.3123779296875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.162313938140869, + "rewards/margins": 7.214511394500732, + "rewards/rejected": -9.376825332641602, + "step": 7108 + }, + { + "epoch": 1.11, + "learning_rate": 8.933305669381192e-06, + "logits/chosen": -2.56709361076355, + "logits/rejected": -3.071105480194092, + "logps/chosen": -100.58374786376953, + "logps/rejected": -258.3724060058594, + "loss": 0.1554, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.612774848937988, + "rewards/margins": 2.699209451675415, + "rewards/rejected": -8.31198501586914, + "step": 7109 + }, + { + "epoch": 1.11, + "learning_rate": 8.932572228850044e-06, + "logits/chosen": -2.9935951232910156, + "logits/rejected": -2.870814085006714, + "logps/chosen": -209.1820068359375, + "logps/rejected": -315.18743896484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0399184226989746, + "rewards/margins": 8.07642936706543, + "rewards/rejected": -11.116348266601562, + "step": 7110 + }, + { + "epoch": 1.11, + "learning_rate": 8.931838788318895e-06, + "logits/chosen": -2.9292781352996826, + "logits/rejected": -2.3289027214050293, + "logps/chosen": -416.3846740722656, + "logps/rejected": -370.19866943359375, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023609161376953, + "rewards/margins": 4.647751808166504, + "rewards/rejected": -9.671360969543457, + "step": 7111 + }, + { + "epoch": 1.11, + "learning_rate": 8.931105347787749e-06, + "logits/chosen": -2.7483832836151123, + "logits/rejected": -2.8701977729797363, + "logps/chosen": -65.12760925292969, + "logps/rejected": -309.88714599609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.152990341186523, + "rewards/margins": 7.729885101318359, + "rewards/rejected": -11.882875442504883, + "step": 7112 + }, + { + "epoch": 1.11, + "learning_rate": 8.930371907256601e-06, + "logits/chosen": -2.6203927993774414, + "logits/rejected": -2.584324836730957, + "logps/chosen": -83.86911010742188, + "logps/rejected": -179.52085876464844, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.078469753265381, + "rewards/margins": 6.7424540519714355, + "rewards/rejected": -9.820923805236816, + "step": 7113 + }, + { + "epoch": 1.11, + "learning_rate": 8.929638466725453e-06, + "logits/chosen": -2.443481922149658, + "logits/rejected": -3.0574934482574463, + "logps/chosen": -222.62721252441406, + "logps/rejected": -463.9530029296875, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.076785564422607, + "rewards/margins": 5.797138690948486, + "rewards/rejected": -9.873924255371094, + "step": 7114 + }, + { + "epoch": 1.11, + "learning_rate": 8.928905026194305e-06, + "logits/chosen": -1.2867094278335571, + "logits/rejected": -2.9177205562591553, + "logps/chosen": -143.46774291992188, + "logps/rejected": -309.62359619140625, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.538656711578369, + "rewards/margins": 3.9027414321899414, + "rewards/rejected": -8.441397666931152, + "step": 7115 + }, + { + "epoch": 1.11, + "learning_rate": 8.928171585663157e-06, + "logits/chosen": -1.84514582157135, + "logits/rejected": -1.9446216821670532, + "logps/chosen": -157.83433532714844, + "logps/rejected": -171.7340087890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.804530143737793, + "rewards/margins": 8.564438819885254, + "rewards/rejected": -11.368968963623047, + "step": 7116 + }, + { + "epoch": 1.11, + "learning_rate": 8.927438145132008e-06, + "logits/chosen": -2.939720630645752, + "logits/rejected": -2.742249011993408, + "logps/chosen": -183.79312133789062, + "logps/rejected": -248.39935302734375, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.974454402923584, + "rewards/margins": 3.080240249633789, + "rewards/rejected": -8.054695129394531, + "step": 7117 + }, + { + "epoch": 1.11, + "learning_rate": 8.92670470460086e-06, + "logits/chosen": -2.857790946960449, + "logits/rejected": -3.036444664001465, + "logps/chosen": -126.10365295410156, + "logps/rejected": -272.0486145019531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734635353088379, + "rewards/margins": 7.280085563659668, + "rewards/rejected": -12.014720916748047, + "step": 7118 + }, + { + "epoch": 1.11, + "learning_rate": 8.925971264069712e-06, + "logits/chosen": -2.987464189529419, + "logits/rejected": -1.8868215084075928, + "logps/chosen": -400.11566162109375, + "logps/rejected": -162.35658264160156, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5143799781799316, + "rewards/margins": 5.808754920959473, + "rewards/rejected": -8.323135375976562, + "step": 7119 + }, + { + "epoch": 1.11, + "learning_rate": 8.925237823538564e-06, + "logits/chosen": -0.997769832611084, + "logits/rejected": -2.755371332168579, + "logps/chosen": -135.53794860839844, + "logps/rejected": -587.572509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.782886505126953, + "rewards/margins": 10.509557723999023, + "rewards/rejected": -14.292444229125977, + "step": 7120 + }, + { + "epoch": 1.11, + "learning_rate": 8.924504383007418e-06, + "logits/chosen": -2.8849987983703613, + "logits/rejected": -3.2467455863952637, + "logps/chosen": -113.78765106201172, + "logps/rejected": -204.5565185546875, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.205286026000977, + "rewards/margins": 3.398670196533203, + "rewards/rejected": -8.60395622253418, + "step": 7121 + }, + { + "epoch": 1.11, + "learning_rate": 8.92377094247627e-06, + "logits/chosen": -2.465998649597168, + "logits/rejected": -2.942735195159912, + "logps/chosen": -301.43280029296875, + "logps/rejected": -492.2452392578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.240309238433838, + "rewards/margins": 8.43517780303955, + "rewards/rejected": -11.675487518310547, + "step": 7122 + }, + { + "epoch": 1.11, + "learning_rate": 8.923037501945121e-06, + "logits/chosen": -2.683912515640259, + "logits/rejected": -1.3991663455963135, + "logps/chosen": -127.27517700195312, + "logps/rejected": -142.7943115234375, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0980730056762695, + "rewards/margins": 4.9267988204956055, + "rewards/rejected": -12.024871826171875, + "step": 7123 + }, + { + "epoch": 1.11, + "learning_rate": 8.922304061413973e-06, + "logits/chosen": -2.2684576511383057, + "logits/rejected": -2.917306661605835, + "logps/chosen": -133.48033142089844, + "logps/rejected": -235.17820739746094, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8407936096191406, + "rewards/margins": 5.491739273071289, + "rewards/rejected": -9.33253288269043, + "step": 7124 + }, + { + "epoch": 1.11, + "learning_rate": 8.921570620882825e-06, + "logits/chosen": -1.6271523237228394, + "logits/rejected": -2.924821138381958, + "logps/chosen": -139.74966430664062, + "logps/rejected": -496.93414306640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901416778564453, + "rewards/margins": 7.672405242919922, + "rewards/rejected": -13.573822021484375, + "step": 7125 + }, + { + "epoch": 1.11, + "learning_rate": 8.920837180351679e-06, + "logits/chosen": -2.4314489364624023, + "logits/rejected": -2.721839427947998, + "logps/chosen": -131.9178924560547, + "logps/rejected": -232.16775512695312, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.103940486907959, + "rewards/margins": 5.605583190917969, + "rewards/rejected": -9.709524154663086, + "step": 7126 + }, + { + "epoch": 1.11, + "learning_rate": 8.92010373982053e-06, + "logits/chosen": -2.4020071029663086, + "logits/rejected": -2.376349687576294, + "logps/chosen": -284.677001953125, + "logps/rejected": -726.9459838867188, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.936822891235352, + "rewards/margins": 6.997757434844971, + "rewards/rejected": -14.934579849243164, + "step": 7127 + }, + { + "epoch": 1.11, + "learning_rate": 8.919370299289382e-06, + "logits/chosen": -1.9544528722763062, + "logits/rejected": -2.919071912765503, + "logps/chosen": -142.88214111328125, + "logps/rejected": -428.4340515136719, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4422101974487305, + "rewards/margins": 7.91484260559082, + "rewards/rejected": -11.35705280303955, + "step": 7128 + }, + { + "epoch": 1.11, + "learning_rate": 8.918636858758234e-06, + "logits/chosen": -3.0557069778442383, + "logits/rejected": -2.2109856605529785, + "logps/chosen": -383.3287048339844, + "logps/rejected": -352.7781066894531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.953648567199707, + "rewards/margins": 8.468629837036133, + "rewards/rejected": -12.422277450561523, + "step": 7129 + }, + { + "epoch": 1.11, + "learning_rate": 8.917903418227088e-06, + "logits/chosen": -2.148207187652588, + "logits/rejected": -2.7118473052978516, + "logps/chosen": -158.18264770507812, + "logps/rejected": -304.3290710449219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6226677894592285, + "rewards/margins": 8.198537826538086, + "rewards/rejected": -11.821205139160156, + "step": 7130 + }, + { + "epoch": 1.11, + "learning_rate": 8.91716997769594e-06, + "logits/chosen": -1.809439778327942, + "logits/rejected": -2.780186891555786, + "logps/chosen": -128.243408203125, + "logps/rejected": -462.2388916015625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213248252868652, + "rewards/margins": 8.953717231750488, + "rewards/rejected": -13.16696548461914, + "step": 7131 + }, + { + "epoch": 1.11, + "learning_rate": 8.916436537164792e-06, + "logits/chosen": -1.3432841300964355, + "logits/rejected": -2.20196533203125, + "logps/chosen": -183.33645629882812, + "logps/rejected": -486.0166320800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9462671279907227, + "rewards/margins": 10.671449661254883, + "rewards/rejected": -13.617717742919922, + "step": 7132 + }, + { + "epoch": 1.11, + "learning_rate": 8.915703096633644e-06, + "logits/chosen": -1.695730209350586, + "logits/rejected": -2.501100540161133, + "logps/chosen": -172.06588745117188, + "logps/rejected": -353.5687561035156, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.869619369506836, + "rewards/margins": 8.865813255310059, + "rewards/rejected": -13.735432624816895, + "step": 7133 + }, + { + "epoch": 1.11, + "learning_rate": 8.914969656102495e-06, + "logits/chosen": -2.078479528427124, + "logits/rejected": -2.903050422668457, + "logps/chosen": -130.01626586914062, + "logps/rejected": -359.96722412109375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5345993041992188, + "rewards/margins": 5.891221523284912, + "rewards/rejected": -9.425821304321289, + "step": 7134 + }, + { + "epoch": 1.11, + "learning_rate": 8.914236215571347e-06, + "logits/chosen": -2.3133997917175293, + "logits/rejected": -2.3780410289764404, + "logps/chosen": -508.5118103027344, + "logps/rejected": -473.08575439453125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2699875831604004, + "rewards/margins": 7.613861083984375, + "rewards/rejected": -10.883848190307617, + "step": 7135 + }, + { + "epoch": 1.11, + "learning_rate": 8.9135027750402e-06, + "logits/chosen": -2.9275624752044678, + "logits/rejected": -1.6006911993026733, + "logps/chosen": -571.99658203125, + "logps/rejected": -258.7331237792969, + "loss": 1.0248, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.494490146636963, + "rewards/margins": 2.0275115966796875, + "rewards/rejected": -8.522001266479492, + "step": 7136 + }, + { + "epoch": 1.11, + "learning_rate": 8.912769334509051e-06, + "logits/chosen": -2.9665749073028564, + "logits/rejected": -2.647620677947998, + "logps/chosen": -151.8455047607422, + "logps/rejected": -126.74272155761719, + "loss": 0.7748, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.746877193450928, + "rewards/margins": 3.249504566192627, + "rewards/rejected": -8.996381759643555, + "step": 7137 + }, + { + "epoch": 1.11, + "learning_rate": 8.912035893977903e-06, + "logits/chosen": -3.0272903442382812, + "logits/rejected": -3.2143726348876953, + "logps/chosen": -455.68157958984375, + "logps/rejected": -484.1082763671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1310837268829346, + "rewards/margins": 7.214627265930176, + "rewards/rejected": -10.345710754394531, + "step": 7138 + }, + { + "epoch": 1.11, + "learning_rate": 8.911302453446757e-06, + "logits/chosen": -2.365186929702759, + "logits/rejected": -2.596153974533081, + "logps/chosen": -174.46942138671875, + "logps/rejected": -285.109375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0777268409729, + "rewards/margins": 5.247827529907227, + "rewards/rejected": -10.325553894042969, + "step": 7139 + }, + { + "epoch": 1.11, + "learning_rate": 8.910569012915608e-06, + "logits/chosen": -1.8241499662399292, + "logits/rejected": -2.38944673538208, + "logps/chosen": -470.09625244140625, + "logps/rejected": -492.3455810546875, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.552084445953369, + "rewards/margins": 9.908548355102539, + "rewards/rejected": -12.46063232421875, + "step": 7140 + }, + { + "epoch": 1.11, + "learning_rate": 8.90983557238446e-06, + "logits/chosen": -3.030454158782959, + "logits/rejected": -2.899353265762329, + "logps/chosen": -137.83987426757812, + "logps/rejected": -184.2650909423828, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.935789108276367, + "rewards/margins": 7.281816482543945, + "rewards/rejected": -12.217605590820312, + "step": 7141 + }, + { + "epoch": 1.11, + "learning_rate": 8.909102131853312e-06, + "logits/chosen": -2.602193593978882, + "logits/rejected": -3.036017656326294, + "logps/chosen": -264.00018310546875, + "logps/rejected": -142.45834350585938, + "loss": 1.0096, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.836568355560303, + "rewards/margins": -0.4048299789428711, + "rewards/rejected": -7.431738376617432, + "step": 7142 + }, + { + "epoch": 1.11, + "learning_rate": 8.908368691322164e-06, + "logits/chosen": -2.046513319015503, + "logits/rejected": -2.9293391704559326, + "logps/chosen": -359.8277587890625, + "logps/rejected": -1030.246337890625, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.966712951660156, + "rewards/margins": 6.700858116149902, + "rewards/rejected": -11.667572021484375, + "step": 7143 + }, + { + "epoch": 1.11, + "learning_rate": 8.907635250791016e-06, + "logits/chosen": -2.6546239852905273, + "logits/rejected": -2.7354683876037598, + "logps/chosen": -164.36526489257812, + "logps/rejected": -395.3887939453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0905051231384277, + "rewards/margins": 9.881708145141602, + "rewards/rejected": -12.972212791442871, + "step": 7144 + }, + { + "epoch": 1.11, + "learning_rate": 8.906901810259868e-06, + "logits/chosen": -2.966057538986206, + "logits/rejected": -2.8814196586608887, + "logps/chosen": -77.60355377197266, + "logps/rejected": -146.4620361328125, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.909465789794922, + "rewards/margins": 5.36243200302124, + "rewards/rejected": -10.27189826965332, + "step": 7145 + }, + { + "epoch": 1.11, + "learning_rate": 8.90616836972872e-06, + "logits/chosen": -2.7020087242126465, + "logits/rejected": -2.4454476833343506, + "logps/chosen": -161.8611297607422, + "logps/rejected": -156.36245727539062, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.926178932189941, + "rewards/margins": 5.140023231506348, + "rewards/rejected": -11.066202163696289, + "step": 7146 + }, + { + "epoch": 1.11, + "learning_rate": 8.905434929197572e-06, + "logits/chosen": -1.6166305541992188, + "logits/rejected": -2.6370155811309814, + "logps/chosen": -222.820556640625, + "logps/rejected": -443.6443176269531, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.172794342041016, + "rewards/margins": 7.088808059692383, + "rewards/rejected": -12.261602401733398, + "step": 7147 + }, + { + "epoch": 1.11, + "learning_rate": 8.904701488666425e-06, + "logits/chosen": -2.171941041946411, + "logits/rejected": -2.6141159534454346, + "logps/chosen": -181.3284912109375, + "logps/rejected": -204.34922790527344, + "loss": 1.1441, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.31785774230957, + "rewards/margins": 4.283792495727539, + "rewards/rejected": -8.60165023803711, + "step": 7148 + }, + { + "epoch": 1.11, + "learning_rate": 8.903968048135277e-06, + "logits/chosen": -2.820772409439087, + "logits/rejected": -2.7988102436065674, + "logps/chosen": -176.37118530273438, + "logps/rejected": -309.7358703613281, + "loss": 1.2622, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.179903507232666, + "rewards/margins": 3.388988494873047, + "rewards/rejected": -8.568891525268555, + "step": 7149 + }, + { + "epoch": 1.11, + "learning_rate": 8.903234607604129e-06, + "logits/chosen": -2.9346120357513428, + "logits/rejected": -1.6234959363937378, + "logps/chosen": -710.5242919921875, + "logps/rejected": -319.19781494140625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5877456665039062, + "rewards/margins": 7.2927632331848145, + "rewards/rejected": -9.880508422851562, + "step": 7150 + }, + { + "epoch": 1.11, + "learning_rate": 8.90250116707298e-06, + "logits/chosen": -2.821082592010498, + "logits/rejected": -2.707688093185425, + "logps/chosen": -235.22787475585938, + "logps/rejected": -321.340087890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.902491569519043, + "rewards/margins": 8.150680541992188, + "rewards/rejected": -12.053171157836914, + "step": 7151 + }, + { + "epoch": 1.11, + "learning_rate": 8.901767726541833e-06, + "logits/chosen": -2.375443458557129, + "logits/rejected": -3.206132650375366, + "logps/chosen": -74.74537658691406, + "logps/rejected": -336.6687927246094, + "loss": 0.1419, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.74289608001709, + "rewards/margins": 3.948138475418091, + "rewards/rejected": -6.69103479385376, + "step": 7152 + }, + { + "epoch": 1.11, + "learning_rate": 8.901034286010685e-06, + "logits/chosen": -3.0596580505371094, + "logits/rejected": -2.178981304168701, + "logps/chosen": -513.3741455078125, + "logps/rejected": -239.56185913085938, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2139079570770264, + "rewards/margins": 6.429887771606445, + "rewards/rejected": -7.643795967102051, + "step": 7153 + }, + { + "epoch": 1.11, + "learning_rate": 8.900300845479536e-06, + "logits/chosen": -2.528993606567383, + "logits/rejected": -2.915564775466919, + "logps/chosen": -265.6310119628906, + "logps/rejected": -296.27667236328125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1881604194641113, + "rewards/margins": 6.401511192321777, + "rewards/rejected": -8.589672088623047, + "step": 7154 + }, + { + "epoch": 1.11, + "learning_rate": 8.899567404948388e-06, + "logits/chosen": -1.5349124670028687, + "logits/rejected": -2.7708375453948975, + "logps/chosen": -140.31472778320312, + "logps/rejected": -292.5885925292969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.651881217956543, + "rewards/margins": 7.197071552276611, + "rewards/rejected": -11.848953247070312, + "step": 7155 + }, + { + "epoch": 1.11, + "learning_rate": 8.89883396441724e-06, + "logits/chosen": -2.916069984436035, + "logits/rejected": -2.467707395553589, + "logps/chosen": -278.8707580566406, + "logps/rejected": -327.051513671875, + "loss": 0.3814, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5992720127105713, + "rewards/margins": 6.287783145904541, + "rewards/rejected": -9.887055397033691, + "step": 7156 + }, + { + "epoch": 1.11, + "learning_rate": 8.898100523886094e-06, + "logits/chosen": -2.2642548084259033, + "logits/rejected": -2.8557288646698, + "logps/chosen": -271.4030456542969, + "logps/rejected": -446.36419677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.086510419845581, + "rewards/margins": 11.31555461883545, + "rewards/rejected": -13.40206527709961, + "step": 7157 + }, + { + "epoch": 1.11, + "learning_rate": 8.897367083354946e-06, + "logits/chosen": -3.1075048446655273, + "logits/rejected": -2.901155948638916, + "logps/chosen": -201.8756103515625, + "logps/rejected": -288.466552734375, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7343029975891113, + "rewards/margins": 3.1246280670166016, + "rewards/rejected": -6.858930587768555, + "step": 7158 + }, + { + "epoch": 1.11, + "learning_rate": 8.896633642823797e-06, + "logits/chosen": -2.9772167205810547, + "logits/rejected": -2.550269842147827, + "logps/chosen": -306.4873046875, + "logps/rejected": -253.61996459960938, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.252755641937256, + "rewards/margins": 6.187355995178223, + "rewards/rejected": -8.44011116027832, + "step": 7159 + }, + { + "epoch": 1.11, + "learning_rate": 8.895900202292651e-06, + "logits/chosen": -1.3135104179382324, + "logits/rejected": -2.9789018630981445, + "logps/chosen": -188.1259307861328, + "logps/rejected": -445.6424560546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.908637046813965, + "rewards/margins": 7.147917747497559, + "rewards/rejected": -13.056554794311523, + "step": 7160 + }, + { + "epoch": 1.11, + "learning_rate": 8.895166761761503e-06, + "logits/chosen": -1.4189090728759766, + "logits/rejected": -2.816563606262207, + "logps/chosen": -155.04937744140625, + "logps/rejected": -439.5442199707031, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8848373889923096, + "rewards/margins": 6.674513816833496, + "rewards/rejected": -8.559350967407227, + "step": 7161 + }, + { + "epoch": 1.11, + "learning_rate": 8.894433321230355e-06, + "logits/chosen": -2.0886175632476807, + "logits/rejected": -2.901149034500122, + "logps/chosen": -228.28158569335938, + "logps/rejected": -326.2471923828125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7295989990234375, + "rewards/margins": 5.18034553527832, + "rewards/rejected": -8.909944534301758, + "step": 7162 + }, + { + "epoch": 1.11, + "learning_rate": 8.893699880699207e-06, + "logits/chosen": -2.0246052742004395, + "logits/rejected": -2.900571346282959, + "logps/chosen": -182.32101440429688, + "logps/rejected": -337.71990966796875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7152719497680664, + "rewards/margins": 6.883419036865234, + "rewards/rejected": -9.5986909866333, + "step": 7163 + }, + { + "epoch": 1.11, + "learning_rate": 8.892966440168059e-06, + "logits/chosen": -2.9441702365875244, + "logits/rejected": -2.9712204933166504, + "logps/chosen": -368.75823974609375, + "logps/rejected": -328.41864013671875, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.694986820220947, + "rewards/margins": 4.995368957519531, + "rewards/rejected": -9.69035530090332, + "step": 7164 + }, + { + "epoch": 1.11, + "learning_rate": 8.89223299963691e-06, + "logits/chosen": -2.9738669395446777, + "logits/rejected": -1.7430813312530518, + "logps/chosen": -387.00927734375, + "logps/rejected": -269.3789978027344, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9692292213439941, + "rewards/margins": 6.156067848205566, + "rewards/rejected": -8.125296592712402, + "step": 7165 + }, + { + "epoch": 1.11, + "learning_rate": 8.891499559105764e-06, + "logits/chosen": -2.679685115814209, + "logits/rejected": -2.9861135482788086, + "logps/chosen": -214.06930541992188, + "logps/rejected": -424.9169921875, + "loss": 1.3903, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.264934062957764, + "rewards/margins": 4.520571231842041, + "rewards/rejected": -8.785505294799805, + "step": 7166 + }, + { + "epoch": 1.11, + "learning_rate": 8.890766118574616e-06, + "logits/chosen": -2.7553317546844482, + "logits/rejected": -1.6080400943756104, + "logps/chosen": -183.40972900390625, + "logps/rejected": -121.75303649902344, + "loss": 0.4239, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.781565189361572, + "rewards/margins": 3.3019967079162598, + "rewards/rejected": -8.083561897277832, + "step": 7167 + }, + { + "epoch": 1.11, + "learning_rate": 8.890032678043468e-06, + "logits/chosen": -3.2204532623291016, + "logits/rejected": -3.120997667312622, + "logps/chosen": -567.1337890625, + "logps/rejected": -579.1416015625, + "loss": 0.0961, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3933119773864746, + "rewards/margins": 3.926539897918701, + "rewards/rejected": -7.319851875305176, + "step": 7168 + }, + { + "epoch": 1.11, + "learning_rate": 8.88929923751232e-06, + "logits/chosen": -1.5135937929153442, + "logits/rejected": -2.9301538467407227, + "logps/chosen": -263.2540283203125, + "logps/rejected": -547.0670166015625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5508880615234375, + "rewards/margins": 6.491209030151367, + "rewards/rejected": -8.042097091674805, + "step": 7169 + }, + { + "epoch": 1.12, + "learning_rate": 8.888565796981172e-06, + "logits/chosen": -2.7677316665649414, + "logits/rejected": -2.8483049869537354, + "logps/chosen": -246.90142822265625, + "logps/rejected": -302.5272521972656, + "loss": 1.892, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.145235538482666, + "rewards/margins": 3.1391990184783936, + "rewards/rejected": -7.2844343185424805, + "step": 7170 + }, + { + "epoch": 1.12, + "learning_rate": 8.887832356450023e-06, + "logits/chosen": -1.4431380033493042, + "logits/rejected": -2.536139965057373, + "logps/chosen": -164.37374877929688, + "logps/rejected": -290.24322509765625, + "loss": 0.5716, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.448125839233398, + "rewards/margins": 4.33992338180542, + "rewards/rejected": -9.788049697875977, + "step": 7171 + }, + { + "epoch": 1.12, + "learning_rate": 8.887098915918875e-06, + "logits/chosen": -2.699836492538452, + "logits/rejected": -3.233046770095825, + "logps/chosen": -421.726318359375, + "logps/rejected": -404.6773681640625, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7324869632720947, + "rewards/margins": 4.075399398803711, + "rewards/rejected": -7.807886123657227, + "step": 7172 + }, + { + "epoch": 1.12, + "learning_rate": 8.886365475387727e-06, + "logits/chosen": -1.7458688020706177, + "logits/rejected": -2.7522759437561035, + "logps/chosen": -72.1971206665039, + "logps/rejected": -285.4561767578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.198754072189331, + "rewards/margins": 7.562215328216553, + "rewards/rejected": -9.760969161987305, + "step": 7173 + }, + { + "epoch": 1.12, + "learning_rate": 8.88563203485658e-06, + "logits/chosen": -2.5867137908935547, + "logits/rejected": -2.6377296447753906, + "logps/chosen": -199.47264099121094, + "logps/rejected": -178.0998077392578, + "loss": 1.1895, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.183874130249023, + "rewards/margins": 1.4529788494110107, + "rewards/rejected": -6.636853218078613, + "step": 7174 + }, + { + "epoch": 1.12, + "learning_rate": 8.884898594325433e-06, + "logits/chosen": -1.7042546272277832, + "logits/rejected": -2.8228917121887207, + "logps/chosen": -244.11264038085938, + "logps/rejected": -661.171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.868170738220215, + "rewards/margins": 10.794746398925781, + "rewards/rejected": -13.66291618347168, + "step": 7175 + }, + { + "epoch": 1.12, + "learning_rate": 8.884165153794285e-06, + "logits/chosen": -1.0811595916748047, + "logits/rejected": -2.913456678390503, + "logps/chosen": -117.43145751953125, + "logps/rejected": -391.27703857421875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6263437271118164, + "rewards/margins": 8.870615005493164, + "rewards/rejected": -11.496959686279297, + "step": 7176 + }, + { + "epoch": 1.12, + "learning_rate": 8.883431713263136e-06, + "logits/chosen": -2.434661626815796, + "logits/rejected": -2.8281352519989014, + "logps/chosen": -286.11566162109375, + "logps/rejected": -323.2830505371094, + "loss": 1.1894, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.021247863769531, + "rewards/margins": 3.3138020038604736, + "rewards/rejected": -8.335050582885742, + "step": 7177 + }, + { + "epoch": 1.12, + "learning_rate": 8.882698272731988e-06, + "logits/chosen": -2.9608030319213867, + "logits/rejected": -1.6484348773956299, + "logps/chosen": -429.626708984375, + "logps/rejected": -162.70425415039062, + "loss": 0.4608, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.0645065307617188, + "rewards/margins": 3.468134880065918, + "rewards/rejected": -5.532641410827637, + "step": 7178 + }, + { + "epoch": 1.12, + "learning_rate": 8.88196483220084e-06, + "logits/chosen": -2.0333645343780518, + "logits/rejected": -2.348632574081421, + "logps/chosen": -196.17144775390625, + "logps/rejected": -622.1336669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4543347358703613, + "rewards/margins": 11.126543045043945, + "rewards/rejected": -13.580877304077148, + "step": 7179 + }, + { + "epoch": 1.12, + "learning_rate": 8.881231391669692e-06, + "logits/chosen": -2.8193042278289795, + "logits/rejected": -2.9231812953948975, + "logps/chosen": -189.3724365234375, + "logps/rejected": -245.1568145751953, + "loss": 1.053, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.424876689910889, + "rewards/margins": 4.778104782104492, + "rewards/rejected": -9.202981948852539, + "step": 7180 + }, + { + "epoch": 1.12, + "learning_rate": 8.880497951138544e-06, + "logits/chosen": -1.787644624710083, + "logits/rejected": -0.7907259464263916, + "logps/chosen": -229.14761352539062, + "logps/rejected": -202.5950164794922, + "loss": 2.107, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.5017218589782715, + "rewards/margins": -0.15452873706817627, + "rewards/rejected": -5.347193241119385, + "step": 7181 + }, + { + "epoch": 1.12, + "learning_rate": 8.879764510607396e-06, + "logits/chosen": -1.6942503452301025, + "logits/rejected": -2.9872894287109375, + "logps/chosen": -170.92950439453125, + "logps/rejected": -535.7037353515625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9696366786956787, + "rewards/margins": 8.057548522949219, + "rewards/rejected": -10.027185440063477, + "step": 7182 + }, + { + "epoch": 1.12, + "learning_rate": 8.87903107007625e-06, + "logits/chosen": -3.1086316108703613, + "logits/rejected": -2.046182870864868, + "logps/chosen": -816.2999267578125, + "logps/rejected": -479.63677978515625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3272278308868408, + "rewards/margins": 6.2955732345581055, + "rewards/rejected": -7.622801303863525, + "step": 7183 + }, + { + "epoch": 1.12, + "learning_rate": 8.878297629545101e-06, + "logits/chosen": -3.0301554203033447, + "logits/rejected": -2.9346227645874023, + "logps/chosen": -310.92498779296875, + "logps/rejected": -275.402587890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.728827476501465, + "rewards/margins": 6.526767730712891, + "rewards/rejected": -9.255595207214355, + "step": 7184 + }, + { + "epoch": 1.12, + "learning_rate": 8.877564189013953e-06, + "logits/chosen": -2.841125726699829, + "logits/rejected": -2.886411190032959, + "logps/chosen": -380.1565246582031, + "logps/rejected": -357.2264709472656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.987551212310791, + "rewards/margins": 7.447722434997559, + "rewards/rejected": -9.435274124145508, + "step": 7185 + }, + { + "epoch": 1.12, + "learning_rate": 8.876830748482805e-06, + "logits/chosen": -2.6467955112457275, + "logits/rejected": -3.0132699012756348, + "logps/chosen": -287.4593200683594, + "logps/rejected": -283.7896423339844, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.242201328277588, + "rewards/margins": 1.8793870210647583, + "rewards/rejected": -6.121588230133057, + "step": 7186 + }, + { + "epoch": 1.12, + "learning_rate": 8.876097307951657e-06, + "logits/chosen": -1.8657441139221191, + "logits/rejected": -2.9206900596618652, + "logps/chosen": -67.84689331054688, + "logps/rejected": -265.10723876953125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.303294658660889, + "rewards/margins": 5.575888156890869, + "rewards/rejected": -10.879182815551758, + "step": 7187 + }, + { + "epoch": 1.12, + "learning_rate": 8.875363867420509e-06, + "logits/chosen": -2.5990307331085205, + "logits/rejected": -2.8494391441345215, + "logps/chosen": -203.20982360839844, + "logps/rejected": -298.58343505859375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.45465087890625, + "rewards/margins": 6.005876541137695, + "rewards/rejected": -8.460527420043945, + "step": 7188 + }, + { + "epoch": 1.12, + "learning_rate": 8.87463042688936e-06, + "logits/chosen": -3.1003782749176025, + "logits/rejected": -2.322733163833618, + "logps/chosen": -728.7509155273438, + "logps/rejected": -567.2791748046875, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2668962478637695, + "rewards/margins": 3.250882148742676, + "rewards/rejected": -7.517778396606445, + "step": 7189 + }, + { + "epoch": 1.12, + "learning_rate": 8.873896986358213e-06, + "logits/chosen": -2.9504637718200684, + "logits/rejected": -1.7540313005447388, + "logps/chosen": -645.5015869140625, + "logps/rejected": -491.7076110839844, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2547287940979, + "rewards/margins": 8.018893241882324, + "rewards/rejected": -12.273622512817383, + "step": 7190 + }, + { + "epoch": 1.12, + "learning_rate": 8.873163545827064e-06, + "logits/chosen": -2.542249917984009, + "logits/rejected": -2.9351966381073, + "logps/chosen": -240.38568115234375, + "logps/rejected": -265.6077575683594, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6395087242126465, + "rewards/margins": 5.706486701965332, + "rewards/rejected": -8.34599494934082, + "step": 7191 + }, + { + "epoch": 1.12, + "learning_rate": 8.872430105295918e-06, + "logits/chosen": -2.6234607696533203, + "logits/rejected": -2.8420329093933105, + "logps/chosen": -230.00958251953125, + "logps/rejected": -320.9324951171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9452438354492188, + "rewards/margins": 6.23543643951416, + "rewards/rejected": -9.180680274963379, + "step": 7192 + }, + { + "epoch": 1.12, + "learning_rate": 8.87169666476477e-06, + "logits/chosen": -3.1166558265686035, + "logits/rejected": -2.9841301441192627, + "logps/chosen": -145.7810821533203, + "logps/rejected": -261.7257385253906, + "loss": 0.1757, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.811448097229004, + "rewards/margins": 3.8836777210235596, + "rewards/rejected": -7.695125579833984, + "step": 7193 + }, + { + "epoch": 1.12, + "learning_rate": 8.870963224233623e-06, + "logits/chosen": -2.829838275909424, + "logits/rejected": -2.1403183937072754, + "logps/chosen": -213.10177612304688, + "logps/rejected": -250.20565795898438, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6409828662872314, + "rewards/margins": 7.994653701782227, + "rewards/rejected": -10.635636329650879, + "step": 7194 + }, + { + "epoch": 1.12, + "learning_rate": 8.870229783702475e-06, + "logits/chosen": -2.81272292137146, + "logits/rejected": -2.98494291305542, + "logps/chosen": -504.0504455566406, + "logps/rejected": -448.5153503417969, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2398738861083984, + "rewards/margins": 5.286038398742676, + "rewards/rejected": -7.525912284851074, + "step": 7195 + }, + { + "epoch": 1.12, + "learning_rate": 8.869496343171327e-06, + "logits/chosen": -3.023160457611084, + "logits/rejected": -3.123765707015991, + "logps/chosen": -105.19338989257812, + "logps/rejected": -301.47918701171875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.653273105621338, + "rewards/margins": 5.809625148773193, + "rewards/rejected": -8.462898254394531, + "step": 7196 + }, + { + "epoch": 1.12, + "learning_rate": 8.868762902640179e-06, + "logits/chosen": -3.056654691696167, + "logits/rejected": -1.5342174768447876, + "logps/chosen": -580.9183959960938, + "logps/rejected": -220.87832641601562, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.207585096359253, + "rewards/margins": 5.668757438659668, + "rewards/rejected": -6.8763427734375, + "step": 7197 + }, + { + "epoch": 1.12, + "learning_rate": 8.868029462109031e-06, + "logits/chosen": -2.7602851390838623, + "logits/rejected": -3.021152973175049, + "logps/chosen": -121.27839660644531, + "logps/rejected": -228.85986328125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6404786109924316, + "rewards/margins": 6.299469470977783, + "rewards/rejected": -8.939948081970215, + "step": 7198 + }, + { + "epoch": 1.12, + "learning_rate": 8.867296021577883e-06, + "logits/chosen": -1.5216525793075562, + "logits/rejected": -2.518294334411621, + "logps/chosen": -105.41878509521484, + "logps/rejected": -332.1691589355469, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166766166687012, + "rewards/margins": 6.371062278747559, + "rewards/rejected": -11.53782844543457, + "step": 7199 + }, + { + "epoch": 1.12, + "learning_rate": 8.866562581046735e-06, + "logits/chosen": -3.0579895973205566, + "logits/rejected": -2.54831600189209, + "logps/chosen": -552.2203979492188, + "logps/rejected": -407.2417297363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03589475154876709, + "rewards/margins": 10.452476501464844, + "rewards/rejected": -10.488370895385742, + "step": 7200 + }, + { + "epoch": 1.12, + "learning_rate": 8.865829140515588e-06, + "logits/chosen": -1.5433040857315063, + "logits/rejected": -2.677114725112915, + "logps/chosen": -180.285400390625, + "logps/rejected": -336.6385192871094, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.283756256103516, + "rewards/margins": 5.610042572021484, + "rewards/rejected": -9.893798828125, + "step": 7201 + }, + { + "epoch": 1.12, + "learning_rate": 8.86509569998444e-06, + "logits/chosen": -1.722524881362915, + "logits/rejected": -2.858264446258545, + "logps/chosen": -130.92019653320312, + "logps/rejected": -316.2273864746094, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.289529323577881, + "rewards/margins": 7.251129150390625, + "rewards/rejected": -11.540657997131348, + "step": 7202 + }, + { + "epoch": 1.12, + "learning_rate": 8.864362259453292e-06, + "logits/chosen": -1.6645509004592896, + "logits/rejected": -2.677237033843994, + "logps/chosen": -218.81814575195312, + "logps/rejected": -410.602294921875, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3348593711853027, + "rewards/margins": 6.8439435958862305, + "rewards/rejected": -10.178802490234375, + "step": 7203 + }, + { + "epoch": 1.12, + "learning_rate": 8.863628818922144e-06, + "logits/chosen": -2.478217840194702, + "logits/rejected": -2.675994634628296, + "logps/chosen": -289.2978820800781, + "logps/rejected": -148.978515625, + "loss": 4.024, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.402050971984863, + "rewards/margins": -1.418504238128662, + "rewards/rejected": -5.983546733856201, + "step": 7204 + }, + { + "epoch": 1.12, + "learning_rate": 8.862895378390996e-06, + "logits/chosen": -2.534406900405884, + "logits/rejected": -3.1307191848754883, + "logps/chosen": -147.5224609375, + "logps/rejected": -340.3837890625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.031987428665161, + "rewards/margins": 4.943752765655518, + "rewards/rejected": -7.975740432739258, + "step": 7205 + }, + { + "epoch": 1.12, + "learning_rate": 8.862161937859848e-06, + "logits/chosen": -2.8431596755981445, + "logits/rejected": -2.0688750743865967, + "logps/chosen": -305.98907470703125, + "logps/rejected": -257.21673583984375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8992701768875122, + "rewards/margins": 7.126456260681152, + "rewards/rejected": -9.025726318359375, + "step": 7206 + }, + { + "epoch": 1.12, + "learning_rate": 8.8614284973287e-06, + "logits/chosen": -1.4496028423309326, + "logits/rejected": -2.9510459899902344, + "logps/chosen": -320.291259765625, + "logps/rejected": -403.7825927734375, + "loss": 2.6143, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.6038665771484375, + "rewards/margins": 1.6599466800689697, + "rewards/rejected": -8.263813018798828, + "step": 7207 + }, + { + "epoch": 1.12, + "learning_rate": 8.860695056797551e-06, + "logits/chosen": -2.315448760986328, + "logits/rejected": -2.8955419063568115, + "logps/chosen": -212.85479736328125, + "logps/rejected": -348.9260559082031, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4874191284179688, + "rewards/margins": 4.430912971496582, + "rewards/rejected": -7.918332099914551, + "step": 7208 + }, + { + "epoch": 1.12, + "learning_rate": 8.859961616266403e-06, + "logits/chosen": -2.0535755157470703, + "logits/rejected": -2.8671844005584717, + "logps/chosen": -112.54173278808594, + "logps/rejected": -299.474609375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6967692375183105, + "rewards/margins": 6.194597244262695, + "rewards/rejected": -9.891366958618164, + "step": 7209 + }, + { + "epoch": 1.12, + "learning_rate": 8.859228175735257e-06, + "logits/chosen": -2.1888058185577393, + "logits/rejected": -2.52642822265625, + "logps/chosen": -302.1435852050781, + "logps/rejected": -483.06610107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9964890480041504, + "rewards/margins": 10.245694160461426, + "rewards/rejected": -13.242183685302734, + "step": 7210 + }, + { + "epoch": 1.12, + "learning_rate": 8.858494735204109e-06, + "logits/chosen": -2.891880512237549, + "logits/rejected": -2.212451219558716, + "logps/chosen": -175.74195861816406, + "logps/rejected": -247.16419982910156, + "loss": 1.2878, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.029833793640137, + "rewards/margins": 1.9473423957824707, + "rewards/rejected": -6.977176666259766, + "step": 7211 + }, + { + "epoch": 1.12, + "learning_rate": 8.85776129467296e-06, + "logits/chosen": -2.927950143814087, + "logits/rejected": -2.1729767322540283, + "logps/chosen": -129.48182678222656, + "logps/rejected": -119.35968017578125, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0535807609558105, + "rewards/margins": 4.6624064445495605, + "rewards/rejected": -7.715987205505371, + "step": 7212 + }, + { + "epoch": 1.12, + "learning_rate": 8.857027854141812e-06, + "logits/chosen": -2.2407069206237793, + "logits/rejected": -3.0452613830566406, + "logps/chosen": -229.47723388671875, + "logps/rejected": -273.82049560546875, + "loss": 0.4507, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.304582118988037, + "rewards/margins": 2.0040361881256104, + "rewards/rejected": -7.308618545532227, + "step": 7213 + }, + { + "epoch": 1.12, + "learning_rate": 8.856294413610664e-06, + "logits/chosen": -2.936397075653076, + "logits/rejected": -2.8697509765625, + "logps/chosen": -422.4018859863281, + "logps/rejected": -310.7364807128906, + "loss": 3.3122, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.534364700317383, + "rewards/margins": 1.0213401317596436, + "rewards/rejected": -6.5557050704956055, + "step": 7214 + }, + { + "epoch": 1.12, + "learning_rate": 8.855560973079516e-06, + "logits/chosen": -2.8275372982025146, + "logits/rejected": -2.5046885013580322, + "logps/chosen": -211.16050720214844, + "logps/rejected": -205.58897399902344, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6201164722442627, + "rewards/margins": 7.031433582305908, + "rewards/rejected": -9.65155029296875, + "step": 7215 + }, + { + "epoch": 1.12, + "learning_rate": 8.854827532548368e-06, + "logits/chosen": -1.7231929302215576, + "logits/rejected": -3.1138811111450195, + "logps/chosen": -215.36770629882812, + "logps/rejected": -481.66082763671875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9070210456848145, + "rewards/margins": 6.143542289733887, + "rewards/rejected": -11.05056381225586, + "step": 7216 + }, + { + "epoch": 1.12, + "learning_rate": 8.85409409201722e-06, + "logits/chosen": -2.8905227184295654, + "logits/rejected": -3.08829927444458, + "logps/chosen": -116.21504211425781, + "logps/rejected": -264.16607666015625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1761605739593506, + "rewards/margins": 5.914376735687256, + "rewards/rejected": -9.090537071228027, + "step": 7217 + }, + { + "epoch": 1.12, + "learning_rate": 8.853360651486072e-06, + "logits/chosen": -1.8315845727920532, + "logits/rejected": -2.9961483478546143, + "logps/chosen": -278.18499755859375, + "logps/rejected": -319.4629821777344, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.363307237625122, + "rewards/margins": 7.663790702819824, + "rewards/rejected": -9.027097702026367, + "step": 7218 + }, + { + "epoch": 1.12, + "learning_rate": 8.852627210954925e-06, + "logits/chosen": -1.7259563207626343, + "logits/rejected": -2.838404655456543, + "logps/chosen": -147.96401977539062, + "logps/rejected": -300.2371520996094, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2249484062194824, + "rewards/margins": 6.122470855712891, + "rewards/rejected": -9.347419738769531, + "step": 7219 + }, + { + "epoch": 1.12, + "learning_rate": 8.851893770423777e-06, + "logits/chosen": -2.7566065788269043, + "logits/rejected": -2.950990676879883, + "logps/chosen": -314.0853271484375, + "logps/rejected": -337.1834716796875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2972397804260254, + "rewards/margins": 5.7898454666137695, + "rewards/rejected": -9.087085723876953, + "step": 7220 + }, + { + "epoch": 1.12, + "learning_rate": 8.85116032989263e-06, + "logits/chosen": -2.4673290252685547, + "logits/rejected": -2.8979506492614746, + "logps/chosen": -131.63893127441406, + "logps/rejected": -413.43817138671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1008609533309937, + "rewards/margins": 9.068405151367188, + "rewards/rejected": -10.169265747070312, + "step": 7221 + }, + { + "epoch": 1.12, + "learning_rate": 8.850426889361481e-06, + "logits/chosen": -2.907834053039551, + "logits/rejected": -3.0995187759399414, + "logps/chosen": -240.59555053710938, + "logps/rejected": -400.3093566894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3671233654022217, + "rewards/margins": 9.452059745788574, + "rewards/rejected": -10.819183349609375, + "step": 7222 + }, + { + "epoch": 1.12, + "learning_rate": 8.849693448830333e-06, + "logits/chosen": -2.9710395336151123, + "logits/rejected": -1.246206521987915, + "logps/chosen": -281.4598693847656, + "logps/rejected": -191.89073181152344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9949551820755005, + "rewards/margins": 7.624375343322754, + "rewards/rejected": -9.619330406188965, + "step": 7223 + }, + { + "epoch": 1.12, + "learning_rate": 8.848960008299185e-06, + "logits/chosen": -2.306649684906006, + "logits/rejected": -2.7808635234832764, + "logps/chosen": -389.09259033203125, + "logps/rejected": -434.3395080566406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.033708095550537, + "rewards/margins": 9.607364654541016, + "rewards/rejected": -12.641072273254395, + "step": 7224 + }, + { + "epoch": 1.12, + "learning_rate": 8.848226567768037e-06, + "logits/chosen": -1.4625002145767212, + "logits/rejected": -2.914797306060791, + "logps/chosen": -114.84654235839844, + "logps/rejected": -341.19915771484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.11027455329895, + "rewards/margins": 7.243622779846191, + "rewards/rejected": -9.353897094726562, + "step": 7225 + }, + { + "epoch": 1.12, + "learning_rate": 8.847493127236889e-06, + "logits/chosen": -2.3821804523468018, + "logits/rejected": -3.1005213260650635, + "logps/chosen": -91.69261169433594, + "logps/rejected": -169.9124755859375, + "loss": 1.056, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.2592997550964355, + "rewards/margins": 1.7286009788513184, + "rewards/rejected": -8.987900733947754, + "step": 7226 + }, + { + "epoch": 1.12, + "learning_rate": 8.846759686705742e-06, + "logits/chosen": -2.836002826690674, + "logits/rejected": -2.5055556297302246, + "logps/chosen": -119.75569152832031, + "logps/rejected": -308.20208740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.595590829849243, + "rewards/margins": 9.1578369140625, + "rewards/rejected": -11.753427505493164, + "step": 7227 + }, + { + "epoch": 1.12, + "learning_rate": 8.846026246174594e-06, + "logits/chosen": -3.069857120513916, + "logits/rejected": -2.980569839477539, + "logps/chosen": -619.8804931640625, + "logps/rejected": -495.3554382324219, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.937652587890625, + "rewards/margins": 5.824465751647949, + "rewards/rejected": -8.762118339538574, + "step": 7228 + }, + { + "epoch": 1.12, + "learning_rate": 8.845292805643448e-06, + "logits/chosen": -3.003109931945801, + "logits/rejected": -2.768434524536133, + "logps/chosen": -586.075439453125, + "logps/rejected": -472.8199768066406, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3527798652648926, + "rewards/margins": 4.237628936767578, + "rewards/rejected": -7.5904083251953125, + "step": 7229 + }, + { + "epoch": 1.12, + "learning_rate": 8.8445593651123e-06, + "logits/chosen": -3.084714889526367, + "logits/rejected": -2.7819437980651855, + "logps/chosen": -416.9213562011719, + "logps/rejected": -407.4093933105469, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.455183982849121, + "rewards/margins": 5.585729598999023, + "rewards/rejected": -8.040912628173828, + "step": 7230 + }, + { + "epoch": 1.12, + "learning_rate": 8.843825924581151e-06, + "logits/chosen": -3.111208915710449, + "logits/rejected": -3.246873378753662, + "logps/chosen": -132.1851348876953, + "logps/rejected": -214.68582153320312, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7931550741195679, + "rewards/margins": 7.320549964904785, + "rewards/rejected": -9.113704681396484, + "step": 7231 + }, + { + "epoch": 1.12, + "learning_rate": 8.843092484050003e-06, + "logits/chosen": -2.5750787258148193, + "logits/rejected": -2.826242446899414, + "logps/chosen": -263.4975280761719, + "logps/rejected": -379.76287841796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.586206078529358, + "rewards/margins": 7.563070297241211, + "rewards/rejected": -9.149276733398438, + "step": 7232 + }, + { + "epoch": 1.12, + "learning_rate": 8.842359043518855e-06, + "logits/chosen": -2.9030191898345947, + "logits/rejected": -2.008101463317871, + "logps/chosen": -140.36903381347656, + "logps/rejected": -168.5924072265625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.158527374267578, + "rewards/margins": 7.696913719177246, + "rewards/rejected": -9.85544204711914, + "step": 7233 + }, + { + "epoch": 1.13, + "learning_rate": 8.841625602987707e-06, + "logits/chosen": -3.188055992126465, + "logits/rejected": -2.3479745388031006, + "logps/chosen": -744.6427612304688, + "logps/rejected": -546.5013427734375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.379873752593994, + "rewards/margins": 6.272793769836426, + "rewards/rejected": -9.652667999267578, + "step": 7234 + }, + { + "epoch": 1.13, + "learning_rate": 8.840892162456559e-06, + "logits/chosen": -2.1431081295013428, + "logits/rejected": -2.776740312576294, + "logps/chosen": -137.72471618652344, + "logps/rejected": -525.204345703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.87445068359375, + "rewards/margins": 6.258485794067383, + "rewards/rejected": -11.132936477661133, + "step": 7235 + }, + { + "epoch": 1.13, + "learning_rate": 8.84015872192541e-06, + "logits/chosen": -1.8992172479629517, + "logits/rejected": -2.35343337059021, + "logps/chosen": -348.263671875, + "logps/rejected": -484.862548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6772162914276123, + "rewards/margins": 12.575899124145508, + "rewards/rejected": -14.253114700317383, + "step": 7236 + }, + { + "epoch": 1.13, + "learning_rate": 8.839425281394264e-06, + "logits/chosen": -0.7532551288604736, + "logits/rejected": -2.9520955085754395, + "logps/chosen": -114.94422912597656, + "logps/rejected": -436.7013244628906, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.042988300323486, + "rewards/margins": 5.160348892211914, + "rewards/rejected": -9.203337669372559, + "step": 7237 + }, + { + "epoch": 1.13, + "learning_rate": 8.838691840863116e-06, + "logits/chosen": -2.3562541007995605, + "logits/rejected": -3.0479862689971924, + "logps/chosen": -77.67906188964844, + "logps/rejected": -322.9872131347656, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1589436531066895, + "rewards/margins": 6.806241989135742, + "rewards/rejected": -9.965185165405273, + "step": 7238 + }, + { + "epoch": 1.13, + "learning_rate": 8.837958400331968e-06, + "logits/chosen": -2.0937891006469727, + "logits/rejected": -2.8540139198303223, + "logps/chosen": -285.38006591796875, + "logps/rejected": -279.3341064453125, + "loss": 0.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.137143611907959, + "rewards/margins": 4.042719841003418, + "rewards/rejected": -7.179863929748535, + "step": 7239 + }, + { + "epoch": 1.13, + "learning_rate": 8.83722495980082e-06, + "logits/chosen": -3.073370933532715, + "logits/rejected": -2.0178639888763428, + "logps/chosen": -273.2071228027344, + "logps/rejected": -166.74317932128906, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3308494091033936, + "rewards/margins": 5.950611591339111, + "rewards/rejected": -8.281460762023926, + "step": 7240 + }, + { + "epoch": 1.13, + "learning_rate": 8.836491519269672e-06, + "logits/chosen": -1.280982494354248, + "logits/rejected": -2.853489875793457, + "logps/chosen": -88.67788696289062, + "logps/rejected": -409.3213806152344, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.405064582824707, + "rewards/margins": 5.4747819900512695, + "rewards/rejected": -10.879846572875977, + "step": 7241 + }, + { + "epoch": 1.13, + "learning_rate": 8.835758078738524e-06, + "logits/chosen": -2.1405627727508545, + "logits/rejected": -3.0433578491210938, + "logps/chosen": -490.2075500488281, + "logps/rejected": -467.51751708984375, + "loss": 0.8119, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.279152870178223, + "rewards/margins": 2.6527366638183594, + "rewards/rejected": -8.931889533996582, + "step": 7242 + }, + { + "epoch": 1.13, + "learning_rate": 8.835024638207376e-06, + "logits/chosen": -2.7937095165252686, + "logits/rejected": -2.5220303535461426, + "logps/chosen": -289.8155212402344, + "logps/rejected": -251.74447631835938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8457573652267456, + "rewards/margins": 8.896499633789062, + "rewards/rejected": -10.742257118225098, + "step": 7243 + }, + { + "epoch": 1.13, + "learning_rate": 8.834291197676227e-06, + "logits/chosen": -2.639608144760132, + "logits/rejected": -3.1741929054260254, + "logps/chosen": -66.55281829833984, + "logps/rejected": -330.60296630859375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.23115348815918, + "rewards/margins": 6.7515764236450195, + "rewards/rejected": -10.9827299118042, + "step": 7244 + }, + { + "epoch": 1.13, + "learning_rate": 8.83355775714508e-06, + "logits/chosen": -1.84274160861969, + "logits/rejected": -2.666036367416382, + "logps/chosen": -114.06713104248047, + "logps/rejected": -202.92572021484375, + "loss": 0.4363, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.70284366607666, + "rewards/margins": 3.0685601234436035, + "rewards/rejected": -9.771404266357422, + "step": 7245 + }, + { + "epoch": 1.13, + "learning_rate": 8.832824316613933e-06, + "logits/chosen": -2.8348641395568848, + "logits/rejected": -3.1373848915100098, + "logps/chosen": -127.49051666259766, + "logps/rejected": -302.5868225097656, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.457278251647949, + "rewards/margins": 4.4287238121032715, + "rewards/rejected": -7.8860015869140625, + "step": 7246 + }, + { + "epoch": 1.13, + "learning_rate": 8.832090876082785e-06, + "logits/chosen": -3.1946182250976562, + "logits/rejected": -2.9905946254730225, + "logps/chosen": -125.27566528320312, + "logps/rejected": -198.84205627441406, + "loss": 0.8772, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.055853843688965, + "rewards/margins": 2.0538649559020996, + "rewards/rejected": -7.109719276428223, + "step": 7247 + }, + { + "epoch": 1.13, + "learning_rate": 8.831357435551637e-06, + "logits/chosen": -2.890734910964966, + "logits/rejected": -3.048837661743164, + "logps/chosen": -181.06607055664062, + "logps/rejected": -364.5576477050781, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0296857357025146, + "rewards/margins": 6.892266750335693, + "rewards/rejected": -9.921953201293945, + "step": 7248 + }, + { + "epoch": 1.13, + "learning_rate": 8.830623995020489e-06, + "logits/chosen": -2.212346076965332, + "logits/rejected": -2.775934934616089, + "logps/chosen": -250.87184143066406, + "logps/rejected": -510.7103576660156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7788171768188477, + "rewards/margins": 7.896969318389893, + "rewards/rejected": -11.675786018371582, + "step": 7249 + }, + { + "epoch": 1.13, + "learning_rate": 8.82989055448934e-06, + "logits/chosen": -2.6134963035583496, + "logits/rejected": -2.944389820098877, + "logps/chosen": -276.81365966796875, + "logps/rejected": -600.6788330078125, + "loss": 0.1816, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.513398170471191, + "rewards/margins": 4.068819999694824, + "rewards/rejected": -9.582218170166016, + "step": 7250 + }, + { + "epoch": 1.13, + "learning_rate": 8.829157113958192e-06, + "logits/chosen": -2.100860834121704, + "logits/rejected": -3.0929312705993652, + "logps/chosen": -101.89183044433594, + "logps/rejected": -450.06439208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.736660957336426, + "rewards/margins": 9.352880477905273, + "rewards/rejected": -12.089542388916016, + "step": 7251 + }, + { + "epoch": 1.13, + "learning_rate": 8.828423673427044e-06, + "logits/chosen": -2.8067667484283447, + "logits/rejected": -2.1380012035369873, + "logps/chosen": -322.5147399902344, + "logps/rejected": -301.61822509765625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4734199047088623, + "rewards/margins": 6.646957874298096, + "rewards/rejected": -10.120378494262695, + "step": 7252 + }, + { + "epoch": 1.13, + "learning_rate": 8.827690232895896e-06, + "logits/chosen": -1.8511953353881836, + "logits/rejected": -2.578579902648926, + "logps/chosen": -260.57257080078125, + "logps/rejected": -537.7418823242188, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.42159366607666, + "rewards/margins": 6.529618263244629, + "rewards/rejected": -10.951211929321289, + "step": 7253 + }, + { + "epoch": 1.13, + "learning_rate": 8.826956792364748e-06, + "logits/chosen": -2.7444565296173096, + "logits/rejected": -3.0432634353637695, + "logps/chosen": -399.31414794921875, + "logps/rejected": -495.38720703125, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3300206661224365, + "rewards/margins": 5.184211254119873, + "rewards/rejected": -8.51423168182373, + "step": 7254 + }, + { + "epoch": 1.13, + "learning_rate": 8.826223351833602e-06, + "logits/chosen": -1.772834062576294, + "logits/rejected": -2.5110158920288086, + "logps/chosen": -220.3854217529297, + "logps/rejected": -527.0072021484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8878021240234375, + "rewards/margins": 7.94156551361084, + "rewards/rejected": -10.829366683959961, + "step": 7255 + }, + { + "epoch": 1.13, + "learning_rate": 8.825489911302453e-06, + "logits/chosen": -1.6274102926254272, + "logits/rejected": -2.126155376434326, + "logps/chosen": -55.146976470947266, + "logps/rejected": -209.74456787109375, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.37236213684082, + "rewards/margins": 4.246729850769043, + "rewards/rejected": -8.619091987609863, + "step": 7256 + }, + { + "epoch": 1.13, + "learning_rate": 8.824756470771305e-06, + "logits/chosen": -3.061749219894409, + "logits/rejected": -3.031970262527466, + "logps/chosen": -108.56262969970703, + "logps/rejected": -452.0894470214844, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2674384117126465, + "rewards/margins": 5.807006359100342, + "rewards/rejected": -10.074444770812988, + "step": 7257 + }, + { + "epoch": 1.13, + "learning_rate": 8.824023030240157e-06, + "logits/chosen": -2.962069034576416, + "logits/rejected": -2.6607937812805176, + "logps/chosen": -224.49658203125, + "logps/rejected": -236.56793212890625, + "loss": 1.6895, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.828128814697266, + "rewards/margins": 4.125712871551514, + "rewards/rejected": -8.953842163085938, + "step": 7258 + }, + { + "epoch": 1.13, + "learning_rate": 8.823289589709009e-06, + "logits/chosen": -2.4416160583496094, + "logits/rejected": -2.948610782623291, + "logps/chosen": -121.5297622680664, + "logps/rejected": -197.8568115234375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4134531021118164, + "rewards/margins": 5.666769027709961, + "rewards/rejected": -9.080222129821777, + "step": 7259 + }, + { + "epoch": 1.13, + "learning_rate": 8.822556149177861e-06, + "logits/chosen": -2.835982084274292, + "logits/rejected": -2.8937699794769287, + "logps/chosen": -237.04928588867188, + "logps/rejected": -210.310546875, + "loss": 0.0951, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4714927673339844, + "rewards/margins": 3.844787359237671, + "rewards/rejected": -7.316280364990234, + "step": 7260 + }, + { + "epoch": 1.13, + "learning_rate": 8.821822708646714e-06, + "logits/chosen": -1.1263796091079712, + "logits/rejected": -2.6673219203948975, + "logps/chosen": -112.3031005859375, + "logps/rejected": -447.7061767578125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.89520788192749, + "rewards/margins": 7.594630718231201, + "rewards/rejected": -12.489838600158691, + "step": 7261 + }, + { + "epoch": 1.13, + "learning_rate": 8.821089268115566e-06, + "logits/chosen": -1.2799952030181885, + "logits/rejected": -2.8048081398010254, + "logps/chosen": -67.25215911865234, + "logps/rejected": -309.82440185546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8845713138580322, + "rewards/margins": 7.943707466125488, + "rewards/rejected": -10.828278541564941, + "step": 7262 + }, + { + "epoch": 1.13, + "learning_rate": 8.820355827584418e-06, + "logits/chosen": -2.9144442081451416, + "logits/rejected": -3.0235812664031982, + "logps/chosen": -257.111083984375, + "logps/rejected": -308.8919982910156, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.618814945220947, + "rewards/margins": 6.21933126449585, + "rewards/rejected": -10.838146209716797, + "step": 7263 + }, + { + "epoch": 1.13, + "learning_rate": 8.819622387053272e-06, + "logits/chosen": -2.908031702041626, + "logits/rejected": -3.0245447158813477, + "logps/chosen": -51.02729415893555, + "logps/rejected": -226.87254333496094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1823554039001465, + "rewards/margins": 9.562631607055664, + "rewards/rejected": -11.744987487792969, + "step": 7264 + }, + { + "epoch": 1.13, + "learning_rate": 8.818888946522124e-06, + "logits/chosen": -1.7364192008972168, + "logits/rejected": -3.1068575382232666, + "logps/chosen": -131.7624053955078, + "logps/rejected": -555.051513671875, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.286460876464844, + "rewards/margins": 1.3295671939849854, + "rewards/rejected": -5.61602783203125, + "step": 7265 + }, + { + "epoch": 1.13, + "learning_rate": 8.818155505990976e-06, + "logits/chosen": -0.5966993570327759, + "logits/rejected": -2.6155035495758057, + "logps/chosen": -99.8785171508789, + "logps/rejected": -368.96514892578125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.224259853363037, + "rewards/margins": 9.250688552856445, + "rewards/rejected": -12.47494888305664, + "step": 7266 + }, + { + "epoch": 1.13, + "learning_rate": 8.817422065459827e-06, + "logits/chosen": -2.822427749633789, + "logits/rejected": -2.499903678894043, + "logps/chosen": -352.64654541015625, + "logps/rejected": -459.4427490234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.150869846343994, + "rewards/margins": 7.554797172546387, + "rewards/rejected": -10.705667495727539, + "step": 7267 + }, + { + "epoch": 1.13, + "learning_rate": 8.81668862492868e-06, + "logits/chosen": -2.192101240158081, + "logits/rejected": -2.956163167953491, + "logps/chosen": -53.58633804321289, + "logps/rejected": -209.94448852539062, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.827658176422119, + "rewards/margins": 7.263348579406738, + "rewards/rejected": -10.091007232666016, + "step": 7268 + }, + { + "epoch": 1.13, + "learning_rate": 8.815955184397531e-06, + "logits/chosen": -1.9766079187393188, + "logits/rejected": -2.975546360015869, + "logps/chosen": -77.09809875488281, + "logps/rejected": -511.374267578125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5434317588806152, + "rewards/margins": 7.807882308959961, + "rewards/rejected": -10.351314544677734, + "step": 7269 + }, + { + "epoch": 1.13, + "learning_rate": 8.815221743866383e-06, + "logits/chosen": -1.5105371475219727, + "logits/rejected": -2.2353515625, + "logps/chosen": -141.50811767578125, + "logps/rejected": -170.85763549804688, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.702051162719727, + "rewards/margins": 4.143031120300293, + "rewards/rejected": -9.84508228302002, + "step": 7270 + }, + { + "epoch": 1.13, + "learning_rate": 8.814488303335235e-06, + "logits/chosen": -2.9967825412750244, + "logits/rejected": -3.0194337368011475, + "logps/chosen": -307.24554443359375, + "logps/rejected": -229.59210205078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.365973711013794, + "rewards/margins": 6.133263111114502, + "rewards/rejected": -8.499237060546875, + "step": 7271 + }, + { + "epoch": 1.13, + "learning_rate": 8.813754862804089e-06, + "logits/chosen": -1.0126546621322632, + "logits/rejected": -2.2587692737579346, + "logps/chosen": -259.00360107421875, + "logps/rejected": -775.4884643554688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.392009735107422, + "rewards/margins": 13.816499710083008, + "rewards/rejected": -18.20850944519043, + "step": 7272 + }, + { + "epoch": 1.13, + "learning_rate": 8.81302142227294e-06, + "logits/chosen": -2.801311492919922, + "logits/rejected": -2.614482879638672, + "logps/chosen": -259.25958251953125, + "logps/rejected": -348.5352478027344, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1713457107543945, + "rewards/margins": 7.579607963562012, + "rewards/rejected": -10.750953674316406, + "step": 7273 + }, + { + "epoch": 1.13, + "learning_rate": 8.812287981741792e-06, + "logits/chosen": -1.2997972965240479, + "logits/rejected": -2.710348606109619, + "logps/chosen": -130.9153289794922, + "logps/rejected": -434.15496826171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7777156829833984, + "rewards/margins": 8.937566757202148, + "rewards/rejected": -11.715282440185547, + "step": 7274 + }, + { + "epoch": 1.13, + "learning_rate": 8.811554541210644e-06, + "logits/chosen": -2.5601625442504883, + "logits/rejected": -2.3600385189056396, + "logps/chosen": -94.81082153320312, + "logps/rejected": -158.658203125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.66478157043457, + "rewards/margins": 6.434748649597168, + "rewards/rejected": -11.099529266357422, + "step": 7275 + }, + { + "epoch": 1.13, + "learning_rate": 8.810821100679496e-06, + "logits/chosen": -2.207730770111084, + "logits/rejected": -3.212878704071045, + "logps/chosen": -81.01160430908203, + "logps/rejected": -316.6318664550781, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.747828006744385, + "rewards/margins": 6.451427459716797, + "rewards/rejected": -11.199254989624023, + "step": 7276 + }, + { + "epoch": 1.13, + "learning_rate": 8.810087660148348e-06, + "logits/chosen": -2.7499849796295166, + "logits/rejected": -2.8372697830200195, + "logps/chosen": -192.43898010253906, + "logps/rejected": -207.13214111328125, + "loss": 0.3458, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.43487548828125, + "rewards/margins": 4.169841766357422, + "rewards/rejected": -9.604717254638672, + "step": 7277 + }, + { + "epoch": 1.13, + "learning_rate": 8.8093542196172e-06, + "logits/chosen": -2.0825250148773193, + "logits/rejected": -2.730989694595337, + "logps/chosen": -262.7386169433594, + "logps/rejected": -319.6300048828125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.792076587677002, + "rewards/margins": 6.4593915939331055, + "rewards/rejected": -10.251468658447266, + "step": 7278 + }, + { + "epoch": 1.13, + "learning_rate": 8.808620779086052e-06, + "logits/chosen": -1.8068058490753174, + "logits/rejected": -2.5817301273345947, + "logps/chosen": -348.049560546875, + "logps/rejected": -505.96881103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0930497646331787, + "rewards/margins": 12.808219909667969, + "rewards/rejected": -14.901269912719727, + "step": 7279 + }, + { + "epoch": 1.13, + "learning_rate": 8.807887338554904e-06, + "logits/chosen": -0.8053684830665588, + "logits/rejected": -2.8109254837036133, + "logps/chosen": -124.64564514160156, + "logps/rejected": -564.2506713867188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5850167274475098, + "rewards/margins": 8.803239822387695, + "rewards/rejected": -12.388256072998047, + "step": 7280 + }, + { + "epoch": 1.13, + "learning_rate": 8.807153898023757e-06, + "logits/chosen": -2.6421759128570557, + "logits/rejected": -2.917144298553467, + "logps/chosen": -293.39501953125, + "logps/rejected": -429.2453308105469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6168277263641357, + "rewards/margins": 8.641900062561035, + "rewards/rejected": -12.25872802734375, + "step": 7281 + }, + { + "epoch": 1.13, + "learning_rate": 8.806420457492609e-06, + "logits/chosen": -1.4894121885299683, + "logits/rejected": -2.841540575027466, + "logps/chosen": -88.5269775390625, + "logps/rejected": -255.25213623046875, + "loss": 0.2296, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.101561069488525, + "rewards/margins": 3.8184022903442383, + "rewards/rejected": -7.919963359832764, + "step": 7282 + }, + { + "epoch": 1.13, + "learning_rate": 8.805687016961461e-06, + "logits/chosen": -2.979898452758789, + "logits/rejected": -3.016317367553711, + "logps/chosen": -223.7933349609375, + "logps/rejected": -377.3694152832031, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.235454082489014, + "rewards/margins": 6.608727931976318, + "rewards/rejected": -11.844182014465332, + "step": 7283 + }, + { + "epoch": 1.13, + "learning_rate": 8.804953576430313e-06, + "logits/chosen": -1.9678109884262085, + "logits/rejected": -2.6773667335510254, + "logps/chosen": -65.47523498535156, + "logps/rejected": -330.01953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5684385299682617, + "rewards/margins": 8.283421516418457, + "rewards/rejected": -10.851860046386719, + "step": 7284 + }, + { + "epoch": 1.13, + "learning_rate": 8.804220135899165e-06, + "logits/chosen": -1.6313709020614624, + "logits/rejected": -2.65122652053833, + "logps/chosen": -110.67710876464844, + "logps/rejected": -401.98388671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.549278259277344, + "rewards/margins": 7.430320739746094, + "rewards/rejected": -11.979598999023438, + "step": 7285 + }, + { + "epoch": 1.13, + "learning_rate": 8.803486695368017e-06, + "logits/chosen": -2.9492580890655518, + "logits/rejected": -2.678776502609253, + "logps/chosen": -99.54550170898438, + "logps/rejected": -192.1320343017578, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.048312664031982, + "rewards/margins": 3.2448902130126953, + "rewards/rejected": -7.293203353881836, + "step": 7286 + }, + { + "epoch": 1.13, + "learning_rate": 8.802753254836868e-06, + "logits/chosen": -2.3989689350128174, + "logits/rejected": -2.981755495071411, + "logps/chosen": -82.29879760742188, + "logps/rejected": -214.64617919921875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7555065155029297, + "rewards/margins": 6.646178245544434, + "rewards/rejected": -10.401684761047363, + "step": 7287 + }, + { + "epoch": 1.13, + "learning_rate": 8.80201981430572e-06, + "logits/chosen": -1.3591440916061401, + "logits/rejected": -2.6874210834503174, + "logps/chosen": -91.21908569335938, + "logps/rejected": -329.0113830566406, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.332507133483887, + "rewards/margins": 7.991098880767822, + "rewards/rejected": -12.323606491088867, + "step": 7288 + }, + { + "epoch": 1.13, + "learning_rate": 8.801286373774572e-06, + "logits/chosen": -1.794728398323059, + "logits/rejected": -2.911836862564087, + "logps/chosen": -482.47711181640625, + "logps/rejected": -611.862060546875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.443730354309082, + "rewards/margins": 6.602039813995361, + "rewards/rejected": -11.045770645141602, + "step": 7289 + }, + { + "epoch": 1.13, + "learning_rate": 8.800552933243426e-06, + "logits/chosen": -2.905543088912964, + "logits/rejected": -2.6841461658477783, + "logps/chosen": -187.83233642578125, + "logps/rejected": -320.83245849609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0896716117858887, + "rewards/margins": 7.484806060791016, + "rewards/rejected": -10.574478149414062, + "step": 7290 + }, + { + "epoch": 1.13, + "learning_rate": 8.799819492712278e-06, + "logits/chosen": -2.658975839614868, + "logits/rejected": -3.2411842346191406, + "logps/chosen": -223.61593627929688, + "logps/rejected": -460.9122009277344, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.748828172683716, + "rewards/margins": 7.321881294250488, + "rewards/rejected": -10.070709228515625, + "step": 7291 + }, + { + "epoch": 1.13, + "learning_rate": 8.79908605218113e-06, + "logits/chosen": -1.93631112575531, + "logits/rejected": -2.700326442718506, + "logps/chosen": -185.4905242919922, + "logps/rejected": -601.837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.941049098968506, + "rewards/margins": 11.24413776397705, + "rewards/rejected": -15.185186386108398, + "step": 7292 + }, + { + "epoch": 1.13, + "learning_rate": 8.798352611649981e-06, + "logits/chosen": -2.744493246078491, + "logits/rejected": -2.870021343231201, + "logps/chosen": -108.38246154785156, + "logps/rejected": -294.7515869140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.222073793411255, + "rewards/margins": 8.220279693603516, + "rewards/rejected": -11.442354202270508, + "step": 7293 + }, + { + "epoch": 1.13, + "learning_rate": 8.797619171118833e-06, + "logits/chosen": -2.721135377883911, + "logits/rejected": -2.938109874725342, + "logps/chosen": -78.37818908691406, + "logps/rejected": -325.7971496582031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8122965097427368, + "rewards/margins": 9.700977325439453, + "rewards/rejected": -11.513274192810059, + "step": 7294 + }, + { + "epoch": 1.13, + "learning_rate": 8.796885730587687e-06, + "logits/chosen": -2.7544190883636475, + "logits/rejected": -2.911322593688965, + "logps/chosen": -494.0641174316406, + "logps/rejected": -647.0421142578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8051475882530212, + "rewards/margins": 11.63650894165039, + "rewards/rejected": -12.441657066345215, + "step": 7295 + }, + { + "epoch": 1.13, + "learning_rate": 8.796152290056539e-06, + "logits/chosen": -2.8754751682281494, + "logits/rejected": -3.058133363723755, + "logps/chosen": -198.8082275390625, + "logps/rejected": -256.8428955078125, + "loss": 0.0611, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.493917942047119, + "rewards/margins": 4.05035924911499, + "rewards/rejected": -10.54427719116211, + "step": 7296 + }, + { + "epoch": 1.13, + "learning_rate": 8.79541884952539e-06, + "logits/chosen": -1.4307923316955566, + "logits/rejected": -2.8543057441711426, + "logps/chosen": -157.83221435546875, + "logps/rejected": -429.487548828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468777656555176, + "rewards/margins": 8.035443305969238, + "rewards/rejected": -13.504220962524414, + "step": 7297 + }, + { + "epoch": 1.13, + "learning_rate": 8.794685408994242e-06, + "logits/chosen": -3.12902569770813, + "logits/rejected": -2.7353062629699707, + "logps/chosen": -174.421875, + "logps/rejected": -202.16500854492188, + "loss": 0.5525, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3975167274475098, + "rewards/margins": 2.349238157272339, + "rewards/rejected": -5.7467546463012695, + "step": 7298 + }, + { + "epoch": 1.14, + "learning_rate": 8.793951968463096e-06, + "logits/chosen": -2.2536633014678955, + "logits/rejected": -2.6661219596862793, + "logps/chosen": -121.26301574707031, + "logps/rejected": -296.9976806640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.022416353225708, + "rewards/margins": 8.65219497680664, + "rewards/rejected": -10.674612045288086, + "step": 7299 + }, + { + "epoch": 1.14, + "learning_rate": 8.793218527931948e-06, + "logits/chosen": -1.5200647115707397, + "logits/rejected": -2.7765681743621826, + "logps/chosen": -266.4234619140625, + "logps/rejected": -540.5247802734375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1640663146972656, + "rewards/margins": 10.137737274169922, + "rewards/rejected": -13.301803588867188, + "step": 7300 + }, + { + "epoch": 1.14, + "learning_rate": 8.7924850874008e-06, + "logits/chosen": -2.9028165340423584, + "logits/rejected": -3.0311458110809326, + "logps/chosen": -140.84375, + "logps/rejected": -210.6698760986328, + "loss": 1.2573, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.975647449493408, + "rewards/margins": 2.8345224857330322, + "rewards/rejected": -7.8101701736450195, + "step": 7301 + }, + { + "epoch": 1.14, + "learning_rate": 8.791751646869652e-06, + "logits/chosen": -3.0835814476013184, + "logits/rejected": -2.8575494289398193, + "logps/chosen": -426.462646484375, + "logps/rejected": -328.94415283203125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.396838665008545, + "rewards/margins": 6.237796783447266, + "rewards/rejected": -8.634635925292969, + "step": 7302 + }, + { + "epoch": 1.14, + "learning_rate": 8.791018206338504e-06, + "logits/chosen": -3.029829263687134, + "logits/rejected": -2.983109474182129, + "logps/chosen": -142.79139709472656, + "logps/rejected": -230.19680786132812, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4678378105163574, + "rewards/margins": 7.508440017700195, + "rewards/rejected": -10.976278305053711, + "step": 7303 + }, + { + "epoch": 1.14, + "learning_rate": 8.790284765807355e-06, + "logits/chosen": -2.8390607833862305, + "logits/rejected": -2.867488145828247, + "logps/chosen": -586.6145629882812, + "logps/rejected": -452.0456237792969, + "loss": 0.4443, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7062134742736816, + "rewards/margins": 4.539925575256348, + "rewards/rejected": -8.246139526367188, + "step": 7304 + }, + { + "epoch": 1.14, + "learning_rate": 8.789551325276207e-06, + "logits/chosen": -2.9676873683929443, + "logits/rejected": -2.9840426445007324, + "logps/chosen": -93.06790924072266, + "logps/rejected": -160.89430236816406, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7211354970932007, + "rewards/margins": 6.853241920471191, + "rewards/rejected": -8.574377059936523, + "step": 7305 + }, + { + "epoch": 1.14, + "learning_rate": 8.78881788474506e-06, + "logits/chosen": -2.4449639320373535, + "logits/rejected": -2.8115522861480713, + "logps/chosen": -88.80024719238281, + "logps/rejected": -228.4832763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.746405124664307, + "rewards/margins": 9.492338180541992, + "rewards/rejected": -14.23874282836914, + "step": 7306 + }, + { + "epoch": 1.14, + "learning_rate": 8.788084444213911e-06, + "logits/chosen": -2.8940534591674805, + "logits/rejected": -1.7041219472885132, + "logps/chosen": -294.304443359375, + "logps/rejected": -239.80709838867188, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.170027494430542, + "rewards/margins": 8.969948768615723, + "rewards/rejected": -11.139976501464844, + "step": 7307 + }, + { + "epoch": 1.14, + "learning_rate": 8.787351003682765e-06, + "logits/chosen": -2.296598196029663, + "logits/rejected": -2.9694459438323975, + "logps/chosen": -144.0166015625, + "logps/rejected": -407.383544921875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.466238021850586, + "rewards/margins": 6.98960018157959, + "rewards/rejected": -9.455838203430176, + "step": 7308 + }, + { + "epoch": 1.14, + "learning_rate": 8.786617563151617e-06, + "logits/chosen": -1.9210108518600464, + "logits/rejected": -2.7963666915893555, + "logps/chosen": -118.42855834960938, + "logps/rejected": -225.4657745361328, + "loss": 2.9181, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.928891181945801, + "rewards/margins": 1.688951015472412, + "rewards/rejected": -7.617842197418213, + "step": 7309 + }, + { + "epoch": 1.14, + "learning_rate": 8.785884122620468e-06, + "logits/chosen": -2.7753806114196777, + "logits/rejected": -2.6108055114746094, + "logps/chosen": -193.90719604492188, + "logps/rejected": -258.8497009277344, + "loss": 0.5662, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.873279571533203, + "rewards/margins": 1.2258362770080566, + "rewards/rejected": -8.099116325378418, + "step": 7310 + }, + { + "epoch": 1.14, + "learning_rate": 8.78515068208932e-06, + "logits/chosen": -1.7193266153335571, + "logits/rejected": -2.9287636280059814, + "logps/chosen": -144.25660705566406, + "logps/rejected": -332.34185791015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.613390922546387, + "rewards/margins": 8.68490982055664, + "rewards/rejected": -13.298301696777344, + "step": 7311 + }, + { + "epoch": 1.14, + "learning_rate": 8.784417241558172e-06, + "logits/chosen": -2.776113986968994, + "logits/rejected": -3.054452419281006, + "logps/chosen": -50.284061431884766, + "logps/rejected": -188.22390747070312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.749208927154541, + "rewards/margins": 8.72314453125, + "rewards/rejected": -12.472352981567383, + "step": 7312 + }, + { + "epoch": 1.14, + "learning_rate": 8.783683801027024e-06, + "logits/chosen": -2.674973964691162, + "logits/rejected": -2.751905918121338, + "logps/chosen": -131.6088409423828, + "logps/rejected": -267.6220703125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6521663665771484, + "rewards/margins": 8.03374195098877, + "rewards/rejected": -11.685907363891602, + "step": 7313 + }, + { + "epoch": 1.14, + "learning_rate": 8.782950360495876e-06, + "logits/chosen": -2.1095685958862305, + "logits/rejected": -2.4040167331695557, + "logps/chosen": -229.0989532470703, + "logps/rejected": -402.44140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9108402729034424, + "rewards/margins": 7.7869062423706055, + "rewards/rejected": -11.697746276855469, + "step": 7314 + }, + { + "epoch": 1.14, + "learning_rate": 8.782216919964728e-06, + "logits/chosen": -2.1425364017486572, + "logits/rejected": -2.9627509117126465, + "logps/chosen": -44.67033386230469, + "logps/rejected": -302.2704772949219, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5516581535339355, + "rewards/margins": 6.617249011993408, + "rewards/rejected": -10.168907165527344, + "step": 7315 + }, + { + "epoch": 1.14, + "learning_rate": 8.78148347943358e-06, + "logits/chosen": -1.1717517375946045, + "logits/rejected": -2.5864431858062744, + "logps/chosen": -62.947776794433594, + "logps/rejected": -318.89990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.311735153198242, + "rewards/margins": 9.26470947265625, + "rewards/rejected": -12.576444625854492, + "step": 7316 + }, + { + "epoch": 1.14, + "learning_rate": 8.780750038902433e-06, + "logits/chosen": -2.9745075702667236, + "logits/rejected": -3.103640079498291, + "logps/chosen": -330.01605224609375, + "logps/rejected": -523.921875, + "loss": 0.2238, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.339056491851807, + "rewards/margins": 4.125539779663086, + "rewards/rejected": -9.46459674835205, + "step": 7317 + }, + { + "epoch": 1.14, + "learning_rate": 8.780016598371285e-06, + "logits/chosen": -2.8152260780334473, + "logits/rejected": -2.985379695892334, + "logps/chosen": -143.52630615234375, + "logps/rejected": -296.65399169921875, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7287895679473877, + "rewards/margins": 4.700584411621094, + "rewards/rejected": -7.4293742179870605, + "step": 7318 + }, + { + "epoch": 1.14, + "learning_rate": 8.779283157840137e-06, + "logits/chosen": -2.9183411598205566, + "logits/rejected": -2.740480661392212, + "logps/chosen": -291.335205078125, + "logps/rejected": -209.33456420898438, + "loss": 2.5946, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.899195671081543, + "rewards/margins": 0.5832645893096924, + "rewards/rejected": -6.4824604988098145, + "step": 7319 + }, + { + "epoch": 1.14, + "learning_rate": 8.778549717308989e-06, + "logits/chosen": -2.064035177230835, + "logits/rejected": -2.9360158443450928, + "logps/chosen": -122.79875183105469, + "logps/rejected": -338.9564514160156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.264011859893799, + "rewards/margins": 8.976106643676758, + "rewards/rejected": -12.240118026733398, + "step": 7320 + }, + { + "epoch": 1.14, + "learning_rate": 8.77781627677784e-06, + "logits/chosen": -2.722825050354004, + "logits/rejected": -2.9878439903259277, + "logps/chosen": -63.719093322753906, + "logps/rejected": -207.48141479492188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8643574714660645, + "rewards/margins": 9.458446502685547, + "rewards/rejected": -13.322803497314453, + "step": 7321 + }, + { + "epoch": 1.14, + "learning_rate": 8.777082836246693e-06, + "logits/chosen": -3.1002700328826904, + "logits/rejected": -3.070861577987671, + "logps/chosen": -80.21808624267578, + "logps/rejected": -148.4333038330078, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.044424295425415, + "rewards/margins": 6.555665969848633, + "rewards/rejected": -8.600090026855469, + "step": 7322 + }, + { + "epoch": 1.14, + "learning_rate": 8.776349395715545e-06, + "logits/chosen": -2.8008244037628174, + "logits/rejected": -2.8102331161499023, + "logps/chosen": -118.35952758789062, + "logps/rejected": -184.47909545898438, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4491872787475586, + "rewards/margins": 5.773802757263184, + "rewards/rejected": -9.222990036010742, + "step": 7323 + }, + { + "epoch": 1.14, + "learning_rate": 8.775615955184396e-06, + "logits/chosen": -2.5149829387664795, + "logits/rejected": -2.7948224544525146, + "logps/chosen": -161.9469451904297, + "logps/rejected": -198.47677612304688, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.530275583267212, + "rewards/margins": 6.720392227172852, + "rewards/rejected": -10.250667572021484, + "step": 7324 + }, + { + "epoch": 1.14, + "learning_rate": 8.774882514653248e-06, + "logits/chosen": -2.835751533508301, + "logits/rejected": -3.0581114292144775, + "logps/chosen": -231.68716430664062, + "logps/rejected": -259.7333068847656, + "loss": 0.0941, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.698148488998413, + "rewards/margins": 3.9255571365356445, + "rewards/rejected": -7.623705863952637, + "step": 7325 + }, + { + "epoch": 1.14, + "learning_rate": 8.774149074122102e-06, + "logits/chosen": -2.490863800048828, + "logits/rejected": -2.7805087566375732, + "logps/chosen": -207.2928924560547, + "logps/rejected": -387.02374267578125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7627005577087402, + "rewards/margins": 8.288679122924805, + "rewards/rejected": -11.051379203796387, + "step": 7326 + }, + { + "epoch": 1.14, + "learning_rate": 8.773415633590954e-06, + "logits/chosen": -1.868245244026184, + "logits/rejected": -2.8952407836914062, + "logps/chosen": -473.427734375, + "logps/rejected": -548.8493041992188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3728713989257812, + "rewards/margins": 7.508817672729492, + "rewards/rejected": -10.881689071655273, + "step": 7327 + }, + { + "epoch": 1.14, + "learning_rate": 8.772682193059806e-06, + "logits/chosen": -2.780945301055908, + "logits/rejected": -2.941502809524536, + "logps/chosen": -532.724609375, + "logps/rejected": -727.856201171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.257627964019775, + "rewards/margins": 8.596797943115234, + "rewards/rejected": -12.854425430297852, + "step": 7328 + }, + { + "epoch": 1.14, + "learning_rate": 8.77194875252866e-06, + "logits/chosen": -2.9870481491088867, + "logits/rejected": -1.7964049577713013, + "logps/chosen": -298.82781982421875, + "logps/rejected": -217.8618927001953, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6426048278808594, + "rewards/margins": 9.333474159240723, + "rewards/rejected": -11.976078987121582, + "step": 7329 + }, + { + "epoch": 1.14, + "learning_rate": 8.771215311997511e-06, + "logits/chosen": -2.6561121940612793, + "logits/rejected": -3.0124282836914062, + "logps/chosen": -179.07749938964844, + "logps/rejected": -270.64849853515625, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3638458251953125, + "rewards/margins": 5.528866767883301, + "rewards/rejected": -7.892712593078613, + "step": 7330 + }, + { + "epoch": 1.14, + "learning_rate": 8.770481871466363e-06, + "logits/chosen": -1.9451844692230225, + "logits/rejected": -2.824024200439453, + "logps/chosen": -224.36032104492188, + "logps/rejected": -521.4598388671875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599022388458252, + "rewards/margins": 6.4723615646362305, + "rewards/rejected": -11.07138442993164, + "step": 7331 + }, + { + "epoch": 1.14, + "learning_rate": 8.769748430935215e-06, + "logits/chosen": -2.4598803520202637, + "logits/rejected": -2.9314050674438477, + "logps/chosen": -192.79452514648438, + "logps/rejected": -210.23849487304688, + "loss": 0.6998, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.001049995422363, + "rewards/margins": 2.8495800495147705, + "rewards/rejected": -7.850629806518555, + "step": 7332 + }, + { + "epoch": 1.14, + "learning_rate": 8.769014990404067e-06, + "logits/chosen": -2.9094297885894775, + "logits/rejected": -1.5665044784545898, + "logps/chosen": -255.9748077392578, + "logps/rejected": -139.80064392089844, + "loss": 0.05, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2703075408935547, + "rewards/margins": 3.917304754257202, + "rewards/rejected": -7.187612533569336, + "step": 7333 + }, + { + "epoch": 1.14, + "learning_rate": 8.768281549872919e-06, + "logits/chosen": -2.797240734100342, + "logits/rejected": -2.334845781326294, + "logps/chosen": -376.92181396484375, + "logps/rejected": -415.0426025390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.611349105834961, + "rewards/margins": 9.893415451049805, + "rewards/rejected": -11.504764556884766, + "step": 7334 + }, + { + "epoch": 1.14, + "learning_rate": 8.767548109341772e-06, + "logits/chosen": -3.0026772022247314, + "logits/rejected": -2.5194640159606934, + "logps/chosen": -664.8692016601562, + "logps/rejected": -423.0670166015625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0799643993377686, + "rewards/margins": 6.326565742492676, + "rewards/rejected": -8.406530380249023, + "step": 7335 + }, + { + "epoch": 1.14, + "learning_rate": 8.766814668810624e-06, + "logits/chosen": -2.5596132278442383, + "logits/rejected": -3.001053810119629, + "logps/chosen": -338.0179443359375, + "logps/rejected": -625.7390747070312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1184334754943848, + "rewards/margins": 8.603655815124512, + "rewards/rejected": -11.722088813781738, + "step": 7336 + }, + { + "epoch": 1.14, + "learning_rate": 8.766081228279476e-06, + "logits/chosen": -2.4480602741241455, + "logits/rejected": -3.094940185546875, + "logps/chosen": -66.3871841430664, + "logps/rejected": -256.8943786621094, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8697214126586914, + "rewards/margins": 6.420958518981934, + "rewards/rejected": -9.290679931640625, + "step": 7337 + }, + { + "epoch": 1.14, + "learning_rate": 8.765347787748328e-06, + "logits/chosen": -3.099778652191162, + "logits/rejected": -2.5406367778778076, + "logps/chosen": -119.40554809570312, + "logps/rejected": -126.99803924560547, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.358288288116455, + "rewards/margins": 5.358492851257324, + "rewards/rejected": -8.716781616210938, + "step": 7338 + }, + { + "epoch": 1.14, + "learning_rate": 8.76461434721718e-06, + "logits/chosen": -1.3388724327087402, + "logits/rejected": -2.430910110473633, + "logps/chosen": -347.08709716796875, + "logps/rejected": -250.43240356445312, + "loss": 0.8458, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.204112529754639, + "rewards/margins": 4.448836803436279, + "rewards/rejected": -10.652949333190918, + "step": 7339 + }, + { + "epoch": 1.14, + "learning_rate": 8.763880906686032e-06, + "logits/chosen": -2.9872775077819824, + "logits/rejected": -2.7513930797576904, + "logps/chosen": -582.0235595703125, + "logps/rejected": -357.8179016113281, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.222917079925537, + "rewards/margins": 5.999392509460449, + "rewards/rejected": -9.222309112548828, + "step": 7340 + }, + { + "epoch": 1.14, + "learning_rate": 8.763147466154883e-06, + "logits/chosen": -2.8577473163604736, + "logits/rejected": -3.019411087036133, + "logps/chosen": -64.06431579589844, + "logps/rejected": -188.21267700195312, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.182868719100952, + "rewards/margins": 7.759111404418945, + "rewards/rejected": -10.941980361938477, + "step": 7341 + }, + { + "epoch": 1.14, + "learning_rate": 8.762414025623735e-06, + "logits/chosen": -2.697985887527466, + "logits/rejected": -3.2426061630249023, + "logps/chosen": -120.62773132324219, + "logps/rejected": -165.6944580078125, + "loss": 1.0093, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.125181674957275, + "rewards/margins": 3.211714744567871, + "rewards/rejected": -8.336896896362305, + "step": 7342 + }, + { + "epoch": 1.14, + "learning_rate": 8.761680585092587e-06, + "logits/chosen": -1.8370612859725952, + "logits/rejected": -2.572596788406372, + "logps/chosen": -175.41246032714844, + "logps/rejected": -533.4716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7149527072906494, + "rewards/margins": 10.114326477050781, + "rewards/rejected": -13.829278945922852, + "step": 7343 + }, + { + "epoch": 1.14, + "learning_rate": 8.76094714456144e-06, + "logits/chosen": -2.6845459938049316, + "logits/rejected": -2.953659772872925, + "logps/chosen": -84.61768341064453, + "logps/rejected": -259.32232666015625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7508201599121094, + "rewards/margins": 6.620560169219971, + "rewards/rejected": -9.371379852294922, + "step": 7344 + }, + { + "epoch": 1.14, + "learning_rate": 8.760213704030293e-06, + "logits/chosen": -2.3402955532073975, + "logits/rejected": -2.867494821548462, + "logps/chosen": -101.07139587402344, + "logps/rejected": -325.62371826171875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4408228397369385, + "rewards/margins": 6.518351078033447, + "rewards/rejected": -9.959173202514648, + "step": 7345 + }, + { + "epoch": 1.14, + "learning_rate": 8.759480263499144e-06, + "logits/chosen": -1.9481359720230103, + "logits/rejected": -2.685678243637085, + "logps/chosen": -216.81109619140625, + "logps/rejected": -619.595458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8668248653411865, + "rewards/margins": 10.00222396850586, + "rewards/rejected": -11.869049072265625, + "step": 7346 + }, + { + "epoch": 1.14, + "learning_rate": 8.758746822967996e-06, + "logits/chosen": -1.8917611837387085, + "logits/rejected": -2.803920030593872, + "logps/chosen": -111.58705139160156, + "logps/rejected": -145.457763671875, + "loss": 0.7894, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.235443115234375, + "rewards/margins": 1.02134108543396, + "rewards/rejected": -6.256783962249756, + "step": 7347 + }, + { + "epoch": 1.14, + "learning_rate": 8.758013382436848e-06, + "logits/chosen": -1.3238887786865234, + "logits/rejected": -3.0078954696655273, + "logps/chosen": -272.4103698730469, + "logps/rejected": -566.1527099609375, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9671521186828613, + "rewards/margins": 5.078183174133301, + "rewards/rejected": -9.04533576965332, + "step": 7348 + }, + { + "epoch": 1.14, + "learning_rate": 8.7572799419057e-06, + "logits/chosen": -1.5381755828857422, + "logits/rejected": -2.7420434951782227, + "logps/chosen": -92.51808166503906, + "logps/rejected": -333.7220458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.526674270629883, + "rewards/margins": 8.738717079162598, + "rewards/rejected": -13.265390396118164, + "step": 7349 + }, + { + "epoch": 1.14, + "learning_rate": 8.756546501374552e-06, + "logits/chosen": -2.3072402477264404, + "logits/rejected": -2.8446526527404785, + "logps/chosen": -294.6954345703125, + "logps/rejected": -400.87646484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5848147869110107, + "rewards/margins": 6.939275741577148, + "rewards/rejected": -9.524090766906738, + "step": 7350 + }, + { + "epoch": 1.14, + "learning_rate": 8.755813060843404e-06, + "logits/chosen": -1.9326515197753906, + "logits/rejected": -2.698035955429077, + "logps/chosen": -131.06842041015625, + "logps/rejected": -325.4259033203125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235939025878906, + "rewards/margins": 4.867558479309082, + "rewards/rejected": -11.103497505187988, + "step": 7351 + }, + { + "epoch": 1.14, + "learning_rate": 8.755079620312256e-06, + "logits/chosen": -3.088948965072632, + "logits/rejected": -3.176539659500122, + "logps/chosen": -197.20611572265625, + "logps/rejected": -237.35923767089844, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.965660810470581, + "rewards/margins": 5.777239799499512, + "rewards/rejected": -7.742900371551514, + "step": 7352 + }, + { + "epoch": 1.14, + "learning_rate": 8.75434617978111e-06, + "logits/chosen": -3.0125110149383545, + "logits/rejected": -1.9718743562698364, + "logps/chosen": -260.3411865234375, + "logps/rejected": -236.35829162597656, + "loss": 0.9825, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.223293304443359, + "rewards/margins": 1.050152063369751, + "rewards/rejected": -6.273445129394531, + "step": 7353 + }, + { + "epoch": 1.14, + "learning_rate": 8.753612739249961e-06, + "logits/chosen": -2.8924367427825928, + "logits/rejected": -2.899672269821167, + "logps/chosen": -260.2786865234375, + "logps/rejected": -464.5885925292969, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119772911071777, + "rewards/margins": 6.115664482116699, + "rewards/rejected": -10.235437393188477, + "step": 7354 + }, + { + "epoch": 1.14, + "learning_rate": 8.752879298718813e-06, + "logits/chosen": -2.4904544353485107, + "logits/rejected": -2.645937204360962, + "logps/chosen": -83.51306915283203, + "logps/rejected": -277.3816223144531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.486881256103516, + "rewards/margins": 7.4478302001953125, + "rewards/rejected": -12.934711456298828, + "step": 7355 + }, + { + "epoch": 1.14, + "learning_rate": 8.752145858187665e-06, + "logits/chosen": -2.875237226486206, + "logits/rejected": -3.264676809310913, + "logps/chosen": -53.09762954711914, + "logps/rejected": -167.15097045898438, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.318410396575928, + "rewards/margins": 3.563872814178467, + "rewards/rejected": -7.8822832107543945, + "step": 7356 + }, + { + "epoch": 1.14, + "learning_rate": 8.751412417656517e-06, + "logits/chosen": -2.858018636703491, + "logits/rejected": -2.9597363471984863, + "logps/chosen": -120.17999267578125, + "logps/rejected": -280.924560546875, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.168308734893799, + "rewards/margins": 6.276785850524902, + "rewards/rejected": -11.44509506225586, + "step": 7357 + }, + { + "epoch": 1.14, + "learning_rate": 8.750678977125369e-06, + "logits/chosen": -2.580901622772217, + "logits/rejected": -3.176862955093384, + "logps/chosen": -124.93608093261719, + "logps/rejected": -267.1003112792969, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8734493255615234, + "rewards/margins": 4.987394332885742, + "rewards/rejected": -7.860843658447266, + "step": 7358 + }, + { + "epoch": 1.14, + "learning_rate": 8.74994553659422e-06, + "logits/chosen": -3.0360093116760254, + "logits/rejected": -2.76826548576355, + "logps/chosen": -180.85191345214844, + "logps/rejected": -250.32737731933594, + "loss": 0.4783, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.702048301696777, + "rewards/margins": 3.3041086196899414, + "rewards/rejected": -9.006156921386719, + "step": 7359 + }, + { + "epoch": 1.14, + "learning_rate": 8.749212096063072e-06, + "logits/chosen": -1.536258339881897, + "logits/rejected": -2.769448757171631, + "logps/chosen": -87.80623626708984, + "logps/rejected": -256.3673095703125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.468204498291016, + "rewards/margins": 6.179110527038574, + "rewards/rejected": -10.647314071655273, + "step": 7360 + }, + { + "epoch": 1.14, + "learning_rate": 8.748478655531926e-06, + "logits/chosen": -2.837435722351074, + "logits/rejected": -3.0489912033081055, + "logps/chosen": -759.3095703125, + "logps/rejected": -654.274169921875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.228902816772461, + "rewards/margins": 5.812739372253418, + "rewards/rejected": -10.041642189025879, + "step": 7361 + }, + { + "epoch": 1.14, + "learning_rate": 8.747745215000778e-06, + "logits/chosen": -2.0175070762634277, + "logits/rejected": -2.92439341545105, + "logps/chosen": -477.96063232421875, + "logps/rejected": -671.9918212890625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08380889892578125, + "rewards/margins": 9.022589683532715, + "rewards/rejected": -8.93878173828125, + "step": 7362 + }, + { + "epoch": 1.15, + "learning_rate": 8.747011774469631e-06, + "logits/chosen": -3.0662994384765625, + "logits/rejected": -2.3921029567718506, + "logps/chosen": -158.25912475585938, + "logps/rejected": -118.37339782714844, + "loss": 1.196, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.873320579528809, + "rewards/margins": 3.070563554763794, + "rewards/rejected": -7.943884372711182, + "step": 7363 + }, + { + "epoch": 1.15, + "learning_rate": 8.746278333938483e-06, + "logits/chosen": -1.3422013521194458, + "logits/rejected": -2.3890597820281982, + "logps/chosen": -188.058349609375, + "logps/rejected": -465.64068603515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0525717735290527, + "rewards/margins": 9.4404296875, + "rewards/rejected": -12.493000984191895, + "step": 7364 + }, + { + "epoch": 1.15, + "learning_rate": 8.745544893407335e-06, + "logits/chosen": -2.6683554649353027, + "logits/rejected": -2.674513816833496, + "logps/chosen": -363.2949523925781, + "logps/rejected": -517.6583862304688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393383979797363, + "rewards/margins": 8.61074161529541, + "rewards/rejected": -14.004125595092773, + "step": 7365 + }, + { + "epoch": 1.15, + "learning_rate": 8.744811452876187e-06, + "logits/chosen": -2.8011536598205566, + "logits/rejected": -1.6405161619186401, + "logps/chosen": -153.57080078125, + "logps/rejected": -237.42056274414062, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.731321334838867, + "rewards/margins": 6.0433454513549805, + "rewards/rejected": -10.774666786193848, + "step": 7366 + }, + { + "epoch": 1.15, + "learning_rate": 8.744078012345039e-06, + "logits/chosen": -3.0072708129882812, + "logits/rejected": -2.4474990367889404, + "logps/chosen": -169.13128662109375, + "logps/rejected": -210.6829833984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6554862260818481, + "rewards/margins": 9.441452980041504, + "rewards/rejected": -10.096939086914062, + "step": 7367 + }, + { + "epoch": 1.15, + "learning_rate": 8.743344571813891e-06, + "logits/chosen": -2.7353363037109375, + "logits/rejected": -3.010317802429199, + "logps/chosen": -319.51470947265625, + "logps/rejected": -519.8687744140625, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7551941871643066, + "rewards/margins": 5.326162815093994, + "rewards/rejected": -8.0813570022583, + "step": 7368 + }, + { + "epoch": 1.15, + "learning_rate": 8.742611131282743e-06, + "logits/chosen": -2.9104485511779785, + "logits/rejected": -2.6875, + "logps/chosen": -119.42229461669922, + "logps/rejected": -164.54351806640625, + "loss": 0.3619, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.870028495788574, + "rewards/margins": 3.405799150466919, + "rewards/rejected": -9.275827407836914, + "step": 7369 + }, + { + "epoch": 1.15, + "learning_rate": 8.741877690751596e-06, + "logits/chosen": -2.2300846576690674, + "logits/rejected": -2.354440212249756, + "logps/chosen": -265.9295959472656, + "logps/rejected": -387.5919494628906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1846070289611816, + "rewards/margins": 9.813992500305176, + "rewards/rejected": -12.998600006103516, + "step": 7370 + }, + { + "epoch": 1.15, + "learning_rate": 8.741144250220448e-06, + "logits/chosen": -2.6961710453033447, + "logits/rejected": -1.3189727067947388, + "logps/chosen": -198.5352325439453, + "logps/rejected": -176.69725036621094, + "loss": 0.5027, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4952774047851562, + "rewards/margins": 4.367893218994141, + "rewards/rejected": -7.863170623779297, + "step": 7371 + }, + { + "epoch": 1.15, + "learning_rate": 8.7404108096893e-06, + "logits/chosen": -2.0925681591033936, + "logits/rejected": -2.9999520778656006, + "logps/chosen": -175.598876953125, + "logps/rejected": -388.73529052734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.438352108001709, + "rewards/margins": 8.528860092163086, + "rewards/rejected": -12.967212677001953, + "step": 7372 + }, + { + "epoch": 1.15, + "learning_rate": 8.739677369158152e-06, + "logits/chosen": -2.4514126777648926, + "logits/rejected": -3.033231496810913, + "logps/chosen": -330.7462158203125, + "logps/rejected": -307.6880187988281, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6733145713806152, + "rewards/margins": 4.863914489746094, + "rewards/rejected": -8.537229537963867, + "step": 7373 + }, + { + "epoch": 1.15, + "learning_rate": 8.738943928627004e-06, + "logits/chosen": -1.268176794052124, + "logits/rejected": -3.0276708602905273, + "logps/chosen": -161.39126586914062, + "logps/rejected": -503.69097900390625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7619519233703613, + "rewards/margins": 7.353137969970703, + "rewards/rejected": -11.115089416503906, + "step": 7374 + }, + { + "epoch": 1.15, + "learning_rate": 8.738210488095856e-06, + "logits/chosen": -2.5106475353240967, + "logits/rejected": -2.6796998977661133, + "logps/chosen": -155.05410766601562, + "logps/rejected": -203.33599853515625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.659994602203369, + "rewards/margins": 5.018359661102295, + "rewards/rejected": -8.678354263305664, + "step": 7375 + }, + { + "epoch": 1.15, + "learning_rate": 8.737477047564708e-06, + "logits/chosen": -2.094792127609253, + "logits/rejected": -2.88356614112854, + "logps/chosen": -69.54314422607422, + "logps/rejected": -326.66571044921875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0366740226745605, + "rewards/margins": 8.060710906982422, + "rewards/rejected": -11.097384452819824, + "step": 7376 + }, + { + "epoch": 1.15, + "learning_rate": 8.73674360703356e-06, + "logits/chosen": -2.4759345054626465, + "logits/rejected": -3.0043158531188965, + "logps/chosen": -180.1175079345703, + "logps/rejected": -345.1551513671875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.705923557281494, + "rewards/margins": 6.84829044342041, + "rewards/rejected": -10.554214477539062, + "step": 7377 + }, + { + "epoch": 1.15, + "learning_rate": 8.736010166502411e-06, + "logits/chosen": -1.387945532798767, + "logits/rejected": -2.5257139205932617, + "logps/chosen": -161.6748046875, + "logps/rejected": -390.7149658203125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.717197895050049, + "rewards/margins": 6.7025885581970215, + "rewards/rejected": -10.41978645324707, + "step": 7378 + }, + { + "epoch": 1.15, + "learning_rate": 8.735276725971265e-06, + "logits/chosen": -2.960965633392334, + "logits/rejected": -1.0243947505950928, + "logps/chosen": -303.959716796875, + "logps/rejected": -98.5177230834961, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.212164402008057, + "rewards/margins": 1.8437260389328003, + "rewards/rejected": -7.0558905601501465, + "step": 7379 + }, + { + "epoch": 1.15, + "learning_rate": 8.734543285440117e-06, + "logits/chosen": -2.8024063110351562, + "logits/rejected": -1.852076768875122, + "logps/chosen": -466.294189453125, + "logps/rejected": -406.5473937988281, + "loss": 0.2103, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627893447875977, + "rewards/margins": 3.2270445823669434, + "rewards/rejected": -7.854937553405762, + "step": 7380 + }, + { + "epoch": 1.15, + "learning_rate": 8.733809844908969e-06, + "logits/chosen": -2.3400402069091797, + "logits/rejected": -2.921335220336914, + "logps/chosen": -111.67123413085938, + "logps/rejected": -254.4349365234375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.358451843261719, + "rewards/margins": 6.268575668334961, + "rewards/rejected": -11.62702751159668, + "step": 7381 + }, + { + "epoch": 1.15, + "learning_rate": 8.73307640437782e-06, + "logits/chosen": -2.94282865524292, + "logits/rejected": -2.7369511127471924, + "logps/chosen": -561.251953125, + "logps/rejected": -568.9868774414062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0160839557647705, + "rewards/margins": 9.547972679138184, + "rewards/rejected": -11.564056396484375, + "step": 7382 + }, + { + "epoch": 1.15, + "learning_rate": 8.732342963846672e-06, + "logits/chosen": -2.343156099319458, + "logits/rejected": -2.774007797241211, + "logps/chosen": -317.7301025390625, + "logps/rejected": -526.6530151367188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.643185615539551, + "rewards/margins": 11.71034049987793, + "rewards/rejected": -15.353527069091797, + "step": 7383 + }, + { + "epoch": 1.15, + "learning_rate": 8.731609523315524e-06, + "logits/chosen": -2.765862226486206, + "logits/rejected": -2.4180877208709717, + "logps/chosen": -225.30178833007812, + "logps/rejected": -239.98788452148438, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.446408271789551, + "rewards/margins": 6.708539009094238, + "rewards/rejected": -10.154947280883789, + "step": 7384 + }, + { + "epoch": 1.15, + "learning_rate": 8.730876082784376e-06, + "logits/chosen": -1.9742013216018677, + "logits/rejected": -2.954780340194702, + "logps/chosen": -123.76873779296875, + "logps/rejected": -306.15643310546875, + "loss": 0.9445, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.649975776672363, + "rewards/margins": 2.3119993209838867, + "rewards/rejected": -8.96197509765625, + "step": 7385 + }, + { + "epoch": 1.15, + "learning_rate": 8.730142642253228e-06, + "logits/chosen": -2.945201873779297, + "logits/rejected": -2.1548309326171875, + "logps/chosen": -275.9306945800781, + "logps/rejected": -211.03192138671875, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.727769374847412, + "rewards/margins": 4.4888458251953125, + "rewards/rejected": -9.216615676879883, + "step": 7386 + }, + { + "epoch": 1.15, + "learning_rate": 8.72940920172208e-06, + "logits/chosen": -1.7898024320602417, + "logits/rejected": -2.7838737964630127, + "logps/chosen": -101.93128967285156, + "logps/rejected": -269.3031921386719, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.259511947631836, + "rewards/margins": 7.273968696594238, + "rewards/rejected": -11.533479690551758, + "step": 7387 + }, + { + "epoch": 1.15, + "learning_rate": 8.728675761190934e-06, + "logits/chosen": -2.8867290019989014, + "logits/rejected": -2.959782838821411, + "logps/chosen": -309.30078125, + "logps/rejected": -365.2146911621094, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2569122314453125, + "rewards/margins": 5.20344352722168, + "rewards/rejected": -9.460355758666992, + "step": 7388 + }, + { + "epoch": 1.15, + "learning_rate": 8.727942320659785e-06, + "logits/chosen": -2.126910924911499, + "logits/rejected": -3.053112030029297, + "logps/chosen": -160.7235565185547, + "logps/rejected": -572.3922729492188, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.793557643890381, + "rewards/margins": 7.120129585266113, + "rewards/rejected": -10.913686752319336, + "step": 7389 + }, + { + "epoch": 1.15, + "learning_rate": 8.727208880128637e-06, + "logits/chosen": -2.5414133071899414, + "logits/rejected": -2.523603677749634, + "logps/chosen": -199.0782012939453, + "logps/rejected": -229.63015747070312, + "loss": 1.9153, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.109285354614258, + "rewards/margins": 1.5293176174163818, + "rewards/rejected": -6.6386027336120605, + "step": 7390 + }, + { + "epoch": 1.15, + "learning_rate": 8.72647543959749e-06, + "logits/chosen": -1.6289070844650269, + "logits/rejected": -2.51137375831604, + "logps/chosen": -109.70834350585938, + "logps/rejected": -453.1275939941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.029135704040527, + "rewards/margins": 13.53041934967041, + "rewards/rejected": -18.559555053710938, + "step": 7391 + }, + { + "epoch": 1.15, + "learning_rate": 8.725741999066341e-06, + "logits/chosen": -3.227206230163574, + "logits/rejected": -3.248171329498291, + "logps/chosen": -182.92115783691406, + "logps/rejected": -316.4305114746094, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.601414203643799, + "rewards/margins": 5.713279724121094, + "rewards/rejected": -9.314693450927734, + "step": 7392 + }, + { + "epoch": 1.15, + "learning_rate": 8.725008558535193e-06, + "logits/chosen": -2.301760673522949, + "logits/rejected": -2.7002618312835693, + "logps/chosen": -166.6025848388672, + "logps/rejected": -338.6888122558594, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9813133478164673, + "rewards/margins": 6.11889123916626, + "rewards/rejected": -8.100204467773438, + "step": 7393 + }, + { + "epoch": 1.15, + "learning_rate": 8.724275118004045e-06, + "logits/chosen": -2.736053705215454, + "logits/rejected": -2.838322639465332, + "logps/chosen": -514.706787109375, + "logps/rejected": -418.895263671875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354588508605957, + "rewards/margins": 5.9102020263671875, + "rewards/rejected": -10.264790534973145, + "step": 7394 + }, + { + "epoch": 1.15, + "learning_rate": 8.723541677472898e-06, + "logits/chosen": -1.4682704210281372, + "logits/rejected": -2.770670175552368, + "logps/chosen": -79.54688262939453, + "logps/rejected": -403.27252197265625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7082934379577637, + "rewards/margins": 10.855113983154297, + "rewards/rejected": -14.563407897949219, + "step": 7395 + }, + { + "epoch": 1.15, + "learning_rate": 8.72280823694175e-06, + "logits/chosen": -2.2342536449432373, + "logits/rejected": -3.167356014251709, + "logps/chosen": -70.89451599121094, + "logps/rejected": -286.4715270996094, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.466632604598999, + "rewards/margins": 5.468974590301514, + "rewards/rejected": -8.93560791015625, + "step": 7396 + }, + { + "epoch": 1.15, + "learning_rate": 8.722074796410604e-06, + "logits/chosen": -2.896836757659912, + "logits/rejected": -2.4269614219665527, + "logps/chosen": -167.7589569091797, + "logps/rejected": -228.11085510253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.051754951477051, + "rewards/margins": 10.79580020904541, + "rewards/rejected": -12.847555160522461, + "step": 7397 + }, + { + "epoch": 1.15, + "learning_rate": 8.721341355879456e-06, + "logits/chosen": -2.9783763885498047, + "logits/rejected": -2.9737720489501953, + "logps/chosen": -384.9752197265625, + "logps/rejected": -433.76385498046875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809573173522949, + "rewards/margins": 6.5109148025512695, + "rewards/rejected": -10.320487976074219, + "step": 7398 + }, + { + "epoch": 1.15, + "learning_rate": 8.720607915348308e-06, + "logits/chosen": -2.0448157787323, + "logits/rejected": -2.7591562271118164, + "logps/chosen": -238.72854614257812, + "logps/rejected": -284.6959533691406, + "loss": 1.3677, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.066418647766113, + "rewards/margins": 2.3818695545196533, + "rewards/rejected": -7.4482879638671875, + "step": 7399 + }, + { + "epoch": 1.15, + "learning_rate": 8.71987447481716e-06, + "logits/chosen": -2.6858999729156494, + "logits/rejected": -2.7121505737304688, + "logps/chosen": -368.1336364746094, + "logps/rejected": -688.57275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.300102710723877, + "rewards/margins": 13.470721244812012, + "rewards/rejected": -17.770824432373047, + "step": 7400 + }, + { + "epoch": 1.15, + "learning_rate": 8.719141034286011e-06, + "logits/chosen": -0.6056111454963684, + "logits/rejected": -2.229975700378418, + "logps/chosen": -95.35039520263672, + "logps/rejected": -530.373291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9557108879089355, + "rewards/margins": 10.53434944152832, + "rewards/rejected": -13.490060806274414, + "step": 7401 + }, + { + "epoch": 1.15, + "learning_rate": 8.718407593754863e-06, + "logits/chosen": -1.4377113580703735, + "logits/rejected": -2.9149184226989746, + "logps/chosen": -130.7464141845703, + "logps/rejected": -274.8253173828125, + "loss": 1.4761, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.398503303527832, + "rewards/margins": 1.0074172019958496, + "rewards/rejected": -6.405920505523682, + "step": 7402 + }, + { + "epoch": 1.15, + "learning_rate": 8.717674153223715e-06, + "logits/chosen": -2.7557966709136963, + "logits/rejected": -2.9407858848571777, + "logps/chosen": -149.39358520507812, + "logps/rejected": -286.1867370605469, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.987306594848633, + "rewards/margins": 7.165689945220947, + "rewards/rejected": -11.152996063232422, + "step": 7403 + }, + { + "epoch": 1.15, + "learning_rate": 8.716940712692567e-06, + "logits/chosen": -2.277574062347412, + "logits/rejected": -2.860776901245117, + "logps/chosen": -208.8853302001953, + "logps/rejected": -394.3707275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301455020904541, + "rewards/margins": 10.031086921691895, + "rewards/rejected": -14.332542419433594, + "step": 7404 + }, + { + "epoch": 1.15, + "learning_rate": 8.716207272161419e-06, + "logits/chosen": -2.762716054916382, + "logits/rejected": -1.4430638551712036, + "logps/chosen": -299.9490661621094, + "logps/rejected": -287.4703063964844, + "loss": 0.7646, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.5884013175964355, + "rewards/margins": 2.735802412033081, + "rewards/rejected": -8.324203491210938, + "step": 7405 + }, + { + "epoch": 1.15, + "learning_rate": 8.715473831630272e-06, + "logits/chosen": -2.974677801132202, + "logits/rejected": -3.11938738822937, + "logps/chosen": -357.1786193847656, + "logps/rejected": -237.95574951171875, + "loss": 1.5048, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.661024570465088, + "rewards/margins": 1.528716802597046, + "rewards/rejected": -6.189741611480713, + "step": 7406 + }, + { + "epoch": 1.15, + "learning_rate": 8.714740391099124e-06, + "logits/chosen": -3.137600898742676, + "logits/rejected": -2.313767671585083, + "logps/chosen": -337.4771728515625, + "logps/rejected": -230.9915008544922, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3070008754730225, + "rewards/margins": 5.87308406829834, + "rewards/rejected": -8.180084228515625, + "step": 7407 + }, + { + "epoch": 1.15, + "learning_rate": 8.714006950567976e-06, + "logits/chosen": -3.0023694038391113, + "logits/rejected": -2.0616323947906494, + "logps/chosen": -263.70941162109375, + "logps/rejected": -201.58688354492188, + "loss": 0.1987, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264337539672852, + "rewards/margins": 2.7544972896575928, + "rewards/rejected": -8.018835067749023, + "step": 7408 + }, + { + "epoch": 1.15, + "learning_rate": 8.713273510036828e-06, + "logits/chosen": -1.3847349882125854, + "logits/rejected": -2.795900821685791, + "logps/chosen": -195.89398193359375, + "logps/rejected": -361.0355224609375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.299917697906494, + "rewards/margins": 7.550292015075684, + "rewards/rejected": -9.850210189819336, + "step": 7409 + }, + { + "epoch": 1.15, + "learning_rate": 8.71254006950568e-06, + "logits/chosen": -0.9357559680938721, + "logits/rejected": -2.9298415184020996, + "logps/chosen": -146.4634246826172, + "logps/rejected": -638.4910888671875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6792306900024414, + "rewards/margins": 6.77216911315918, + "rewards/rejected": -10.451400756835938, + "step": 7410 + }, + { + "epoch": 1.15, + "learning_rate": 8.711806628974532e-06, + "logits/chosen": -2.7948923110961914, + "logits/rejected": -2.6619913578033447, + "logps/chosen": -500.3782043457031, + "logps/rejected": -489.93231201171875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.673741340637207, + "rewards/margins": 6.961910247802734, + "rewards/rejected": -10.635650634765625, + "step": 7411 + }, + { + "epoch": 1.15, + "learning_rate": 8.711073188443384e-06, + "logits/chosen": -2.837078809738159, + "logits/rejected": -2.4290664196014404, + "logps/chosen": -227.2849884033203, + "logps/rejected": -214.07757568359375, + "loss": 2.2853, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.921304702758789, + "rewards/margins": -1.6223127841949463, + "rewards/rejected": -7.298992156982422, + "step": 7412 + }, + { + "epoch": 1.15, + "learning_rate": 8.710339747912236e-06, + "logits/chosen": -1.572745442390442, + "logits/rejected": -2.864281177520752, + "logps/chosen": -278.91461181640625, + "logps/rejected": -386.982421875, + "loss": 2.1559, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.8206682205200195, + "rewards/margins": 2.9593374729156494, + "rewards/rejected": -9.78000545501709, + "step": 7413 + }, + { + "epoch": 1.15, + "learning_rate": 8.709606307381087e-06, + "logits/chosen": -1.8385318517684937, + "logits/rejected": -2.820831298828125, + "logps/chosen": -124.0674057006836, + "logps/rejected": -240.22293090820312, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6360931396484375, + "rewards/margins": 5.166600227355957, + "rewards/rejected": -9.802693367004395, + "step": 7414 + }, + { + "epoch": 1.15, + "learning_rate": 8.708872866849941e-06, + "logits/chosen": -1.9243099689483643, + "logits/rejected": -2.761507511138916, + "logps/chosen": -135.39605712890625, + "logps/rejected": -355.57586669921875, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181325912475586, + "rewards/margins": 7.166818141937256, + "rewards/rejected": -11.348143577575684, + "step": 7415 + }, + { + "epoch": 1.15, + "learning_rate": 8.708139426318793e-06, + "logits/chosen": -2.705671787261963, + "logits/rejected": -3.1079232692718506, + "logps/chosen": -141.72885131835938, + "logps/rejected": -216.79534912109375, + "loss": 2.2716, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.101228713989258, + "rewards/margins": 0.39717531204223633, + "rewards/rejected": -5.498404026031494, + "step": 7416 + }, + { + "epoch": 1.15, + "learning_rate": 8.707405985787645e-06, + "logits/chosen": -1.6715881824493408, + "logits/rejected": -2.9613826274871826, + "logps/chosen": -430.718017578125, + "logps/rejected": -607.8517456054688, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6739501953125, + "rewards/margins": 7.088144302368164, + "rewards/rejected": -11.762094497680664, + "step": 7417 + }, + { + "epoch": 1.15, + "learning_rate": 8.706672545256497e-06, + "logits/chosen": -2.924180269241333, + "logits/rejected": -3.1232588291168213, + "logps/chosen": -153.26568603515625, + "logps/rejected": -183.29629516601562, + "loss": 1.5492, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.427240371704102, + "rewards/margins": 1.801419734954834, + "rewards/rejected": -9.228659629821777, + "step": 7418 + }, + { + "epoch": 1.15, + "learning_rate": 8.705939104725349e-06, + "logits/chosen": -3.037353515625, + "logits/rejected": -2.8390185832977295, + "logps/chosen": -151.26914978027344, + "logps/rejected": -226.07058715820312, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.47288703918457, + "rewards/margins": 5.486878871917725, + "rewards/rejected": -9.959765434265137, + "step": 7419 + }, + { + "epoch": 1.15, + "learning_rate": 8.7052056641942e-06, + "logits/chosen": -2.1572492122650146, + "logits/rejected": -2.8641653060913086, + "logps/chosen": -186.99685668945312, + "logps/rejected": -332.9185791015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3354315757751465, + "rewards/margins": 6.76854133605957, + "rewards/rejected": -10.103973388671875, + "step": 7420 + }, + { + "epoch": 1.15, + "learning_rate": 8.704472223663052e-06, + "logits/chosen": -2.198669910430908, + "logits/rejected": -2.9669716358184814, + "logps/chosen": -247.4048309326172, + "logps/rejected": -374.1720275878906, + "loss": 0.4222, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.176072120666504, + "rewards/margins": 4.609205722808838, + "rewards/rejected": -10.785277366638184, + "step": 7421 + }, + { + "epoch": 1.15, + "learning_rate": 8.703738783131904e-06, + "logits/chosen": -1.666284441947937, + "logits/rejected": -2.9742696285247803, + "logps/chosen": -233.63888549804688, + "logps/rejected": -558.1893310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.656407356262207, + "rewards/margins": 12.158045768737793, + "rewards/rejected": -15.814453125, + "step": 7422 + }, + { + "epoch": 1.15, + "learning_rate": 8.703005342600756e-06, + "logits/chosen": -2.7908542156219482, + "logits/rejected": -2.9890129566192627, + "logps/chosen": -273.0413818359375, + "logps/rejected": -342.2874450683594, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.305185079574585, + "rewards/margins": 6.7063493728637695, + "rewards/rejected": -10.011533737182617, + "step": 7423 + }, + { + "epoch": 1.15, + "learning_rate": 8.70227190206961e-06, + "logits/chosen": -3.1414263248443604, + "logits/rejected": -3.077115297317505, + "logps/chosen": -205.67074584960938, + "logps/rejected": -210.2327880859375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5168380737304688, + "rewards/margins": 6.4062395095825195, + "rewards/rejected": -7.923077583312988, + "step": 7424 + }, + { + "epoch": 1.15, + "learning_rate": 8.701538461538461e-06, + "logits/chosen": -2.7058136463165283, + "logits/rejected": -3.004218101501465, + "logps/chosen": -68.6490249633789, + "logps/rejected": -251.1140899658203, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.529380798339844, + "rewards/margins": 6.523263931274414, + "rewards/rejected": -11.052644729614258, + "step": 7425 + }, + { + "epoch": 1.15, + "learning_rate": 8.700805021007313e-06, + "logits/chosen": -3.052361488342285, + "logits/rejected": -2.9505064487457275, + "logps/chosen": -128.39224243164062, + "logps/rejected": -285.62841796875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.276678562164307, + "rewards/margins": 7.537189483642578, + "rewards/rejected": -11.813867568969727, + "step": 7426 + }, + { + "epoch": 1.16, + "learning_rate": 8.700071580476165e-06, + "logits/chosen": -2.809354782104492, + "logits/rejected": -2.7826569080352783, + "logps/chosen": -157.32794189453125, + "logps/rejected": -196.70388793945312, + "loss": 0.4203, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.391412734985352, + "rewards/margins": 3.5306599140167236, + "rewards/rejected": -7.922072410583496, + "step": 7427 + }, + { + "epoch": 1.16, + "learning_rate": 8.699338139945017e-06, + "logits/chosen": -1.025495171546936, + "logits/rejected": -2.59125018119812, + "logps/chosen": -100.9554443359375, + "logps/rejected": -596.7052001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.659013271331787, + "rewards/margins": 11.460762023925781, + "rewards/rejected": -14.119775772094727, + "step": 7428 + }, + { + "epoch": 1.16, + "learning_rate": 8.69860469941387e-06, + "logits/chosen": -3.117980718612671, + "logits/rejected": -3.15824031829834, + "logps/chosen": -252.8946990966797, + "logps/rejected": -277.9496765136719, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.680266857147217, + "rewards/margins": 6.322050094604492, + "rewards/rejected": -10.002317428588867, + "step": 7429 + }, + { + "epoch": 1.16, + "learning_rate": 8.697871258882723e-06, + "logits/chosen": -1.135848879814148, + "logits/rejected": -2.83644962310791, + "logps/chosen": -227.828857421875, + "logps/rejected": -301.25042724609375, + "loss": 1.5006, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.33079195022583, + "rewards/margins": 0.31276679039001465, + "rewards/rejected": -5.643558502197266, + "step": 7430 + }, + { + "epoch": 1.16, + "learning_rate": 8.697137818351574e-06, + "logits/chosen": -1.021429419517517, + "logits/rejected": -2.6454951763153076, + "logps/chosen": -87.84822082519531, + "logps/rejected": -320.65399169921875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.526716232299805, + "rewards/margins": 4.879956245422363, + "rewards/rejected": -10.406672477722168, + "step": 7431 + }, + { + "epoch": 1.16, + "learning_rate": 8.696404377820426e-06, + "logits/chosen": -2.864574909210205, + "logits/rejected": -1.290681004524231, + "logps/chosen": -303.9737548828125, + "logps/rejected": -96.6882095336914, + "loss": 1.9872, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.877638339996338, + "rewards/margins": -1.8357915878295898, + "rewards/rejected": -5.041846752166748, + "step": 7432 + }, + { + "epoch": 1.16, + "learning_rate": 8.69567093728928e-06, + "logits/chosen": -2.0994374752044678, + "logits/rejected": -3.183612108230591, + "logps/chosen": -113.89027404785156, + "logps/rejected": -367.35748291015625, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.094999313354492, + "rewards/margins": 7.053797721862793, + "rewards/rejected": -11.148797035217285, + "step": 7433 + }, + { + "epoch": 1.16, + "learning_rate": 8.694937496758132e-06, + "logits/chosen": -2.465726613998413, + "logits/rejected": -2.761197566986084, + "logps/chosen": -133.83616638183594, + "logps/rejected": -270.42498779296875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5281593799591064, + "rewards/margins": 7.159724235534668, + "rewards/rejected": -9.687883377075195, + "step": 7434 + }, + { + "epoch": 1.16, + "learning_rate": 8.694204056226984e-06, + "logits/chosen": -3.0031514167785645, + "logits/rejected": -2.2776072025299072, + "logps/chosen": -224.7953338623047, + "logps/rejected": -203.8005828857422, + "loss": 0.4379, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.733695983886719, + "rewards/margins": 4.249974250793457, + "rewards/rejected": -8.983670234680176, + "step": 7435 + }, + { + "epoch": 1.16, + "learning_rate": 8.693470615695836e-06, + "logits/chosen": -2.5076966285705566, + "logits/rejected": -2.826374053955078, + "logps/chosen": -134.01492309570312, + "logps/rejected": -170.78448486328125, + "loss": 0.1571, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0695600509643555, + "rewards/margins": 4.732869625091553, + "rewards/rejected": -9.80242919921875, + "step": 7436 + }, + { + "epoch": 1.16, + "learning_rate": 8.692737175164687e-06, + "logits/chosen": -2.572533130645752, + "logits/rejected": -3.1173322200775146, + "logps/chosen": -418.2086181640625, + "logps/rejected": -566.2310180664062, + "loss": 0.0422, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9827041625976562, + "rewards/margins": 3.8460311889648438, + "rewards/rejected": -7.8287353515625, + "step": 7437 + }, + { + "epoch": 1.16, + "learning_rate": 8.69200373463354e-06, + "logits/chosen": -3.0489468574523926, + "logits/rejected": -2.4618520736694336, + "logps/chosen": -175.94570922851562, + "logps/rejected": -215.61654663085938, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6286568641662598, + "rewards/margins": 5.39445686340332, + "rewards/rejected": -9.023114204406738, + "step": 7438 + }, + { + "epoch": 1.16, + "learning_rate": 8.691270294102391e-06, + "logits/chosen": -2.654921054840088, + "logits/rejected": -2.969958782196045, + "logps/chosen": -102.86904907226562, + "logps/rejected": -347.7743225097656, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505337715148926, + "rewards/margins": 5.976593017578125, + "rewards/rejected": -11.48193073272705, + "step": 7439 + }, + { + "epoch": 1.16, + "learning_rate": 8.690536853571243e-06, + "logits/chosen": -3.003326654434204, + "logits/rejected": -3.0413174629211426, + "logps/chosen": -198.591552734375, + "logps/rejected": -233.04562377929688, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0832061767578125, + "rewards/margins": 5.951803684234619, + "rewards/rejected": -10.035009384155273, + "step": 7440 + }, + { + "epoch": 1.16, + "learning_rate": 8.689803413040095e-06, + "logits/chosen": -3.071017026901245, + "logits/rejected": -3.0816006660461426, + "logps/chosen": -167.01654052734375, + "logps/rejected": -232.03570556640625, + "loss": 0.3534, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.723665714263916, + "rewards/margins": 5.187410831451416, + "rewards/rejected": -9.911076545715332, + "step": 7441 + }, + { + "epoch": 1.16, + "learning_rate": 8.689069972508949e-06, + "logits/chosen": -3.0943572521209717, + "logits/rejected": -2.892631769180298, + "logps/chosen": -208.89776611328125, + "logps/rejected": -241.92933654785156, + "loss": 0.2485, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4102966785430908, + "rewards/margins": 4.4969482421875, + "rewards/rejected": -5.907244682312012, + "step": 7442 + }, + { + "epoch": 1.16, + "learning_rate": 8.6883365319778e-06, + "logits/chosen": -2.261715888977051, + "logits/rejected": -3.0277884006500244, + "logps/chosen": -239.10536193847656, + "logps/rejected": -258.9984436035156, + "loss": 0.3829, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.180391788482666, + "rewards/margins": 4.879250526428223, + "rewards/rejected": -8.059642791748047, + "step": 7443 + }, + { + "epoch": 1.16, + "learning_rate": 8.687603091446652e-06, + "logits/chosen": -1.2014939785003662, + "logits/rejected": -2.8563296794891357, + "logps/chosen": -95.21180725097656, + "logps/rejected": -241.14974975585938, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1895670890808105, + "rewards/margins": 4.72261905670166, + "rewards/rejected": -8.912185668945312, + "step": 7444 + }, + { + "epoch": 1.16, + "learning_rate": 8.686869650915504e-06, + "logits/chosen": -2.6410515308380127, + "logits/rejected": -1.9187370538711548, + "logps/chosen": -139.5452423095703, + "logps/rejected": -137.78619384765625, + "loss": 0.9637, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.696157932281494, + "rewards/margins": 1.2102386951446533, + "rewards/rejected": -6.906396865844727, + "step": 7445 + }, + { + "epoch": 1.16, + "learning_rate": 8.686136210384356e-06, + "logits/chosen": -1.7870880365371704, + "logits/rejected": -2.814661979675293, + "logps/chosen": -96.00916290283203, + "logps/rejected": -237.87709045410156, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9175310134887695, + "rewards/margins": 4.331010341644287, + "rewards/rejected": -8.248541831970215, + "step": 7446 + }, + { + "epoch": 1.16, + "learning_rate": 8.685402769853208e-06, + "logits/chosen": -1.872934341430664, + "logits/rejected": -3.1512176990509033, + "logps/chosen": -95.13388061523438, + "logps/rejected": -400.48968505859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.952272415161133, + "rewards/margins": 6.452136993408203, + "rewards/rejected": -12.404409408569336, + "step": 7447 + }, + { + "epoch": 1.16, + "learning_rate": 8.68466932932206e-06, + "logits/chosen": -2.5651962757110596, + "logits/rejected": -2.7883198261260986, + "logps/chosen": -206.99917602539062, + "logps/rejected": -226.32199096679688, + "loss": 0.0725, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.014484405517578, + "rewards/margins": 5.893181800842285, + "rewards/rejected": -9.907665252685547, + "step": 7448 + }, + { + "epoch": 1.16, + "learning_rate": 8.683935888790912e-06, + "logits/chosen": -3.2205374240875244, + "logits/rejected": -2.7458608150482178, + "logps/chosen": -622.3322143554688, + "logps/rejected": -573.5126342773438, + "loss": 0.0732, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6856170892715454, + "rewards/margins": 9.918329238891602, + "rewards/rejected": -11.603946685791016, + "step": 7449 + }, + { + "epoch": 1.16, + "learning_rate": 8.683202448259764e-06, + "logits/chosen": -2.2134454250335693, + "logits/rejected": -3.1676666736602783, + "logps/chosen": -91.42259216308594, + "logps/rejected": -557.6824951171875, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2406091690063477, + "rewards/margins": 7.901764869689941, + "rewards/rejected": -11.142374038696289, + "step": 7450 + }, + { + "epoch": 1.16, + "learning_rate": 8.682469007728617e-06, + "logits/chosen": -2.7062370777130127, + "logits/rejected": -3.129692316055298, + "logps/chosen": -300.53533935546875, + "logps/rejected": -483.88916015625, + "loss": 0.1314, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.025237083435059, + "rewards/margins": 3.629131317138672, + "rewards/rejected": -8.65436840057373, + "step": 7451 + }, + { + "epoch": 1.16, + "learning_rate": 8.681735567197469e-06, + "logits/chosen": -2.9740090370178223, + "logits/rejected": -3.076873540878296, + "logps/chosen": -98.63248443603516, + "logps/rejected": -166.34341430664062, + "loss": 0.1046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4674878120422363, + "rewards/margins": 4.948722839355469, + "rewards/rejected": -8.416210174560547, + "step": 7452 + }, + { + "epoch": 1.16, + "learning_rate": 8.681002126666321e-06, + "logits/chosen": -2.917431116104126, + "logits/rejected": -2.5354435443878174, + "logps/chosen": -188.3416748046875, + "logps/rejected": -329.01263427734375, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.246070623397827, + "rewards/margins": 4.114907741546631, + "rewards/rejected": -7.360978126525879, + "step": 7453 + }, + { + "epoch": 1.16, + "learning_rate": 8.680268686135173e-06, + "logits/chosen": -2.979928731918335, + "logits/rejected": -3.199700355529785, + "logps/chosen": -259.43487548828125, + "logps/rejected": -468.7115478515625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1282302886247635, + "rewards/margins": 6.849874973297119, + "rewards/rejected": -6.978105545043945, + "step": 7454 + }, + { + "epoch": 1.16, + "learning_rate": 8.679535245604025e-06, + "logits/chosen": -2.922654390335083, + "logits/rejected": -2.744147777557373, + "logps/chosen": -395.0523681640625, + "logps/rejected": -511.30133056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.182210683822632, + "rewards/margins": 9.468179702758789, + "rewards/rejected": -11.650390625, + "step": 7455 + }, + { + "epoch": 1.16, + "learning_rate": 8.678801805072876e-06, + "logits/chosen": -2.1555140018463135, + "logits/rejected": -3.0521240234375, + "logps/chosen": -120.93048095703125, + "logps/rejected": -287.3984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.541815757751465, + "rewards/margins": 7.47606086730957, + "rewards/rejected": -11.017876625061035, + "step": 7456 + }, + { + "epoch": 1.16, + "learning_rate": 8.678068364541728e-06, + "logits/chosen": -3.1130003929138184, + "logits/rejected": -2.6901674270629883, + "logps/chosen": -542.6792602539062, + "logps/rejected": -498.8345947265625, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.925629138946533, + "rewards/margins": 4.5642991065979, + "rewards/rejected": -7.489928245544434, + "step": 7457 + }, + { + "epoch": 1.16, + "learning_rate": 8.67733492401058e-06, + "logits/chosen": -1.518249273300171, + "logits/rejected": -3.047004461288452, + "logps/chosen": -68.58090209960938, + "logps/rejected": -313.6401672363281, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.947265148162842, + "rewards/margins": 6.544622421264648, + "rewards/rejected": -9.491888046264648, + "step": 7458 + }, + { + "epoch": 1.16, + "learning_rate": 8.676601483479434e-06, + "logits/chosen": -2.7553608417510986, + "logits/rejected": -3.2072930335998535, + "logps/chosen": -289.144775390625, + "logps/rejected": -428.6990661621094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.645141065120697, + "rewards/margins": 7.31693172454834, + "rewards/rejected": -7.962072849273682, + "step": 7459 + }, + { + "epoch": 1.16, + "learning_rate": 8.675868042948286e-06, + "logits/chosen": -1.420335292816162, + "logits/rejected": -2.7478013038635254, + "logps/chosen": -116.52491760253906, + "logps/rejected": -254.43638610839844, + "loss": 0.4743, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5262610912323, + "rewards/margins": 4.650997161865234, + "rewards/rejected": -8.177258491516113, + "step": 7460 + }, + { + "epoch": 1.16, + "learning_rate": 8.675134602417138e-06, + "logits/chosen": -1.835878610610962, + "logits/rejected": -3.168621063232422, + "logps/chosen": -157.79745483398438, + "logps/rejected": -359.7301330566406, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395383596420288, + "rewards/margins": 3.536142349243164, + "rewards/rejected": -6.931526184082031, + "step": 7461 + }, + { + "epoch": 1.16, + "learning_rate": 8.67440116188599e-06, + "logits/chosen": -2.019066333770752, + "logits/rejected": -2.788086175918579, + "logps/chosen": -158.58055114746094, + "logps/rejected": -397.2568664550781, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.862053394317627, + "rewards/margins": 6.798432350158691, + "rewards/rejected": -8.660486221313477, + "step": 7462 + }, + { + "epoch": 1.16, + "learning_rate": 8.673667721354843e-06, + "logits/chosen": -2.9162871837615967, + "logits/rejected": -3.023125171661377, + "logps/chosen": -99.30106353759766, + "logps/rejected": -335.91290283203125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.026390075683594, + "rewards/margins": 6.375856399536133, + "rewards/rejected": -10.402246475219727, + "step": 7463 + }, + { + "epoch": 1.16, + "learning_rate": 8.672934280823695e-06, + "logits/chosen": -0.82932448387146, + "logits/rejected": -2.905662775039673, + "logps/chosen": -93.82505798339844, + "logps/rejected": -368.7984619140625, + "loss": 1.3108, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.10577392578125, + "rewards/margins": 2.135751724243164, + "rewards/rejected": -10.241525650024414, + "step": 7464 + }, + { + "epoch": 1.16, + "learning_rate": 8.672200840292547e-06, + "logits/chosen": -2.9797465801239014, + "logits/rejected": -2.267483711242676, + "logps/chosen": -225.86993408203125, + "logps/rejected": -230.92178344726562, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.877513408660889, + "rewards/margins": 4.219869136810303, + "rewards/rejected": -9.097382545471191, + "step": 7465 + }, + { + "epoch": 1.16, + "learning_rate": 8.671467399761399e-06, + "logits/chosen": -2.986858606338501, + "logits/rejected": -3.141624927520752, + "logps/chosen": -81.65941619873047, + "logps/rejected": -161.95668029785156, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9569973945617676, + "rewards/margins": 5.720134258270264, + "rewards/rejected": -9.677131652832031, + "step": 7466 + }, + { + "epoch": 1.16, + "learning_rate": 8.67073395923025e-06, + "logits/chosen": -1.7891772985458374, + "logits/rejected": -2.887188196182251, + "logps/chosen": -137.37074279785156, + "logps/rejected": -467.85418701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7532532215118408, + "rewards/margins": 11.140480041503906, + "rewards/rejected": -12.893733978271484, + "step": 7467 + }, + { + "epoch": 1.16, + "learning_rate": 8.670000518699104e-06, + "logits/chosen": -1.652543067932129, + "logits/rejected": -1.7674469947814941, + "logps/chosen": -465.5643005371094, + "logps/rejected": -509.28271484375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6695899963378906, + "rewards/margins": 9.612377166748047, + "rewards/rejected": -11.281967163085938, + "step": 7468 + }, + { + "epoch": 1.16, + "learning_rate": 8.669267078167956e-06, + "logits/chosen": -2.5007388591766357, + "logits/rejected": -3.0457613468170166, + "logps/chosen": -73.35600280761719, + "logps/rejected": -173.69981384277344, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9861979484558105, + "rewards/margins": 3.0207297801971436, + "rewards/rejected": -8.006927490234375, + "step": 7469 + }, + { + "epoch": 1.16, + "learning_rate": 8.668533637636808e-06, + "logits/chosen": -1.1823445558547974, + "logits/rejected": -2.8316147327423096, + "logps/chosen": -142.28794860839844, + "logps/rejected": -468.3078308105469, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.599589824676514, + "rewards/margins": 8.332383155822754, + "rewards/rejected": -13.93197250366211, + "step": 7470 + }, + { + "epoch": 1.16, + "learning_rate": 8.66780019710566e-06, + "logits/chosen": -2.859588623046875, + "logits/rejected": -1.3784780502319336, + "logps/chosen": -203.52406311035156, + "logps/rejected": -111.17794799804688, + "loss": 0.7414, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7913498878479, + "rewards/margins": 1.0233474969863892, + "rewards/rejected": -5.814697265625, + "step": 7471 + }, + { + "epoch": 1.16, + "learning_rate": 8.667066756574512e-06, + "logits/chosen": -1.5466187000274658, + "logits/rejected": -2.8781988620758057, + "logps/chosen": -174.42938232421875, + "logps/rejected": -378.12615966796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.67570161819458, + "rewards/margins": 7.923384189605713, + "rewards/rejected": -11.599085807800293, + "step": 7472 + }, + { + "epoch": 1.16, + "learning_rate": 8.666333316043364e-06, + "logits/chosen": -3.269986629486084, + "logits/rejected": -1.9407742023468018, + "logps/chosen": -597.6739501953125, + "logps/rejected": -235.24720764160156, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5831421613693237, + "rewards/margins": 5.4924540519714355, + "rewards/rejected": -7.075595855712891, + "step": 7473 + }, + { + "epoch": 1.16, + "learning_rate": 8.665599875512215e-06, + "logits/chosen": -2.869473934173584, + "logits/rejected": -1.9849932193756104, + "logps/chosen": -200.17306518554688, + "logps/rejected": -187.8450164794922, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.935826301574707, + "rewards/margins": 2.65799880027771, + "rewards/rejected": -7.593824863433838, + "step": 7474 + }, + { + "epoch": 1.16, + "learning_rate": 8.664866434981067e-06, + "logits/chosen": -2.983807325363159, + "logits/rejected": -3.0555551052093506, + "logps/chosen": -248.66107177734375, + "logps/rejected": -178.92465209960938, + "loss": 0.0948, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2038462162017822, + "rewards/margins": 4.319828510284424, + "rewards/rejected": -7.523674964904785, + "step": 7475 + }, + { + "epoch": 1.16, + "learning_rate": 8.664132994449919e-06, + "logits/chosen": -2.500074625015259, + "logits/rejected": -2.9380245208740234, + "logps/chosen": -150.7126007080078, + "logps/rejected": -193.64036560058594, + "loss": 0.4749, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.777322769165039, + "rewards/margins": 2.8710827827453613, + "rewards/rejected": -8.648405075073242, + "step": 7476 + }, + { + "epoch": 1.16, + "learning_rate": 8.663399553918773e-06, + "logits/chosen": -1.3006060123443604, + "logits/rejected": -2.764296770095825, + "logps/chosen": -134.11770629882812, + "logps/rejected": -356.5753173828125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.428959846496582, + "rewards/margins": 7.809016227722168, + "rewards/rejected": -12.23797607421875, + "step": 7477 + }, + { + "epoch": 1.16, + "learning_rate": 8.662666113387625e-06, + "logits/chosen": -1.8130385875701904, + "logits/rejected": -2.7785379886627197, + "logps/chosen": -150.25115966796875, + "logps/rejected": -225.08242797851562, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7092397212982178, + "rewards/margins": 6.299528121948242, + "rewards/rejected": -10.008768081665039, + "step": 7478 + }, + { + "epoch": 1.16, + "learning_rate": 8.661932672856476e-06, + "logits/chosen": -2.1900699138641357, + "logits/rejected": -3.17988920211792, + "logps/chosen": -203.64747619628906, + "logps/rejected": -486.6485900878906, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0281991958618164, + "rewards/margins": 6.470688343048096, + "rewards/rejected": -9.49888801574707, + "step": 7479 + }, + { + "epoch": 1.16, + "learning_rate": 8.661199232325328e-06, + "logits/chosen": -2.624485969543457, + "logits/rejected": -2.9462711811065674, + "logps/chosen": -164.89987182617188, + "logps/rejected": -346.2420349121094, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7102315425872803, + "rewards/margins": 6.125297546386719, + "rewards/rejected": -8.835529327392578, + "step": 7480 + }, + { + "epoch": 1.16, + "learning_rate": 8.66046579179418e-06, + "logits/chosen": -2.9415700435638428, + "logits/rejected": -1.6867990493774414, + "logps/chosen": -556.5693969726562, + "logps/rejected": -290.4497375488281, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.272816181182861, + "rewards/margins": 5.812035083770752, + "rewards/rejected": -11.084851264953613, + "step": 7481 + }, + { + "epoch": 1.16, + "learning_rate": 8.659732351263032e-06, + "logits/chosen": -2.077327251434326, + "logits/rejected": -2.3787901401519775, + "logps/chosen": -146.66015625, + "logps/rejected": -368.62579345703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.588410377502441, + "rewards/margins": 9.311793327331543, + "rewards/rejected": -15.900203704833984, + "step": 7482 + }, + { + "epoch": 1.16, + "learning_rate": 8.658998910731884e-06, + "logits/chosen": -1.546647548675537, + "logits/rejected": -2.596627712249756, + "logps/chosen": -172.18310546875, + "logps/rejected": -397.99090576171875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.236232280731201, + "rewards/margins": 7.707551002502441, + "rewards/rejected": -12.943782806396484, + "step": 7483 + }, + { + "epoch": 1.16, + "learning_rate": 8.658265470200736e-06, + "logits/chosen": -2.762662887573242, + "logits/rejected": -2.0210986137390137, + "logps/chosen": -431.57061767578125, + "logps/rejected": -365.201416015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9500243663787842, + "rewards/margins": 10.16817855834961, + "rewards/rejected": -12.118202209472656, + "step": 7484 + }, + { + "epoch": 1.16, + "learning_rate": 8.657532029669588e-06, + "logits/chosen": -1.7504459619522095, + "logits/rejected": -3.016834020614624, + "logps/chosen": -255.35108947753906, + "logps/rejected": -391.2882385253906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6583619117736816, + "rewards/margins": 8.621709823608398, + "rewards/rejected": -11.280072212219238, + "step": 7485 + }, + { + "epoch": 1.16, + "learning_rate": 8.656798589138441e-06, + "logits/chosen": -0.9986833930015564, + "logits/rejected": -2.6915345191955566, + "logps/chosen": -72.15754699707031, + "logps/rejected": -384.63714599609375, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1421799659729, + "rewards/margins": 6.550652503967285, + "rewards/rejected": -11.692832946777344, + "step": 7486 + }, + { + "epoch": 1.16, + "learning_rate": 8.656065148607293e-06, + "logits/chosen": -2.276456117630005, + "logits/rejected": -2.5404186248779297, + "logps/chosen": -77.48971557617188, + "logps/rejected": -184.4097900390625, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.694948196411133, + "rewards/margins": 5.211413383483887, + "rewards/rejected": -9.90636157989502, + "step": 7487 + }, + { + "epoch": 1.16, + "learning_rate": 8.655331708076145e-06, + "logits/chosen": -2.791294813156128, + "logits/rejected": -2.1830546855926514, + "logps/chosen": -113.18264770507812, + "logps/rejected": -249.2699432373047, + "loss": 0.9993, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.8578715324401855, + "rewards/margins": 5.873132705688477, + "rewards/rejected": -10.73100471496582, + "step": 7488 + }, + { + "epoch": 1.16, + "learning_rate": 8.654598267544997e-06, + "logits/chosen": -2.6226158142089844, + "logits/rejected": -3.0184926986694336, + "logps/chosen": -416.93853759765625, + "logps/rejected": -379.5238037109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.669431209564209, + "rewards/margins": 7.9897027015686035, + "rewards/rejected": -12.659133911132812, + "step": 7489 + }, + { + "epoch": 1.16, + "learning_rate": 8.653864827013849e-06, + "logits/chosen": -3.057769536972046, + "logits/rejected": -2.770771026611328, + "logps/chosen": -566.39111328125, + "logps/rejected": -416.6923828125, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.849012851715088, + "rewards/margins": 5.235325813293457, + "rewards/rejected": -10.084339141845703, + "step": 7490 + }, + { + "epoch": 1.17, + "learning_rate": 8.6531313864827e-06, + "logits/chosen": -2.6002635955810547, + "logits/rejected": -3.2734992504119873, + "logps/chosen": -164.5376434326172, + "logps/rejected": -295.211669921875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6177186965942383, + "rewards/margins": 6.185304641723633, + "rewards/rejected": -8.803022384643555, + "step": 7491 + }, + { + "epoch": 1.17, + "learning_rate": 8.652397945951553e-06, + "logits/chosen": -3.0635857582092285, + "logits/rejected": -2.837618589401245, + "logps/chosen": -379.1446838378906, + "logps/rejected": -524.9169311523438, + "loss": 0.5304, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.269911766052246, + "rewards/margins": 3.9883217811584473, + "rewards/rejected": -10.258234024047852, + "step": 7492 + }, + { + "epoch": 1.17, + "learning_rate": 8.651664505420404e-06, + "logits/chosen": -2.78200364112854, + "logits/rejected": -3.0066702365875244, + "logps/chosen": -614.98828125, + "logps/rejected": -659.7523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.247233510017395, + "rewards/margins": 11.272819519042969, + "rewards/rejected": -12.520052909851074, + "step": 7493 + }, + { + "epoch": 1.17, + "learning_rate": 8.650931064889256e-06, + "logits/chosen": -2.660355806350708, + "logits/rejected": -3.0714521408081055, + "logps/chosen": -100.41098022460938, + "logps/rejected": -344.8694763183594, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9680263996124268, + "rewards/margins": 7.542965888977051, + "rewards/rejected": -10.510992050170898, + "step": 7494 + }, + { + "epoch": 1.17, + "learning_rate": 8.65019762435811e-06, + "logits/chosen": -1.4315983057022095, + "logits/rejected": -2.641324043273926, + "logps/chosen": -178.68124389648438, + "logps/rejected": -266.51123046875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2977452278137207, + "rewards/margins": 4.786260604858398, + "rewards/rejected": -8.084006309509277, + "step": 7495 + }, + { + "epoch": 1.17, + "learning_rate": 8.649464183826962e-06, + "logits/chosen": -2.960693359375, + "logits/rejected": -2.8292770385742188, + "logps/chosen": -660.1484985351562, + "logps/rejected": -568.809326171875, + "loss": 0.4866, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.093407154083252, + "rewards/margins": 4.4274821281433105, + "rewards/rejected": -10.520889282226562, + "step": 7496 + }, + { + "epoch": 1.17, + "learning_rate": 8.648730743295815e-06, + "logits/chosen": -2.5324454307556152, + "logits/rejected": -2.8145670890808105, + "logps/chosen": -107.33848571777344, + "logps/rejected": -334.3057861328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.871276378631592, + "rewards/margins": 10.681602478027344, + "rewards/rejected": -15.552878379821777, + "step": 7497 + }, + { + "epoch": 1.17, + "learning_rate": 8.647997302764667e-06, + "logits/chosen": -2.3414883613586426, + "logits/rejected": -2.9384243488311768, + "logps/chosen": -135.21620178222656, + "logps/rejected": -232.90060424804688, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.721073627471924, + "rewards/margins": 3.405637741088867, + "rewards/rejected": -8.12671184539795, + "step": 7498 + }, + { + "epoch": 1.17, + "learning_rate": 8.647263862233519e-06, + "logits/chosen": -2.0980312824249268, + "logits/rejected": -3.0386128425598145, + "logps/chosen": -372.4527282714844, + "logps/rejected": -605.087890625, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1693878173828125, + "rewards/margins": 6.818563461303711, + "rewards/rejected": -10.987951278686523, + "step": 7499 + }, + { + "epoch": 1.17, + "learning_rate": 8.646530421702371e-06, + "logits/chosen": -1.7971891164779663, + "logits/rejected": -2.8104519844055176, + "logps/chosen": -98.99929809570312, + "logps/rejected": -230.31460571289062, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.790450572967529, + "rewards/margins": 4.96574592590332, + "rewards/rejected": -11.756196975708008, + "step": 7500 + }, + { + "epoch": 1.17, + "learning_rate": 8.645796981171223e-06, + "logits/chosen": -2.646423101425171, + "logits/rejected": -2.583845615386963, + "logps/chosen": -154.43345642089844, + "logps/rejected": -418.8365478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.006776809692383, + "rewards/margins": 9.639156341552734, + "rewards/rejected": -11.645933151245117, + "step": 7501 + }, + { + "epoch": 1.17, + "learning_rate": 8.645063540640075e-06, + "logits/chosen": -2.9942612648010254, + "logits/rejected": -2.9334876537323, + "logps/chosen": -697.805419921875, + "logps/rejected": -684.9503173828125, + "loss": 0.2638, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5386078357696533, + "rewards/margins": 3.971975803375244, + "rewards/rejected": -6.510583877563477, + "step": 7502 + }, + { + "epoch": 1.17, + "learning_rate": 8.644330100108927e-06, + "logits/chosen": -2.6110641956329346, + "logits/rejected": -3.065725326538086, + "logps/chosen": -114.3466567993164, + "logps/rejected": -150.46214294433594, + "loss": 1.5169, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.541509628295898, + "rewards/margins": 2.13153338432312, + "rewards/rejected": -7.673043251037598, + "step": 7503 + }, + { + "epoch": 1.17, + "learning_rate": 8.64359665957778e-06, + "logits/chosen": -2.8716061115264893, + "logits/rejected": -1.4609583616256714, + "logps/chosen": -723.8427734375, + "logps/rejected": -316.54278564453125, + "loss": 2.1526, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.301116943359375, + "rewards/margins": 0.6825466156005859, + "rewards/rejected": -5.983663558959961, + "step": 7504 + }, + { + "epoch": 1.17, + "learning_rate": 8.642863219046632e-06, + "logits/chosen": -3.017937421798706, + "logits/rejected": -2.6162681579589844, + "logps/chosen": -172.9365692138672, + "logps/rejected": -76.76836395263672, + "loss": 1.5575, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.51695442199707, + "rewards/margins": -0.802959680557251, + "rewards/rejected": -6.71399450302124, + "step": 7505 + }, + { + "epoch": 1.17, + "learning_rate": 8.642129778515484e-06, + "logits/chosen": -2.8590047359466553, + "logits/rejected": -2.5488266944885254, + "logps/chosen": -163.34210205078125, + "logps/rejected": -138.4614715576172, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.72166109085083, + "rewards/margins": 4.356245994567871, + "rewards/rejected": -8.07790756225586, + "step": 7506 + }, + { + "epoch": 1.17, + "learning_rate": 8.641396337984336e-06, + "logits/chosen": -2.943661689758301, + "logits/rejected": -3.1793322563171387, + "logps/chosen": -267.17657470703125, + "logps/rejected": -170.13015747070312, + "loss": 0.7165, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.918746471405029, + "rewards/margins": 1.1544417142868042, + "rewards/rejected": -6.073187828063965, + "step": 7507 + }, + { + "epoch": 1.17, + "learning_rate": 8.640662897453188e-06, + "logits/chosen": -2.417215585708618, + "logits/rejected": -3.023984432220459, + "logps/chosen": -274.98846435546875, + "logps/rejected": -275.3169860839844, + "loss": 2.6398, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.2861328125, + "rewards/margins": -2.553772211074829, + "rewards/rejected": -5.732359886169434, + "step": 7508 + }, + { + "epoch": 1.17, + "learning_rate": 8.63992945692204e-06, + "logits/chosen": -2.8795437812805176, + "logits/rejected": -3.0397942066192627, + "logps/chosen": -186.74932861328125, + "logps/rejected": -172.6774444580078, + "loss": 0.4979, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5309863090515137, + "rewards/margins": 1.8646299839019775, + "rewards/rejected": -5.39561653137207, + "step": 7509 + }, + { + "epoch": 1.17, + "learning_rate": 8.639196016390891e-06, + "logits/chosen": -2.903933048248291, + "logits/rejected": -3.0159289836883545, + "logps/chosen": -357.60302734375, + "logps/rejected": -437.9474182128906, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5492671728134155, + "rewards/margins": 8.318426132202148, + "rewards/rejected": -7.769159317016602, + "step": 7510 + }, + { + "epoch": 1.17, + "learning_rate": 8.638462575859743e-06, + "logits/chosen": -1.8059576749801636, + "logits/rejected": -2.765493869781494, + "logps/chosen": -684.2762451171875, + "logps/rejected": -680.2625732421875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.459277153015137, + "rewards/margins": 7.115048408508301, + "rewards/rejected": -12.574325561523438, + "step": 7511 + }, + { + "epoch": 1.17, + "learning_rate": 8.637729135328595e-06, + "logits/chosen": -0.9378662109375, + "logits/rejected": -2.9205164909362793, + "logps/chosen": -158.8443603515625, + "logps/rejected": -646.0716552734375, + "loss": 0.8611, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.951473712921143, + "rewards/margins": 2.93961238861084, + "rewards/rejected": -7.891085624694824, + "step": 7512 + }, + { + "epoch": 1.17, + "learning_rate": 8.636995694797449e-06, + "logits/chosen": -2.5716283321380615, + "logits/rejected": -2.480311393737793, + "logps/chosen": -237.781982421875, + "logps/rejected": -383.10833740234375, + "loss": 0.6031, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.453935146331787, + "rewards/margins": 7.100588798522949, + "rewards/rejected": -11.554524421691895, + "step": 7513 + }, + { + "epoch": 1.17, + "learning_rate": 8.6362622542663e-06, + "logits/chosen": -2.796980857849121, + "logits/rejected": -2.9095990657806396, + "logps/chosen": -314.0168151855469, + "logps/rejected": -366.181396484375, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5103821754455566, + "rewards/margins": 4.671270847320557, + "rewards/rejected": -7.181653022766113, + "step": 7514 + }, + { + "epoch": 1.17, + "learning_rate": 8.635528813735153e-06, + "logits/chosen": -2.9230964183807373, + "logits/rejected": -3.069784641265869, + "logps/chosen": -51.93527603149414, + "logps/rejected": -238.93020629882812, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.097193717956543, + "rewards/margins": 5.29268741607666, + "rewards/rejected": -8.389881134033203, + "step": 7515 + }, + { + "epoch": 1.17, + "learning_rate": 8.634795373204004e-06, + "logits/chosen": -2.886871099472046, + "logits/rejected": -2.2140953540802, + "logps/chosen": -346.210693359375, + "logps/rejected": -485.9148254394531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.133706092834473, + "rewards/margins": 9.15073013305664, + "rewards/rejected": -13.284436225891113, + "step": 7516 + }, + { + "epoch": 1.17, + "learning_rate": 8.634061932672856e-06, + "logits/chosen": -0.7673380374908447, + "logits/rejected": -2.9881515502929688, + "logps/chosen": -252.21240234375, + "logps/rejected": -437.60626220703125, + "loss": 0.4259, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.153659343719482, + "rewards/margins": 9.006837844848633, + "rewards/rejected": -13.160497665405273, + "step": 7517 + }, + { + "epoch": 1.17, + "learning_rate": 8.633328492141708e-06, + "logits/chosen": -1.8977909088134766, + "logits/rejected": -2.921624183654785, + "logps/chosen": -176.64703369140625, + "logps/rejected": -473.1213684082031, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8633012771606445, + "rewards/margins": 6.450826644897461, + "rewards/rejected": -10.314127922058105, + "step": 7518 + }, + { + "epoch": 1.17, + "learning_rate": 8.63259505161056e-06, + "logits/chosen": -2.7939751148223877, + "logits/rejected": -3.1257412433624268, + "logps/chosen": -110.86990356445312, + "logps/rejected": -214.3176727294922, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5257759094238281, + "rewards/margins": 6.772165775299072, + "rewards/rejected": -8.297941207885742, + "step": 7519 + }, + { + "epoch": 1.17, + "learning_rate": 8.631861611079412e-06, + "logits/chosen": -2.7193875312805176, + "logits/rejected": -2.1698853969573975, + "logps/chosen": -180.7584228515625, + "logps/rejected": -216.70724487304688, + "loss": 0.0736, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0007925033569336, + "rewards/margins": 5.614781379699707, + "rewards/rejected": -8.61557388305664, + "step": 7520 + }, + { + "epoch": 1.17, + "learning_rate": 8.631128170548264e-06, + "logits/chosen": -2.9326772689819336, + "logits/rejected": -2.611142635345459, + "logps/chosen": -119.38519287109375, + "logps/rejected": -152.34857177734375, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8233537673950195, + "rewards/margins": 5.667201042175293, + "rewards/rejected": -11.490554809570312, + "step": 7521 + }, + { + "epoch": 1.17, + "learning_rate": 8.630394730017117e-06, + "logits/chosen": -3.0686111450195312, + "logits/rejected": -3.158935546875, + "logps/chosen": -616.7247314453125, + "logps/rejected": -398.7284240722656, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5696258544921875, + "rewards/margins": 5.224802017211914, + "rewards/rejected": -7.794427871704102, + "step": 7522 + }, + { + "epoch": 1.17, + "learning_rate": 8.62966128948597e-06, + "logits/chosen": -3.0539212226867676, + "logits/rejected": -2.4419963359832764, + "logps/chosen": -443.43499755859375, + "logps/rejected": -536.2891845703125, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.847085475921631, + "rewards/margins": 5.13359260559082, + "rewards/rejected": -7.980678081512451, + "step": 7523 + }, + { + "epoch": 1.17, + "learning_rate": 8.628927848954821e-06, + "logits/chosen": -2.3257393836975098, + "logits/rejected": -3.0456602573394775, + "logps/chosen": -131.80908203125, + "logps/rejected": -325.80926513671875, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9363481998443604, + "rewards/margins": 5.4686994552612305, + "rewards/rejected": -8.405048370361328, + "step": 7524 + }, + { + "epoch": 1.17, + "learning_rate": 8.628194408423673e-06, + "logits/chosen": -2.9571001529693604, + "logits/rejected": -1.1961315870285034, + "logps/chosen": -753.7530517578125, + "logps/rejected": -270.673583984375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5981407165527344, + "rewards/margins": 8.380489349365234, + "rewards/rejected": -7.782349586486816, + "step": 7525 + }, + { + "epoch": 1.17, + "learning_rate": 8.627460967892525e-06, + "logits/chosen": -2.258098840713501, + "logits/rejected": -2.8400120735168457, + "logps/chosen": -94.41879272460938, + "logps/rejected": -221.30384826660156, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.253819704055786, + "rewards/margins": 5.701626777648926, + "rewards/rejected": -8.955446243286133, + "step": 7526 + }, + { + "epoch": 1.17, + "learning_rate": 8.626727527361377e-06, + "logits/chosen": -2.1528632640838623, + "logits/rejected": -2.860863208770752, + "logps/chosen": -140.5131072998047, + "logps/rejected": -373.6193542480469, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6680784225463867, + "rewards/margins": 5.891124725341797, + "rewards/rejected": -8.5592041015625, + "step": 7527 + }, + { + "epoch": 1.17, + "learning_rate": 8.625994086830229e-06, + "logits/chosen": -1.5799576044082642, + "logits/rejected": -3.1055634021759033, + "logps/chosen": -203.95669555664062, + "logps/rejected": -608.6375732421875, + "loss": 0.202, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.497466087341309, + "rewards/margins": 2.33894944190979, + "rewards/rejected": -9.83641529083252, + "step": 7528 + }, + { + "epoch": 1.17, + "learning_rate": 8.625260646299082e-06, + "logits/chosen": -3.0302910804748535, + "logits/rejected": -1.7860002517700195, + "logps/chosen": -504.43341064453125, + "logps/rejected": -360.3289489746094, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6629638671875, + "rewards/margins": 5.518679618835449, + "rewards/rejected": -9.18164348602295, + "step": 7529 + }, + { + "epoch": 1.17, + "learning_rate": 8.624527205767934e-06, + "logits/chosen": -2.164738893508911, + "logits/rejected": -2.8106234073638916, + "logps/chosen": -87.62908172607422, + "logps/rejected": -257.072265625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.679788112640381, + "rewards/margins": 7.2281341552734375, + "rewards/rejected": -11.90792179107666, + "step": 7530 + }, + { + "epoch": 1.17, + "learning_rate": 8.623793765236788e-06, + "logits/chosen": -3.1271209716796875, + "logits/rejected": -2.1187777519226074, + "logps/chosen": -267.2686767578125, + "logps/rejected": -78.79436492919922, + "loss": 0.5942, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.812417984008789, + "rewards/margins": 1.116899013519287, + "rewards/rejected": -5.929316520690918, + "step": 7531 + }, + { + "epoch": 1.17, + "learning_rate": 8.62306032470564e-06, + "logits/chosen": -2.7614259719848633, + "logits/rejected": -3.1484375, + "logps/chosen": -141.64865112304688, + "logps/rejected": -235.72723388671875, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0338668823242188, + "rewards/margins": 5.100827693939209, + "rewards/rejected": -7.134694576263428, + "step": 7532 + }, + { + "epoch": 1.17, + "learning_rate": 8.622326884174491e-06, + "logits/chosen": -3.105374336242676, + "logits/rejected": -2.226015329360962, + "logps/chosen": -655.7064819335938, + "logps/rejected": -407.16412353515625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25624388456344604, + "rewards/margins": 7.394720077514648, + "rewards/rejected": -7.1384758949279785, + "step": 7533 + }, + { + "epoch": 1.17, + "learning_rate": 8.621593443643343e-06, + "logits/chosen": -0.48876526951789856, + "logits/rejected": -2.8269760608673096, + "logps/chosen": -102.73401641845703, + "logps/rejected": -636.3062744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809257984161377, + "rewards/margins": 10.864425659179688, + "rewards/rejected": -14.673683166503906, + "step": 7534 + }, + { + "epoch": 1.17, + "learning_rate": 8.620860003112195e-06, + "logits/chosen": -3.0653090476989746, + "logits/rejected": -2.9305856227874756, + "logps/chosen": -285.0557556152344, + "logps/rejected": -274.84112548828125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8931527137756348, + "rewards/margins": 6.40826416015625, + "rewards/rejected": -9.301416397094727, + "step": 7535 + }, + { + "epoch": 1.17, + "learning_rate": 8.620126562581047e-06, + "logits/chosen": -3.086254596710205, + "logits/rejected": -2.3362815380096436, + "logps/chosen": -523.7384033203125, + "logps/rejected": -335.17791748046875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0227370262146, + "rewards/margins": 6.366018772125244, + "rewards/rejected": -10.388755798339844, + "step": 7536 + }, + { + "epoch": 1.17, + "learning_rate": 8.619393122049899e-06, + "logits/chosen": -2.507768392562866, + "logits/rejected": -2.9831643104553223, + "logps/chosen": -185.78610229492188, + "logps/rejected": -160.17843627929688, + "loss": 0.4435, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.0542826652526855, + "rewards/margins": 3.1216468811035156, + "rewards/rejected": -8.17593002319336, + "step": 7537 + }, + { + "epoch": 1.17, + "learning_rate": 8.618659681518751e-06, + "logits/chosen": -3.1682567596435547, + "logits/rejected": -3.149916410446167, + "logps/chosen": -406.9361267089844, + "logps/rejected": -419.97998046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6572258472442627, + "rewards/margins": 7.932467460632324, + "rewards/rejected": -10.589693069458008, + "step": 7538 + }, + { + "epoch": 1.17, + "learning_rate": 8.617926240987603e-06, + "logits/chosen": -2.5279042720794678, + "logits/rejected": -3.152737617492676, + "logps/chosen": -101.83879089355469, + "logps/rejected": -328.5168762207031, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8941330909729004, + "rewards/margins": 5.984300136566162, + "rewards/rejected": -8.878433227539062, + "step": 7539 + }, + { + "epoch": 1.17, + "learning_rate": 8.617192800456456e-06, + "logits/chosen": -2.0820140838623047, + "logits/rejected": -2.8428292274475098, + "logps/chosen": -66.30279541015625, + "logps/rejected": -259.4559020996094, + "loss": 0.1095, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.511578559875488, + "rewards/margins": 3.739375352859497, + "rewards/rejected": -9.250953674316406, + "step": 7540 + }, + { + "epoch": 1.17, + "learning_rate": 8.616459359925308e-06, + "logits/chosen": -2.9032764434814453, + "logits/rejected": -2.9236230850219727, + "logps/chosen": -143.2698974609375, + "logps/rejected": -325.0224609375, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7766395211219788, + "rewards/margins": 7.14073371887207, + "rewards/rejected": -7.917373180389404, + "step": 7541 + }, + { + "epoch": 1.17, + "learning_rate": 8.61572591939416e-06, + "logits/chosen": -2.2489328384399414, + "logits/rejected": -3.198749303817749, + "logps/chosen": -631.8458862304688, + "logps/rejected": -717.225341796875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9116110801696777, + "rewards/margins": 6.208624839782715, + "rewards/rejected": -8.12023639678955, + "step": 7542 + }, + { + "epoch": 1.17, + "learning_rate": 8.614992478863012e-06, + "logits/chosen": -2.150261878967285, + "logits/rejected": -2.7112350463867188, + "logps/chosen": -254.12811279296875, + "logps/rejected": -255.18972778320312, + "loss": 0.6355, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.33860969543457, + "rewards/margins": 4.5583038330078125, + "rewards/rejected": -8.896913528442383, + "step": 7543 + }, + { + "epoch": 1.17, + "learning_rate": 8.614259038331864e-06, + "logits/chosen": -1.4184650182724, + "logits/rejected": -2.801309823989868, + "logps/chosen": -135.61407470703125, + "logps/rejected": -720.0091552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.235003471374512, + "rewards/margins": 11.607980728149414, + "rewards/rejected": -15.842985153198242, + "step": 7544 + }, + { + "epoch": 1.17, + "learning_rate": 8.613525597800716e-06, + "logits/chosen": -1.6524057388305664, + "logits/rejected": -2.45283842086792, + "logps/chosen": -119.25443267822266, + "logps/rejected": -256.4560546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.855113983154297, + "rewards/margins": 7.881809234619141, + "rewards/rejected": -11.736923217773438, + "step": 7545 + }, + { + "epoch": 1.17, + "learning_rate": 8.612792157269568e-06, + "logits/chosen": -2.213811159133911, + "logits/rejected": -2.938373327255249, + "logps/chosen": -228.2694091796875, + "logps/rejected": -398.89337158203125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.30391263961792, + "rewards/margins": 6.843752861022949, + "rewards/rejected": -9.147665023803711, + "step": 7546 + }, + { + "epoch": 1.17, + "learning_rate": 8.61205871673842e-06, + "logits/chosen": -2.002143621444702, + "logits/rejected": -2.814903974533081, + "logps/chosen": -98.35919189453125, + "logps/rejected": -383.668212890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.337409019470215, + "rewards/margins": 8.66729736328125, + "rewards/rejected": -11.004705429077148, + "step": 7547 + }, + { + "epoch": 1.17, + "learning_rate": 8.611325276207273e-06, + "logits/chosen": -1.5453397035598755, + "logits/rejected": -1.5026826858520508, + "logps/chosen": -395.24774169921875, + "logps/rejected": -496.30511474609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.080829381942749, + "rewards/margins": 9.183552742004395, + "rewards/rejected": -11.264382362365723, + "step": 7548 + }, + { + "epoch": 1.17, + "learning_rate": 8.610591835676125e-06, + "logits/chosen": -2.8372721672058105, + "logits/rejected": -3.199636697769165, + "logps/chosen": -242.14450073242188, + "logps/rejected": -469.6783447265625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.667748212814331, + "rewards/margins": 7.813126564025879, + "rewards/rejected": -10.480875015258789, + "step": 7549 + }, + { + "epoch": 1.17, + "learning_rate": 8.609858395144977e-06, + "logits/chosen": -1.781292200088501, + "logits/rejected": -2.6706204414367676, + "logps/chosen": -133.90386962890625, + "logps/rejected": -410.8172302246094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4246442317962646, + "rewards/margins": 9.243322372436523, + "rewards/rejected": -12.667966842651367, + "step": 7550 + }, + { + "epoch": 1.17, + "learning_rate": 8.609124954613829e-06, + "logits/chosen": -3.2331223487854004, + "logits/rejected": -2.9176387786865234, + "logps/chosen": -177.15359497070312, + "logps/rejected": -105.65288543701172, + "loss": 2.595, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.587344169616699, + "rewards/margins": -0.14870619773864746, + "rewards/rejected": -6.438638210296631, + "step": 7551 + }, + { + "epoch": 1.17, + "learning_rate": 8.60839151408268e-06, + "logits/chosen": -2.6350653171539307, + "logits/rejected": -2.9840259552001953, + "logps/chosen": -637.57861328125, + "logps/rejected": -626.0745849609375, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354921817779541, + "rewards/margins": 5.219487190246582, + "rewards/rejected": -9.574409484863281, + "step": 7552 + }, + { + "epoch": 1.17, + "learning_rate": 8.607658073551532e-06, + "logits/chosen": -2.265852451324463, + "logits/rejected": -2.9296488761901855, + "logps/chosen": -291.7261657714844, + "logps/rejected": -419.9363708496094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.352529525756836, + "rewards/margins": 8.380487442016602, + "rewards/rejected": -10.733016967773438, + "step": 7553 + }, + { + "epoch": 1.17, + "learning_rate": 8.606924633020384e-06, + "logits/chosen": -2.106745958328247, + "logits/rejected": -3.004467248916626, + "logps/chosen": -138.15179443359375, + "logps/rejected": -323.10723876953125, + "loss": 2.5504, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.196658611297607, + "rewards/margins": 1.2343356609344482, + "rewards/rejected": -6.430994510650635, + "step": 7554 + }, + { + "epoch": 1.17, + "learning_rate": 8.606191192489236e-06, + "logits/chosen": -2.850947856903076, + "logits/rejected": -2.948213815689087, + "logps/chosen": -196.54969787597656, + "logps/rejected": -256.43731689453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3282418251037598, + "rewards/margins": 7.597603797912598, + "rewards/rejected": -10.925846099853516, + "step": 7555 + }, + { + "epoch": 1.18, + "learning_rate": 8.605457751958088e-06, + "logits/chosen": -3.264761209487915, + "logits/rejected": -3.2836618423461914, + "logps/chosen": -101.17742919921875, + "logps/rejected": -118.5462875366211, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.374424457550049, + "rewards/margins": 5.023564338684082, + "rewards/rejected": -8.397989273071289, + "step": 7556 + }, + { + "epoch": 1.18, + "learning_rate": 8.604724311426942e-06, + "logits/chosen": -3.118433952331543, + "logits/rejected": -1.8563650846481323, + "logps/chosen": -259.97894287109375, + "logps/rejected": -200.3117218017578, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7236328125, + "rewards/margins": 5.19077205657959, + "rewards/rejected": -7.91440486907959, + "step": 7557 + }, + { + "epoch": 1.18, + "learning_rate": 8.603990870895793e-06, + "logits/chosen": -2.523905038833618, + "logits/rejected": -1.7549484968185425, + "logps/chosen": -1840.776611328125, + "logps/rejected": -320.17803955078125, + "loss": 0.3983, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.206172943115234, + "rewards/margins": 2.1150760650634766, + "rewards/rejected": -11.321249008178711, + "step": 7558 + }, + { + "epoch": 1.18, + "learning_rate": 8.603257430364645e-06, + "logits/chosen": -2.980196952819824, + "logits/rejected": -3.1487338542938232, + "logps/chosen": -95.04605102539062, + "logps/rejected": -181.22219848632812, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.75832462310791, + "rewards/margins": 6.423213958740234, + "rewards/rejected": -9.181537628173828, + "step": 7559 + }, + { + "epoch": 1.18, + "learning_rate": 8.602523989833497e-06, + "logits/chosen": -2.968048572540283, + "logits/rejected": -3.195971727371216, + "logps/chosen": -210.85781860351562, + "logps/rejected": -177.605712890625, + "loss": 0.0973, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2104249000549316, + "rewards/margins": 4.481579780578613, + "rewards/rejected": -6.692004680633545, + "step": 7560 + }, + { + "epoch": 1.18, + "learning_rate": 8.601790549302349e-06, + "logits/chosen": -2.0733284950256348, + "logits/rejected": -2.7290706634521484, + "logps/chosen": -156.86614990234375, + "logps/rejected": -301.47576904296875, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.450616836547852, + "rewards/margins": 3.0902373790740967, + "rewards/rejected": -7.540853977203369, + "step": 7561 + }, + { + "epoch": 1.18, + "learning_rate": 8.601057108771201e-06, + "logits/chosen": -2.5436127185821533, + "logits/rejected": -2.806253433227539, + "logps/chosen": -106.81458282470703, + "logps/rejected": -220.6513671875, + "loss": 0.5687, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.003470420837402, + "rewards/margins": 1.8019468784332275, + "rewards/rejected": -6.805417060852051, + "step": 7562 + }, + { + "epoch": 1.18, + "learning_rate": 8.600323668240055e-06, + "logits/chosen": -1.7439857721328735, + "logits/rejected": -2.9692811965942383, + "logps/chosen": -103.92214965820312, + "logps/rejected": -338.9448547363281, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.556705951690674, + "rewards/margins": 8.084546089172363, + "rewards/rejected": -11.641252517700195, + "step": 7563 + }, + { + "epoch": 1.18, + "learning_rate": 8.599590227708906e-06, + "logits/chosen": -2.1811859607696533, + "logits/rejected": -3.099794626235962, + "logps/chosen": -210.15504455566406, + "logps/rejected": -390.28570556640625, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.614858627319336, + "rewards/margins": 6.794719696044922, + "rewards/rejected": -10.409578323364258, + "step": 7564 + }, + { + "epoch": 1.18, + "learning_rate": 8.598856787177758e-06, + "logits/chosen": -1.8903592824935913, + "logits/rejected": -2.9187355041503906, + "logps/chosen": -225.85211181640625, + "logps/rejected": -325.6416015625, + "loss": 0.7447, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.634631156921387, + "rewards/margins": 3.711367607116699, + "rewards/rejected": -9.345998764038086, + "step": 7565 + }, + { + "epoch": 1.18, + "learning_rate": 8.598123346646612e-06, + "logits/chosen": -1.309949278831482, + "logits/rejected": -2.941354990005493, + "logps/chosen": -112.52348327636719, + "logps/rejected": -418.88299560546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.631657600402832, + "rewards/margins": 8.231344223022461, + "rewards/rejected": -11.86300277709961, + "step": 7566 + }, + { + "epoch": 1.18, + "learning_rate": 8.597389906115464e-06, + "logits/chosen": -2.8190829753875732, + "logits/rejected": -2.056475877761841, + "logps/chosen": -227.75868225097656, + "logps/rejected": -172.30892944335938, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6545486450195312, + "rewards/margins": 6.650564193725586, + "rewards/rejected": -7.305112838745117, + "step": 7567 + }, + { + "epoch": 1.18, + "learning_rate": 8.596656465584316e-06, + "logits/chosen": -2.270965099334717, + "logits/rejected": -2.815617084503174, + "logps/chosen": -331.80157470703125, + "logps/rejected": -445.2388610839844, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6333529949188232, + "rewards/margins": 5.124517917633057, + "rewards/rejected": -7.757871150970459, + "step": 7568 + }, + { + "epoch": 1.18, + "learning_rate": 8.595923025053168e-06, + "logits/chosen": -2.9398651123046875, + "logits/rejected": -1.9417535066604614, + "logps/chosen": -275.1185302734375, + "logps/rejected": -240.32632446289062, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6299242973327637, + "rewards/margins": 4.659145355224609, + "rewards/rejected": -7.289069652557373, + "step": 7569 + }, + { + "epoch": 1.18, + "learning_rate": 8.59518958452202e-06, + "logits/chosen": -2.273998260498047, + "logits/rejected": -3.0650417804718018, + "logps/chosen": -114.06844329833984, + "logps/rejected": -262.7140197753906, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7743959426879883, + "rewards/margins": 4.848250389099121, + "rewards/rejected": -8.62264633178711, + "step": 7570 + }, + { + "epoch": 1.18, + "learning_rate": 8.594456143990871e-06, + "logits/chosen": -2.6265316009521484, + "logits/rejected": -1.721185326576233, + "logps/chosen": -669.6788330078125, + "logps/rejected": -487.1146545410156, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6606550216674805, + "rewards/margins": 6.7761430740356445, + "rewards/rejected": -12.436798095703125, + "step": 7571 + }, + { + "epoch": 1.18, + "learning_rate": 8.593722703459723e-06, + "logits/chosen": -2.6550538539886475, + "logits/rejected": -2.6393959522247314, + "logps/chosen": -259.79052734375, + "logps/rejected": -437.0743103027344, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.069505214691162, + "rewards/margins": 6.812250137329102, + "rewards/rejected": -9.881755828857422, + "step": 7572 + }, + { + "epoch": 1.18, + "learning_rate": 8.592989262928575e-06, + "logits/chosen": -2.1747875213623047, + "logits/rejected": -3.068084478378296, + "logps/chosen": -76.09558868408203, + "logps/rejected": -234.47695922851562, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.297887086868286, + "rewards/margins": 6.869145393371582, + "rewards/rejected": -10.167032241821289, + "step": 7573 + }, + { + "epoch": 1.18, + "learning_rate": 8.592255822397427e-06, + "logits/chosen": -2.9541072845458984, + "logits/rejected": -2.6526100635528564, + "logps/chosen": -135.25677490234375, + "logps/rejected": -117.19823455810547, + "loss": 0.8522, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.772490501403809, + "rewards/margins": 2.0476832389831543, + "rewards/rejected": -6.820173740386963, + "step": 7574 + }, + { + "epoch": 1.18, + "learning_rate": 8.59152238186628e-06, + "logits/chosen": -2.7055656909942627, + "logits/rejected": -1.676445484161377, + "logps/chosen": -417.9547119140625, + "logps/rejected": -330.72100830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.10986328125, + "rewards/margins": 9.661005020141602, + "rewards/rejected": -10.770868301391602, + "step": 7575 + }, + { + "epoch": 1.18, + "learning_rate": 8.590788941335132e-06, + "logits/chosen": -2.5876169204711914, + "logits/rejected": -3.0276248455047607, + "logps/chosen": -276.3016357421875, + "logps/rejected": -279.669677734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3140978813171387, + "rewards/margins": 7.46784782409668, + "rewards/rejected": -9.781946182250977, + "step": 7576 + }, + { + "epoch": 1.18, + "learning_rate": 8.590055500803984e-06, + "logits/chosen": -3.117420196533203, + "logits/rejected": -3.2160778045654297, + "logps/chosen": -99.8488540649414, + "logps/rejected": -178.9064178466797, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2255630493164062, + "rewards/margins": 5.646716117858887, + "rewards/rejected": -8.872279167175293, + "step": 7577 + }, + { + "epoch": 1.18, + "learning_rate": 8.589322060272836e-06, + "logits/chosen": -2.299405813217163, + "logits/rejected": -3.142426013946533, + "logps/chosen": -92.62028503417969, + "logps/rejected": -238.27391052246094, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.021602630615234, + "rewards/margins": 5.120675563812256, + "rewards/rejected": -9.142278671264648, + "step": 7578 + }, + { + "epoch": 1.18, + "learning_rate": 8.588588619741688e-06, + "logits/chosen": -3.0391829013824463, + "logits/rejected": -1.6510941982269287, + "logps/chosen": -390.5039367675781, + "logps/rejected": -384.3514404296875, + "loss": 0.4435, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.322767734527588, + "rewards/margins": 1.6703554391860962, + "rewards/rejected": -6.9931230545043945, + "step": 7579 + }, + { + "epoch": 1.18, + "learning_rate": 8.58785517921054e-06, + "logits/chosen": -3.098517656326294, + "logits/rejected": -2.909496545791626, + "logps/chosen": -295.1184387207031, + "logps/rejected": -234.46969604492188, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2035393714904785, + "rewards/margins": 6.602072238922119, + "rewards/rejected": -9.805611610412598, + "step": 7580 + }, + { + "epoch": 1.18, + "learning_rate": 8.587121738679392e-06, + "logits/chosen": -2.2598812580108643, + "logits/rejected": -2.9521677494049072, + "logps/chosen": -236.65261840820312, + "logps/rejected": -284.1963195800781, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.821173667907715, + "rewards/margins": 4.121403694152832, + "rewards/rejected": -6.942577362060547, + "step": 7581 + }, + { + "epoch": 1.18, + "learning_rate": 8.586388298148244e-06, + "logits/chosen": -2.594768524169922, + "logits/rejected": -3.0670270919799805, + "logps/chosen": -553.5281372070312, + "logps/rejected": -336.29534912109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8016555309295654, + "rewards/margins": 7.171158313751221, + "rewards/rejected": -10.972813606262207, + "step": 7582 + }, + { + "epoch": 1.18, + "learning_rate": 8.585654857617096e-06, + "logits/chosen": -2.55313777923584, + "logits/rejected": -2.962402105331421, + "logps/chosen": -142.093994140625, + "logps/rejected": -171.783447265625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.917149543762207, + "rewards/margins": 7.629940986633301, + "rewards/rejected": -11.547090530395508, + "step": 7583 + }, + { + "epoch": 1.18, + "learning_rate": 8.584921417085949e-06, + "logits/chosen": -2.3443922996520996, + "logits/rejected": -2.9606995582580566, + "logps/chosen": -356.9200439453125, + "logps/rejected": -488.55859375, + "loss": 0.7362, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.795705795288086, + "rewards/margins": 5.969848155975342, + "rewards/rejected": -11.765554428100586, + "step": 7584 + }, + { + "epoch": 1.18, + "learning_rate": 8.584187976554801e-06, + "logits/chosen": -2.580566883087158, + "logits/rejected": -3.124128580093384, + "logps/chosen": -88.64859008789062, + "logps/rejected": -180.46109008789062, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.965747833251953, + "rewards/margins": 6.7952985763549805, + "rewards/rejected": -10.761046409606934, + "step": 7585 + }, + { + "epoch": 1.18, + "learning_rate": 8.583454536023653e-06, + "logits/chosen": -2.5938408374786377, + "logits/rejected": -3.085259437561035, + "logps/chosen": -1258.5703125, + "logps/rejected": -1065.775390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.431732177734375, + "rewards/margins": 9.151834487915039, + "rewards/rejected": -11.583566665649414, + "step": 7586 + }, + { + "epoch": 1.18, + "learning_rate": 8.582721095492505e-06, + "logits/chosen": -2.1594057083129883, + "logits/rejected": -3.0733742713928223, + "logps/chosen": -83.35835266113281, + "logps/rejected": -238.42642211914062, + "loss": 2.576, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.191802978515625, + "rewards/margins": -0.4171445369720459, + "rewards/rejected": -6.774658203125, + "step": 7587 + }, + { + "epoch": 1.18, + "learning_rate": 8.581987654961357e-06, + "logits/chosen": -2.3860068321228027, + "logits/rejected": -2.7023658752441406, + "logps/chosen": -228.3490447998047, + "logps/rejected": -481.2162780761719, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2451679706573486, + "rewards/margins": 5.891327857971191, + "rewards/rejected": -8.136495590209961, + "step": 7588 + }, + { + "epoch": 1.18, + "learning_rate": 8.581254214430208e-06, + "logits/chosen": -3.085926055908203, + "logits/rejected": -2.968871593475342, + "logps/chosen": -109.9041748046875, + "logps/rejected": -209.70394897460938, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7781877517700195, + "rewards/margins": 5.1920905113220215, + "rewards/rejected": -8.9702787399292, + "step": 7589 + }, + { + "epoch": 1.18, + "learning_rate": 8.58052077389906e-06, + "logits/chosen": -2.3659398555755615, + "logits/rejected": -2.8534321784973145, + "logps/chosen": -229.59033203125, + "logps/rejected": -465.252197265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2058136463165283, + "rewards/margins": 8.146293640136719, + "rewards/rejected": -11.352108001708984, + "step": 7590 + }, + { + "epoch": 1.18, + "learning_rate": 8.579787333367912e-06, + "logits/chosen": -2.929135322570801, + "logits/rejected": -3.0101547241210938, + "logps/chosen": -148.2838134765625, + "logps/rejected": -272.46087646484375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1636104583740234, + "rewards/margins": 6.949456214904785, + "rewards/rejected": -10.113067626953125, + "step": 7591 + }, + { + "epoch": 1.18, + "learning_rate": 8.579053892836764e-06, + "logits/chosen": -2.268160820007324, + "logits/rejected": -2.998703718185425, + "logps/chosen": -209.4813690185547, + "logps/rejected": -486.1522216796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4016677141189575, + "rewards/margins": 6.940620422363281, + "rewards/rejected": -8.342288970947266, + "step": 7592 + }, + { + "epoch": 1.18, + "learning_rate": 8.578320452305618e-06, + "logits/chosen": -2.4110171794891357, + "logits/rejected": -2.769904136657715, + "logps/chosen": -194.3419189453125, + "logps/rejected": -333.67950439453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7627789974212646, + "rewards/margins": 7.276663780212402, + "rewards/rejected": -10.039443016052246, + "step": 7593 + }, + { + "epoch": 1.18, + "learning_rate": 8.57758701177447e-06, + "logits/chosen": -2.4691476821899414, + "logits/rejected": -2.6923272609710693, + "logps/chosen": -77.58313751220703, + "logps/rejected": -242.65167236328125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.137773513793945, + "rewards/margins": 5.792326927185059, + "rewards/rejected": -9.930099487304688, + "step": 7594 + }, + { + "epoch": 1.18, + "learning_rate": 8.576853571243321e-06, + "logits/chosen": -2.374469041824341, + "logits/rejected": -2.9153077602386475, + "logps/chosen": -202.68307495117188, + "logps/rejected": -150.30250549316406, + "loss": 2.9243, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.2379560470581055, + "rewards/margins": -0.3597841262817383, + "rewards/rejected": -6.878171920776367, + "step": 7595 + }, + { + "epoch": 1.18, + "learning_rate": 8.576120130712173e-06, + "logits/chosen": -2.9740827083587646, + "logits/rejected": -3.071683168411255, + "logps/chosen": -64.4752197265625, + "logps/rejected": -222.85354614257812, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.453037738800049, + "rewards/margins": 5.972172737121582, + "rewards/rejected": -8.425210952758789, + "step": 7596 + }, + { + "epoch": 1.18, + "learning_rate": 8.575386690181025e-06, + "logits/chosen": -2.71718430519104, + "logits/rejected": -2.959648609161377, + "logps/chosen": -642.6600341796875, + "logps/rejected": -678.384521484375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8007917404174805, + "rewards/margins": 7.748654365539551, + "rewards/rejected": -10.549446105957031, + "step": 7597 + }, + { + "epoch": 1.18, + "learning_rate": 8.574653249649879e-06, + "logits/chosen": -1.7951998710632324, + "logits/rejected": -2.587679862976074, + "logps/chosen": -126.31414031982422, + "logps/rejected": -227.8787384033203, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8047404289245605, + "rewards/margins": 7.429326057434082, + "rewards/rejected": -10.234066009521484, + "step": 7598 + }, + { + "epoch": 1.18, + "learning_rate": 8.57391980911873e-06, + "logits/chosen": -2.6971733570098877, + "logits/rejected": -3.128113269805908, + "logps/chosen": -767.9072875976562, + "logps/rejected": -737.1681518554688, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.551229238510132, + "rewards/margins": 6.17955207824707, + "rewards/rejected": -9.730781555175781, + "step": 7599 + }, + { + "epoch": 1.18, + "learning_rate": 8.573186368587583e-06, + "logits/chosen": -1.6930330991744995, + "logits/rejected": -2.8199257850646973, + "logps/chosen": -358.3775634765625, + "logps/rejected": -579.5730590820312, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3759796619415283, + "rewards/margins": 9.302691459655762, + "rewards/rejected": -12.678670883178711, + "step": 7600 + }, + { + "epoch": 1.18, + "learning_rate": 8.572452928056434e-06, + "logits/chosen": -2.1180994510650635, + "logits/rejected": -2.921293020248413, + "logps/chosen": -173.0526580810547, + "logps/rejected": -239.8748016357422, + "loss": 2.0682, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.770064353942871, + "rewards/margins": -0.4161853790283203, + "rewards/rejected": -8.35387897491455, + "step": 7601 + }, + { + "epoch": 1.18, + "learning_rate": 8.571719487525288e-06, + "logits/chosen": -3.004613161087036, + "logits/rejected": -2.9068946838378906, + "logps/chosen": -116.59712219238281, + "logps/rejected": -132.82156372070312, + "loss": 1.0468, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.686766624450684, + "rewards/margins": 1.1998575925827026, + "rewards/rejected": -5.886624336242676, + "step": 7602 + }, + { + "epoch": 1.18, + "learning_rate": 8.57098604699414e-06, + "logits/chosen": -2.9298036098480225, + "logits/rejected": -1.930648922920227, + "logps/chosen": -297.697998046875, + "logps/rejected": -371.81964111328125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9354798793792725, + "rewards/margins": 8.4822998046875, + "rewards/rejected": -11.417779922485352, + "step": 7603 + }, + { + "epoch": 1.18, + "learning_rate": 8.570252606462992e-06, + "logits/chosen": -2.4253363609313965, + "logits/rejected": -3.0252702236175537, + "logps/chosen": -416.8034973144531, + "logps/rejected": -666.4431762695312, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6100845336914062, + "rewards/margins": 7.978428840637207, + "rewards/rejected": -10.588513374328613, + "step": 7604 + }, + { + "epoch": 1.18, + "learning_rate": 8.569519165931844e-06, + "logits/chosen": -2.134798526763916, + "logits/rejected": -2.8789424896240234, + "logps/chosen": -166.47018432617188, + "logps/rejected": -391.6347961425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7095513343811035, + "rewards/margins": 10.34263801574707, + "rewards/rejected": -12.052188873291016, + "step": 7605 + }, + { + "epoch": 1.18, + "learning_rate": 8.568785725400696e-06, + "logits/chosen": -2.882718801498413, + "logits/rejected": -2.1862258911132812, + "logps/chosen": -171.89100646972656, + "logps/rejected": -234.40451049804688, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4384350776672363, + "rewards/margins": 6.31268310546875, + "rewards/rejected": -9.751118659973145, + "step": 7606 + }, + { + "epoch": 1.18, + "learning_rate": 8.568052284869547e-06, + "logits/chosen": -2.651338815689087, + "logits/rejected": -2.8413097858428955, + "logps/chosen": -280.8399658203125, + "logps/rejected": -319.33892822265625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.568786144256592, + "rewards/margins": 4.920543670654297, + "rewards/rejected": -8.48932933807373, + "step": 7607 + }, + { + "epoch": 1.18, + "learning_rate": 8.5673188443384e-06, + "logits/chosen": -1.6182923316955566, + "logits/rejected": -2.851954221725464, + "logps/chosen": -111.46490478515625, + "logps/rejected": -585.5534057617188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.569772481918335, + "rewards/margins": 8.346179008483887, + "rewards/rejected": -10.9159517288208, + "step": 7608 + }, + { + "epoch": 1.18, + "learning_rate": 8.566585403807251e-06, + "logits/chosen": -3.0566041469573975, + "logits/rejected": -2.9992105960845947, + "logps/chosen": -159.8744659423828, + "logps/rejected": -303.98846435546875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.223775863647461, + "rewards/margins": 5.948331356048584, + "rewards/rejected": -9.172107696533203, + "step": 7609 + }, + { + "epoch": 1.18, + "learning_rate": 8.565851963276103e-06, + "logits/chosen": -3.0155129432678223, + "logits/rejected": -2.9399352073669434, + "logps/chosen": -776.9688110351562, + "logps/rejected": -842.6217041015625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0473875999450684, + "rewards/margins": 6.405520915985107, + "rewards/rejected": -9.452908515930176, + "step": 7610 + }, + { + "epoch": 1.18, + "learning_rate": 8.565118522744957e-06, + "logits/chosen": -3.0704870223999023, + "logits/rejected": -3.1861579418182373, + "logps/chosen": -411.523193359375, + "logps/rejected": -360.49847412109375, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5626060962677, + "rewards/margins": 3.7501754760742188, + "rewards/rejected": -7.31278133392334, + "step": 7611 + }, + { + "epoch": 1.18, + "learning_rate": 8.564385082213808e-06, + "logits/chosen": -2.734572410583496, + "logits/rejected": -2.6545205116271973, + "logps/chosen": -280.134521484375, + "logps/rejected": -422.5472412109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.605729818344116, + "rewards/margins": 8.450702667236328, + "rewards/rejected": -12.056432723999023, + "step": 7612 + }, + { + "epoch": 1.18, + "learning_rate": 8.56365164168266e-06, + "logits/chosen": -2.4149556159973145, + "logits/rejected": -2.8146958351135254, + "logps/chosen": -197.38323974609375, + "logps/rejected": -259.0792541503906, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.051544189453125, + "rewards/margins": 6.232113838195801, + "rewards/rejected": -10.283658027648926, + "step": 7613 + }, + { + "epoch": 1.18, + "learning_rate": 8.562918201151512e-06, + "logits/chosen": -3.0838067531585693, + "logits/rejected": -2.152899980545044, + "logps/chosen": -294.53753662109375, + "logps/rejected": -156.92166137695312, + "loss": 4.4476, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.078525543212891, + "rewards/margins": -4.413445949554443, + "rewards/rejected": -2.6650795936584473, + "step": 7614 + }, + { + "epoch": 1.18, + "learning_rate": 8.562184760620364e-06, + "logits/chosen": -1.9481698274612427, + "logits/rejected": -3.019716501235962, + "logps/chosen": -141.0345916748047, + "logps/rejected": -365.7330322265625, + "loss": 0.0473, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.466607093811035, + "rewards/margins": 3.0632424354553223, + "rewards/rejected": -7.529849052429199, + "step": 7615 + }, + { + "epoch": 1.18, + "learning_rate": 8.561451320089216e-06, + "logits/chosen": -1.128747582435608, + "logits/rejected": -2.438976287841797, + "logps/chosen": -91.43685150146484, + "logps/rejected": -450.78533935546875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4526445865631104, + "rewards/margins": 8.599466323852539, + "rewards/rejected": -12.05211067199707, + "step": 7616 + }, + { + "epoch": 1.18, + "learning_rate": 8.560717879558068e-06, + "logits/chosen": -2.57593035697937, + "logits/rejected": -2.176370143890381, + "logps/chosen": -444.0692443847656, + "logps/rejected": -520.3224487304688, + "loss": 0.0363, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.074967384338379, + "rewards/margins": 3.5571420192718506, + "rewards/rejected": -8.632108688354492, + "step": 7617 + }, + { + "epoch": 1.18, + "learning_rate": 8.55998443902692e-06, + "logits/chosen": -1.9667596817016602, + "logits/rejected": -2.887505054473877, + "logps/chosen": -110.79273223876953, + "logps/rejected": -200.66851806640625, + "loss": 0.1007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979368209838867, + "rewards/margins": 3.171738862991333, + "rewards/rejected": -9.151106834411621, + "step": 7618 + }, + { + "epoch": 1.18, + "learning_rate": 8.559250998495772e-06, + "logits/chosen": -2.732138156890869, + "logits/rejected": -2.91007661819458, + "logps/chosen": -380.3678894042969, + "logps/rejected": -531.8392333984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3197031021118164, + "rewards/margins": 8.34564208984375, + "rewards/rejected": -10.665345191955566, + "step": 7619 + }, + { + "epoch": 1.19, + "learning_rate": 8.558517557964625e-06, + "logits/chosen": -2.165414333343506, + "logits/rejected": -2.6531739234924316, + "logps/chosen": -283.51446533203125, + "logps/rejected": -685.16015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.747959613800049, + "rewards/margins": 8.658671379089355, + "rewards/rejected": -13.406631469726562, + "step": 7620 + }, + { + "epoch": 1.19, + "learning_rate": 8.557784117433477e-06, + "logits/chosen": -1.8262450695037842, + "logits/rejected": -2.896772623062134, + "logps/chosen": -157.42469787597656, + "logps/rejected": -367.62652587890625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4299750328063965, + "rewards/margins": 8.043681144714355, + "rewards/rejected": -11.473655700683594, + "step": 7621 + }, + { + "epoch": 1.19, + "learning_rate": 8.557050676902329e-06, + "logits/chosen": -2.013594150543213, + "logits/rejected": -2.8093652725219727, + "logps/chosen": -177.32005310058594, + "logps/rejected": -358.21240234375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.423032760620117, + "rewards/margins": 7.0516204833984375, + "rewards/rejected": -9.474653244018555, + "step": 7622 + }, + { + "epoch": 1.19, + "learning_rate": 8.55631723637118e-06, + "logits/chosen": -2.778158664703369, + "logits/rejected": -2.8352344036102295, + "logps/chosen": -240.69314575195312, + "logps/rejected": -322.9481506347656, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9483070373535156, + "rewards/margins": 6.232468605041504, + "rewards/rejected": -10.180776596069336, + "step": 7623 + }, + { + "epoch": 1.19, + "learning_rate": 8.555583795840033e-06, + "logits/chosen": -1.0314148664474487, + "logits/rejected": -2.4720640182495117, + "logps/chosen": -103.34142303466797, + "logps/rejected": -193.52243041992188, + "loss": 0.3765, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.88219690322876, + "rewards/margins": 3.215272903442383, + "rewards/rejected": -9.097469329833984, + "step": 7624 + }, + { + "epoch": 1.19, + "learning_rate": 8.554850355308885e-06, + "logits/chosen": -1.8489209413528442, + "logits/rejected": -2.824387788772583, + "logps/chosen": -177.16278076171875, + "logps/rejected": -365.3933410644531, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3928070068359375, + "rewards/margins": 7.354284763336182, + "rewards/rejected": -9.747091293334961, + "step": 7625 + }, + { + "epoch": 1.19, + "learning_rate": 8.554116914777736e-06, + "logits/chosen": -2.5455548763275146, + "logits/rejected": -2.9835455417633057, + "logps/chosen": -122.03925323486328, + "logps/rejected": -249.9488525390625, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8393964767456055, + "rewards/margins": 6.263261795043945, + "rewards/rejected": -11.102657318115234, + "step": 7626 + }, + { + "epoch": 1.19, + "learning_rate": 8.553383474246588e-06, + "logits/chosen": -2.5642387866973877, + "logits/rejected": -3.1577022075653076, + "logps/chosen": -76.8262939453125, + "logps/rejected": -361.4619445800781, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4777021408081055, + "rewards/margins": 7.7364702224731445, + "rewards/rejected": -11.21417236328125, + "step": 7627 + }, + { + "epoch": 1.19, + "learning_rate": 8.55265003371544e-06, + "logits/chosen": -2.8696303367614746, + "logits/rejected": -3.1373541355133057, + "logps/chosen": -114.01883697509766, + "logps/rejected": -242.30137634277344, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.034951210021973, + "rewards/margins": 6.077847957611084, + "rewards/rejected": -10.112798690795898, + "step": 7628 + }, + { + "epoch": 1.19, + "learning_rate": 8.551916593184294e-06, + "logits/chosen": -2.867971420288086, + "logits/rejected": -2.5966756343841553, + "logps/chosen": -129.29486083984375, + "logps/rejected": -166.5042724609375, + "loss": 1.9873, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.065463066101074, + "rewards/margins": 0.43611180782318115, + "rewards/rejected": -6.501574993133545, + "step": 7629 + }, + { + "epoch": 1.19, + "learning_rate": 8.551183152653146e-06, + "logits/chosen": -2.9396724700927734, + "logits/rejected": -2.4334535598754883, + "logps/chosen": -404.74737548828125, + "logps/rejected": -262.6141662597656, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.74479079246521, + "rewards/margins": 4.309700012207031, + "rewards/rejected": -7.05449104309082, + "step": 7630 + }, + { + "epoch": 1.19, + "learning_rate": 8.550449712121998e-06, + "logits/chosen": -2.467137098312378, + "logits/rejected": -3.093920946121216, + "logps/chosen": -155.94175720214844, + "logps/rejected": -275.93865966796875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.275547742843628, + "rewards/margins": 6.3984375, + "rewards/rejected": -8.673985481262207, + "step": 7631 + }, + { + "epoch": 1.19, + "learning_rate": 8.549716271590851e-06, + "logits/chosen": -2.975151538848877, + "logits/rejected": -2.815433979034424, + "logps/chosen": -66.26148986816406, + "logps/rejected": -169.87271118164062, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.469813585281372, + "rewards/margins": 7.220820426940918, + "rewards/rejected": -8.690633773803711, + "step": 7632 + }, + { + "epoch": 1.19, + "learning_rate": 8.548982831059703e-06, + "logits/chosen": -2.9765379428863525, + "logits/rejected": -2.88377046585083, + "logps/chosen": -646.8806762695312, + "logps/rejected": -599.9385986328125, + "loss": 0.0482, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.335253953933716, + "rewards/margins": 5.0362443923950195, + "rewards/rejected": -8.371498107910156, + "step": 7633 + }, + { + "epoch": 1.19, + "learning_rate": 8.548249390528555e-06, + "logits/chosen": -2.9942286014556885, + "logits/rejected": -2.1395652294158936, + "logps/chosen": -249.80628967285156, + "logps/rejected": -115.38734436035156, + "loss": 1.6608, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.079801559448242, + "rewards/margins": -1.443739414215088, + "rewards/rejected": -3.636061906814575, + "step": 7634 + }, + { + "epoch": 1.19, + "learning_rate": 8.547515949997407e-06, + "logits/chosen": -2.028108835220337, + "logits/rejected": -2.7417733669281006, + "logps/chosen": -137.6375274658203, + "logps/rejected": -369.6332092285156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7542309761047363, + "rewards/margins": 9.892818450927734, + "rewards/rejected": -13.647050857543945, + "step": 7635 + }, + { + "epoch": 1.19, + "learning_rate": 8.546782509466259e-06, + "logits/chosen": -2.8468189239501953, + "logits/rejected": -2.9454026222229004, + "logps/chosen": -186.55197143554688, + "logps/rejected": -201.87660217285156, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7979440689086914, + "rewards/margins": 6.200598239898682, + "rewards/rejected": -9.998541831970215, + "step": 7636 + }, + { + "epoch": 1.19, + "learning_rate": 8.54604906893511e-06, + "logits/chosen": -2.8643693923950195, + "logits/rejected": -2.5385398864746094, + "logps/chosen": -238.3031463623047, + "logps/rejected": -339.4329833984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2860560417175293, + "rewards/margins": 7.814260959625244, + "rewards/rejected": -11.100317001342773, + "step": 7637 + }, + { + "epoch": 1.19, + "learning_rate": 8.545315628403964e-06, + "logits/chosen": -2.305608034133911, + "logits/rejected": -2.664842128753662, + "logps/chosen": -204.699951171875, + "logps/rejected": -235.78018188476562, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9084343910217285, + "rewards/margins": 8.306930541992188, + "rewards/rejected": -12.215364456176758, + "step": 7638 + }, + { + "epoch": 1.19, + "learning_rate": 8.544582187872816e-06, + "logits/chosen": -2.628243923187256, + "logits/rejected": -2.890805959701538, + "logps/chosen": -309.2910461425781, + "logps/rejected": -370.19329833984375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9994634389877319, + "rewards/margins": 7.176304340362549, + "rewards/rejected": -8.17576789855957, + "step": 7639 + }, + { + "epoch": 1.19, + "learning_rate": 8.543848747341668e-06, + "logits/chosen": -3.071401834487915, + "logits/rejected": -3.103943109512329, + "logps/chosen": -189.68121337890625, + "logps/rejected": -235.3890838623047, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.415902614593506, + "rewards/margins": 3.8800902366638184, + "rewards/rejected": -7.295992851257324, + "step": 7640 + }, + { + "epoch": 1.19, + "learning_rate": 8.54311530681052e-06, + "logits/chosen": -2.659186363220215, + "logits/rejected": -2.955843210220337, + "logps/chosen": -155.72073364257812, + "logps/rejected": -259.3016357421875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7289005517959595, + "rewards/margins": 6.109147071838379, + "rewards/rejected": -7.838047027587891, + "step": 7641 + }, + { + "epoch": 1.19, + "learning_rate": 8.542381866279372e-06, + "logits/chosen": -2.3760814666748047, + "logits/rejected": -2.8122599124908447, + "logps/chosen": -53.523162841796875, + "logps/rejected": -150.122314453125, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.249918460845947, + "rewards/margins": 3.4400827884674072, + "rewards/rejected": -7.690001487731934, + "step": 7642 + }, + { + "epoch": 1.19, + "learning_rate": 8.541648425748223e-06, + "logits/chosen": -1.9865977764129639, + "logits/rejected": -2.9880013465881348, + "logps/chosen": -206.32901000976562, + "logps/rejected": -466.20068359375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9681406021118164, + "rewards/margins": 6.397612571716309, + "rewards/rejected": -9.365753173828125, + "step": 7643 + }, + { + "epoch": 1.19, + "learning_rate": 8.540914985217075e-06, + "logits/chosen": -2.789320468902588, + "logits/rejected": -2.5286262035369873, + "logps/chosen": -453.72528076171875, + "logps/rejected": -501.95172119140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4987624883651733, + "rewards/margins": 9.868901252746582, + "rewards/rejected": -11.367664337158203, + "step": 7644 + }, + { + "epoch": 1.19, + "learning_rate": 8.540181544685927e-06, + "logits/chosen": -1.4640787839889526, + "logits/rejected": -2.8111536502838135, + "logps/chosen": -230.820068359375, + "logps/rejected": -501.4930114746094, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.443833589553833, + "rewards/margins": 8.49614143371582, + "rewards/rejected": -9.93997573852539, + "step": 7645 + }, + { + "epoch": 1.19, + "learning_rate": 8.53944810415478e-06, + "logits/chosen": -2.486313581466675, + "logits/rejected": -2.9098973274230957, + "logps/chosen": -511.21563720703125, + "logps/rejected": -514.0907592773438, + "loss": 0.4593, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.362881660461426, + "rewards/margins": 2.0168349742889404, + "rewards/rejected": -6.379716396331787, + "step": 7646 + }, + { + "epoch": 1.19, + "learning_rate": 8.538714663623633e-06, + "logits/chosen": -2.2128312587738037, + "logits/rejected": -3.0276317596435547, + "logps/chosen": -205.30886840820312, + "logps/rejected": -469.07476806640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0023033618927, + "rewards/margins": 7.713167190551758, + "rewards/rejected": -10.715471267700195, + "step": 7647 + }, + { + "epoch": 1.19, + "learning_rate": 8.537981223092485e-06, + "logits/chosen": -2.8063273429870605, + "logits/rejected": -1.6966222524642944, + "logps/chosen": -436.8997802734375, + "logps/rejected": -279.2056884765625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.791360378265381, + "rewards/margins": 4.6946845054626465, + "rewards/rejected": -7.486044883728027, + "step": 7648 + }, + { + "epoch": 1.19, + "learning_rate": 8.537247782561336e-06, + "logits/chosen": -2.478118896484375, + "logits/rejected": -2.966675043106079, + "logps/chosen": -439.1587829589844, + "logps/rejected": -206.18472290039062, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0021843910217285, + "rewards/margins": 7.3852620124816895, + "rewards/rejected": -8.387446403503418, + "step": 7649 + }, + { + "epoch": 1.19, + "learning_rate": 8.536514342030188e-06, + "logits/chosen": -2.477085590362549, + "logits/rejected": -2.897773504257202, + "logps/chosen": -143.74700927734375, + "logps/rejected": -275.47784423828125, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.709857940673828, + "rewards/margins": 5.367037296295166, + "rewards/rejected": -9.076895713806152, + "step": 7650 + }, + { + "epoch": 1.19, + "learning_rate": 8.53578090149904e-06, + "logits/chosen": -2.9262611865997314, + "logits/rejected": -2.897047996520996, + "logps/chosen": -250.7801513671875, + "logps/rejected": -282.5086669921875, + "loss": 0.1197, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.715038299560547, + "rewards/margins": 2.1258978843688965, + "rewards/rejected": -5.840936183929443, + "step": 7651 + }, + { + "epoch": 1.19, + "learning_rate": 8.535047460967892e-06, + "logits/chosen": -2.8385121822357178, + "logits/rejected": -2.43807315826416, + "logps/chosen": -132.16200256347656, + "logps/rejected": -222.33721923828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1487793922424316, + "rewards/margins": 7.6260986328125, + "rewards/rejected": -9.774877548217773, + "step": 7652 + }, + { + "epoch": 1.19, + "learning_rate": 8.534314020436744e-06, + "logits/chosen": -2.6757102012634277, + "logits/rejected": -3.2108254432678223, + "logps/chosen": -98.62147521972656, + "logps/rejected": -252.14999389648438, + "loss": 1.8482, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9380829334259033, + "rewards/margins": 0.999693751335144, + "rewards/rejected": -4.937776565551758, + "step": 7653 + }, + { + "epoch": 1.19, + "learning_rate": 8.533580579905596e-06, + "logits/chosen": -2.4701571464538574, + "logits/rejected": -3.146399974822998, + "logps/chosen": -537.7642211914062, + "logps/rejected": -499.84832763671875, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.259829521179199, + "rewards/margins": 7.266880035400391, + "rewards/rejected": -12.526710510253906, + "step": 7654 + }, + { + "epoch": 1.19, + "learning_rate": 8.53284713937445e-06, + "logits/chosen": -1.6860002279281616, + "logits/rejected": -2.6900503635406494, + "logps/chosen": -130.78997802734375, + "logps/rejected": -405.26708984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0286996364593506, + "rewards/margins": 10.899251937866211, + "rewards/rejected": -13.92795181274414, + "step": 7655 + }, + { + "epoch": 1.19, + "learning_rate": 8.532113698843301e-06, + "logits/chosen": -2.8192155361175537, + "logits/rejected": -2.3741648197174072, + "logps/chosen": -192.7296142578125, + "logps/rejected": -259.7147521972656, + "loss": 0.5941, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.259654521942139, + "rewards/margins": 1.9598296880722046, + "rewards/rejected": -6.219484329223633, + "step": 7656 + }, + { + "epoch": 1.19, + "learning_rate": 8.531380258312153e-06, + "logits/chosen": -2.8219945430755615, + "logits/rejected": -2.6314594745635986, + "logps/chosen": -255.72021484375, + "logps/rejected": -270.1666564941406, + "loss": 0.0771, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.002314567565918, + "rewards/margins": 2.888455629348755, + "rewards/rejected": -6.890770435333252, + "step": 7657 + }, + { + "epoch": 1.19, + "learning_rate": 8.530646817781005e-06, + "logits/chosen": -2.8527708053588867, + "logits/rejected": -2.113981008529663, + "logps/chosen": -277.6645202636719, + "logps/rejected": -288.93353271484375, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2000131607055664, + "rewards/margins": 4.9753499031066895, + "rewards/rejected": -8.175363540649414, + "step": 7658 + }, + { + "epoch": 1.19, + "learning_rate": 8.529913377249857e-06, + "logits/chosen": -2.7677371501922607, + "logits/rejected": -3.072970390319824, + "logps/chosen": -260.07110595703125, + "logps/rejected": -438.7002258300781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2160768508911133, + "rewards/margins": 8.816067695617676, + "rewards/rejected": -11.032144546508789, + "step": 7659 + }, + { + "epoch": 1.19, + "learning_rate": 8.529179936718709e-06, + "logits/chosen": -2.89032244682312, + "logits/rejected": -3.171982765197754, + "logps/chosen": -94.2301025390625, + "logps/rejected": -305.411865234375, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.712538957595825, + "rewards/margins": 4.612758636474609, + "rewards/rejected": -7.3252973556518555, + "step": 7660 + }, + { + "epoch": 1.19, + "learning_rate": 8.52844649618756e-06, + "logits/chosen": -2.532386064529419, + "logits/rejected": -3.062217950820923, + "logps/chosen": -98.21778869628906, + "logps/rejected": -362.37353515625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.920227527618408, + "rewards/margins": 7.259580612182617, + "rewards/rejected": -10.179807662963867, + "step": 7661 + }, + { + "epoch": 1.19, + "learning_rate": 8.527713055656413e-06, + "logits/chosen": -2.915938138961792, + "logits/rejected": -2.2106921672821045, + "logps/chosen": -211.75540161132812, + "logps/rejected": -135.07669067382812, + "loss": 0.9991, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3328857421875, + "rewards/margins": 0.8148010969161987, + "rewards/rejected": -4.147686958312988, + "step": 7662 + }, + { + "epoch": 1.19, + "learning_rate": 8.526979615125264e-06, + "logits/chosen": -1.6759603023529053, + "logits/rejected": -2.7063887119293213, + "logps/chosen": -247.50071716308594, + "logps/rejected": -537.86083984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3664116859436035, + "rewards/margins": 7.769537925720215, + "rewards/rejected": -12.135950088500977, + "step": 7663 + }, + { + "epoch": 1.19, + "learning_rate": 8.526246174594118e-06, + "logits/chosen": -2.8016998767852783, + "logits/rejected": -1.7103807926177979, + "logps/chosen": -469.76055908203125, + "logps/rejected": -289.8814697265625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.342916965484619, + "rewards/margins": 5.610631942749023, + "rewards/rejected": -9.9535493850708, + "step": 7664 + }, + { + "epoch": 1.19, + "learning_rate": 8.52551273406297e-06, + "logits/chosen": -2.7279467582702637, + "logits/rejected": -2.8729138374328613, + "logps/chosen": -80.1728515625, + "logps/rejected": -251.8251953125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3697004318237305, + "rewards/margins": 6.6940155029296875, + "rewards/rejected": -11.063715934753418, + "step": 7665 + }, + { + "epoch": 1.19, + "learning_rate": 8.524779293531823e-06, + "logits/chosen": -3.0271880626678467, + "logits/rejected": -3.303849458694458, + "logps/chosen": -86.1323471069336, + "logps/rejected": -168.7381134033203, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5481789112091064, + "rewards/margins": 3.375659227371216, + "rewards/rejected": -5.923838138580322, + "step": 7666 + }, + { + "epoch": 1.19, + "learning_rate": 8.524045853000675e-06, + "logits/chosen": -1.6342986822128296, + "logits/rejected": -2.795842170715332, + "logps/chosen": -333.35260009765625, + "logps/rejected": -542.365966796875, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.079789638519287, + "rewards/margins": 6.75401496887207, + "rewards/rejected": -8.8338041305542, + "step": 7667 + }, + { + "epoch": 1.19, + "learning_rate": 8.523312412469527e-06, + "logits/chosen": -2.53222393989563, + "logits/rejected": -2.895789384841919, + "logps/chosen": -94.7821044921875, + "logps/rejected": -262.604736328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.761514186859131, + "rewards/margins": 7.061979293823242, + "rewards/rejected": -10.823493957519531, + "step": 7668 + }, + { + "epoch": 1.19, + "learning_rate": 8.522578971938379e-06, + "logits/chosen": -2.5586867332458496, + "logits/rejected": -3.1653995513916016, + "logps/chosen": -532.234130859375, + "logps/rejected": -523.525390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2444820404052734, + "rewards/margins": 7.548625946044922, + "rewards/rejected": -10.793107986450195, + "step": 7669 + }, + { + "epoch": 1.19, + "learning_rate": 8.521845531407231e-06, + "logits/chosen": -0.6126477718353271, + "logits/rejected": -1.9149171113967896, + "logps/chosen": -210.3189697265625, + "logps/rejected": -530.5819702148438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.432635545730591, + "rewards/margins": 11.577567100524902, + "rewards/rejected": -15.010202407836914, + "step": 7670 + }, + { + "epoch": 1.19, + "learning_rate": 8.521112090876083e-06, + "logits/chosen": -1.9604148864746094, + "logits/rejected": -2.734355926513672, + "logps/chosen": -184.8014678955078, + "logps/rejected": -406.65771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1249631643295288, + "rewards/margins": 11.120574951171875, + "rewards/rejected": -12.245538711547852, + "step": 7671 + }, + { + "epoch": 1.19, + "learning_rate": 8.520378650344935e-06, + "logits/chosen": -2.1649935245513916, + "logits/rejected": -3.042835235595703, + "logps/chosen": -184.22927856445312, + "logps/rejected": -413.8841552734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8991384506225586, + "rewards/margins": 7.944360733032227, + "rewards/rejected": -10.843498229980469, + "step": 7672 + }, + { + "epoch": 1.19, + "learning_rate": 8.519645209813788e-06, + "logits/chosen": -2.6994638442993164, + "logits/rejected": -2.87312650680542, + "logps/chosen": -93.07821655273438, + "logps/rejected": -298.35211181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.190883159637451, + "rewards/margins": 9.96231746673584, + "rewards/rejected": -12.153200149536133, + "step": 7673 + }, + { + "epoch": 1.19, + "learning_rate": 8.51891176928264e-06, + "logits/chosen": -3.1520631313323975, + "logits/rejected": -2.8465237617492676, + "logps/chosen": -119.85456848144531, + "logps/rejected": -195.641357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4833741188049316, + "rewards/margins": 10.166816711425781, + "rewards/rejected": -12.650190353393555, + "step": 7674 + }, + { + "epoch": 1.19, + "learning_rate": 8.518178328751492e-06, + "logits/chosen": -1.3608499765396118, + "logits/rejected": -2.6363580226898193, + "logps/chosen": -73.582763671875, + "logps/rejected": -191.19825744628906, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.41485071182251, + "rewards/margins": 5.442923545837402, + "rewards/rejected": -9.85777473449707, + "step": 7675 + }, + { + "epoch": 1.19, + "learning_rate": 8.517444888220344e-06, + "logits/chosen": -1.7739180326461792, + "logits/rejected": -2.602323532104492, + "logps/chosen": -211.78269958496094, + "logps/rejected": -356.27618408203125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.030076503753662, + "rewards/margins": 6.967439651489258, + "rewards/rejected": -9.997515678405762, + "step": 7676 + }, + { + "epoch": 1.19, + "learning_rate": 8.516711447689196e-06, + "logits/chosen": -2.935880422592163, + "logits/rejected": -2.479698419570923, + "logps/chosen": -654.9339599609375, + "logps/rejected": -445.431640625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.327394962310791, + "rewards/margins": 5.756974220275879, + "rewards/rejected": -8.084369659423828, + "step": 7677 + }, + { + "epoch": 1.19, + "learning_rate": 8.515978007158048e-06, + "logits/chosen": -2.7372210025787354, + "logits/rejected": -2.87265944480896, + "logps/chosen": -315.70611572265625, + "logps/rejected": -351.529052734375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.75592041015625, + "rewards/margins": 5.742426872253418, + "rewards/rejected": -9.498347282409668, + "step": 7678 + }, + { + "epoch": 1.19, + "learning_rate": 8.5152445666269e-06, + "logits/chosen": -2.9067635536193848, + "logits/rejected": -3.0558438301086426, + "logps/chosen": -734.485107421875, + "logps/rejected": -819.3126220703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.130286455154419, + "rewards/margins": 9.045555114746094, + "rewards/rejected": -12.17584228515625, + "step": 7679 + }, + { + "epoch": 1.19, + "learning_rate": 8.514511126095751e-06, + "logits/chosen": -1.1327732801437378, + "logits/rejected": -2.4863381385803223, + "logps/chosen": -259.89404296875, + "logps/rejected": -373.6573791503906, + "loss": 1.0261, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.674405097961426, + "rewards/margins": 3.475703716278076, + "rewards/rejected": -8.150108337402344, + "step": 7680 + }, + { + "epoch": 1.19, + "learning_rate": 8.513777685564603e-06, + "logits/chosen": -3.0708765983581543, + "logits/rejected": -2.94640851020813, + "logps/chosen": -324.73089599609375, + "logps/rejected": -185.29946899414062, + "loss": 2.0387, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.1389780044555664, + "rewards/margins": 1.6961288452148438, + "rewards/rejected": -4.83510684967041, + "step": 7681 + }, + { + "epoch": 1.19, + "learning_rate": 8.513044245033457e-06, + "logits/chosen": -2.3264453411102295, + "logits/rejected": -2.8205277919769287, + "logps/chosen": -140.63633728027344, + "logps/rejected": -278.17730712890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.67875599861145, + "rewards/margins": 7.371757507324219, + "rewards/rejected": -11.050514221191406, + "step": 7682 + }, + { + "epoch": 1.19, + "learning_rate": 8.512310804502309e-06, + "logits/chosen": -1.1416724920272827, + "logits/rejected": -2.936737298965454, + "logps/chosen": -327.40087890625, + "logps/rejected": -584.9421997070312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.547537326812744, + "rewards/margins": 7.932977676391602, + "rewards/rejected": -12.480514526367188, + "step": 7683 + }, + { + "epoch": 1.2, + "learning_rate": 8.51157736397116e-06, + "logits/chosen": -1.175175666809082, + "logits/rejected": -2.834113359451294, + "logps/chosen": -140.06112670898438, + "logps/rejected": -471.48223876953125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7686004638671875, + "rewards/margins": 6.267782688140869, + "rewards/rejected": -10.036382675170898, + "step": 7684 + }, + { + "epoch": 1.2, + "learning_rate": 8.510843923440013e-06, + "logits/chosen": -2.527266263961792, + "logits/rejected": -2.711608409881592, + "logps/chosen": -177.3484649658203, + "logps/rejected": -273.2294006347656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.699911117553711, + "rewards/margins": 7.466320037841797, + "rewards/rejected": -10.166231155395508, + "step": 7685 + }, + { + "epoch": 1.2, + "learning_rate": 8.510110482908864e-06, + "logits/chosen": -2.94687557220459, + "logits/rejected": -2.885894536972046, + "logps/chosen": -110.97208404541016, + "logps/rejected": -119.47554016113281, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.167795658111572, + "rewards/margins": 5.209517478942871, + "rewards/rejected": -9.377313613891602, + "step": 7686 + }, + { + "epoch": 1.2, + "learning_rate": 8.509377042377716e-06, + "logits/chosen": -2.8257179260253906, + "logits/rejected": -2.2180073261260986, + "logps/chosen": -165.21517944335938, + "logps/rejected": -178.28236389160156, + "loss": 1.8917, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.456269264221191, + "rewards/margins": 2.889348268508911, + "rewards/rejected": -9.345617294311523, + "step": 7687 + }, + { + "epoch": 1.2, + "learning_rate": 8.508643601846568e-06, + "logits/chosen": -2.4106087684631348, + "logits/rejected": -3.056793451309204, + "logps/chosen": -295.01812744140625, + "logps/rejected": -603.5737915039062, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.982903480529785, + "rewards/margins": 3.678621768951416, + "rewards/rejected": -6.661524772644043, + "step": 7688 + }, + { + "epoch": 1.2, + "learning_rate": 8.50791016131542e-06, + "logits/chosen": -2.5218496322631836, + "logits/rejected": -2.9677696228027344, + "logps/chosen": -326.46502685546875, + "logps/rejected": -325.1148681640625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5856354236602783, + "rewards/margins": 6.667802333831787, + "rewards/rejected": -8.253437995910645, + "step": 7689 + }, + { + "epoch": 1.2, + "learning_rate": 8.507176720784272e-06, + "logits/chosen": -3.0120275020599365, + "logits/rejected": -2.118175745010376, + "logps/chosen": -419.5330810546875, + "logps/rejected": -273.1208801269531, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6474380493164062, + "rewards/margins": 6.976261615753174, + "rewards/rejected": -9.623699188232422, + "step": 7690 + }, + { + "epoch": 1.2, + "learning_rate": 8.506443280253125e-06, + "logits/chosen": -1.4416955709457397, + "logits/rejected": -2.8902735710144043, + "logps/chosen": -194.80743408203125, + "logps/rejected": -363.08251953125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.408722400665283, + "rewards/margins": 5.620382308959961, + "rewards/rejected": -9.029104232788086, + "step": 7691 + }, + { + "epoch": 1.2, + "learning_rate": 8.505709839721977e-06, + "logits/chosen": -1.8737105131149292, + "logits/rejected": -3.05070161819458, + "logps/chosen": -115.41651153564453, + "logps/rejected": -352.02618408203125, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.519212245941162, + "rewards/margins": 4.414161682128906, + "rewards/rejected": -7.933374404907227, + "step": 7692 + }, + { + "epoch": 1.2, + "learning_rate": 8.50497639919083e-06, + "logits/chosen": -3.078369617462158, + "logits/rejected": -3.0787370204925537, + "logps/chosen": -109.21806335449219, + "logps/rejected": -293.8759460449219, + "loss": 0.0493, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1645121574401855, + "rewards/margins": 5.365383625030518, + "rewards/rejected": -9.529895782470703, + "step": 7693 + }, + { + "epoch": 1.2, + "learning_rate": 8.504242958659681e-06, + "logits/chosen": -2.649733304977417, + "logits/rejected": -1.7077271938323975, + "logps/chosen": -322.48516845703125, + "logps/rejected": -289.4791259765625, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.628659248352051, + "rewards/margins": 3.7965190410614014, + "rewards/rejected": -7.425178050994873, + "step": 7694 + }, + { + "epoch": 1.2, + "learning_rate": 8.503509518128533e-06, + "logits/chosen": -2.528797149658203, + "logits/rejected": -2.925143241882324, + "logps/chosen": -250.862548828125, + "logps/rejected": -334.2613830566406, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.352062225341797, + "rewards/margins": 4.192183017730713, + "rewards/rejected": -7.544245719909668, + "step": 7695 + }, + { + "epoch": 1.2, + "learning_rate": 8.502776077597385e-06, + "logits/chosen": -3.080942153930664, + "logits/rejected": -1.9625499248504639, + "logps/chosen": -897.0062255859375, + "logps/rejected": -389.4600830078125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8688628673553467, + "rewards/margins": 5.1339616775512695, + "rewards/rejected": -7.002824783325195, + "step": 7696 + }, + { + "epoch": 1.2, + "learning_rate": 8.502042637066237e-06, + "logits/chosen": -2.4455318450927734, + "logits/rejected": -2.004316568374634, + "logps/chosen": -166.46087646484375, + "logps/rejected": -266.00054931640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8330235481262207, + "rewards/margins": 11.118648529052734, + "rewards/rejected": -13.951671600341797, + "step": 7697 + }, + { + "epoch": 1.2, + "learning_rate": 8.50130919653509e-06, + "logits/chosen": -2.936640739440918, + "logits/rejected": -1.0314890146255493, + "logps/chosen": -376.64202880859375, + "logps/rejected": -189.44544982910156, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6088314056396484, + "rewards/margins": 5.874051094055176, + "rewards/rejected": -8.482882499694824, + "step": 7698 + }, + { + "epoch": 1.2, + "learning_rate": 8.500575756003942e-06, + "logits/chosen": -2.654214859008789, + "logits/rejected": -2.9798996448516846, + "logps/chosen": -34.974117279052734, + "logps/rejected": -173.54754638671875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.339799165725708, + "rewards/margins": 5.923488140106201, + "rewards/rejected": -8.263287544250488, + "step": 7699 + }, + { + "epoch": 1.2, + "learning_rate": 8.499842315472796e-06, + "logits/chosen": -1.5079971551895142, + "logits/rejected": -2.58617901802063, + "logps/chosen": -170.95159912109375, + "logps/rejected": -451.9417419433594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3372600078582764, + "rewards/margins": 8.387043952941895, + "rewards/rejected": -11.72430419921875, + "step": 7700 + }, + { + "epoch": 1.2, + "learning_rate": 8.499108874941648e-06, + "logits/chosen": -2.227309465408325, + "logits/rejected": -2.1316561698913574, + "logps/chosen": -239.8379669189453, + "logps/rejected": -536.3013916015625, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.130723476409912, + "rewards/margins": 7.9669189453125, + "rewards/rejected": -12.09764289855957, + "step": 7701 + }, + { + "epoch": 1.2, + "learning_rate": 8.4983754344105e-06, + "logits/chosen": -2.608751058578491, + "logits/rejected": -2.9960806369781494, + "logps/chosen": -29.057212829589844, + "logps/rejected": -134.72055053710938, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0004539489746094, + "rewards/margins": 5.8578925132751465, + "rewards/rejected": -7.858346462249756, + "step": 7702 + }, + { + "epoch": 1.2, + "learning_rate": 8.497641993879351e-06, + "logits/chosen": -1.6902029514312744, + "logits/rejected": -2.8873887062072754, + "logps/chosen": -147.86216735839844, + "logps/rejected": -293.3041687011719, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9181017875671387, + "rewards/margins": 6.087039947509766, + "rewards/rejected": -10.005141258239746, + "step": 7703 + }, + { + "epoch": 1.2, + "learning_rate": 8.496908553348203e-06, + "logits/chosen": -1.0842725038528442, + "logits/rejected": -2.7770349979400635, + "logps/chosen": -66.7596435546875, + "logps/rejected": -343.86480712890625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1145524978637695, + "rewards/margins": 6.594426155090332, + "rewards/rejected": -10.708978652954102, + "step": 7704 + }, + { + "epoch": 1.2, + "learning_rate": 8.496175112817055e-06, + "logits/chosen": -2.5052988529205322, + "logits/rejected": -3.05308198928833, + "logps/chosen": -114.54495239257812, + "logps/rejected": -419.2294006347656, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8797518014907837, + "rewards/margins": 6.716256141662598, + "rewards/rejected": -8.59600830078125, + "step": 7705 + }, + { + "epoch": 1.2, + "learning_rate": 8.495441672285907e-06, + "logits/chosen": -2.5947067737579346, + "logits/rejected": -3.0024852752685547, + "logps/chosen": -182.22030639648438, + "logps/rejected": -508.7667236328125, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034928321838379, + "rewards/margins": 4.334343910217285, + "rewards/rejected": -10.369272232055664, + "step": 7706 + }, + { + "epoch": 1.2, + "learning_rate": 8.494708231754759e-06, + "logits/chosen": -2.9233899116516113, + "logits/rejected": -2.123166561126709, + "logps/chosen": -801.4989624023438, + "logps/rejected": -477.869384765625, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477229356765747, + "rewards/margins": 5.404177665710449, + "rewards/rejected": -8.881406784057617, + "step": 7707 + }, + { + "epoch": 1.2, + "learning_rate": 8.49397479122361e-06, + "logits/chosen": -2.51605224609375, + "logits/rejected": -1.6613056659698486, + "logps/chosen": -384.0221862792969, + "logps/rejected": -398.54376220703125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.122915029525757, + "rewards/margins": 7.250757217407227, + "rewards/rejected": -10.373672485351562, + "step": 7708 + }, + { + "epoch": 1.2, + "learning_rate": 8.493241350692464e-06, + "logits/chosen": -1.7975471019744873, + "logits/rejected": -2.4477431774139404, + "logps/chosen": -198.18991088867188, + "logps/rejected": -314.3795166015625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.708634376525879, + "rewards/margins": 6.2967658042907715, + "rewards/rejected": -11.005399703979492, + "step": 7709 + }, + { + "epoch": 1.2, + "learning_rate": 8.492507910161316e-06, + "logits/chosen": -2.891651153564453, + "logits/rejected": -1.748321533203125, + "logps/chosen": -299.8660583496094, + "logps/rejected": -315.3316650390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.183238983154297, + "rewards/margins": 9.72974967956543, + "rewards/rejected": -11.912988662719727, + "step": 7710 + }, + { + "epoch": 1.2, + "learning_rate": 8.491774469630168e-06, + "logits/chosen": -2.1339871883392334, + "logits/rejected": -2.970900058746338, + "logps/chosen": -383.4622802734375, + "logps/rejected": -485.77001953125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414768218994141, + "rewards/margins": 7.405376434326172, + "rewards/rejected": -11.820144653320312, + "step": 7711 + }, + { + "epoch": 1.2, + "learning_rate": 8.49104102909902e-06, + "logits/chosen": -1.028804063796997, + "logits/rejected": -2.282374858856201, + "logps/chosen": -146.484375, + "logps/rejected": -360.5115051269531, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7549571990966797, + "rewards/margins": 8.010594367980957, + "rewards/rejected": -10.765551567077637, + "step": 7712 + }, + { + "epoch": 1.2, + "learning_rate": 8.490307588567872e-06, + "logits/chosen": -2.154146194458008, + "logits/rejected": -2.721884250640869, + "logps/chosen": -220.25392150878906, + "logps/rejected": -179.84840393066406, + "loss": 0.5833, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.489526271820068, + "rewards/margins": 2.048535108566284, + "rewards/rejected": -6.538061618804932, + "step": 7713 + }, + { + "epoch": 1.2, + "learning_rate": 8.489574148036724e-06, + "logits/chosen": -2.896867036819458, + "logits/rejected": -2.1893796920776367, + "logps/chosen": -196.61550903320312, + "logps/rejected": -365.4359130859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34619140625, + "rewards/margins": 10.72622299194336, + "rewards/rejected": -11.07241439819336, + "step": 7714 + }, + { + "epoch": 1.2, + "learning_rate": 8.488840707505576e-06, + "logits/chosen": -1.7875057458877563, + "logits/rejected": -2.8869974613189697, + "logps/chosen": -101.64564514160156, + "logps/rejected": -347.3942565917969, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1862587928771973, + "rewards/margins": 6.473931312561035, + "rewards/rejected": -9.66019058227539, + "step": 7715 + }, + { + "epoch": 1.2, + "learning_rate": 8.488107266974428e-06, + "logits/chosen": -1.9967097043991089, + "logits/rejected": -3.09423828125, + "logps/chosen": -75.8502426147461, + "logps/rejected": -232.37887573242188, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161070823669434, + "rewards/margins": 3.7453463077545166, + "rewards/rejected": -8.906416893005371, + "step": 7716 + }, + { + "epoch": 1.2, + "learning_rate": 8.48737382644328e-06, + "logits/chosen": -1.316487193107605, + "logits/rejected": -2.5842092037200928, + "logps/chosen": -142.27210998535156, + "logps/rejected": -272.1444091796875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.994328022003174, + "rewards/margins": 5.990543365478516, + "rewards/rejected": -9.984870910644531, + "step": 7717 + }, + { + "epoch": 1.2, + "learning_rate": 8.486640385912133e-06, + "logits/chosen": -2.5747649669647217, + "logits/rejected": -2.1280157566070557, + "logps/chosen": -218.4490203857422, + "logps/rejected": -329.8897705078125, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.152825832366943, + "rewards/margins": 5.7111945152282715, + "rewards/rejected": -9.864020347595215, + "step": 7718 + }, + { + "epoch": 1.2, + "learning_rate": 8.485906945380985e-06, + "logits/chosen": -2.802936315536499, + "logits/rejected": -2.6322827339172363, + "logps/chosen": -605.4417724609375, + "logps/rejected": -383.36419677734375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.735403537750244, + "rewards/margins": 6.6005120277404785, + "rewards/rejected": -9.335915565490723, + "step": 7719 + }, + { + "epoch": 1.2, + "learning_rate": 8.485173504849837e-06, + "logits/chosen": -1.8626134395599365, + "logits/rejected": -2.9840240478515625, + "logps/chosen": -122.47933959960938, + "logps/rejected": -296.3179931640625, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.609331130981445, + "rewards/margins": 1.3384807109832764, + "rewards/rejected": -6.947812080383301, + "step": 7720 + }, + { + "epoch": 1.2, + "learning_rate": 8.484440064318689e-06, + "logits/chosen": -2.037280559539795, + "logits/rejected": -2.4599685668945312, + "logps/chosen": -88.6946792602539, + "logps/rejected": -351.19158935546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4130101203918457, + "rewards/margins": 7.5368781089782715, + "rewards/rejected": -9.949888229370117, + "step": 7721 + }, + { + "epoch": 1.2, + "learning_rate": 8.48370662378754e-06, + "logits/chosen": -2.5949759483337402, + "logits/rejected": -2.878826379776001, + "logps/chosen": -160.00184631347656, + "logps/rejected": -257.3576965332031, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8983514308929443, + "rewards/margins": 7.280478477478027, + "rewards/rejected": -10.17883014678955, + "step": 7722 + }, + { + "epoch": 1.2, + "learning_rate": 8.482973183256392e-06, + "logits/chosen": -1.9293584823608398, + "logits/rejected": -2.5655181407928467, + "logps/chosen": -131.7235870361328, + "logps/rejected": -387.5029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.707047462463379, + "rewards/margins": 9.016104698181152, + "rewards/rejected": -12.723152160644531, + "step": 7723 + }, + { + "epoch": 1.2, + "learning_rate": 8.482239742725244e-06, + "logits/chosen": -2.578456163406372, + "logits/rejected": -2.765512228012085, + "logps/chosen": -78.85066223144531, + "logps/rejected": -171.02613830566406, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.423962354660034, + "rewards/margins": 6.454256057739258, + "rewards/rejected": -9.878217697143555, + "step": 7724 + }, + { + "epoch": 1.2, + "learning_rate": 8.481506302194096e-06, + "logits/chosen": -1.3265329599380493, + "logits/rejected": -2.9646058082580566, + "logps/chosen": -143.0835418701172, + "logps/rejected": -759.0654907226562, + "loss": 0.6354, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.813464641571045, + "rewards/margins": 1.5702364444732666, + "rewards/rejected": -6.383701324462891, + "step": 7725 + }, + { + "epoch": 1.2, + "learning_rate": 8.480772861662948e-06, + "logits/chosen": -1.812469244003296, + "logits/rejected": -2.570730447769165, + "logps/chosen": -183.2821044921875, + "logps/rejected": -240.5537109375, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.212517738342285, + "rewards/margins": 5.816033363342285, + "rewards/rejected": -10.02855110168457, + "step": 7726 + }, + { + "epoch": 1.2, + "learning_rate": 8.480039421131802e-06, + "logits/chosen": -2.8789615631103516, + "logits/rejected": -1.692112922668457, + "logps/chosen": -382.5352783203125, + "logps/rejected": -218.98297119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3150074481964111, + "rewards/margins": 9.96002197265625, + "rewards/rejected": -11.275029182434082, + "step": 7727 + }, + { + "epoch": 1.2, + "learning_rate": 8.479305980600653e-06, + "logits/chosen": -2.114525079727173, + "logits/rejected": -2.530221462249756, + "logps/chosen": -84.30913543701172, + "logps/rejected": -347.70452880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7568349838256836, + "rewards/margins": 10.689868927001953, + "rewards/rejected": -12.446704864501953, + "step": 7728 + }, + { + "epoch": 1.2, + "learning_rate": 8.478572540069505e-06, + "logits/chosen": -2.769113540649414, + "logits/rejected": -2.2116527557373047, + "logps/chosen": -538.5779418945312, + "logps/rejected": -498.1473083496094, + "loss": 0.0669, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.423404693603516, + "rewards/margins": 2.717617988586426, + "rewards/rejected": -9.141022682189941, + "step": 7729 + }, + { + "epoch": 1.2, + "learning_rate": 8.477839099538357e-06, + "logits/chosen": -2.9238693714141846, + "logits/rejected": -2.7672383785247803, + "logps/chosen": -241.08026123046875, + "logps/rejected": -310.84368896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7101502418518066, + "rewards/margins": 9.844648361206055, + "rewards/rejected": -11.55479907989502, + "step": 7730 + }, + { + "epoch": 1.2, + "learning_rate": 8.477105659007209e-06, + "logits/chosen": -2.7350966930389404, + "logits/rejected": -2.3995745182037354, + "logps/chosen": -262.59442138671875, + "logps/rejected": -460.8431091308594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8221756219863892, + "rewards/margins": 9.35650634765625, + "rewards/rejected": -11.178682327270508, + "step": 7731 + }, + { + "epoch": 1.2, + "learning_rate": 8.476372218476063e-06, + "logits/chosen": -2.5740957260131836, + "logits/rejected": -2.6479668617248535, + "logps/chosen": -138.16700744628906, + "logps/rejected": -225.9901123046875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5408060550689697, + "rewards/margins": 7.134408950805664, + "rewards/rejected": -9.675214767456055, + "step": 7732 + }, + { + "epoch": 1.2, + "learning_rate": 8.475638777944915e-06, + "logits/chosen": -3.104466199874878, + "logits/rejected": -2.59515118598938, + "logps/chosen": -614.233154296875, + "logps/rejected": -365.219970703125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3703784942626953, + "rewards/margins": 7.494503974914551, + "rewards/rejected": -10.864882469177246, + "step": 7733 + }, + { + "epoch": 1.2, + "learning_rate": 8.474905337413766e-06, + "logits/chosen": -0.9077993035316467, + "logits/rejected": -2.166043519973755, + "logps/chosen": -225.5133056640625, + "logps/rejected": -673.620849609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3654847145080566, + "rewards/margins": 14.10175609588623, + "rewards/rejected": -17.467241287231445, + "step": 7734 + }, + { + "epoch": 1.2, + "learning_rate": 8.47417189688262e-06, + "logits/chosen": -2.2026872634887695, + "logits/rejected": -1.926504135131836, + "logps/chosen": -298.80126953125, + "logps/rejected": -499.81341552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5042152404785156, + "rewards/margins": 11.563916206359863, + "rewards/rejected": -15.068132400512695, + "step": 7735 + }, + { + "epoch": 1.2, + "learning_rate": 8.473438456351472e-06, + "logits/chosen": -2.0009453296661377, + "logits/rejected": -2.438443422317505, + "logps/chosen": -187.11822509765625, + "logps/rejected": -345.731689453125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7573323249816895, + "rewards/margins": 8.681646347045898, + "rewards/rejected": -13.43897819519043, + "step": 7736 + }, + { + "epoch": 1.2, + "learning_rate": 8.472705015820324e-06, + "logits/chosen": -2.7182435989379883, + "logits/rejected": -2.195998191833496, + "logps/chosen": -224.799560546875, + "logps/rejected": -302.79473876953125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2168262004852295, + "rewards/margins": 7.9505934715271, + "rewards/rejected": -11.16741943359375, + "step": 7737 + }, + { + "epoch": 1.2, + "learning_rate": 8.471971575289176e-06, + "logits/chosen": -0.7441193461418152, + "logits/rejected": -1.6613718271255493, + "logps/chosen": -177.19528198242188, + "logps/rejected": -642.9208984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0882346630096436, + "rewards/margins": 13.983772277832031, + "rewards/rejected": -17.072006225585938, + "step": 7738 + }, + { + "epoch": 1.2, + "learning_rate": 8.471238134758028e-06, + "logits/chosen": -2.842625617980957, + "logits/rejected": -2.8831584453582764, + "logps/chosen": -365.78009033203125, + "logps/rejected": -331.54931640625, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.759591579437256, + "rewards/margins": 4.4073567390441895, + "rewards/rejected": -9.166948318481445, + "step": 7739 + }, + { + "epoch": 1.2, + "learning_rate": 8.47050469422688e-06, + "logits/chosen": -2.7349183559417725, + "logits/rejected": -3.0893733501434326, + "logps/chosen": -178.44720458984375, + "logps/rejected": -192.8412628173828, + "loss": 0.6749, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.680424690246582, + "rewards/margins": 3.202545642852783, + "rewards/rejected": -7.882970809936523, + "step": 7740 + }, + { + "epoch": 1.2, + "learning_rate": 8.469771253695731e-06, + "logits/chosen": -2.2978591918945312, + "logits/rejected": -3.1192967891693115, + "logps/chosen": -406.76416015625, + "logps/rejected": -626.01611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884474277496338, + "rewards/margins": 10.099519729614258, + "rewards/rejected": -12.983993530273438, + "step": 7741 + }, + { + "epoch": 1.2, + "learning_rate": 8.469037813164583e-06, + "logits/chosen": -1.358055591583252, + "logits/rejected": -2.589024305343628, + "logps/chosen": -99.24726104736328, + "logps/rejected": -401.692626953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7980737686157227, + "rewards/margins": 8.336037635803223, + "rewards/rejected": -11.134111404418945, + "step": 7742 + }, + { + "epoch": 1.2, + "learning_rate": 8.468304372633435e-06, + "logits/chosen": -2.3395862579345703, + "logits/rejected": -2.755983829498291, + "logps/chosen": -517.4500732421875, + "logps/rejected": -532.1475830078125, + "loss": 0.6551, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7660605907440186, + "rewards/margins": 3.509425640106201, + "rewards/rejected": -6.275485992431641, + "step": 7743 + }, + { + "epoch": 1.2, + "learning_rate": 8.467570932102289e-06, + "logits/chosen": -2.522272825241089, + "logits/rejected": -2.7202515602111816, + "logps/chosen": -149.62307739257812, + "logps/rejected": -281.65435791015625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5433189868927, + "rewards/margins": 6.583221435546875, + "rewards/rejected": -10.126540184020996, + "step": 7744 + }, + { + "epoch": 1.2, + "learning_rate": 8.46683749157114e-06, + "logits/chosen": -2.9287898540496826, + "logits/rejected": -2.5696444511413574, + "logps/chosen": -802.3955078125, + "logps/rejected": -653.7684326171875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.364717721939087, + "rewards/margins": 6.425571441650391, + "rewards/rejected": -9.790288925170898, + "step": 7745 + }, + { + "epoch": 1.2, + "learning_rate": 8.466104051039992e-06, + "logits/chosen": -2.7112374305725098, + "logits/rejected": -2.2487971782684326, + "logps/chosen": -256.5788269042969, + "logps/rejected": -335.88494873046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4902632236480713, + "rewards/margins": 7.520934104919434, + "rewards/rejected": -10.011198043823242, + "step": 7746 + }, + { + "epoch": 1.2, + "learning_rate": 8.465370610508844e-06, + "logits/chosen": -2.847334861755371, + "logits/rejected": -2.1853692531585693, + "logps/chosen": -618.8101806640625, + "logps/rejected": -419.7045593261719, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3132195472717285, + "rewards/margins": 4.442163467407227, + "rewards/rejected": -8.755382537841797, + "step": 7747 + }, + { + "epoch": 1.2, + "learning_rate": 8.464637169977696e-06, + "logits/chosen": -2.5823395252227783, + "logits/rejected": -2.9594242572784424, + "logps/chosen": -148.2095947265625, + "logps/rejected": -326.1310729980469, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9405899047851562, + "rewards/margins": 10.219488143920898, + "rewards/rejected": -12.160078048706055, + "step": 7748 + }, + { + "epoch": 1.21, + "learning_rate": 8.463903729446548e-06, + "logits/chosen": -2.722026824951172, + "logits/rejected": -0.6520558595657349, + "logps/chosen": -237.13897705078125, + "logps/rejected": -113.44906616210938, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.067711353302002, + "rewards/margins": 3.7961041927337646, + "rewards/rejected": -5.8638153076171875, + "step": 7749 + }, + { + "epoch": 1.21, + "learning_rate": 8.4631702889154e-06, + "logits/chosen": -2.371872663497925, + "logits/rejected": -2.033020496368408, + "logps/chosen": -193.12196350097656, + "logps/rejected": -143.88560485839844, + "loss": 2.6984, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.395498275756836, + "rewards/margins": 0.927802562713623, + "rewards/rejected": -7.323300838470459, + "step": 7750 + }, + { + "epoch": 1.21, + "learning_rate": 8.462436848384252e-06, + "logits/chosen": -1.9383070468902588, + "logits/rejected": -2.936189651489258, + "logps/chosen": -307.7285461425781, + "logps/rejected": -245.93051147460938, + "loss": 3.1823, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.120776176452637, + "rewards/margins": -0.8516683578491211, + "rewards/rejected": -6.269107818603516, + "step": 7751 + }, + { + "epoch": 1.21, + "learning_rate": 8.461703407853104e-06, + "logits/chosen": -1.683348298072815, + "logits/rejected": -2.574220895767212, + "logps/chosen": -119.61359405517578, + "logps/rejected": -363.3565368652344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3219082355499268, + "rewards/margins": 7.0282745361328125, + "rewards/rejected": -10.35018253326416, + "step": 7752 + }, + { + "epoch": 1.21, + "learning_rate": 8.460969967321957e-06, + "logits/chosen": -2.8403100967407227, + "logits/rejected": -1.8246432542800903, + "logps/chosen": -911.5284423828125, + "logps/rejected": -372.48175048828125, + "loss": 0.1129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.532031297683716, + "rewards/margins": 4.039392471313477, + "rewards/rejected": -6.571423530578613, + "step": 7753 + }, + { + "epoch": 1.21, + "learning_rate": 8.460236526790809e-06, + "logits/chosen": -2.7302260398864746, + "logits/rejected": -2.641523838043213, + "logps/chosen": -273.80035400390625, + "logps/rejected": -304.7378234863281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.415105104446411, + "rewards/margins": 8.664091110229492, + "rewards/rejected": -12.07919692993164, + "step": 7754 + }, + { + "epoch": 1.21, + "learning_rate": 8.459503086259661e-06, + "logits/chosen": -2.9177517890930176, + "logits/rejected": -2.9178085327148438, + "logps/chosen": -112.81159973144531, + "logps/rejected": -200.9864501953125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8038105964660645, + "rewards/margins": 5.901719093322754, + "rewards/rejected": -9.70552921295166, + "step": 7755 + }, + { + "epoch": 1.21, + "learning_rate": 8.458769645728513e-06, + "logits/chosen": -2.5231056213378906, + "logits/rejected": -2.757970094680786, + "logps/chosen": -100.84706115722656, + "logps/rejected": -339.82293701171875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0259459018707275, + "rewards/margins": 10.898292541503906, + "rewards/rejected": -12.924238204956055, + "step": 7756 + }, + { + "epoch": 1.21, + "learning_rate": 8.458036205197365e-06, + "logits/chosen": -2.713071346282959, + "logits/rejected": -2.9067013263702393, + "logps/chosen": -68.45040893554688, + "logps/rejected": -230.0983123779297, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.106799125671387, + "rewards/margins": 5.037047386169434, + "rewards/rejected": -9.14384651184082, + "step": 7757 + }, + { + "epoch": 1.21, + "learning_rate": 8.457302764666217e-06, + "logits/chosen": -2.2141330242156982, + "logits/rejected": -2.728848934173584, + "logps/chosen": -124.01842498779297, + "logps/rejected": -317.642822265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.478093147277832, + "rewards/margins": 9.08302116394043, + "rewards/rejected": -11.561113357543945, + "step": 7758 + }, + { + "epoch": 1.21, + "learning_rate": 8.456569324135068e-06, + "logits/chosen": -1.6908113956451416, + "logits/rejected": -2.5761425495147705, + "logps/chosen": -142.18446350097656, + "logps/rejected": -303.44439697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4255162477493286, + "rewards/margins": 9.310159683227539, + "rewards/rejected": -10.735675811767578, + "step": 7759 + }, + { + "epoch": 1.21, + "learning_rate": 8.45583588360392e-06, + "logits/chosen": -2.977882146835327, + "logits/rejected": -2.171405792236328, + "logps/chosen": -520.04248046875, + "logps/rejected": -437.35400390625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.225205421447754, + "rewards/margins": 6.4079365730285645, + "rewards/rejected": -10.63314151763916, + "step": 7760 + }, + { + "epoch": 1.21, + "learning_rate": 8.455102443072772e-06, + "logits/chosen": -1.0045498609542847, + "logits/rejected": -2.4450345039367676, + "logps/chosen": -112.7109375, + "logps/rejected": -218.63278198242188, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.490006923675537, + "rewards/margins": 6.594303607940674, + "rewards/rejected": -10.084310531616211, + "step": 7761 + }, + { + "epoch": 1.21, + "learning_rate": 8.454369002541626e-06, + "logits/chosen": -2.3310439586639404, + "logits/rejected": -2.8025567531585693, + "logps/chosen": -177.88446044921875, + "logps/rejected": -365.2724914550781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3495216369628906, + "rewards/margins": 8.059431076049805, + "rewards/rejected": -11.408952713012695, + "step": 7762 + }, + { + "epoch": 1.21, + "learning_rate": 8.453635562010478e-06, + "logits/chosen": -2.209383726119995, + "logits/rejected": -2.9291136264801025, + "logps/chosen": -41.34918975830078, + "logps/rejected": -235.66763305664062, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2558634281158447, + "rewards/margins": 6.794139385223389, + "rewards/rejected": -10.050003051757812, + "step": 7763 + }, + { + "epoch": 1.21, + "learning_rate": 8.45290212147933e-06, + "logits/chosen": -2.7875070571899414, + "logits/rejected": -3.002082586288452, + "logps/chosen": -99.72470092773438, + "logps/rejected": -328.9058532714844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539546966552734, + "rewards/margins": 7.960447311401367, + "rewards/rejected": -12.499994277954102, + "step": 7764 + }, + { + "epoch": 1.21, + "learning_rate": 8.452168680948181e-06, + "logits/chosen": -3.0319693088531494, + "logits/rejected": -3.0767929553985596, + "logps/chosen": -264.3887634277344, + "logps/rejected": -388.58465576171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3938448429107666, + "rewards/margins": 7.636678695678711, + "rewards/rejected": -11.030523300170898, + "step": 7765 + }, + { + "epoch": 1.21, + "learning_rate": 8.451435240417035e-06, + "logits/chosen": -1.5296587944030762, + "logits/rejected": -2.649120330810547, + "logps/chosen": -133.4337158203125, + "logps/rejected": -517.1927490234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.674605369567871, + "rewards/margins": 10.60405158996582, + "rewards/rejected": -16.278656005859375, + "step": 7766 + }, + { + "epoch": 1.21, + "learning_rate": 8.450701799885887e-06, + "logits/chosen": -3.1738364696502686, + "logits/rejected": -3.1705214977264404, + "logps/chosen": -332.8609619140625, + "logps/rejected": -257.7677917480469, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.681321620941162, + "rewards/margins": 6.2096757888793945, + "rewards/rejected": -10.890996932983398, + "step": 7767 + }, + { + "epoch": 1.21, + "learning_rate": 8.449968359354739e-06, + "logits/chosen": -2.5409958362579346, + "logits/rejected": -2.9134669303894043, + "logps/chosen": -203.73486328125, + "logps/rejected": -361.35693359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.03385066986084, + "rewards/margins": 8.705300331115723, + "rewards/rejected": -12.739151000976562, + "step": 7768 + }, + { + "epoch": 1.21, + "learning_rate": 8.44923491882359e-06, + "logits/chosen": -2.8925774097442627, + "logits/rejected": -3.1439268589019775, + "logps/chosen": -457.2641296386719, + "logps/rejected": -499.6639404296875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2796902656555176, + "rewards/margins": 7.096156597137451, + "rewards/rejected": -9.375846862792969, + "step": 7769 + }, + { + "epoch": 1.21, + "learning_rate": 8.448501478292443e-06, + "logits/chosen": -2.0460691452026367, + "logits/rejected": -2.6689956188201904, + "logps/chosen": -141.6725616455078, + "logps/rejected": -308.2503356933594, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.689496994018555, + "rewards/margins": 7.889431953430176, + "rewards/rejected": -12.578927993774414, + "step": 7770 + }, + { + "epoch": 1.21, + "learning_rate": 8.447768037761296e-06, + "logits/chosen": -1.231705665588379, + "logits/rejected": -2.492112874984741, + "logps/chosen": -82.24478912353516, + "logps/rejected": -274.58624267578125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.85291051864624, + "rewards/margins": 5.86250638961792, + "rewards/rejected": -10.71541690826416, + "step": 7771 + }, + { + "epoch": 1.21, + "learning_rate": 8.447034597230148e-06, + "logits/chosen": -2.7970798015594482, + "logits/rejected": -2.7051010131835938, + "logps/chosen": -321.88714599609375, + "logps/rejected": -216.45069885253906, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2253360748291016, + "rewards/margins": 5.002374172210693, + "rewards/rejected": -8.227709770202637, + "step": 7772 + }, + { + "epoch": 1.21, + "learning_rate": 8.446301156699e-06, + "logits/chosen": -2.992981195449829, + "logits/rejected": -3.1214213371276855, + "logps/chosen": -189.44677734375, + "logps/rejected": -308.3496398925781, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.636503219604492, + "rewards/margins": 6.147507667541504, + "rewards/rejected": -10.784010887145996, + "step": 7773 + }, + { + "epoch": 1.21, + "learning_rate": 8.445567716167852e-06, + "logits/chosen": -2.9713895320892334, + "logits/rejected": -2.87878680229187, + "logps/chosen": -803.0155029296875, + "logps/rejected": -593.32470703125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.763559103012085, + "rewards/margins": 6.892816066741943, + "rewards/rejected": -9.656375885009766, + "step": 7774 + }, + { + "epoch": 1.21, + "learning_rate": 8.444834275636704e-06, + "logits/chosen": -2.9200217723846436, + "logits/rejected": -2.6640169620513916, + "logps/chosen": -182.60752868652344, + "logps/rejected": -249.45498657226562, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8876380920410156, + "rewards/margins": 7.957543849945068, + "rewards/rejected": -10.845182418823242, + "step": 7775 + }, + { + "epoch": 1.21, + "learning_rate": 8.444100835105555e-06, + "logits/chosen": -2.5970098972320557, + "logits/rejected": -2.295008420944214, + "logps/chosen": -257.3929443359375, + "logps/rejected": -368.3101501464844, + "loss": 1.1131, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.575260162353516, + "rewards/margins": 3.2921879291534424, + "rewards/rejected": -12.867448806762695, + "step": 7776 + }, + { + "epoch": 1.21, + "learning_rate": 8.443367394574407e-06, + "logits/chosen": -2.7253036499023438, + "logits/rejected": -3.060258388519287, + "logps/chosen": -354.8888244628906, + "logps/rejected": -491.44061279296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.360318422317505, + "rewards/margins": 7.175197124481201, + "rewards/rejected": -9.535515785217285, + "step": 7777 + }, + { + "epoch": 1.21, + "learning_rate": 8.44263395404326e-06, + "logits/chosen": -2.7505383491516113, + "logits/rejected": -2.9367523193359375, + "logps/chosen": -124.33422088623047, + "logps/rejected": -293.0968017578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79549503326416, + "rewards/margins": 7.0142388343811035, + "rewards/rejected": -11.809734344482422, + "step": 7778 + }, + { + "epoch": 1.21, + "learning_rate": 8.441900513512111e-06, + "logits/chosen": -2.9146926403045654, + "logits/rejected": -2.016159772872925, + "logps/chosen": -683.302734375, + "logps/rejected": -314.7739562988281, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9494752883911133, + "rewards/margins": 7.46055793762207, + "rewards/rejected": -11.410032272338867, + "step": 7779 + }, + { + "epoch": 1.21, + "learning_rate": 8.441167072980965e-06, + "logits/chosen": -3.0876307487487793, + "logits/rejected": -2.8304717540740967, + "logps/chosen": -108.78144073486328, + "logps/rejected": -154.28152465820312, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9416937828063965, + "rewards/margins": 5.127956390380859, + "rewards/rejected": -8.069650650024414, + "step": 7780 + }, + { + "epoch": 1.21, + "learning_rate": 8.440433632449817e-06, + "logits/chosen": -2.5424063205718994, + "logits/rejected": -2.0153555870056152, + "logps/chosen": -120.76782989501953, + "logps/rejected": -297.76702880859375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7950170040130615, + "rewards/margins": 9.339317321777344, + "rewards/rejected": -13.134334564208984, + "step": 7781 + }, + { + "epoch": 1.21, + "learning_rate": 8.439700191918668e-06, + "logits/chosen": -2.845869541168213, + "logits/rejected": -2.782470703125, + "logps/chosen": -294.2981872558594, + "logps/rejected": -297.95819091796875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.861598014831543, + "rewards/margins": 6.801328182220459, + "rewards/rejected": -9.662925720214844, + "step": 7782 + }, + { + "epoch": 1.21, + "learning_rate": 8.43896675138752e-06, + "logits/chosen": -2.8996236324310303, + "logits/rejected": -3.030090093612671, + "logps/chosen": -417.513916015625, + "logps/rejected": -357.1024475097656, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.179753303527832, + "rewards/margins": 6.410247802734375, + "rewards/rejected": -10.590001106262207, + "step": 7783 + }, + { + "epoch": 1.21, + "learning_rate": 8.438233310856372e-06, + "logits/chosen": -2.231825590133667, + "logits/rejected": -2.785085439682007, + "logps/chosen": -279.5497741699219, + "logps/rejected": -609.8133544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9625864028930664, + "rewards/margins": 11.041181564331055, + "rewards/rejected": -14.003767013549805, + "step": 7784 + }, + { + "epoch": 1.21, + "learning_rate": 8.437499870325224e-06, + "logits/chosen": -0.9188721776008606, + "logits/rejected": -1.3229857683181763, + "logps/chosen": -256.6312255859375, + "logps/rejected": -425.47265625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.336277008056641, + "rewards/margins": 8.832782745361328, + "rewards/rejected": -13.169059753417969, + "step": 7785 + }, + { + "epoch": 1.21, + "learning_rate": 8.436766429794076e-06, + "logits/chosen": -2.8516855239868164, + "logits/rejected": -2.799116611480713, + "logps/chosen": -399.0508728027344, + "logps/rejected": -521.1737060546875, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0967743396759033, + "rewards/margins": 4.641394138336182, + "rewards/rejected": -7.738168716430664, + "step": 7786 + }, + { + "epoch": 1.21, + "learning_rate": 8.436032989262928e-06, + "logits/chosen": -2.899081230163574, + "logits/rejected": -3.029947519302368, + "logps/chosen": -184.8363037109375, + "logps/rejected": -254.38558959960938, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.677584171295166, + "rewards/margins": 7.016797065734863, + "rewards/rejected": -10.694380760192871, + "step": 7787 + }, + { + "epoch": 1.21, + "learning_rate": 8.43529954873178e-06, + "logits/chosen": -1.5383195877075195, + "logits/rejected": -2.659858465194702, + "logps/chosen": -308.7427673339844, + "logps/rejected": -541.0146484375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2306108474731445, + "rewards/margins": 7.9561614990234375, + "rewards/rejected": -11.186772346496582, + "step": 7788 + }, + { + "epoch": 1.21, + "learning_rate": 8.434566108200633e-06, + "logits/chosen": -1.092424750328064, + "logits/rejected": -3.0308032035827637, + "logps/chosen": -226.20863342285156, + "logps/rejected": -467.08477783203125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.698163032531738, + "rewards/margins": 6.095452785491943, + "rewards/rejected": -10.793615341186523, + "step": 7789 + }, + { + "epoch": 1.21, + "learning_rate": 8.433832667669485e-06, + "logits/chosen": -2.228391647338867, + "logits/rejected": -2.7284276485443115, + "logps/chosen": -298.3780212402344, + "logps/rejected": -473.60296630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5194902420043945, + "rewards/margins": 9.361297607421875, + "rewards/rejected": -11.88078784942627, + "step": 7790 + }, + { + "epoch": 1.21, + "learning_rate": 8.433099227138337e-06, + "logits/chosen": -2.5333197116851807, + "logits/rejected": -2.945258140563965, + "logps/chosen": -268.5604248046875, + "logps/rejected": -293.3452453613281, + "loss": 1.3857, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.3989763259887695, + "rewards/margins": 0.6083365678787231, + "rewards/rejected": -6.007312774658203, + "step": 7791 + }, + { + "epoch": 1.21, + "learning_rate": 8.432365786607189e-06, + "logits/chosen": -2.0000367164611816, + "logits/rejected": -2.5144731998443604, + "logps/chosen": -239.1469268798828, + "logps/rejected": -346.61480712890625, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6237688064575195, + "rewards/margins": 8.298047065734863, + "rewards/rejected": -13.921815872192383, + "step": 7792 + }, + { + "epoch": 1.21, + "learning_rate": 8.43163234607604e-06, + "logits/chosen": -3.047351360321045, + "logits/rejected": -3.1479952335357666, + "logps/chosen": -307.69384765625, + "logps/rejected": -285.9048156738281, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4665617942810059, + "rewards/margins": 8.127232551574707, + "rewards/rejected": -9.593793869018555, + "step": 7793 + }, + { + "epoch": 1.21, + "learning_rate": 8.430898905544893e-06, + "logits/chosen": -2.5591726303100586, + "logits/rejected": -1.6074138879776, + "logps/chosen": -354.189453125, + "logps/rejected": -318.7567138671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1479411125183105, + "rewards/margins": 8.087549209594727, + "rewards/rejected": -10.235490798950195, + "step": 7794 + }, + { + "epoch": 1.21, + "learning_rate": 8.430165465013745e-06, + "logits/chosen": -3.044255018234253, + "logits/rejected": -2.623617172241211, + "logps/chosen": -819.7540283203125, + "logps/rejected": -634.154296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.743597388267517, + "rewards/margins": 7.751889228820801, + "rewards/rejected": -9.495487213134766, + "step": 7795 + }, + { + "epoch": 1.21, + "learning_rate": 8.429432024482596e-06, + "logits/chosen": -0.7648580074310303, + "logits/rejected": -1.9932669401168823, + "logps/chosen": -109.02267456054688, + "logps/rejected": -382.49847412109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.499966144561768, + "rewards/margins": 9.906378746032715, + "rewards/rejected": -14.40634536743164, + "step": 7796 + }, + { + "epoch": 1.21, + "learning_rate": 8.428698583951448e-06, + "logits/chosen": -3.05294132232666, + "logits/rejected": -1.9808040857315063, + "logps/chosen": -542.6968994140625, + "logps/rejected": -114.73941040039062, + "loss": 3.8184, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.755758762359619, + "rewards/margins": -0.9437527656555176, + "rewards/rejected": -6.812005996704102, + "step": 7797 + }, + { + "epoch": 1.21, + "learning_rate": 8.427965143420302e-06, + "logits/chosen": -2.4471487998962402, + "logits/rejected": -2.432614803314209, + "logps/chosen": -82.00984191894531, + "logps/rejected": -321.40704345703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1114301681518555, + "rewards/margins": 7.804037570953369, + "rewards/rejected": -9.915468215942383, + "step": 7798 + }, + { + "epoch": 1.21, + "learning_rate": 8.427231702889154e-06, + "logits/chosen": -1.3553777933120728, + "logits/rejected": -2.4949264526367188, + "logps/chosen": -222.74227905273438, + "logps/rejected": -520.344970703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.503836154937744, + "rewards/margins": 6.186192035675049, + "rewards/rejected": -11.690028190612793, + "step": 7799 + }, + { + "epoch": 1.21, + "learning_rate": 8.426498262358007e-06, + "logits/chosen": -1.3605256080627441, + "logits/rejected": -2.7030858993530273, + "logps/chosen": -105.20531463623047, + "logps/rejected": -309.74005126953125, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.367302894592285, + "rewards/margins": 5.370203018188477, + "rewards/rejected": -9.737505912780762, + "step": 7800 + }, + { + "epoch": 1.21, + "learning_rate": 8.42576482182686e-06, + "logits/chosen": -2.9144721031188965, + "logits/rejected": -2.3596534729003906, + "logps/chosen": -455.66119384765625, + "logps/rejected": -445.9273681640625, + "loss": 0.2145, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.443225383758545, + "rewards/margins": 4.520655632019043, + "rewards/rejected": -7.963881015777588, + "step": 7801 + }, + { + "epoch": 1.21, + "learning_rate": 8.425031381295711e-06, + "logits/chosen": -3.025709629058838, + "logits/rejected": -2.8760898113250732, + "logps/chosen": -193.58950805664062, + "logps/rejected": -351.8594970703125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.156881809234619, + "rewards/margins": 6.631643295288086, + "rewards/rejected": -9.788524627685547, + "step": 7802 + }, + { + "epoch": 1.21, + "learning_rate": 8.424297940764563e-06, + "logits/chosen": -2.8758838176727295, + "logits/rejected": -2.4018607139587402, + "logps/chosen": -334.02838134765625, + "logps/rejected": -380.95703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.378373861312866, + "rewards/margins": 8.70468521118164, + "rewards/rejected": -12.083059310913086, + "step": 7803 + }, + { + "epoch": 1.21, + "learning_rate": 8.423564500233415e-06, + "logits/chosen": -1.9244868755340576, + "logits/rejected": -2.7604305744171143, + "logps/chosen": -132.02587890625, + "logps/rejected": -167.64382934570312, + "loss": 2.6068, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.5502729415893555, + "rewards/margins": -0.658115029335022, + "rewards/rejected": -5.892157554626465, + "step": 7804 + }, + { + "epoch": 1.21, + "learning_rate": 8.422831059702267e-06, + "logits/chosen": -2.0220096111297607, + "logits/rejected": -2.951822519302368, + "logps/chosen": -472.0972900390625, + "logps/rejected": -482.601318359375, + "loss": 1.7448, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.534625053405762, + "rewards/margins": 2.176976203918457, + "rewards/rejected": -9.711601257324219, + "step": 7805 + }, + { + "epoch": 1.21, + "learning_rate": 8.422097619171119e-06, + "logits/chosen": -2.6848909854888916, + "logits/rejected": -3.0167794227600098, + "logps/chosen": -193.28172302246094, + "logps/rejected": -122.39232635498047, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319854736328125, + "rewards/margins": 2.496673822402954, + "rewards/rejected": -5.8165283203125, + "step": 7806 + }, + { + "epoch": 1.21, + "learning_rate": 8.421364178639972e-06, + "logits/chosen": -1.5072401762008667, + "logits/rejected": -2.6353297233581543, + "logps/chosen": -100.41751861572266, + "logps/rejected": -406.4915466308594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0837483406066895, + "rewards/margins": 8.486504554748535, + "rewards/rejected": -11.570253372192383, + "step": 7807 + }, + { + "epoch": 1.21, + "learning_rate": 8.420630738108824e-06, + "logits/chosen": -3.0669374465942383, + "logits/rejected": -2.350271224975586, + "logps/chosen": -263.7603759765625, + "logps/rejected": -294.84521484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9363242983818054, + "rewards/margins": 9.469440460205078, + "rewards/rejected": -10.405765533447266, + "step": 7808 + }, + { + "epoch": 1.21, + "learning_rate": 8.419897297577676e-06, + "logits/chosen": -3.0976080894470215, + "logits/rejected": -1.6338380575180054, + "logps/chosen": -423.5378723144531, + "logps/rejected": -204.5599365234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.593327760696411, + "rewards/margins": 6.556751251220703, + "rewards/rejected": -9.150078773498535, + "step": 7809 + }, + { + "epoch": 1.21, + "learning_rate": 8.419163857046528e-06, + "logits/chosen": -1.92889404296875, + "logits/rejected": -2.579557180404663, + "logps/chosen": -177.36508178710938, + "logps/rejected": -431.9569091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5448029041290283, + "rewards/margins": 11.257670402526855, + "rewards/rejected": -13.802473068237305, + "step": 7810 + }, + { + "epoch": 1.21, + "learning_rate": 8.41843041651538e-06, + "logits/chosen": -2.3426811695098877, + "logits/rejected": -2.8258566856384277, + "logps/chosen": -72.21064758300781, + "logps/rejected": -203.91506958007812, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.371624231338501, + "rewards/margins": 6.079162120819092, + "rewards/rejected": -8.450786590576172, + "step": 7811 + }, + { + "epoch": 1.21, + "learning_rate": 8.417696975984232e-06, + "logits/chosen": -2.3865010738372803, + "logits/rejected": -2.7931792736053467, + "logps/chosen": -154.78480529785156, + "logps/rejected": -337.2530517578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.562128782272339, + "rewards/margins": 9.521610260009766, + "rewards/rejected": -13.083740234375, + "step": 7812 + }, + { + "epoch": 1.22, + "learning_rate": 8.416963535453083e-06, + "logits/chosen": -2.4108798503875732, + "logits/rejected": -3.1015641689300537, + "logps/chosen": -72.109375, + "logps/rejected": -223.90072631835938, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.318329811096191, + "rewards/margins": 4.579181671142578, + "rewards/rejected": -8.89751148223877, + "step": 7813 + }, + { + "epoch": 1.22, + "learning_rate": 8.416230094921935e-06, + "logits/chosen": -1.5473793745040894, + "logits/rejected": -3.02363657951355, + "logps/chosen": -140.10284423828125, + "logps/rejected": -536.2732543945312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354278087615967, + "rewards/margins": 7.284047603607178, + "rewards/rejected": -11.638325691223145, + "step": 7814 + }, + { + "epoch": 1.22, + "learning_rate": 8.415496654390787e-06, + "logits/chosen": -2.914581537246704, + "logits/rejected": -2.3636860847473145, + "logps/chosen": -295.53668212890625, + "logps/rejected": -286.53350830078125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4675369262695312, + "rewards/margins": 7.221006393432617, + "rewards/rejected": -10.688543319702148, + "step": 7815 + }, + { + "epoch": 1.22, + "learning_rate": 8.41476321385964e-06, + "logits/chosen": -2.796574115753174, + "logits/rejected": -3.0625290870666504, + "logps/chosen": -182.03871154785156, + "logps/rejected": -340.6643371582031, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.854036331176758, + "rewards/margins": 7.385520935058594, + "rewards/rejected": -10.239557266235352, + "step": 7816 + }, + { + "epoch": 1.22, + "learning_rate": 8.414029773328493e-06, + "logits/chosen": -1.945235252380371, + "logits/rejected": -2.6699655055999756, + "logps/chosen": -163.76834106445312, + "logps/rejected": -348.2695007324219, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.938641309738159, + "rewards/margins": 6.503235816955566, + "rewards/rejected": -9.441877365112305, + "step": 7817 + }, + { + "epoch": 1.22, + "learning_rate": 8.413296332797345e-06, + "logits/chosen": -2.90725040435791, + "logits/rejected": -3.116478443145752, + "logps/chosen": -53.39888000488281, + "logps/rejected": -190.35150146484375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.612619876861572, + "rewards/margins": 6.50881814956665, + "rewards/rejected": -11.121438026428223, + "step": 7818 + }, + { + "epoch": 1.22, + "learning_rate": 8.412562892266196e-06, + "logits/chosen": -2.3825626373291016, + "logits/rejected": -2.3345630168914795, + "logps/chosen": -321.0041198730469, + "logps/rejected": -468.1298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5270737409591675, + "rewards/margins": 13.819072723388672, + "rewards/rejected": -14.346145629882812, + "step": 7819 + }, + { + "epoch": 1.22, + "learning_rate": 8.411829451735048e-06, + "logits/chosen": -1.4317257404327393, + "logits/rejected": -2.2077476978302, + "logps/chosen": -449.75225830078125, + "logps/rejected": -684.1468505859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5511474609375, + "rewards/margins": 10.53373908996582, + "rewards/rejected": -15.08488655090332, + "step": 7820 + }, + { + "epoch": 1.22, + "learning_rate": 8.4110960112039e-06, + "logits/chosen": -2.2220616340637207, + "logits/rejected": -1.3277839422225952, + "logps/chosen": -689.733642578125, + "logps/rejected": -402.9647216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.075200080871582, + "rewards/margins": 10.494707107543945, + "rewards/rejected": -14.569907188415527, + "step": 7821 + }, + { + "epoch": 1.22, + "learning_rate": 8.410362570672752e-06, + "logits/chosen": -2.525273561477661, + "logits/rejected": -2.896225929260254, + "logps/chosen": -28.000289916992188, + "logps/rejected": -124.83721923828125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9846606254577637, + "rewards/margins": 6.974910736083984, + "rewards/rejected": -8.959571838378906, + "step": 7822 + }, + { + "epoch": 1.22, + "learning_rate": 8.409629130141604e-06, + "logits/chosen": -2.9823856353759766, + "logits/rejected": -1.8479033708572388, + "logps/chosen": -375.9527893066406, + "logps/rejected": -249.74008178710938, + "loss": 1.6819, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.347412109375, + "rewards/margins": 1.508573293685913, + "rewards/rejected": -7.855985641479492, + "step": 7823 + }, + { + "epoch": 1.22, + "learning_rate": 8.408895689610456e-06, + "logits/chosen": -2.8627657890319824, + "logits/rejected": -1.5756946802139282, + "logps/chosen": -230.13577270507812, + "logps/rejected": -278.8261413574219, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4967265129089355, + "rewards/margins": 5.895658493041992, + "rewards/rejected": -9.39238452911377, + "step": 7824 + }, + { + "epoch": 1.22, + "learning_rate": 8.40816224907931e-06, + "logits/chosen": -0.7063875198364258, + "logits/rejected": -2.8740382194519043, + "logps/chosen": -124.23600769042969, + "logps/rejected": -830.6214599609375, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.511246681213379, + "rewards/margins": 5.904802322387695, + "rewards/rejected": -10.416049003601074, + "step": 7825 + }, + { + "epoch": 1.22, + "learning_rate": 8.407428808548161e-06, + "logits/chosen": -2.000340700149536, + "logits/rejected": -2.6702072620391846, + "logps/chosen": -216.6121826171875, + "logps/rejected": -368.063720703125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4095988273620605, + "rewards/margins": 7.289658546447754, + "rewards/rejected": -10.699256896972656, + "step": 7826 + }, + { + "epoch": 1.22, + "learning_rate": 8.406695368017013e-06, + "logits/chosen": -1.74969482421875, + "logits/rejected": -1.9606252908706665, + "logps/chosen": -231.76107788085938, + "logps/rejected": -273.2707214355469, + "loss": 1.1324, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.414356231689453, + "rewards/margins": 3.248814582824707, + "rewards/rejected": -9.66317081451416, + "step": 7827 + }, + { + "epoch": 1.22, + "learning_rate": 8.405961927485865e-06, + "logits/chosen": -2.1590628623962402, + "logits/rejected": -2.9193592071533203, + "logps/chosen": -207.88140869140625, + "logps/rejected": -395.2786865234375, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2092037200927734, + "rewards/margins": 4.9701642990112305, + "rewards/rejected": -8.179368019104004, + "step": 7828 + }, + { + "epoch": 1.22, + "learning_rate": 8.405228486954717e-06, + "logits/chosen": -2.6750125885009766, + "logits/rejected": -2.3632254600524902, + "logps/chosen": -223.66445922851562, + "logps/rejected": -258.0840759277344, + "loss": 0.3601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.610806941986084, + "rewards/margins": 2.668038845062256, + "rewards/rejected": -7.27884578704834, + "step": 7829 + }, + { + "epoch": 1.22, + "learning_rate": 8.404495046423569e-06, + "logits/chosen": -2.4927239418029785, + "logits/rejected": -3.1236414909362793, + "logps/chosen": -66.36356353759766, + "logps/rejected": -395.179443359375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1718902587890625, + "rewards/margins": 6.685003757476807, + "rewards/rejected": -9.856893539428711, + "step": 7830 + }, + { + "epoch": 1.22, + "learning_rate": 8.40376160589242e-06, + "logits/chosen": -1.8664300441741943, + "logits/rejected": -3.041635751724243, + "logps/chosen": -253.98614501953125, + "logps/rejected": -460.322021484375, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.71745491027832, + "rewards/margins": 5.317514419555664, + "rewards/rejected": -10.034969329833984, + "step": 7831 + }, + { + "epoch": 1.22, + "learning_rate": 8.403028165361274e-06, + "logits/chosen": -1.416304111480713, + "logits/rejected": -2.8321757316589355, + "logps/chosen": -255.01651000976562, + "logps/rejected": -422.107666015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7888917922973633, + "rewards/margins": 8.975781440734863, + "rewards/rejected": -11.764673233032227, + "step": 7832 + }, + { + "epoch": 1.22, + "learning_rate": 8.402294724830126e-06, + "logits/chosen": -2.5546224117279053, + "logits/rejected": -2.8263723850250244, + "logps/chosen": -470.72833251953125, + "logps/rejected": -425.1611328125, + "loss": 0.2124, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.10327672958374, + "rewards/margins": 2.7192542552948, + "rewards/rejected": -7.822531223297119, + "step": 7833 + }, + { + "epoch": 1.22, + "learning_rate": 8.40156128429898e-06, + "logits/chosen": -1.5307859182357788, + "logits/rejected": -2.9368231296539307, + "logps/chosen": -150.32662963867188, + "logps/rejected": -505.8774719238281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9342739582061768, + "rewards/margins": 9.702925682067871, + "rewards/rejected": -11.637199401855469, + "step": 7834 + }, + { + "epoch": 1.22, + "learning_rate": 8.400827843767832e-06, + "logits/chosen": -3.1050920486450195, + "logits/rejected": -3.1539669036865234, + "logps/chosen": -96.60974884033203, + "logps/rejected": -188.52618408203125, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.220095157623291, + "rewards/margins": 5.505615234375, + "rewards/rejected": -9.725709915161133, + "step": 7835 + }, + { + "epoch": 1.22, + "learning_rate": 8.400094403236683e-06, + "logits/chosen": -3.0698060989379883, + "logits/rejected": -2.775404214859009, + "logps/chosen": -82.34721374511719, + "logps/rejected": -159.13165283203125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.969515323638916, + "rewards/margins": 7.356391906738281, + "rewards/rejected": -10.325906753540039, + "step": 7836 + }, + { + "epoch": 1.22, + "learning_rate": 8.399360962705535e-06, + "logits/chosen": -2.896409034729004, + "logits/rejected": -0.6947360634803772, + "logps/chosen": -904.0308227539062, + "logps/rejected": -391.48663330078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.42360782623291, + "rewards/margins": 6.611410140991211, + "rewards/rejected": -10.035017013549805, + "step": 7837 + }, + { + "epoch": 1.22, + "learning_rate": 8.398627522174387e-06, + "logits/chosen": -1.3834730386734009, + "logits/rejected": -2.7384517192840576, + "logps/chosen": -186.98377990722656, + "logps/rejected": -445.1241455078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528852939605713, + "rewards/margins": 8.373071670532227, + "rewards/rejected": -12.901924133300781, + "step": 7838 + }, + { + "epoch": 1.22, + "learning_rate": 8.397894081643239e-06, + "logits/chosen": -2.9324450492858887, + "logits/rejected": -3.041212320327759, + "logps/chosen": -128.42117309570312, + "logps/rejected": -262.0501708984375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.72156023979187, + "rewards/margins": 7.481101989746094, + "rewards/rejected": -11.202661514282227, + "step": 7839 + }, + { + "epoch": 1.22, + "learning_rate": 8.397160641112091e-06, + "logits/chosen": -2.5648744106292725, + "logits/rejected": -1.579738736152649, + "logps/chosen": -223.36044311523438, + "logps/rejected": -291.51568603515625, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.524162769317627, + "rewards/margins": 6.200712203979492, + "rewards/rejected": -10.724875450134277, + "step": 7840 + }, + { + "epoch": 1.22, + "learning_rate": 8.396427200580943e-06, + "logits/chosen": -1.0574816465377808, + "logits/rejected": -2.655855655670166, + "logps/chosen": -287.527099609375, + "logps/rejected": -788.3131103515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.389701843261719, + "rewards/margins": 10.757818222045898, + "rewards/rejected": -15.147520065307617, + "step": 7841 + }, + { + "epoch": 1.22, + "learning_rate": 8.395693760049796e-06, + "logits/chosen": -2.8836474418640137, + "logits/rejected": -3.0590996742248535, + "logps/chosen": -380.60260009765625, + "logps/rejected": -314.72747802734375, + "loss": 2.7221, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.2096452713012695, + "rewards/margins": 1.407243251800537, + "rewards/rejected": -7.616888999938965, + "step": 7842 + }, + { + "epoch": 1.22, + "learning_rate": 8.394960319518648e-06, + "logits/chosen": -2.4831953048706055, + "logits/rejected": -2.883826494216919, + "logps/chosen": -183.756103515625, + "logps/rejected": -333.3453369140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6172866821289062, + "rewards/margins": 8.902495384216309, + "rewards/rejected": -11.519782066345215, + "step": 7843 + }, + { + "epoch": 1.22, + "learning_rate": 8.3942268789875e-06, + "logits/chosen": -2.4520328044891357, + "logits/rejected": -3.0853374004364014, + "logps/chosen": -337.9615478515625, + "logps/rejected": -319.00897216796875, + "loss": 0.7153, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9799699783325195, + "rewards/margins": 4.9328460693359375, + "rewards/rejected": -8.912816047668457, + "step": 7844 + }, + { + "epoch": 1.22, + "learning_rate": 8.393493438456352e-06, + "logits/chosen": -2.647172212600708, + "logits/rejected": -3.0898592472076416, + "logps/chosen": -735.2389526367188, + "logps/rejected": -905.8011474609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.924687385559082, + "rewards/margins": 8.877116203308105, + "rewards/rejected": -11.801803588867188, + "step": 7845 + }, + { + "epoch": 1.22, + "learning_rate": 8.392759997925204e-06, + "logits/chosen": -2.7966156005859375, + "logits/rejected": -2.5431771278381348, + "logps/chosen": -189.62184143066406, + "logps/rejected": -330.0180969238281, + "loss": 0.7851, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.235518932342529, + "rewards/margins": 4.991808891296387, + "rewards/rejected": -11.227327346801758, + "step": 7846 + }, + { + "epoch": 1.22, + "learning_rate": 8.392026557394056e-06, + "logits/chosen": -2.5946974754333496, + "logits/rejected": -2.9659664630889893, + "logps/chosen": -100.7926254272461, + "logps/rejected": -232.54336547851562, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2824554443359375, + "rewards/margins": 4.460277557373047, + "rewards/rejected": -8.742733001708984, + "step": 7847 + }, + { + "epoch": 1.22, + "learning_rate": 8.391293116862908e-06, + "logits/chosen": -2.281053304672241, + "logits/rejected": -3.007812261581421, + "logps/chosen": -58.03189468383789, + "logps/rejected": -230.57119750976562, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.128845691680908, + "rewards/margins": 5.550727844238281, + "rewards/rejected": -8.679573059082031, + "step": 7848 + }, + { + "epoch": 1.22, + "learning_rate": 8.39055967633176e-06, + "logits/chosen": -2.4120981693267822, + "logits/rejected": -1.0826466083526611, + "logps/chosen": -379.4875793457031, + "logps/rejected": -129.41357421875, + "loss": 2.6562, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.669526100158691, + "rewards/margins": 0.4547111988067627, + "rewards/rejected": -8.124237060546875, + "step": 7849 + }, + { + "epoch": 1.22, + "learning_rate": 8.389826235800611e-06, + "logits/chosen": -1.0951673984527588, + "logits/rejected": -2.47644305229187, + "logps/chosen": -59.51203155517578, + "logps/rejected": -289.888671875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4990127086639404, + "rewards/margins": 5.696893692016602, + "rewards/rejected": -9.195906639099121, + "step": 7850 + }, + { + "epoch": 1.22, + "learning_rate": 8.389092795269465e-06, + "logits/chosen": -2.9275870323181152, + "logits/rejected": -2.7960219383239746, + "logps/chosen": -962.6632080078125, + "logps/rejected": -743.0604248046875, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.521365165710449, + "rewards/margins": 3.445343255996704, + "rewards/rejected": -7.966708660125732, + "step": 7851 + }, + { + "epoch": 1.22, + "learning_rate": 8.388359354738317e-06, + "logits/chosen": -2.7901344299316406, + "logits/rejected": -3.023463487625122, + "logps/chosen": -247.86451721191406, + "logps/rejected": -431.40386962890625, + "loss": 0.5989, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.466965675354004, + "rewards/margins": 1.8798773288726807, + "rewards/rejected": -9.346842765808105, + "step": 7852 + }, + { + "epoch": 1.22, + "learning_rate": 8.387625914207169e-06, + "logits/chosen": -2.9975063800811768, + "logits/rejected": -2.9150233268737793, + "logps/chosen": -276.6648254394531, + "logps/rejected": -247.7304229736328, + "loss": 0.8372, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.968395233154297, + "rewards/margins": 2.024282693862915, + "rewards/rejected": -7.992677688598633, + "step": 7853 + }, + { + "epoch": 1.22, + "learning_rate": 8.38689247367602e-06, + "logits/chosen": -1.7158201932907104, + "logits/rejected": -2.6518239974975586, + "logps/chosen": -187.41156005859375, + "logps/rejected": -362.1320495605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.634387493133545, + "rewards/margins": 9.736170768737793, + "rewards/rejected": -12.37055778503418, + "step": 7854 + }, + { + "epoch": 1.22, + "learning_rate": 8.386159033144872e-06, + "logits/chosen": -2.89365291595459, + "logits/rejected": -2.978097677230835, + "logps/chosen": -33.97971725463867, + "logps/rejected": -282.66058349609375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.063835859298706, + "rewards/margins": 10.81712818145752, + "rewards/rejected": -12.880964279174805, + "step": 7855 + }, + { + "epoch": 1.22, + "learning_rate": 8.385425592613724e-06, + "logits/chosen": -2.524199962615967, + "logits/rejected": -2.6798923015594482, + "logps/chosen": -367.6640625, + "logps/rejected": -496.4913635253906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.882908582687378, + "rewards/margins": 8.937970161437988, + "rewards/rejected": -10.820878982543945, + "step": 7856 + }, + { + "epoch": 1.22, + "learning_rate": 8.384692152082576e-06, + "logits/chosen": -3.026468515396118, + "logits/rejected": -2.235900402069092, + "logps/chosen": -380.38201904296875, + "logps/rejected": -343.313720703125, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2243432998657227, + "rewards/margins": 5.931943416595459, + "rewards/rejected": -9.156286239624023, + "step": 7857 + }, + { + "epoch": 1.22, + "learning_rate": 8.383958711551428e-06, + "logits/chosen": -2.9534597396850586, + "logits/rejected": -2.7695980072021484, + "logps/chosen": -197.51458740234375, + "logps/rejected": -282.7950134277344, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.964151859283447, + "rewards/margins": 5.263686180114746, + "rewards/rejected": -10.227838516235352, + "step": 7858 + }, + { + "epoch": 1.22, + "learning_rate": 8.38322527102028e-06, + "logits/chosen": -2.887986183166504, + "logits/rejected": -2.499237060546875, + "logps/chosen": -637.9547729492188, + "logps/rejected": -435.9645690917969, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.124147415161133, + "rewards/margins": 6.94745397567749, + "rewards/rejected": -11.071600914001465, + "step": 7859 + }, + { + "epoch": 1.22, + "learning_rate": 8.382491830489134e-06, + "logits/chosen": -2.44832706451416, + "logits/rejected": -2.3117239475250244, + "logps/chosen": -162.53982543945312, + "logps/rejected": -260.4747009277344, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892852306365967, + "rewards/margins": 6.746053218841553, + "rewards/rejected": -12.63890552520752, + "step": 7860 + }, + { + "epoch": 1.22, + "learning_rate": 8.381758389957985e-06, + "logits/chosen": -2.6240103244781494, + "logits/rejected": -2.976064443588257, + "logps/chosen": -71.6796875, + "logps/rejected": -390.5960998535156, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.549093246459961, + "rewards/margins": 5.363093376159668, + "rewards/rejected": -10.912186622619629, + "step": 7861 + }, + { + "epoch": 1.22, + "learning_rate": 8.381024949426837e-06, + "logits/chosen": -0.9902950525283813, + "logits/rejected": -2.7529966831207275, + "logps/chosen": -204.3076934814453, + "logps/rejected": -505.384521484375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.549231052398682, + "rewards/margins": 5.82200813293457, + "rewards/rejected": -10.371238708496094, + "step": 7862 + }, + { + "epoch": 1.22, + "learning_rate": 8.38029150889569e-06, + "logits/chosen": -3.0270144939422607, + "logits/rejected": -2.3256447315216064, + "logps/chosen": -576.751953125, + "logps/rejected": -896.4769287109375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4913437366485596, + "rewards/margins": 7.0982561111450195, + "rewards/rejected": -9.589599609375, + "step": 7863 + }, + { + "epoch": 1.22, + "learning_rate": 8.379558068364541e-06, + "logits/chosen": -3.1016316413879395, + "logits/rejected": -2.7127575874328613, + "logps/chosen": -484.4085693359375, + "logps/rejected": -602.7012939453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4440298080444336, + "rewards/margins": 6.795046806335449, + "rewards/rejected": -10.239076614379883, + "step": 7864 + }, + { + "epoch": 1.22, + "learning_rate": 8.378824627833393e-06, + "logits/chosen": -2.7604081630706787, + "logits/rejected": -2.892232894897461, + "logps/chosen": -85.22325897216797, + "logps/rejected": -211.6835174560547, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.297933578491211, + "rewards/margins": 7.604222297668457, + "rewards/rejected": -11.902154922485352, + "step": 7865 + }, + { + "epoch": 1.22, + "learning_rate": 8.378091187302247e-06, + "logits/chosen": -2.8853392601013184, + "logits/rejected": -1.8277146816253662, + "logps/chosen": -120.6190185546875, + "logps/rejected": -190.71435546875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.261096954345703, + "rewards/margins": 5.884405136108398, + "rewards/rejected": -9.145502090454102, + "step": 7866 + }, + { + "epoch": 1.22, + "learning_rate": 8.377357746771098e-06, + "logits/chosen": -3.0909643173217773, + "logits/rejected": -2.737527847290039, + "logps/chosen": -121.69226837158203, + "logps/rejected": -161.14894104003906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.714113235473633, + "rewards/margins": 8.12044906616211, + "rewards/rejected": -10.834562301635742, + "step": 7867 + }, + { + "epoch": 1.22, + "learning_rate": 8.37662430623995e-06, + "logits/chosen": -2.627274751663208, + "logits/rejected": -2.872817039489746, + "logps/chosen": -139.9450225830078, + "logps/rejected": -321.47039794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.546476364135742, + "rewards/margins": 8.799308776855469, + "rewards/rejected": -13.345785140991211, + "step": 7868 + }, + { + "epoch": 1.22, + "learning_rate": 8.375890865708804e-06, + "logits/chosen": -2.686566114425659, + "logits/rejected": -2.861938953399658, + "logps/chosen": -537.150390625, + "logps/rejected": -634.2785034179688, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617337703704834, + "rewards/margins": 4.044544696807861, + "rewards/rejected": -9.661882400512695, + "step": 7869 + }, + { + "epoch": 1.22, + "learning_rate": 8.375157425177656e-06, + "logits/chosen": -2.292051315307617, + "logits/rejected": -2.9839580059051514, + "logps/chosen": -160.89756774902344, + "logps/rejected": -286.2939758300781, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.830535888671875, + "rewards/margins": 4.417747497558594, + "rewards/rejected": -8.248283386230469, + "step": 7870 + }, + { + "epoch": 1.22, + "learning_rate": 8.374423984646508e-06, + "logits/chosen": -2.902888298034668, + "logits/rejected": -1.554739236831665, + "logps/chosen": -644.121337890625, + "logps/rejected": -420.6136474609375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9574586153030396, + "rewards/margins": 5.696268081665039, + "rewards/rejected": -7.653726577758789, + "step": 7871 + }, + { + "epoch": 1.22, + "learning_rate": 8.37369054411536e-06, + "logits/chosen": -1.3270376920700073, + "logits/rejected": -2.631258249282837, + "logps/chosen": -166.64791870117188, + "logps/rejected": -352.8558349609375, + "loss": 0.2424, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9910964965820312, + "rewards/margins": 3.3703057765960693, + "rewards/rejected": -7.36140251159668, + "step": 7872 + }, + { + "epoch": 1.22, + "learning_rate": 8.372957103584211e-06, + "logits/chosen": -2.226249933242798, + "logits/rejected": -2.792734384536743, + "logps/chosen": -267.06317138671875, + "logps/rejected": -427.8236999511719, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.320528507232666, + "rewards/margins": 8.486068725585938, + "rewards/rejected": -11.806596755981445, + "step": 7873 + }, + { + "epoch": 1.22, + "learning_rate": 8.372223663053063e-06, + "logits/chosen": -2.9069361686706543, + "logits/rejected": -2.8281021118164062, + "logps/chosen": -109.70906066894531, + "logps/rejected": -149.83245849609375, + "loss": 1.2908, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.047394752502441, + "rewards/margins": 2.8793046474456787, + "rewards/rejected": -7.926699638366699, + "step": 7874 + }, + { + "epoch": 1.22, + "learning_rate": 8.371490222521915e-06, + "logits/chosen": -0.7501401305198669, + "logits/rejected": -2.8532588481903076, + "logps/chosen": -102.14559936523438, + "logps/rejected": -513.78857421875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.159695625305176, + "rewards/margins": 7.667681694030762, + "rewards/rejected": -12.827377319335938, + "step": 7875 + }, + { + "epoch": 1.22, + "learning_rate": 8.370756781990767e-06, + "logits/chosen": -1.7907590866088867, + "logits/rejected": -2.6822190284729004, + "logps/chosen": -134.2323760986328, + "logps/rejected": -413.3145751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.247982025146484, + "rewards/margins": 10.930776596069336, + "rewards/rejected": -15.17875862121582, + "step": 7876 + }, + { + "epoch": 1.23, + "learning_rate": 8.370023341459619e-06, + "logits/chosen": -2.408477306365967, + "logits/rejected": -2.7697949409484863, + "logps/chosen": -60.267574310302734, + "logps/rejected": -224.11544799804688, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7361514568328857, + "rewards/margins": 7.203888893127441, + "rewards/rejected": -10.940040588378906, + "step": 7877 + }, + { + "epoch": 1.23, + "learning_rate": 8.369289900928472e-06, + "logits/chosen": -2.533006429672241, + "logits/rejected": -3.0337328910827637, + "logps/chosen": -153.25772094726562, + "logps/rejected": -220.943115234375, + "loss": 1.8064, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.868023872375488, + "rewards/margins": -1.3310362100601196, + "rewards/rejected": -3.536987781524658, + "step": 7878 + }, + { + "epoch": 1.23, + "learning_rate": 8.368556460397324e-06, + "logits/chosen": -1.5744906663894653, + "logits/rejected": -2.6646153926849365, + "logps/chosen": -255.09713745117188, + "logps/rejected": -460.39044189453125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.366151809692383, + "rewards/margins": 9.208003997802734, + "rewards/rejected": -12.574155807495117, + "step": 7879 + }, + { + "epoch": 1.23, + "learning_rate": 8.367823019866176e-06, + "logits/chosen": -2.961639165878296, + "logits/rejected": -2.5939929485321045, + "logps/chosen": -351.1800537109375, + "logps/rejected": -224.86045837402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3986313343048096, + "rewards/margins": 9.233797073364258, + "rewards/rejected": -10.632427215576172, + "step": 7880 + }, + { + "epoch": 1.23, + "learning_rate": 8.367089579335028e-06, + "logits/chosen": -2.7582521438598633, + "logits/rejected": -2.82423734664917, + "logps/chosen": -169.90298461914062, + "logps/rejected": -224.31570434570312, + "loss": 1.0844, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.332156181335449, + "rewards/margins": 2.1709060668945312, + "rewards/rejected": -7.5030622482299805, + "step": 7881 + }, + { + "epoch": 1.23, + "learning_rate": 8.36635613880388e-06, + "logits/chosen": -2.649968147277832, + "logits/rejected": -2.980882406234741, + "logps/chosen": -247.90570068359375, + "logps/rejected": -216.1549835205078, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.683945655822754, + "rewards/margins": 3.449842691421509, + "rewards/rejected": -6.133788108825684, + "step": 7882 + }, + { + "epoch": 1.23, + "learning_rate": 8.365622698272732e-06, + "logits/chosen": -2.798365354537964, + "logits/rejected": -1.5329595804214478, + "logps/chosen": -193.41635131835938, + "logps/rejected": -151.75746154785156, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5026350021362305, + "rewards/margins": 3.530231475830078, + "rewards/rejected": -6.032866477966309, + "step": 7883 + }, + { + "epoch": 1.23, + "learning_rate": 8.364889257741584e-06, + "logits/chosen": -2.8189120292663574, + "logits/rejected": -2.5973727703094482, + "logps/chosen": -120.96895599365234, + "logps/rejected": -204.00857543945312, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4836288690567017, + "rewards/margins": 7.2987470626831055, + "rewards/rejected": -8.782376289367676, + "step": 7884 + }, + { + "epoch": 1.23, + "learning_rate": 8.364155817210436e-06, + "logits/chosen": -2.516207456588745, + "logits/rejected": -3.1310958862304688, + "logps/chosen": -685.661376953125, + "logps/rejected": -660.3433837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1332643032073975, + "rewards/margins": 10.641523361206055, + "rewards/rejected": -12.774787902832031, + "step": 7885 + }, + { + "epoch": 1.23, + "learning_rate": 8.363422376679288e-06, + "logits/chosen": -2.825205087661743, + "logits/rejected": -2.7763030529022217, + "logps/chosen": -51.031524658203125, + "logps/rejected": -181.09075927734375, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3317975997924805, + "rewards/margins": 5.5486884117126465, + "rewards/rejected": -8.880486488342285, + "step": 7886 + }, + { + "epoch": 1.23, + "learning_rate": 8.362688936148141e-06, + "logits/chosen": -2.6081881523132324, + "logits/rejected": -2.9230687618255615, + "logps/chosen": -484.0845947265625, + "logps/rejected": -542.1455078125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.254409074783325, + "rewards/margins": 7.548093318939209, + "rewards/rejected": -9.802502632141113, + "step": 7887 + }, + { + "epoch": 1.23, + "learning_rate": 8.361955495616993e-06, + "logits/chosen": -2.933823585510254, + "logits/rejected": -2.2722318172454834, + "logps/chosen": -382.30755615234375, + "logps/rejected": -367.18682861328125, + "loss": 1.543, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.861914157867432, + "rewards/margins": 4.263617515563965, + "rewards/rejected": -10.125532150268555, + "step": 7888 + }, + { + "epoch": 1.23, + "learning_rate": 8.361222055085845e-06, + "logits/chosen": -3.030022382736206, + "logits/rejected": -2.483166217803955, + "logps/chosen": -897.0611572265625, + "logps/rejected": -712.1888427734375, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.761504888534546, + "rewards/margins": 4.0556111335754395, + "rewards/rejected": -7.817115783691406, + "step": 7889 + }, + { + "epoch": 1.23, + "learning_rate": 8.360488614554697e-06, + "logits/chosen": -1.9608546495437622, + "logits/rejected": -2.6037824153900146, + "logps/chosen": -278.31634521484375, + "logps/rejected": -631.117919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.247753858566284, + "rewards/margins": 13.029989242553711, + "rewards/rejected": -15.27774429321289, + "step": 7890 + }, + { + "epoch": 1.23, + "learning_rate": 8.359755174023549e-06, + "logits/chosen": -2.981722831726074, + "logits/rejected": -2.3849902153015137, + "logps/chosen": -571.0079345703125, + "logps/rejected": -418.6913146972656, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.09340763092041, + "rewards/margins": 5.94288444519043, + "rewards/rejected": -8.036291122436523, + "step": 7891 + }, + { + "epoch": 1.23, + "learning_rate": 8.3590217334924e-06, + "logits/chosen": -2.829627513885498, + "logits/rejected": -2.9944887161254883, + "logps/chosen": -121.34230041503906, + "logps/rejected": -198.23406982421875, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.287428617477417, + "rewards/margins": 4.611891746520996, + "rewards/rejected": -7.899320602416992, + "step": 7892 + }, + { + "epoch": 1.23, + "learning_rate": 8.358288292961252e-06, + "logits/chosen": -1.7958827018737793, + "logits/rejected": -2.8552298545837402, + "logps/chosen": -526.7566528320312, + "logps/rejected": -616.9190063476562, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.246455192565918, + "rewards/margins": 6.5792317390441895, + "rewards/rejected": -12.82568645477295, + "step": 7893 + }, + { + "epoch": 1.23, + "learning_rate": 8.357554852430104e-06, + "logits/chosen": -2.8486392498016357, + "logits/rejected": -2.388230323791504, + "logps/chosen": -177.3043670654297, + "logps/rejected": -210.56912231445312, + "loss": 1.1103, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.966912269592285, + "rewards/margins": 3.846147060394287, + "rewards/rejected": -8.813058853149414, + "step": 7894 + }, + { + "epoch": 1.23, + "learning_rate": 8.356821411898956e-06, + "logits/chosen": -2.1163132190704346, + "logits/rejected": -3.023040771484375, + "logps/chosen": -51.706214904785156, + "logps/rejected": -307.353759765625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4399218559265137, + "rewards/margins": 6.799618721008301, + "rewards/rejected": -10.239540100097656, + "step": 7895 + }, + { + "epoch": 1.23, + "learning_rate": 8.35608797136781e-06, + "logits/chosen": -3.0326929092407227, + "logits/rejected": -2.8317699432373047, + "logps/chosen": -604.7366333007812, + "logps/rejected": -561.842041015625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1392836570739746, + "rewards/margins": 6.742685317993164, + "rewards/rejected": -9.881969451904297, + "step": 7896 + }, + { + "epoch": 1.23, + "learning_rate": 8.355354530836662e-06, + "logits/chosen": -2.1596710681915283, + "logits/rejected": -2.7105491161346436, + "logps/chosen": -141.1214599609375, + "logps/rejected": -450.31884765625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.034207820892334, + "rewards/margins": 6.164224624633789, + "rewards/rejected": -9.198432922363281, + "step": 7897 + }, + { + "epoch": 1.23, + "learning_rate": 8.354621090305513e-06, + "logits/chosen": -1.2968684434890747, + "logits/rejected": -2.199589490890503, + "logps/chosen": -55.85443878173828, + "logps/rejected": -308.1295166015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181858539581299, + "rewards/margins": 9.129372596740723, + "rewards/rejected": -13.311230659484863, + "step": 7898 + }, + { + "epoch": 1.23, + "learning_rate": 8.353887649774365e-06, + "logits/chosen": -2.468087911605835, + "logits/rejected": -1.6124415397644043, + "logps/chosen": -317.2522277832031, + "logps/rejected": -331.39483642578125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219694137573242, + "rewards/margins": 6.418906211853027, + "rewards/rejected": -10.638599395751953, + "step": 7899 + }, + { + "epoch": 1.23, + "learning_rate": 8.353154209243219e-06, + "logits/chosen": -1.608689785003662, + "logits/rejected": -2.706491470336914, + "logps/chosen": -86.26364135742188, + "logps/rejected": -227.03810119628906, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3157148361206055, + "rewards/margins": 6.838013172149658, + "rewards/rejected": -11.153728485107422, + "step": 7900 + }, + { + "epoch": 1.23, + "learning_rate": 8.35242076871207e-06, + "logits/chosen": -2.90191912651062, + "logits/rejected": -2.3302319049835205, + "logps/chosen": -611.9343872070312, + "logps/rejected": -440.40301513671875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.025109767913818, + "rewards/margins": 7.599897384643555, + "rewards/rejected": -11.625007629394531, + "step": 7901 + }, + { + "epoch": 1.23, + "learning_rate": 8.351687328180923e-06, + "logits/chosen": -2.138720750808716, + "logits/rejected": -2.941631317138672, + "logps/chosen": -538.7650146484375, + "logps/rejected": -498.19561767578125, + "loss": 0.1612, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219060897827148, + "rewards/margins": 4.33796501159668, + "rewards/rejected": -10.557025909423828, + "step": 7902 + }, + { + "epoch": 1.23, + "learning_rate": 8.350953887649775e-06, + "logits/chosen": -2.7827205657958984, + "logits/rejected": -2.813249111175537, + "logps/chosen": -360.5762634277344, + "logps/rejected": -488.4031066894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4409724473953247, + "rewards/margins": 11.900875091552734, + "rewards/rejected": -12.34184741973877, + "step": 7903 + }, + { + "epoch": 1.23, + "learning_rate": 8.350220447118626e-06, + "logits/chosen": -1.485561490058899, + "logits/rejected": -2.6053307056427, + "logps/chosen": -197.10391235351562, + "logps/rejected": -354.591064453125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.569361925125122, + "rewards/margins": 7.353281021118164, + "rewards/rejected": -10.922642707824707, + "step": 7904 + }, + { + "epoch": 1.23, + "learning_rate": 8.34948700658748e-06, + "logits/chosen": -2.816986083984375, + "logits/rejected": -2.8932955265045166, + "logps/chosen": -316.5393371582031, + "logps/rejected": -134.16653442382812, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.259817600250244, + "rewards/margins": 3.6797516345977783, + "rewards/rejected": -6.939569473266602, + "step": 7905 + }, + { + "epoch": 1.23, + "learning_rate": 8.348753566056332e-06, + "logits/chosen": -2.8413350582122803, + "logits/rejected": -2.911914110183716, + "logps/chosen": -125.65630340576172, + "logps/rejected": -154.58596801757812, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.14650821685791, + "rewards/margins": 5.737303733825684, + "rewards/rejected": -11.883811950683594, + "step": 7906 + }, + { + "epoch": 1.23, + "learning_rate": 8.348020125525184e-06, + "logits/chosen": -2.9572622776031494, + "logits/rejected": -3.2616055011749268, + "logps/chosen": -189.4406280517578, + "logps/rejected": -264.35931396484375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411794185638428, + "rewards/margins": 5.063164710998535, + "rewards/rejected": -11.474958419799805, + "step": 7907 + }, + { + "epoch": 1.23, + "learning_rate": 8.347286684994036e-06, + "logits/chosen": -2.1679043769836426, + "logits/rejected": -2.704460382461548, + "logps/chosen": -390.5673828125, + "logps/rejected": -477.0882263183594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.901410102844238, + "rewards/margins": 8.983935356140137, + "rewards/rejected": -13.885345458984375, + "step": 7908 + }, + { + "epoch": 1.23, + "learning_rate": 8.346553244462887e-06, + "logits/chosen": -2.533381938934326, + "logits/rejected": -2.743162155151367, + "logps/chosen": -116.23915100097656, + "logps/rejected": -209.95559692382812, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.389130115509033, + "rewards/margins": 4.343286037445068, + "rewards/rejected": -7.732416152954102, + "step": 7909 + }, + { + "epoch": 1.23, + "learning_rate": 8.34581980393174e-06, + "logits/chosen": -2.9046311378479004, + "logits/rejected": -3.0229175090789795, + "logps/chosen": -106.53988647460938, + "logps/rejected": -288.06231689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6371989250183105, + "rewards/margins": 9.13712215423584, + "rewards/rejected": -12.774320602416992, + "step": 7910 + }, + { + "epoch": 1.23, + "learning_rate": 8.345086363400591e-06, + "logits/chosen": -2.686065196990967, + "logits/rejected": -2.9983789920806885, + "logps/chosen": -177.66165161132812, + "logps/rejected": -449.6203918457031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.86409854888916, + "rewards/margins": 10.109915733337402, + "rewards/rejected": -14.974014282226562, + "step": 7911 + }, + { + "epoch": 1.23, + "learning_rate": 8.344352922869443e-06, + "logits/chosen": -2.817298412322998, + "logits/rejected": -2.5710220336914062, + "logps/chosen": -188.91726684570312, + "logps/rejected": -218.8733367919922, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2087838649749756, + "rewards/margins": 5.767078399658203, + "rewards/rejected": -7.975862503051758, + "step": 7912 + }, + { + "epoch": 1.23, + "learning_rate": 8.343619482338295e-06, + "logits/chosen": -3.052523374557495, + "logits/rejected": -2.9349911212921143, + "logps/chosen": -51.62651062011719, + "logps/rejected": -118.49655151367188, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2642831802368164, + "rewards/margins": 5.282788276672363, + "rewards/rejected": -8.54707145690918, + "step": 7913 + }, + { + "epoch": 1.23, + "learning_rate": 8.342886041807149e-06, + "logits/chosen": -2.5307466983795166, + "logits/rejected": -2.6339128017425537, + "logps/chosen": -180.5706024169922, + "logps/rejected": -277.1784362792969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2068209648132324, + "rewards/margins": 7.576029300689697, + "rewards/rejected": -10.78285026550293, + "step": 7914 + }, + { + "epoch": 1.23, + "learning_rate": 8.342152601276e-06, + "logits/chosen": -1.5070643424987793, + "logits/rejected": -2.741217851638794, + "logps/chosen": -212.87738037109375, + "logps/rejected": -416.3616638183594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.880284070968628, + "rewards/margins": 7.982267379760742, + "rewards/rejected": -11.862550735473633, + "step": 7915 + }, + { + "epoch": 1.23, + "learning_rate": 8.341419160744852e-06, + "logits/chosen": -2.1742281913757324, + "logits/rejected": -2.88199520111084, + "logps/chosen": -561.3821411132812, + "logps/rejected": -680.9273681640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.316667079925537, + "rewards/margins": 8.483589172363281, + "rewards/rejected": -11.800256729125977, + "step": 7916 + }, + { + "epoch": 1.23, + "learning_rate": 8.340685720213704e-06, + "logits/chosen": -1.1837657690048218, + "logits/rejected": -2.612752676010132, + "logps/chosen": -182.45681762695312, + "logps/rejected": -553.630126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6899514198303223, + "rewards/margins": 10.59178352355957, + "rewards/rejected": -13.28173542022705, + "step": 7917 + }, + { + "epoch": 1.23, + "learning_rate": 8.339952279682556e-06, + "logits/chosen": -2.821392774581909, + "logits/rejected": -2.9661667346954346, + "logps/chosen": -149.08184814453125, + "logps/rejected": -247.51303100585938, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.111618995666504, + "rewards/margins": 5.529880523681641, + "rewards/rejected": -8.641499519348145, + "step": 7918 + }, + { + "epoch": 1.23, + "learning_rate": 8.339218839151408e-06, + "logits/chosen": -2.192990303039551, + "logits/rejected": -2.9789743423461914, + "logps/chosen": -219.98289489746094, + "logps/rejected": -379.6886901855469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0194783210754395, + "rewards/margins": 8.72118091583252, + "rewards/rejected": -12.740659713745117, + "step": 7919 + }, + { + "epoch": 1.23, + "learning_rate": 8.33848539862026e-06, + "logits/chosen": -1.6268799304962158, + "logits/rejected": -2.8476450443267822, + "logps/chosen": -423.87725830078125, + "logps/rejected": -659.0059814453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7026185989379883, + "rewards/margins": 7.657121181488037, + "rewards/rejected": -11.359739303588867, + "step": 7920 + }, + { + "epoch": 1.23, + "learning_rate": 8.337751958089112e-06, + "logits/chosen": -2.760129928588867, + "logits/rejected": -2.993344306945801, + "logps/chosen": -551.9230346679688, + "logps/rejected": -540.2976684570312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4287428855895996, + "rewards/margins": 8.669519424438477, + "rewards/rejected": -11.098261833190918, + "step": 7921 + }, + { + "epoch": 1.23, + "learning_rate": 8.337018517557965e-06, + "logits/chosen": -1.1196327209472656, + "logits/rejected": -2.6320323944091797, + "logps/chosen": -122.73081970214844, + "logps/rejected": -522.9052124023438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6093268394470215, + "rewards/margins": 8.505802154541016, + "rewards/rejected": -14.115129470825195, + "step": 7922 + }, + { + "epoch": 1.23, + "learning_rate": 8.336285077026817e-06, + "logits/chosen": -2.037015914916992, + "logits/rejected": -2.617690086364746, + "logps/chosen": -295.749267578125, + "logps/rejected": -534.0327758789062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.053121089935303, + "rewards/margins": 8.38144302368164, + "rewards/rejected": -12.434564590454102, + "step": 7923 + }, + { + "epoch": 1.23, + "learning_rate": 8.335551636495669e-06, + "logits/chosen": -2.487832546234131, + "logits/rejected": -2.7568743228912354, + "logps/chosen": -274.7177429199219, + "logps/rejected": -342.8128662109375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4664757251739502, + "rewards/margins": 5.174319267272949, + "rewards/rejected": -6.64079475402832, + "step": 7924 + }, + { + "epoch": 1.23, + "learning_rate": 8.334818195964521e-06, + "logits/chosen": -1.6815590858459473, + "logits/rejected": -3.0066730976104736, + "logps/chosen": -130.47573852539062, + "logps/rejected": -292.70709228515625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.053139686584473, + "rewards/margins": 5.611213684082031, + "rewards/rejected": -10.66435432434082, + "step": 7925 + }, + { + "epoch": 1.23, + "learning_rate": 8.334084755433373e-06, + "logits/chosen": -2.8099067211151123, + "logits/rejected": -2.3448758125305176, + "logps/chosen": -492.33343505859375, + "logps/rejected": -673.76513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3349761962890625, + "rewards/margins": 11.602527618408203, + "rewards/rejected": -12.937503814697266, + "step": 7926 + }, + { + "epoch": 1.23, + "learning_rate": 8.333351314902225e-06, + "logits/chosen": -2.500504493713379, + "logits/rejected": -3.1090879440307617, + "logps/chosen": -44.228065490722656, + "logps/rejected": -309.9624938964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4951038360595703, + "rewards/margins": 8.881898880004883, + "rewards/rejected": -12.377002716064453, + "step": 7927 + }, + { + "epoch": 1.23, + "learning_rate": 8.332617874371077e-06, + "logits/chosen": -2.720364809036255, + "logits/rejected": -2.1299924850463867, + "logps/chosen": -357.0429992675781, + "logps/rejected": -290.5816345214844, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.780890464782715, + "rewards/margins": 7.384379863739014, + "rewards/rejected": -11.16526985168457, + "step": 7928 + }, + { + "epoch": 1.23, + "learning_rate": 8.331884433839928e-06, + "logits/chosen": -3.013218641281128, + "logits/rejected": -2.780452251434326, + "logps/chosen": -481.4291076660156, + "logps/rejected": -415.52435302734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8499174118041992, + "rewards/margins": 7.967257976531982, + "rewards/rejected": -9.817174911499023, + "step": 7929 + }, + { + "epoch": 1.23, + "learning_rate": 8.33115099330878e-06, + "logits/chosen": -2.194441795349121, + "logits/rejected": -2.850557565689087, + "logps/chosen": -139.13316345214844, + "logps/rejected": -329.7664794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3014400005340576, + "rewards/margins": 9.826024055480957, + "rewards/rejected": -12.127464294433594, + "step": 7930 + }, + { + "epoch": 1.23, + "learning_rate": 8.330417552777634e-06, + "logits/chosen": -2.940741539001465, + "logits/rejected": -2.716151237487793, + "logps/chosen": -202.63511657714844, + "logps/rejected": -349.4258117675781, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.502682685852051, + "rewards/margins": 5.223882675170898, + "rewards/rejected": -8.72656536102295, + "step": 7931 + }, + { + "epoch": 1.23, + "learning_rate": 8.329684112246486e-06, + "logits/chosen": -2.862734317779541, + "logits/rejected": -2.2338547706604004, + "logps/chosen": -290.3799133300781, + "logps/rejected": -230.670654296875, + "loss": 0.1326, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.815188407897949, + "rewards/margins": 4.901597499847412, + "rewards/rejected": -7.716785907745361, + "step": 7932 + }, + { + "epoch": 1.23, + "learning_rate": 8.328950671715338e-06, + "logits/chosen": -2.7525179386138916, + "logits/rejected": -2.9847545623779297, + "logps/chosen": -93.53814697265625, + "logps/rejected": -260.4903869628906, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156473636627197, + "rewards/margins": 7.0933637619018555, + "rewards/rejected": -11.249837875366211, + "step": 7933 + }, + { + "epoch": 1.23, + "learning_rate": 8.328217231184191e-06, + "logits/chosen": -1.366802453994751, + "logits/rejected": -2.8272416591644287, + "logps/chosen": -191.48330688476562, + "logps/rejected": -462.44091796875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.112748146057129, + "rewards/margins": 5.357616424560547, + "rewards/rejected": -11.470364570617676, + "step": 7934 + }, + { + "epoch": 1.23, + "learning_rate": 8.327483790653043e-06, + "logits/chosen": -2.9355554580688477, + "logits/rejected": -2.6127243041992188, + "logps/chosen": -243.35647583007812, + "logps/rejected": -389.4407958984375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9715394973754883, + "rewards/margins": 5.962338924407959, + "rewards/rejected": -8.933877944946289, + "step": 7935 + }, + { + "epoch": 1.23, + "learning_rate": 8.326750350121895e-06, + "logits/chosen": -2.2673261165618896, + "logits/rejected": -2.7446510791778564, + "logps/chosen": -248.4854736328125, + "logps/rejected": -340.24578857421875, + "loss": 0.1868, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.45095157623291, + "rewards/margins": 5.18727970123291, + "rewards/rejected": -11.63823127746582, + "step": 7936 + }, + { + "epoch": 1.23, + "learning_rate": 8.326016909590747e-06, + "logits/chosen": -1.026137351989746, + "logits/rejected": -2.445361614227295, + "logps/chosen": -109.31573486328125, + "logps/rejected": -447.802978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.002769470214844, + "rewards/margins": 10.162521362304688, + "rewards/rejected": -14.165290832519531, + "step": 7937 + }, + { + "epoch": 1.23, + "learning_rate": 8.325283469059599e-06, + "logits/chosen": -2.994058609008789, + "logits/rejected": -2.360044002532959, + "logps/chosen": -257.16595458984375, + "logps/rejected": -291.9267578125, + "loss": 0.0964, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5841965675354004, + "rewards/margins": 3.297060489654541, + "rewards/rejected": -6.881257057189941, + "step": 7938 + }, + { + "epoch": 1.23, + "learning_rate": 8.32455002852845e-06, + "logits/chosen": -2.543076515197754, + "logits/rejected": -2.897982120513916, + "logps/chosen": -61.240394592285156, + "logps/rejected": -230.48779296875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1768736839294434, + "rewards/margins": 6.861587047576904, + "rewards/rejected": -10.038460731506348, + "step": 7939 + }, + { + "epoch": 1.23, + "learning_rate": 8.323816587997304e-06, + "logits/chosen": -1.4736829996109009, + "logits/rejected": -2.1292831897735596, + "logps/chosen": -253.2109375, + "logps/rejected": -339.796875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.458548545837402, + "rewards/margins": 6.329834938049316, + "rewards/rejected": -11.788383483886719, + "step": 7940 + }, + { + "epoch": 1.23, + "learning_rate": 8.323083147466156e-06, + "logits/chosen": -2.754159688949585, + "logits/rejected": -2.975497245788574, + "logps/chosen": -264.5966491699219, + "logps/rejected": -257.13232421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6455893516540527, + "rewards/margins": 7.864040374755859, + "rewards/rejected": -11.50963020324707, + "step": 7941 + }, + { + "epoch": 1.24, + "learning_rate": 8.322349706935008e-06, + "logits/chosen": -1.7849868535995483, + "logits/rejected": -2.752671241760254, + "logps/chosen": -179.69346618652344, + "logps/rejected": -332.208251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.090672969818115, + "rewards/margins": 9.274601936340332, + "rewards/rejected": -13.365274429321289, + "step": 7942 + }, + { + "epoch": 1.24, + "learning_rate": 8.32161626640386e-06, + "logits/chosen": -2.789182186126709, + "logits/rejected": -2.2155325412750244, + "logps/chosen": -210.61386108398438, + "logps/rejected": -335.1383056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.145346164703369, + "rewards/margins": 10.363260269165039, + "rewards/rejected": -13.508606910705566, + "step": 7943 + }, + { + "epoch": 1.24, + "learning_rate": 8.320882825872712e-06, + "logits/chosen": -2.1858127117156982, + "logits/rejected": -2.8711740970611572, + "logps/chosen": -251.9073486328125, + "logps/rejected": -473.92437744140625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075366020202637, + "rewards/margins": 6.496217727661133, + "rewards/rejected": -12.57158374786377, + "step": 7944 + }, + { + "epoch": 1.24, + "learning_rate": 8.320149385341564e-06, + "logits/chosen": -3.015004873275757, + "logits/rejected": -2.482684373855591, + "logps/chosen": -143.72726440429688, + "logps/rejected": -176.60171508789062, + "loss": 0.6675, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.473746299743652, + "rewards/margins": 1.4514577388763428, + "rewards/rejected": -7.925204277038574, + "step": 7945 + }, + { + "epoch": 1.24, + "learning_rate": 8.319415944810415e-06, + "logits/chosen": -2.947927713394165, + "logits/rejected": -2.6342592239379883, + "logps/chosen": -317.5647277832031, + "logps/rejected": -501.1739196777344, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.185569763183594, + "rewards/margins": 4.735236644744873, + "rewards/rejected": -9.920805931091309, + "step": 7946 + }, + { + "epoch": 1.24, + "learning_rate": 8.318682504279267e-06, + "logits/chosen": -2.5385968685150146, + "logits/rejected": -2.777421474456787, + "logps/chosen": -117.18223571777344, + "logps/rejected": -231.86868286132812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9945976734161377, + "rewards/margins": 8.869039535522461, + "rewards/rejected": -10.863637924194336, + "step": 7947 + }, + { + "epoch": 1.24, + "learning_rate": 8.31794906374812e-06, + "logits/chosen": -3.0452709197998047, + "logits/rejected": -2.8282387256622314, + "logps/chosen": -582.3632202148438, + "logps/rejected": -524.14990234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9228572845458984, + "rewards/margins": 8.619028091430664, + "rewards/rejected": -11.541885375976562, + "step": 7948 + }, + { + "epoch": 1.24, + "learning_rate": 8.317215623216973e-06, + "logits/chosen": -1.8151779174804688, + "logits/rejected": -2.664876699447632, + "logps/chosen": -93.24661254882812, + "logps/rejected": -305.6456298828125, + "loss": 0.0712, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.979260444641113, + "rewards/margins": 5.969573020935059, + "rewards/rejected": -10.948833465576172, + "step": 7949 + }, + { + "epoch": 1.24, + "learning_rate": 8.316482182685825e-06, + "logits/chosen": -2.1925289630889893, + "logits/rejected": -2.9293878078460693, + "logps/chosen": -866.6948852539062, + "logps/rejected": -360.12841796875, + "loss": 0.4738, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.651519298553467, + "rewards/margins": 4.7097954750061035, + "rewards/rejected": -11.36131477355957, + "step": 7950 + }, + { + "epoch": 1.24, + "learning_rate": 8.315748742154677e-06, + "logits/chosen": -2.224100112915039, + "logits/rejected": -2.2707622051239014, + "logps/chosen": -379.80621337890625, + "logps/rejected": -616.1082153320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.910120964050293, + "rewards/margins": 14.012712478637695, + "rewards/rejected": -17.922834396362305, + "step": 7951 + }, + { + "epoch": 1.24, + "learning_rate": 8.315015301623528e-06, + "logits/chosen": -3.158019542694092, + "logits/rejected": -2.842662811279297, + "logps/chosen": -258.8170166015625, + "logps/rejected": -249.36331176757812, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2182540893554688, + "rewards/margins": 5.654644012451172, + "rewards/rejected": -8.87289810180664, + "step": 7952 + }, + { + "epoch": 1.24, + "learning_rate": 8.31428186109238e-06, + "logits/chosen": -2.494605779647827, + "logits/rejected": -2.763110399246216, + "logps/chosen": -129.83538818359375, + "logps/rejected": -355.05108642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.742038726806641, + "rewards/margins": 11.631442070007324, + "rewards/rejected": -17.37347984313965, + "step": 7953 + }, + { + "epoch": 1.24, + "learning_rate": 8.313548420561232e-06, + "logits/chosen": -1.6864955425262451, + "logits/rejected": -2.910705327987671, + "logps/chosen": -151.19729614257812, + "logps/rejected": -619.7958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.424576759338379, + "rewards/margins": 9.602230072021484, + "rewards/rejected": -14.026806831359863, + "step": 7954 + }, + { + "epoch": 1.24, + "learning_rate": 8.312814980030084e-06, + "logits/chosen": -2.160238742828369, + "logits/rejected": -2.569098711013794, + "logps/chosen": -138.38694763183594, + "logps/rejected": -298.3018798828125, + "loss": 0.0617, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.840180397033691, + "rewards/margins": 3.0965588092803955, + "rewards/rejected": -10.936738967895508, + "step": 7955 + }, + { + "epoch": 1.24, + "learning_rate": 8.312081539498936e-06, + "logits/chosen": -2.4526047706604004, + "logits/rejected": -3.024893045425415, + "logps/chosen": -315.98272705078125, + "logps/rejected": -527.786865234375, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.855278968811035, + "rewards/margins": 3.7763512134552, + "rewards/rejected": -8.631629943847656, + "step": 7956 + }, + { + "epoch": 1.24, + "learning_rate": 8.311348098967788e-06, + "logits/chosen": -2.4681873321533203, + "logits/rejected": -2.689393997192383, + "logps/chosen": -506.8009033203125, + "logps/rejected": -318.20843505859375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.359650611877441, + "rewards/margins": 7.8250322341918945, + "rewards/rejected": -12.184682846069336, + "step": 7957 + }, + { + "epoch": 1.24, + "learning_rate": 8.310614658436641e-06, + "logits/chosen": -2.671250820159912, + "logits/rejected": -1.1912627220153809, + "logps/chosen": -431.0656433105469, + "logps/rejected": -271.10211181640625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3673884868621826, + "rewards/margins": 8.275245666503906, + "rewards/rejected": -8.642633438110352, + "step": 7958 + }, + { + "epoch": 1.24, + "learning_rate": 8.309881217905493e-06, + "logits/chosen": -2.6295645236968994, + "logits/rejected": -1.574347734451294, + "logps/chosen": -367.2493896484375, + "logps/rejected": -308.0235900878906, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7689104080200195, + "rewards/margins": 6.096206188201904, + "rewards/rejected": -11.865116119384766, + "step": 7959 + }, + { + "epoch": 1.24, + "learning_rate": 8.309147777374345e-06, + "logits/chosen": -2.9013962745666504, + "logits/rejected": -2.9214327335357666, + "logps/chosen": -415.1278076171875, + "logps/rejected": -899.7496337890625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369663715362549, + "rewards/margins": 6.503939628601074, + "rewards/rejected": -11.873603820800781, + "step": 7960 + }, + { + "epoch": 1.24, + "learning_rate": 8.308414336843197e-06, + "logits/chosen": -2.8812196254730225, + "logits/rejected": -2.1207330226898193, + "logps/chosen": -256.8274230957031, + "logps/rejected": -140.75540161132812, + "loss": 0.9132, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.278404235839844, + "rewards/margins": -0.002262115478515625, + "rewards/rejected": -5.276142120361328, + "step": 7961 + }, + { + "epoch": 1.24, + "learning_rate": 8.307680896312049e-06, + "logits/chosen": -2.7954490184783936, + "logits/rejected": -2.998019218444824, + "logps/chosen": -78.751708984375, + "logps/rejected": -281.33209228515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0261242389678955, + "rewards/margins": 8.052108764648438, + "rewards/rejected": -11.078231811523438, + "step": 7962 + }, + { + "epoch": 1.24, + "learning_rate": 8.3069474557809e-06, + "logits/chosen": -2.832416296005249, + "logits/rejected": -3.027350664138794, + "logps/chosen": -59.999122619628906, + "logps/rejected": -217.13253784179688, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.108733654022217, + "rewards/margins": 6.467697620391846, + "rewards/rejected": -10.576431274414062, + "step": 7963 + }, + { + "epoch": 1.24, + "learning_rate": 8.306214015249753e-06, + "logits/chosen": -2.372976303100586, + "logits/rejected": -2.685131549835205, + "logps/chosen": -204.2685089111328, + "logps/rejected": -396.0517272949219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1440978050231934, + "rewards/margins": 10.354503631591797, + "rewards/rejected": -13.498601913452148, + "step": 7964 + }, + { + "epoch": 1.24, + "learning_rate": 8.305480574718605e-06, + "logits/chosen": -2.7970638275146484, + "logits/rejected": -1.4439220428466797, + "logps/chosen": -299.8152160644531, + "logps/rejected": -262.3121032714844, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9346976280212402, + "rewards/margins": 4.655283451080322, + "rewards/rejected": -8.589981079101562, + "step": 7965 + }, + { + "epoch": 1.24, + "learning_rate": 8.304747134187456e-06, + "logits/chosen": -2.6159589290618896, + "logits/rejected": -2.5371639728546143, + "logps/chosen": -110.31649017333984, + "logps/rejected": -194.880859375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1255943775177, + "rewards/margins": 7.7569169998168945, + "rewards/rejected": -10.882511138916016, + "step": 7966 + }, + { + "epoch": 1.24, + "learning_rate": 8.30401369365631e-06, + "logits/chosen": -3.156345844268799, + "logits/rejected": -2.978356122970581, + "logps/chosen": -630.5140380859375, + "logps/rejected": -508.29302978515625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4014970064163208, + "rewards/margins": 5.796929836273193, + "rewards/rejected": -7.198427200317383, + "step": 7967 + }, + { + "epoch": 1.24, + "learning_rate": 8.303280253125162e-06, + "logits/chosen": -2.8303167819976807, + "logits/rejected": -2.728188991546631, + "logps/chosen": -241.65298461914062, + "logps/rejected": -224.98965454101562, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.491637706756592, + "rewards/margins": 5.3905768394470215, + "rewards/rejected": -8.882214546203613, + "step": 7968 + }, + { + "epoch": 1.24, + "learning_rate": 8.302546812594015e-06, + "logits/chosen": -2.7471823692321777, + "logits/rejected": -2.7136011123657227, + "logps/chosen": -95.44044494628906, + "logps/rejected": -293.3562927246094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.265023708343506, + "rewards/margins": 9.773534774780273, + "rewards/rejected": -12.038558959960938, + "step": 7969 + }, + { + "epoch": 1.24, + "learning_rate": 8.301813372062867e-06, + "logits/chosen": -1.3374797105789185, + "logits/rejected": -2.923584222793579, + "logps/chosen": -189.4661102294922, + "logps/rejected": -437.5016174316406, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.143871307373047, + "rewards/margins": 6.88828182220459, + "rewards/rejected": -11.032154083251953, + "step": 7970 + }, + { + "epoch": 1.24, + "learning_rate": 8.30107993153172e-06, + "logits/chosen": -2.0484132766723633, + "logits/rejected": -2.6635382175445557, + "logps/chosen": -122.88760375976562, + "logps/rejected": -235.9337921142578, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.985889196395874, + "rewards/margins": 7.678556442260742, + "rewards/rejected": -11.664445877075195, + "step": 7971 + }, + { + "epoch": 1.24, + "learning_rate": 8.300346491000571e-06, + "logits/chosen": -2.7956457138061523, + "logits/rejected": -2.7180755138397217, + "logps/chosen": -139.46832275390625, + "logps/rejected": -240.99368286132812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.743781089782715, + "rewards/margins": 6.983973503112793, + "rewards/rejected": -11.727754592895508, + "step": 7972 + }, + { + "epoch": 1.24, + "learning_rate": 8.299613050469423e-06, + "logits/chosen": -2.7944746017456055, + "logits/rejected": -2.5778770446777344, + "logps/chosen": -197.1895294189453, + "logps/rejected": -164.8681182861328, + "loss": 3.2917, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.959478855133057, + "rewards/margins": 0.1842799186706543, + "rewards/rejected": -7.143758773803711, + "step": 7973 + }, + { + "epoch": 1.24, + "learning_rate": 8.298879609938275e-06, + "logits/chosen": -2.834618091583252, + "logits/rejected": -3.0426125526428223, + "logps/chosen": -208.71282958984375, + "logps/rejected": -303.1595764160156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5912344455718994, + "rewards/margins": 7.8877949714660645, + "rewards/rejected": -10.479029655456543, + "step": 7974 + }, + { + "epoch": 1.24, + "learning_rate": 8.298146169407127e-06, + "logits/chosen": -3.0457355976104736, + "logits/rejected": -3.1285171508789062, + "logps/chosen": -121.85238647460938, + "logps/rejected": -252.43077087402344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2835094928741455, + "rewards/margins": 7.594474792480469, + "rewards/rejected": -10.877984046936035, + "step": 7975 + }, + { + "epoch": 1.24, + "learning_rate": 8.29741272887598e-06, + "logits/chosen": -2.5739455223083496, + "logits/rejected": -2.756995677947998, + "logps/chosen": -214.3385467529297, + "logps/rejected": -245.60707092285156, + "loss": 0.0571, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.115525245666504, + "rewards/margins": 5.919375419616699, + "rewards/rejected": -9.034900665283203, + "step": 7976 + }, + { + "epoch": 1.24, + "learning_rate": 8.296679288344832e-06, + "logits/chosen": -2.8748157024383545, + "logits/rejected": -1.8688902854919434, + "logps/chosen": -727.245361328125, + "logps/rejected": -477.01226806640625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6562767028808594, + "rewards/margins": 6.925117492675781, + "rewards/rejected": -10.58139419555664, + "step": 7977 + }, + { + "epoch": 1.24, + "learning_rate": 8.295945847813684e-06, + "logits/chosen": -2.6675353050231934, + "logits/rejected": -3.0905346870422363, + "logps/chosen": -1273.2183837890625, + "logps/rejected": -1106.2415771484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7065918445587158, + "rewards/margins": 7.865780830383301, + "rewards/rejected": -9.572372436523438, + "step": 7978 + }, + { + "epoch": 1.24, + "learning_rate": 8.295212407282536e-06, + "logits/chosen": -2.631047248840332, + "logits/rejected": -1.5594125986099243, + "logps/chosen": -348.08062744140625, + "logps/rejected": -156.00582885742188, + "loss": 1.1343, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.005817413330078, + "rewards/margins": 0.45603346824645996, + "rewards/rejected": -6.461851119995117, + "step": 7979 + }, + { + "epoch": 1.24, + "learning_rate": 8.294478966751388e-06, + "logits/chosen": -2.0357067584991455, + "logits/rejected": -2.789402723312378, + "logps/chosen": -119.93902587890625, + "logps/rejected": -208.3450469970703, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.627992868423462, + "rewards/margins": 5.606799125671387, + "rewards/rejected": -9.23479175567627, + "step": 7980 + }, + { + "epoch": 1.24, + "learning_rate": 8.29374552622024e-06, + "logits/chosen": -2.502216100692749, + "logits/rejected": -2.837103843688965, + "logps/chosen": -264.2258605957031, + "logps/rejected": -447.86199951171875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.778383255004883, + "rewards/margins": 9.065423965454102, + "rewards/rejected": -11.843807220458984, + "step": 7981 + }, + { + "epoch": 1.24, + "learning_rate": 8.293012085689092e-06, + "logits/chosen": -3.035475969314575, + "logits/rejected": -2.97646427154541, + "logps/chosen": -521.3063354492188, + "logps/rejected": -643.1910400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0131683349609375, + "rewards/margins": 11.49914836883545, + "rewards/rejected": -13.512317657470703, + "step": 7982 + }, + { + "epoch": 1.24, + "learning_rate": 8.292278645157943e-06, + "logits/chosen": -2.8818905353546143, + "logits/rejected": -2.4141147136688232, + "logps/chosen": -291.87225341796875, + "logps/rejected": -410.91241455078125, + "loss": 0.9464, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.098165988922119, + "rewards/margins": 2.7130050659179688, + "rewards/rejected": -5.811171054840088, + "step": 7983 + }, + { + "epoch": 1.24, + "learning_rate": 8.291545204626795e-06, + "logits/chosen": -1.0154433250427246, + "logits/rejected": -2.802746057510376, + "logps/chosen": -103.11380767822266, + "logps/rejected": -335.58221435546875, + "loss": 0.1786, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.843052387237549, + "rewards/margins": 2.3833024501800537, + "rewards/rejected": -7.226354598999023, + "step": 7984 + }, + { + "epoch": 1.24, + "learning_rate": 8.290811764095649e-06, + "logits/chosen": -1.6192280054092407, + "logits/rejected": -2.7779700756073, + "logps/chosen": -77.03007507324219, + "logps/rejected": -298.48992919921875, + "loss": 0.69, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.404182434082031, + "rewards/margins": 2.72963285446167, + "rewards/rejected": -8.133814811706543, + "step": 7985 + }, + { + "epoch": 1.24, + "learning_rate": 8.2900783235645e-06, + "logits/chosen": -1.9208393096923828, + "logits/rejected": -3.0873262882232666, + "logps/chosen": -108.27950286865234, + "logps/rejected": -427.7285461425781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.088320732116699, + "rewards/margins": 8.112113952636719, + "rewards/rejected": -11.200435638427734, + "step": 7986 + }, + { + "epoch": 1.24, + "learning_rate": 8.289344883033353e-06, + "logits/chosen": -2.910370349884033, + "logits/rejected": -3.0549113750457764, + "logps/chosen": -339.81353759765625, + "logps/rejected": -408.4334716796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3283329010009766, + "rewards/margins": 8.267386436462402, + "rewards/rejected": -11.595719337463379, + "step": 7987 + }, + { + "epoch": 1.24, + "learning_rate": 8.288611442502204e-06, + "logits/chosen": -2.9158859252929688, + "logits/rejected": -2.938608407974243, + "logps/chosen": -109.36561584472656, + "logps/rejected": -302.519287109375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9888219833374023, + "rewards/margins": 5.143996238708496, + "rewards/rejected": -8.132818222045898, + "step": 7988 + }, + { + "epoch": 1.24, + "learning_rate": 8.287878001971056e-06, + "logits/chosen": -3.0295732021331787, + "logits/rejected": -3.142895221710205, + "logps/chosen": -107.07986450195312, + "logps/rejected": -165.3201446533203, + "loss": 1.2904, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.048335075378418, + "rewards/margins": 0.6528916358947754, + "rewards/rejected": -5.701226234436035, + "step": 7989 + }, + { + "epoch": 1.24, + "learning_rate": 8.287144561439908e-06, + "logits/chosen": -1.7850369215011597, + "logits/rejected": -2.73848819732666, + "logps/chosen": -102.65029907226562, + "logps/rejected": -394.72076416015625, + "loss": 0.1547, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.666895866394043, + "rewards/margins": 4.668288230895996, + "rewards/rejected": -9.335184097290039, + "step": 7990 + }, + { + "epoch": 1.24, + "learning_rate": 8.28641112090876e-06, + "logits/chosen": -3.0670220851898193, + "logits/rejected": -3.144357442855835, + "logps/chosen": -118.58562469482422, + "logps/rejected": -241.62863159179688, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7572720050811768, + "rewards/margins": 4.782466888427734, + "rewards/rejected": -7.539738655090332, + "step": 7991 + }, + { + "epoch": 1.24, + "learning_rate": 8.285677680377612e-06, + "logits/chosen": -1.551194429397583, + "logits/rejected": -2.5021042823791504, + "logps/chosen": -120.64237213134766, + "logps/rejected": -603.6175537109375, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8530592918396, + "rewards/margins": 8.19255256652832, + "rewards/rejected": -13.045612335205078, + "step": 7992 + }, + { + "epoch": 1.24, + "learning_rate": 8.284944239846464e-06, + "logits/chosen": -2.8992326259613037, + "logits/rejected": -2.23783278465271, + "logps/chosen": -148.1380157470703, + "logps/rejected": -165.96932983398438, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.937575101852417, + "rewards/margins": 4.953232288360596, + "rewards/rejected": -8.890807151794434, + "step": 7993 + }, + { + "epoch": 1.24, + "learning_rate": 8.284210799315317e-06, + "logits/chosen": -3.0507543087005615, + "logits/rejected": -2.93211030960083, + "logps/chosen": -228.5767822265625, + "logps/rejected": -471.3397521972656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.001089096069336, + "rewards/margins": 6.7937421798706055, + "rewards/rejected": -10.794830322265625, + "step": 7994 + }, + { + "epoch": 1.24, + "learning_rate": 8.28347735878417e-06, + "logits/chosen": -0.6278631091117859, + "logits/rejected": -2.104031801223755, + "logps/chosen": -123.0793685913086, + "logps/rejected": -366.4725036621094, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3853907585144043, + "rewards/margins": 10.138063430786133, + "rewards/rejected": -13.523454666137695, + "step": 7995 + }, + { + "epoch": 1.24, + "learning_rate": 8.282743918253021e-06, + "logits/chosen": -2.194058656692505, + "logits/rejected": -3.1215553283691406, + "logps/chosen": -184.93881225585938, + "logps/rejected": -391.042724609375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.26015567779541, + "rewards/margins": 6.955733299255371, + "rewards/rejected": -11.215888977050781, + "step": 7996 + }, + { + "epoch": 1.24, + "learning_rate": 8.282010477721873e-06, + "logits/chosen": -2.177429437637329, + "logits/rejected": -2.75011944770813, + "logps/chosen": -147.20632934570312, + "logps/rejected": -442.9565734863281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8931725025177, + "rewards/margins": 10.084413528442383, + "rewards/rejected": -13.977584838867188, + "step": 7997 + }, + { + "epoch": 1.24, + "learning_rate": 8.281277037190725e-06, + "logits/chosen": -1.9304704666137695, + "logits/rejected": -3.1010632514953613, + "logps/chosen": -138.39968872070312, + "logps/rejected": -400.2051696777344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.136130928993225, + "rewards/margins": 8.413337707519531, + "rewards/rejected": -9.549468040466309, + "step": 7998 + }, + { + "epoch": 1.24, + "learning_rate": 8.280543596659577e-06, + "logits/chosen": -2.921616792678833, + "logits/rejected": -2.56846284866333, + "logps/chosen": -441.07171630859375, + "logps/rejected": -369.14208984375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.148643493652344, + "rewards/margins": 5.990701675415039, + "rewards/rejected": -10.139345169067383, + "step": 7999 + }, + { + "epoch": 1.24, + "learning_rate": 8.279810156128429e-06, + "logits/chosen": -1.924590826034546, + "logits/rejected": -2.8465349674224854, + "logps/chosen": -350.70306396484375, + "logps/rejected": -384.57659912109375, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.527261257171631, + "rewards/margins": 5.496500015258789, + "rewards/rejected": -10.023761749267578, + "step": 8000 + }, + { + "epoch": 1.24, + "learning_rate": 8.279076715597282e-06, + "logits/chosen": -2.8448429107666016, + "logits/rejected": -2.6638360023498535, + "logps/chosen": -673.853515625, + "logps/rejected": -550.6666870117188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.214580535888672, + "rewards/margins": 8.751496315002441, + "rewards/rejected": -11.966076850891113, + "step": 8001 + }, + { + "epoch": 1.24, + "learning_rate": 8.278343275066134e-06, + "logits/chosen": -2.242629051208496, + "logits/rejected": -2.646667003631592, + "logps/chosen": -291.6127014160156, + "logps/rejected": -322.3408508300781, + "loss": 0.1397, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.068124771118164, + "rewards/margins": 3.714712381362915, + "rewards/rejected": -7.7828369140625, + "step": 8002 + }, + { + "epoch": 1.24, + "learning_rate": 8.277609834534988e-06, + "logits/chosen": -2.3757388591766357, + "logits/rejected": -2.3722646236419678, + "logps/chosen": -95.68707275390625, + "logps/rejected": -177.01858520507812, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.161709308624268, + "rewards/margins": 6.144869804382324, + "rewards/rejected": -10.30657958984375, + "step": 8003 + }, + { + "epoch": 1.24, + "learning_rate": 8.27687639400384e-06, + "logits/chosen": -2.772289514541626, + "logits/rejected": -3.001549243927002, + "logps/chosen": -138.5216827392578, + "logps/rejected": -336.7431640625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9011459350585938, + "rewards/margins": 7.975249290466309, + "rewards/rejected": -11.876396179199219, + "step": 8004 + }, + { + "epoch": 1.24, + "learning_rate": 8.276142953472692e-06, + "logits/chosen": -2.531341791152954, + "logits/rejected": -3.053117275238037, + "logps/chosen": -127.82524871826172, + "logps/rejected": -542.991943359375, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.251409530639648, + "rewards/margins": 5.078995704650879, + "rewards/rejected": -10.330406188964844, + "step": 8005 + }, + { + "epoch": 1.25, + "learning_rate": 8.275409512941543e-06, + "logits/chosen": -2.3499293327331543, + "logits/rejected": -2.170827627182007, + "logps/chosen": -203.02899169921875, + "logps/rejected": -193.77963256835938, + "loss": 0.616, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.043044090270996, + "rewards/margins": 3.1177520751953125, + "rewards/rejected": -7.160796165466309, + "step": 8006 + }, + { + "epoch": 1.25, + "learning_rate": 8.274676072410395e-06, + "logits/chosen": -2.9817042350769043, + "logits/rejected": -2.930161714553833, + "logps/chosen": -426.2193908691406, + "logps/rejected": -390.4208068847656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8439631462097168, + "rewards/margins": 8.06295394897461, + "rewards/rejected": -9.906917572021484, + "step": 8007 + }, + { + "epoch": 1.25, + "learning_rate": 8.273942631879247e-06, + "logits/chosen": -2.9726133346557617, + "logits/rejected": -2.8986644744873047, + "logps/chosen": -205.44454956054688, + "logps/rejected": -314.46160888671875, + "loss": 0.3912, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.62296199798584, + "rewards/margins": 4.2002058029174805, + "rewards/rejected": -9.82316780090332, + "step": 8008 + }, + { + "epoch": 1.25, + "learning_rate": 8.273209191348099e-06, + "logits/chosen": -2.128429412841797, + "logits/rejected": -2.3857133388519287, + "logps/chosen": -234.58291625976562, + "logps/rejected": -263.9784240722656, + "loss": 2.5153, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.628340244293213, + "rewards/margins": 1.2689871788024902, + "rewards/rejected": -8.897327423095703, + "step": 8009 + }, + { + "epoch": 1.25, + "learning_rate": 8.272475750816951e-06, + "logits/chosen": -2.87146258354187, + "logits/rejected": -1.9421913623809814, + "logps/chosen": -417.3130187988281, + "logps/rejected": -329.90313720703125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354633331298828, + "rewards/margins": 8.296927452087402, + "rewards/rejected": -12.651559829711914, + "step": 8010 + }, + { + "epoch": 1.25, + "learning_rate": 8.271742310285803e-06, + "logits/chosen": -2.543567657470703, + "logits/rejected": -2.279327154159546, + "logps/chosen": -150.18511962890625, + "logps/rejected": -316.96856689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5431900024414062, + "rewards/margins": 11.151651382446289, + "rewards/rejected": -13.694841384887695, + "step": 8011 + }, + { + "epoch": 1.25, + "learning_rate": 8.271008869754656e-06, + "logits/chosen": -2.898845672607422, + "logits/rejected": -2.214024066925049, + "logps/chosen": -288.9158020019531, + "logps/rejected": -247.31051635742188, + "loss": 3.0985, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.7466630935668945, + "rewards/margins": -0.13678979873657227, + "rewards/rejected": -7.609873294830322, + "step": 8012 + }, + { + "epoch": 1.25, + "learning_rate": 8.270275429223508e-06, + "logits/chosen": -2.8902506828308105, + "logits/rejected": -2.7749199867248535, + "logps/chosen": -347.0833740234375, + "logps/rejected": -305.0713195800781, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.180043935775757, + "rewards/margins": 8.260235786437988, + "rewards/rejected": -10.440279960632324, + "step": 8013 + }, + { + "epoch": 1.25, + "learning_rate": 8.26954198869236e-06, + "logits/chosen": -1.1387038230895996, + "logits/rejected": -2.222926378250122, + "logps/chosen": -137.71466064453125, + "logps/rejected": -322.1510009765625, + "loss": 1.672, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.612839698791504, + "rewards/margins": 2.8290700912475586, + "rewards/rejected": -10.441909790039062, + "step": 8014 + }, + { + "epoch": 1.25, + "learning_rate": 8.268808548161212e-06, + "logits/chosen": -2.431674003601074, + "logits/rejected": -2.834378957748413, + "logps/chosen": -151.0316162109375, + "logps/rejected": -321.5663757324219, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8708722591400146, + "rewards/margins": 6.851334571838379, + "rewards/rejected": -10.722206115722656, + "step": 8015 + }, + { + "epoch": 1.25, + "learning_rate": 8.268075107630064e-06, + "logits/chosen": -2.2962377071380615, + "logits/rejected": -3.1772332191467285, + "logps/chosen": -236.9141082763672, + "logps/rejected": -519.4039916992188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.050718307495117, + "rewards/margins": 7.095856666564941, + "rewards/rejected": -9.146574974060059, + "step": 8016 + }, + { + "epoch": 1.25, + "learning_rate": 8.267341667098916e-06, + "logits/chosen": -3.0649077892303467, + "logits/rejected": -2.7911579608917236, + "logps/chosen": -134.57696533203125, + "logps/rejected": -275.8157043457031, + "loss": 0.9733, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7655255794525146, + "rewards/margins": 5.958025932312012, + "rewards/rejected": -9.723551750183105, + "step": 8017 + }, + { + "epoch": 1.25, + "learning_rate": 8.266608226567768e-06, + "logits/chosen": -3.035226821899414, + "logits/rejected": -3.1258363723754883, + "logps/chosen": -189.7894287109375, + "logps/rejected": -401.8825988769531, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.759756565093994, + "rewards/margins": 6.479640483856201, + "rewards/rejected": -10.239397048950195, + "step": 8018 + }, + { + "epoch": 1.25, + "learning_rate": 8.26587478603662e-06, + "logits/chosen": -2.025123357772827, + "logits/rejected": -2.438875198364258, + "logps/chosen": -174.38259887695312, + "logps/rejected": -218.0228271484375, + "loss": 0.7987, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.413274765014648, + "rewards/margins": 1.9345958232879639, + "rewards/rejected": -8.347870826721191, + "step": 8019 + }, + { + "epoch": 1.25, + "learning_rate": 8.265141345505473e-06, + "logits/chosen": -2.921992301940918, + "logits/rejected": -2.1396374702453613, + "logps/chosen": -435.7047424316406, + "logps/rejected": -239.87008666992188, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.00439453125, + "rewards/margins": 6.353097915649414, + "rewards/rejected": -10.357492446899414, + "step": 8020 + }, + { + "epoch": 1.25, + "learning_rate": 8.264407904974325e-06, + "logits/chosen": -2.535172939300537, + "logits/rejected": -2.972316026687622, + "logps/chosen": -135.3845672607422, + "logps/rejected": -252.8314971923828, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.329501628875732, + "rewards/margins": 5.269558906555176, + "rewards/rejected": -9.599061012268066, + "step": 8021 + }, + { + "epoch": 1.25, + "learning_rate": 8.263674464443177e-06, + "logits/chosen": -2.910409450531006, + "logits/rejected": -2.829436779022217, + "logps/chosen": -536.3474731445312, + "logps/rejected": -435.2768859863281, + "loss": 0.9182, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.502699375152588, + "rewards/margins": 2.4722816944122314, + "rewards/rejected": -6.974981307983398, + "step": 8022 + }, + { + "epoch": 1.25, + "learning_rate": 8.262941023912029e-06, + "logits/chosen": -0.7805490493774414, + "logits/rejected": -3.0082509517669678, + "logps/chosen": -71.03228759765625, + "logps/rejected": -466.3730773925781, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.015492916107178, + "rewards/margins": 4.678643226623535, + "rewards/rejected": -8.694135665893555, + "step": 8023 + }, + { + "epoch": 1.25, + "learning_rate": 8.26220758338088e-06, + "logits/chosen": -2.9096639156341553, + "logits/rejected": -2.282341957092285, + "logps/chosen": -456.8549499511719, + "logps/rejected": -421.8463134765625, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9273648262023926, + "rewards/margins": 7.4198899269104, + "rewards/rejected": -11.347254753112793, + "step": 8024 + }, + { + "epoch": 1.25, + "learning_rate": 8.261474142849732e-06, + "logits/chosen": -3.2560982704162598, + "logits/rejected": -3.3744730949401855, + "logps/chosen": -75.38398742675781, + "logps/rejected": -134.27651977539062, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.039181232452393, + "rewards/margins": 4.474895477294922, + "rewards/rejected": -8.514076232910156, + "step": 8025 + }, + { + "epoch": 1.25, + "learning_rate": 8.260740702318584e-06, + "logits/chosen": -2.983065366744995, + "logits/rejected": -2.3972909450531006, + "logps/chosen": -466.10595703125, + "logps/rejected": -443.04168701171875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.779394507408142, + "rewards/margins": 7.604146480560303, + "rewards/rejected": -9.383541107177734, + "step": 8026 + }, + { + "epoch": 1.25, + "learning_rate": 8.260007261787436e-06, + "logits/chosen": -2.50343918800354, + "logits/rejected": -2.9814717769622803, + "logps/chosen": -274.71746826171875, + "logps/rejected": -256.0605773925781, + "loss": 1.2616, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.569912433624268, + "rewards/margins": 2.095097780227661, + "rewards/rejected": -7.665010452270508, + "step": 8027 + }, + { + "epoch": 1.25, + "learning_rate": 8.259273821256288e-06, + "logits/chosen": -2.8084702491760254, + "logits/rejected": -2.539734363555908, + "logps/chosen": -102.10069274902344, + "logps/rejected": -240.29574584960938, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0023059844970703, + "rewards/margins": 5.499772548675537, + "rewards/rejected": -8.50207805633545, + "step": 8028 + }, + { + "epoch": 1.25, + "learning_rate": 8.258540380725142e-06, + "logits/chosen": -2.2073986530303955, + "logits/rejected": -2.9402337074279785, + "logps/chosen": -449.03369140625, + "logps/rejected": -524.0389404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.246751308441162, + "rewards/margins": 10.013494491577148, + "rewards/rejected": -12.260245323181152, + "step": 8029 + }, + { + "epoch": 1.25, + "learning_rate": 8.257806940193994e-06, + "logits/chosen": -2.366957664489746, + "logits/rejected": -2.969691038131714, + "logps/chosen": -85.64093780517578, + "logps/rejected": -121.28263854980469, + "loss": 1.2571, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.75581693649292, + "rewards/margins": 0.6663022041320801, + "rewards/rejected": -4.422119140625, + "step": 8030 + }, + { + "epoch": 1.25, + "learning_rate": 8.257073499662845e-06, + "logits/chosen": -2.456451177597046, + "logits/rejected": -2.6236140727996826, + "logps/chosen": -312.59765625, + "logps/rejected": -451.47735595703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.635119676589966, + "rewards/margins": 9.877644538879395, + "rewards/rejected": -12.512763977050781, + "step": 8031 + }, + { + "epoch": 1.25, + "learning_rate": 8.256340059131697e-06, + "logits/chosen": -1.5412553548812866, + "logits/rejected": -2.8538732528686523, + "logps/chosen": -73.58663177490234, + "logps/rejected": -330.32568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.175705909729004, + "rewards/margins": 10.201421737670898, + "rewards/rejected": -12.377126693725586, + "step": 8032 + }, + { + "epoch": 1.25, + "learning_rate": 8.25560661860055e-06, + "logits/chosen": -1.6906081438064575, + "logits/rejected": -2.781818151473999, + "logps/chosen": -128.17835998535156, + "logps/rejected": -346.49725341796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4556806087493896, + "rewards/margins": 9.01197624206543, + "rewards/rejected": -11.467657089233398, + "step": 8033 + }, + { + "epoch": 1.25, + "learning_rate": 8.254873178069401e-06, + "logits/chosen": -1.8918406963348389, + "logits/rejected": -2.948373556137085, + "logps/chosen": -428.7779846191406, + "logps/rejected": -525.2259521484375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.563773155212402, + "rewards/margins": 6.2576398849487305, + "rewards/rejected": -11.821413040161133, + "step": 8034 + }, + { + "epoch": 1.25, + "learning_rate": 8.254139737538255e-06, + "logits/chosen": -2.606006145477295, + "logits/rejected": -2.6743838787078857, + "logps/chosen": -416.92535400390625, + "logps/rejected": -381.87274169921875, + "loss": 1.4335, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.371063232421875, + "rewards/margins": 1.351327657699585, + "rewards/rejected": -6.722391128540039, + "step": 8035 + }, + { + "epoch": 1.25, + "learning_rate": 8.253406297007107e-06, + "logits/chosen": -3.085839033126831, + "logits/rejected": -2.625110387802124, + "logps/chosen": -366.21148681640625, + "logps/rejected": -576.9251098632812, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8627541065216064, + "rewards/margins": 6.144464492797852, + "rewards/rejected": -9.007219314575195, + "step": 8036 + }, + { + "epoch": 1.25, + "learning_rate": 8.252672856475958e-06, + "logits/chosen": -2.9990732669830322, + "logits/rejected": -2.9172589778900146, + "logps/chosen": -337.54718017578125, + "logps/rejected": -426.14404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209125518798828, + "rewards/margins": 12.133792877197266, + "rewards/rejected": -16.342918395996094, + "step": 8037 + }, + { + "epoch": 1.25, + "learning_rate": 8.251939415944812e-06, + "logits/chosen": -1.467301845550537, + "logits/rejected": -3.0778486728668213, + "logps/chosen": -124.29055786132812, + "logps/rejected": -372.52947998046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6056495904922485, + "rewards/margins": 9.05537223815918, + "rewards/rejected": -10.661022186279297, + "step": 8038 + }, + { + "epoch": 1.25, + "learning_rate": 8.251205975413664e-06, + "logits/chosen": -2.703397274017334, + "logits/rejected": -2.719031810760498, + "logps/chosen": -120.92943572998047, + "logps/rejected": -266.0791015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1128082275390625, + "rewards/margins": 6.85687780380249, + "rewards/rejected": -10.969685554504395, + "step": 8039 + }, + { + "epoch": 1.25, + "learning_rate": 8.250472534882516e-06, + "logits/chosen": -1.8540608882904053, + "logits/rejected": -2.9820480346679688, + "logps/chosen": -124.06809997558594, + "logps/rejected": -248.53675842285156, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0789427757263184, + "rewards/margins": 3.023629665374756, + "rewards/rejected": -6.102572441101074, + "step": 8040 + }, + { + "epoch": 1.25, + "learning_rate": 8.249739094351368e-06, + "logits/chosen": -2.824314832687378, + "logits/rejected": -2.327148914337158, + "logps/chosen": -373.0800476074219, + "logps/rejected": -409.7730407714844, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.534253120422363, + "rewards/margins": 7.444718837738037, + "rewards/rejected": -12.978971481323242, + "step": 8041 + }, + { + "epoch": 1.25, + "learning_rate": 8.24900565382022e-06, + "logits/chosen": -2.3134214878082275, + "logits/rejected": -3.023275136947632, + "logps/chosen": -687.1983642578125, + "logps/rejected": -744.59912109375, + "loss": 0.0846, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.407311916351318, + "rewards/margins": 4.425951957702637, + "rewards/rejected": -8.833264350891113, + "step": 8042 + }, + { + "epoch": 1.25, + "learning_rate": 8.248272213289071e-06, + "logits/chosen": -1.815116286277771, + "logits/rejected": -3.0329179763793945, + "logps/chosen": -114.98353576660156, + "logps/rejected": -435.8466491699219, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.383972644805908, + "rewards/margins": 6.667079925537109, + "rewards/rejected": -11.05105209350586, + "step": 8043 + }, + { + "epoch": 1.25, + "learning_rate": 8.247538772757923e-06, + "logits/chosen": -2.7218785285949707, + "logits/rejected": -2.1993167400360107, + "logps/chosen": -156.21368408203125, + "logps/rejected": -371.3811950683594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.625692844390869, + "rewards/margins": 9.311847686767578, + "rewards/rejected": -12.937540054321289, + "step": 8044 + }, + { + "epoch": 1.25, + "learning_rate": 8.246805332226775e-06, + "logits/chosen": -2.95763897895813, + "logits/rejected": -1.4504793882369995, + "logps/chosen": -330.5054931640625, + "logps/rejected": -129.89877319335938, + "loss": 0.1135, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.949638366699219, + "rewards/margins": 3.8030426502227783, + "rewards/rejected": -8.752681732177734, + "step": 8045 + }, + { + "epoch": 1.25, + "learning_rate": 8.246071891695627e-06, + "logits/chosen": -1.7544821500778198, + "logits/rejected": -2.5616073608398438, + "logps/chosen": -104.52934265136719, + "logps/rejected": -208.68988037109375, + "loss": 0.1452, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192194938659668, + "rewards/margins": 3.011181592941284, + "rewards/rejected": -8.203376770019531, + "step": 8046 + }, + { + "epoch": 1.25, + "learning_rate": 8.24533845116448e-06, + "logits/chosen": -2.3475654125213623, + "logits/rejected": -2.8254330158233643, + "logps/chosen": -203.83551025390625, + "logps/rejected": -366.73126220703125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.235056400299072, + "rewards/margins": 6.864679336547852, + "rewards/rejected": -11.099736213684082, + "step": 8047 + }, + { + "epoch": 1.25, + "learning_rate": 8.244605010633332e-06, + "logits/chosen": -1.5196629762649536, + "logits/rejected": -2.6525917053222656, + "logps/chosen": -94.89469909667969, + "logps/rejected": -372.02508544921875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.58509635925293, + "rewards/margins": 9.361702919006348, + "rewards/rejected": -13.946798324584961, + "step": 8048 + }, + { + "epoch": 1.25, + "learning_rate": 8.243871570102184e-06, + "logits/chosen": -2.988868474960327, + "logits/rejected": -2.8587265014648438, + "logps/chosen": -174.7331085205078, + "logps/rejected": -432.74652099609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.091777801513672, + "rewards/margins": 10.798751831054688, + "rewards/rejected": -12.89052963256836, + "step": 8049 + }, + { + "epoch": 1.25, + "learning_rate": 8.243138129571036e-06, + "logits/chosen": -2.883962631225586, + "logits/rejected": -2.193598508834839, + "logps/chosen": -500.09722900390625, + "logps/rejected": -311.8189392089844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.164778232574463, + "rewards/margins": 7.5755228996276855, + "rewards/rejected": -10.740301132202148, + "step": 8050 + }, + { + "epoch": 1.25, + "learning_rate": 8.242404689039888e-06, + "logits/chosen": -3.0069339275360107, + "logits/rejected": -1.9336888790130615, + "logps/chosen": -265.13238525390625, + "logps/rejected": -183.899658203125, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.038655996322632, + "rewards/margins": 5.696455001831055, + "rewards/rejected": -8.735111236572266, + "step": 8051 + }, + { + "epoch": 1.25, + "learning_rate": 8.24167124850874e-06, + "logits/chosen": -1.3965116739273071, + "logits/rejected": -2.2117831707000732, + "logps/chosen": -165.81753540039062, + "logps/rejected": -462.65447998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.055976390838623, + "rewards/margins": 9.696971893310547, + "rewards/rejected": -12.752948760986328, + "step": 8052 + }, + { + "epoch": 1.25, + "learning_rate": 8.240937807977592e-06, + "logits/chosen": -2.9207043647766113, + "logits/rejected": -3.028796672821045, + "logps/chosen": -117.3616943359375, + "logps/rejected": -173.1475067138672, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.382235527038574, + "rewards/margins": 5.440161228179932, + "rewards/rejected": -7.822397232055664, + "step": 8053 + }, + { + "epoch": 1.25, + "learning_rate": 8.240204367446444e-06, + "logits/chosen": -2.92065167427063, + "logits/rejected": -2.5533864498138428, + "logps/chosen": -216.54275512695312, + "logps/rejected": -306.70404052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.625727891921997, + "rewards/margins": 11.322033882141113, + "rewards/rejected": -12.947761535644531, + "step": 8054 + }, + { + "epoch": 1.25, + "learning_rate": 8.239470926915296e-06, + "logits/chosen": -3.1565515995025635, + "logits/rejected": -2.6414740085601807, + "logps/chosen": -377.7275695800781, + "logps/rejected": -284.77105712890625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14907526969909668, + "rewards/margins": 5.997622489929199, + "rewards/rejected": -6.146697521209717, + "step": 8055 + }, + { + "epoch": 1.25, + "learning_rate": 8.23873748638415e-06, + "logits/chosen": -3.046077013015747, + "logits/rejected": -2.353578567504883, + "logps/chosen": -291.376953125, + "logps/rejected": -167.81124877929688, + "loss": 0.055, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2890430688858032, + "rewards/margins": 5.402665138244629, + "rewards/rejected": -6.691708564758301, + "step": 8056 + }, + { + "epoch": 1.25, + "learning_rate": 8.238004045853001e-06, + "logits/chosen": -1.882524013519287, + "logits/rejected": -2.4139204025268555, + "logps/chosen": -437.63970947265625, + "logps/rejected": -596.64404296875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.114189624786377, + "rewards/margins": 9.538480758666992, + "rewards/rejected": -12.652669906616211, + "step": 8057 + }, + { + "epoch": 1.25, + "learning_rate": 8.237270605321853e-06, + "logits/chosen": -2.5535311698913574, + "logits/rejected": -2.9393582344055176, + "logps/chosen": -151.80702209472656, + "logps/rejected": -559.7561645507812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4916061162948608, + "rewards/margins": 10.65906810760498, + "rewards/rejected": -12.150674819946289, + "step": 8058 + }, + { + "epoch": 1.25, + "learning_rate": 8.236537164790705e-06, + "logits/chosen": -2.7624192237854004, + "logits/rejected": -2.386291265487671, + "logps/chosen": -310.74566650390625, + "logps/rejected": -324.73101806640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.575007915496826, + "rewards/margins": 9.028042793273926, + "rewards/rejected": -12.603050231933594, + "step": 8059 + }, + { + "epoch": 1.25, + "learning_rate": 8.235803724259557e-06, + "logits/chosen": -2.7642712593078613, + "logits/rejected": -2.8457252979278564, + "logps/chosen": -156.47238159179688, + "logps/rejected": -276.93096923828125, + "loss": 0.8394, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.346670150756836, + "rewards/margins": 6.797983169555664, + "rewards/rejected": -12.1446533203125, + "step": 8060 + }, + { + "epoch": 1.25, + "learning_rate": 8.235070283728409e-06, + "logits/chosen": -2.748378276824951, + "logits/rejected": -3.041692018508911, + "logps/chosen": -315.164306640625, + "logps/rejected": -393.11761474609375, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8775634765625, + "rewards/margins": 6.67962646484375, + "rewards/rejected": -11.55718994140625, + "step": 8061 + }, + { + "epoch": 1.25, + "learning_rate": 8.23433684319726e-06, + "logits/chosen": -2.8588879108428955, + "logits/rejected": -2.576432943344116, + "logps/chosen": -204.94430541992188, + "logps/rejected": -231.19119262695312, + "loss": 0.2572, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4367823600769043, + "rewards/margins": 4.101055145263672, + "rewards/rejected": -7.537837028503418, + "step": 8062 + }, + { + "epoch": 1.25, + "learning_rate": 8.233603402666112e-06, + "logits/chosen": -2.16229510307312, + "logits/rejected": -2.5998032093048096, + "logps/chosen": -101.62162780761719, + "logps/rejected": -125.36876678466797, + "loss": 0.1862, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.364255905151367, + "rewards/margins": 2.0063867568969727, + "rewards/rejected": -7.37064266204834, + "step": 8063 + }, + { + "epoch": 1.25, + "learning_rate": 8.232869962134964e-06, + "logits/chosen": -2.4441099166870117, + "logits/rejected": -2.778266429901123, + "logps/chosen": -197.13282775878906, + "logps/rejected": -448.2398376464844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7846596240997314, + "rewards/margins": 10.018060684204102, + "rewards/rejected": -11.80272102355957, + "step": 8064 + }, + { + "epoch": 1.25, + "learning_rate": 8.232136521603818e-06, + "logits/chosen": -3.020634174346924, + "logits/rejected": -3.0813350677490234, + "logps/chosen": -160.73776245117188, + "logps/rejected": -247.4971923828125, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.992379903793335, + "rewards/margins": 5.351390838623047, + "rewards/rejected": -9.343770980834961, + "step": 8065 + }, + { + "epoch": 1.25, + "learning_rate": 8.23140308107267e-06, + "logits/chosen": -1.8256282806396484, + "logits/rejected": -2.588118314743042, + "logps/chosen": -127.82207489013672, + "logps/rejected": -287.6927185058594, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8863632678985596, + "rewards/margins": 7.310068130493164, + "rewards/rejected": -11.196431159973145, + "step": 8066 + }, + { + "epoch": 1.25, + "learning_rate": 8.230669640541522e-06, + "logits/chosen": -2.984468698501587, + "logits/rejected": -2.6165833473205566, + "logps/chosen": -235.7776336669922, + "logps/rejected": -247.91403198242188, + "loss": 1.2032, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.026460647583008, + "rewards/margins": 1.1791057586669922, + "rewards/rejected": -6.20556640625, + "step": 8067 + }, + { + "epoch": 1.25, + "learning_rate": 8.229936200010373e-06, + "logits/chosen": -2.872288942337036, + "logits/rejected": -2.6656646728515625, + "logps/chosen": -318.4139404296875, + "logps/rejected": -326.4180603027344, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848293304443359, + "rewards/margins": 4.287985801696777, + "rewards/rejected": -9.136279106140137, + "step": 8068 + }, + { + "epoch": 1.25, + "learning_rate": 8.229202759479227e-06, + "logits/chosen": -2.2901861667633057, + "logits/rejected": -0.5605211853981018, + "logps/chosen": -401.2580261230469, + "logps/rejected": -281.66253662109375, + "loss": 0.8071, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.782208442687988, + "rewards/margins": -0.20925915241241455, + "rewards/rejected": -5.572948932647705, + "step": 8069 + }, + { + "epoch": 1.26, + "learning_rate": 8.228469318948079e-06, + "logits/chosen": -2.9683127403259277, + "logits/rejected": -2.8461380004882812, + "logps/chosen": -149.78904724121094, + "logps/rejected": -300.12432861328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.747039794921875, + "rewards/margins": 7.276454448699951, + "rewards/rejected": -10.023494720458984, + "step": 8070 + }, + { + "epoch": 1.26, + "learning_rate": 8.22773587841693e-06, + "logits/chosen": -2.9513161182403564, + "logits/rejected": -2.295830488204956, + "logps/chosen": -216.2833709716797, + "logps/rejected": -286.79400634765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1749093532562256, + "rewards/margins": 8.866201400756836, + "rewards/rejected": -11.04111099243164, + "step": 8071 + }, + { + "epoch": 1.26, + "learning_rate": 8.227002437885783e-06, + "logits/chosen": -2.977264881134033, + "logits/rejected": -2.7712910175323486, + "logps/chosen": -387.3753662109375, + "logps/rejected": -275.03057861328125, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.883404731750488, + "rewards/margins": 2.9789910316467285, + "rewards/rejected": -7.862395763397217, + "step": 8072 + }, + { + "epoch": 1.26, + "learning_rate": 8.226268997354634e-06, + "logits/chosen": -2.33413028717041, + "logits/rejected": -2.5744071006774902, + "logps/chosen": -60.496055603027344, + "logps/rejected": -268.91131591796875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8298916816711426, + "rewards/margins": 7.598513126373291, + "rewards/rejected": -10.428404808044434, + "step": 8073 + }, + { + "epoch": 1.26, + "learning_rate": 8.225535556823488e-06, + "logits/chosen": -2.2086501121520996, + "logits/rejected": -3.0204977989196777, + "logps/chosen": -88.97947692871094, + "logps/rejected": -461.1757507324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.233255624771118, + "rewards/margins": 9.999039649963379, + "rewards/rejected": -13.232295989990234, + "step": 8074 + }, + { + "epoch": 1.26, + "learning_rate": 8.22480211629234e-06, + "logits/chosen": -1.4178619384765625, + "logits/rejected": -2.7019760608673096, + "logps/chosen": -253.63607788085938, + "logps/rejected": -529.3109741210938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.576371192932129, + "rewards/margins": 9.227771759033203, + "rewards/rejected": -11.804142951965332, + "step": 8075 + }, + { + "epoch": 1.26, + "learning_rate": 8.224068675761192e-06, + "logits/chosen": -2.43704891204834, + "logits/rejected": -2.859698534011841, + "logps/chosen": -139.33299255371094, + "logps/rejected": -213.39837646484375, + "loss": 0.0629, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.315081596374512, + "rewards/margins": 2.8122270107269287, + "rewards/rejected": -7.1273088455200195, + "step": 8076 + }, + { + "epoch": 1.26, + "learning_rate": 8.223335235230044e-06, + "logits/chosen": -2.0867116451263428, + "logits/rejected": -3.149041175842285, + "logps/chosen": -78.40347290039062, + "logps/rejected": -344.2037658691406, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7746543884277344, + "rewards/margins": 5.1891560554504395, + "rewards/rejected": -8.963809967041016, + "step": 8077 + }, + { + "epoch": 1.26, + "learning_rate": 8.222601794698896e-06, + "logits/chosen": -2.8212006092071533, + "logits/rejected": -3.166574478149414, + "logps/chosen": -388.4244384765625, + "logps/rejected": -468.86236572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1484575271606445, + "rewards/margins": 11.530447959899902, + "rewards/rejected": -14.678905487060547, + "step": 8078 + }, + { + "epoch": 1.26, + "learning_rate": 8.221868354167747e-06, + "logits/chosen": -1.9000900983810425, + "logits/rejected": -3.0208988189697266, + "logps/chosen": -68.078857421875, + "logps/rejected": -324.2737731933594, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4079294204711914, + "rewards/margins": 7.019013404846191, + "rewards/rejected": -9.426942825317383, + "step": 8079 + }, + { + "epoch": 1.26, + "learning_rate": 8.2211349136366e-06, + "logits/chosen": -3.027859926223755, + "logits/rejected": -2.8740718364715576, + "logps/chosen": -210.865234375, + "logps/rejected": -83.58413696289062, + "loss": 1.6518, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.423786163330078, + "rewards/margins": -1.1139763593673706, + "rewards/rejected": -4.309809684753418, + "step": 8080 + }, + { + "epoch": 1.26, + "learning_rate": 8.220401473105451e-06, + "logits/chosen": -2.5545103549957275, + "logits/rejected": -3.0659821033477783, + "logps/chosen": -438.4991455078125, + "logps/rejected": -272.3392028808594, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32762452960014343, + "rewards/margins": 4.5972394943237305, + "rewards/rejected": -4.924864292144775, + "step": 8081 + }, + { + "epoch": 1.26, + "learning_rate": 8.219668032574303e-06, + "logits/chosen": -1.8666129112243652, + "logits/rejected": -2.9337668418884277, + "logps/chosen": -223.04624938964844, + "logps/rejected": -421.8419494628906, + "loss": 0.0496, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.35960054397583, + "rewards/margins": 7.1064605712890625, + "rewards/rejected": -11.466060638427734, + "step": 8082 + }, + { + "epoch": 1.26, + "learning_rate": 8.218934592043157e-06, + "logits/chosen": -2.8490102291107178, + "logits/rejected": -0.8216174840927124, + "logps/chosen": -276.5040588378906, + "logps/rejected": -122.25202941894531, + "loss": 2.2959, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.933368682861328, + "rewards/margins": 1.3109705448150635, + "rewards/rejected": -8.244338989257812, + "step": 8083 + }, + { + "epoch": 1.26, + "learning_rate": 8.218201151512009e-06, + "logits/chosen": -2.982999086380005, + "logits/rejected": -2.6325042247772217, + "logps/chosen": -573.422607421875, + "logps/rejected": -347.77410888671875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8659043312072754, + "rewards/margins": 5.684077739715576, + "rewards/rejected": -9.549982070922852, + "step": 8084 + }, + { + "epoch": 1.26, + "learning_rate": 8.21746771098086e-06, + "logits/chosen": -3.0110280513763428, + "logits/rejected": -3.094015598297119, + "logps/chosen": -191.9573974609375, + "logps/rejected": -130.70071411132812, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.949568748474121, + "rewards/margins": 3.574857234954834, + "rewards/rejected": -6.524425983428955, + "step": 8085 + }, + { + "epoch": 1.26, + "learning_rate": 8.216734270449712e-06, + "logits/chosen": -3.0176870822906494, + "logits/rejected": -2.9345273971557617, + "logps/chosen": -110.52874755859375, + "logps/rejected": -212.53677368164062, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.343658924102783, + "rewards/margins": 7.049840450286865, + "rewards/rejected": -9.393499374389648, + "step": 8086 + }, + { + "epoch": 1.26, + "learning_rate": 8.216000829918564e-06, + "logits/chosen": -2.626394033432007, + "logits/rejected": -3.2069029808044434, + "logps/chosen": -943.2172241210938, + "logps/rejected": -900.4791870117188, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.90185546875, + "rewards/margins": 8.673288345336914, + "rewards/rejected": -10.575143814086914, + "step": 8087 + }, + { + "epoch": 1.26, + "learning_rate": 8.215267389387416e-06, + "logits/chosen": -1.508543848991394, + "logits/rejected": -2.992809295654297, + "logps/chosen": -121.802001953125, + "logps/rejected": -327.2088623046875, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.332509756088257, + "rewards/margins": 5.390953063964844, + "rewards/rejected": -8.72346305847168, + "step": 8088 + }, + { + "epoch": 1.26, + "learning_rate": 8.214533948856268e-06, + "logits/chosen": -2.7837681770324707, + "logits/rejected": -2.1815218925476074, + "logps/chosen": -223.49855041503906, + "logps/rejected": -357.10699462890625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0096168518066406, + "rewards/margins": 8.497818946838379, + "rewards/rejected": -11.50743579864502, + "step": 8089 + }, + { + "epoch": 1.26, + "learning_rate": 8.21380050832512e-06, + "logits/chosen": -0.7919309735298157, + "logits/rejected": -2.2771973609924316, + "logps/chosen": -160.75852966308594, + "logps/rejected": -599.262939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6908012628555298, + "rewards/margins": 13.834482192993164, + "rewards/rejected": -14.525283813476562, + "step": 8090 + }, + { + "epoch": 1.26, + "learning_rate": 8.213067067793972e-06, + "logits/chosen": -2.8957371711730957, + "logits/rejected": -2.5799906253814697, + "logps/chosen": -348.3621520996094, + "logps/rejected": -224.8367156982422, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.909808397293091, + "rewards/margins": 4.2985076904296875, + "rewards/rejected": -7.208315849304199, + "step": 8091 + }, + { + "epoch": 1.26, + "learning_rate": 8.212333627262825e-06, + "logits/chosen": -2.618180513381958, + "logits/rejected": -3.070823907852173, + "logps/chosen": -171.48178100585938, + "logps/rejected": -306.4631652832031, + "loss": 0.1202, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6723685264587402, + "rewards/margins": 5.608689308166504, + "rewards/rejected": -9.281057357788086, + "step": 8092 + }, + { + "epoch": 1.26, + "learning_rate": 8.211600186731677e-06, + "logits/chosen": -2.5713939666748047, + "logits/rejected": -3.0349228382110596, + "logps/chosen": -298.55926513671875, + "logps/rejected": -456.2146301269531, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4054932594299316, + "rewards/margins": 5.089424133300781, + "rewards/rejected": -8.494916915893555, + "step": 8093 + }, + { + "epoch": 1.26, + "learning_rate": 8.210866746200529e-06, + "logits/chosen": -2.4150876998901367, + "logits/rejected": -2.706221103668213, + "logps/chosen": -226.19363403320312, + "logps/rejected": -188.469482421875, + "loss": 1.478, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.40486478805542, + "rewards/margins": 1.870163917541504, + "rewards/rejected": -7.275028228759766, + "step": 8094 + }, + { + "epoch": 1.26, + "learning_rate": 8.210133305669381e-06, + "logits/chosen": -2.829101085662842, + "logits/rejected": -2.145700693130493, + "logps/chosen": -388.40234375, + "logps/rejected": -216.03225708007812, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.264148712158203, + "rewards/margins": 5.5648193359375, + "rewards/rejected": -8.828968048095703, + "step": 8095 + }, + { + "epoch": 1.26, + "learning_rate": 8.209399865138233e-06, + "logits/chosen": -2.0347678661346436, + "logits/rejected": -2.9792277812957764, + "logps/chosen": -92.97982025146484, + "logps/rejected": -289.5599670410156, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.22192645072937, + "rewards/margins": 6.227453708648682, + "rewards/rejected": -9.449380874633789, + "step": 8096 + }, + { + "epoch": 1.26, + "learning_rate": 8.208666424607085e-06, + "logits/chosen": -3.0843558311462402, + "logits/rejected": -2.799712657928467, + "logps/chosen": -414.03802490234375, + "logps/rejected": -510.5860595703125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5054199695587158, + "rewards/margins": 6.536521911621094, + "rewards/rejected": -8.04194164276123, + "step": 8097 + }, + { + "epoch": 1.26, + "learning_rate": 8.207932984075937e-06, + "logits/chosen": -2.8559486865997314, + "logits/rejected": -3.0986649990081787, + "logps/chosen": -70.66477966308594, + "logps/rejected": -190.48651123046875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.411576747894287, + "rewards/margins": 7.630328178405762, + "rewards/rejected": -10.04190444946289, + "step": 8098 + }, + { + "epoch": 1.26, + "learning_rate": 8.207199543544788e-06, + "logits/chosen": -1.6054461002349854, + "logits/rejected": -2.58862566947937, + "logps/chosen": -146.30203247070312, + "logps/rejected": -363.105224609375, + "loss": 0.1525, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.04749059677124, + "rewards/margins": 7.423375606536865, + "rewards/rejected": -12.470866203308105, + "step": 8099 + }, + { + "epoch": 1.26, + "learning_rate": 8.20646610301364e-06, + "logits/chosen": -2.9566476345062256, + "logits/rejected": -2.7961747646331787, + "logps/chosen": -122.18959045410156, + "logps/rejected": -231.98281860351562, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8400416374206543, + "rewards/margins": 5.8911333084106445, + "rewards/rejected": -9.731175422668457, + "step": 8100 + }, + { + "epoch": 1.26, + "learning_rate": 8.205732662482494e-06, + "logits/chosen": -3.033604145050049, + "logits/rejected": -3.109325408935547, + "logps/chosen": -104.57211303710938, + "logps/rejected": -190.59326171875, + "loss": 1.3117, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.405774116516113, + "rewards/margins": 3.0028669834136963, + "rewards/rejected": -8.40864086151123, + "step": 8101 + }, + { + "epoch": 1.26, + "learning_rate": 8.204999221951346e-06, + "logits/chosen": -2.0742146968841553, + "logits/rejected": -2.72699236869812, + "logps/chosen": -332.3203125, + "logps/rejected": -329.1334533691406, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.167810916900635, + "rewards/margins": 4.784054756164551, + "rewards/rejected": -8.951865196228027, + "step": 8102 + }, + { + "epoch": 1.26, + "learning_rate": 8.2042657814202e-06, + "logits/chosen": -2.9382169246673584, + "logits/rejected": -2.9327309131622314, + "logps/chosen": -442.26739501953125, + "logps/rejected": -426.6639404296875, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6038544178009033, + "rewards/margins": 6.567655086517334, + "rewards/rejected": -10.1715087890625, + "step": 8103 + }, + { + "epoch": 1.26, + "learning_rate": 8.203532340889051e-06, + "logits/chosen": -0.9339452981948853, + "logits/rejected": -2.8078019618988037, + "logps/chosen": -56.745784759521484, + "logps/rejected": -304.25946044921875, + "loss": 0.1388, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.348209857940674, + "rewards/margins": 6.207089424133301, + "rewards/rejected": -9.555298805236816, + "step": 8104 + }, + { + "epoch": 1.26, + "learning_rate": 8.202798900357903e-06, + "logits/chosen": -1.6296677589416504, + "logits/rejected": -2.554108142852783, + "logps/chosen": -100.2903823852539, + "logps/rejected": -335.696044921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.724614381790161, + "rewards/margins": 7.348593711853027, + "rewards/rejected": -10.07320785522461, + "step": 8105 + }, + { + "epoch": 1.26, + "learning_rate": 8.202065459826755e-06, + "logits/chosen": -2.80930495262146, + "logits/rejected": -2.982133626937866, + "logps/chosen": -65.59496307373047, + "logps/rejected": -292.732177734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1677618026733398, + "rewards/margins": 8.329792976379395, + "rewards/rejected": -9.497554779052734, + "step": 8106 + }, + { + "epoch": 1.26, + "learning_rate": 8.201332019295607e-06, + "logits/chosen": -2.4313364028930664, + "logits/rejected": -3.1352620124816895, + "logps/chosen": -419.67681884765625, + "logps/rejected": -546.4447021484375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0962648391723633, + "rewards/margins": 7.1075825691223145, + "rewards/rejected": -10.20384693145752, + "step": 8107 + }, + { + "epoch": 1.26, + "learning_rate": 8.200598578764459e-06, + "logits/chosen": -1.0270586013793945, + "logits/rejected": -2.8773910999298096, + "logps/chosen": -149.08413696289062, + "logps/rejected": -393.7120666503906, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3916802406311035, + "rewards/margins": 6.413626670837402, + "rewards/rejected": -10.805306434631348, + "step": 8108 + }, + { + "epoch": 1.26, + "learning_rate": 8.199865138233312e-06, + "logits/chosen": -1.7458211183547974, + "logits/rejected": -2.0751171112060547, + "logps/chosen": -1035.118408203125, + "logps/rejected": -1076.0953369140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4063754081726074, + "rewards/margins": 9.976293563842773, + "rewards/rejected": -13.382669448852539, + "step": 8109 + }, + { + "epoch": 1.26, + "learning_rate": 8.199131697702164e-06, + "logits/chosen": -3.238693952560425, + "logits/rejected": -3.086927652359009, + "logps/chosen": -693.4655151367188, + "logps/rejected": -895.902099609375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9584779739379883, + "rewards/margins": 5.029284477233887, + "rewards/rejected": -7.987762451171875, + "step": 8110 + }, + { + "epoch": 1.26, + "learning_rate": 8.198398257171016e-06, + "logits/chosen": -2.759763479232788, + "logits/rejected": -2.1897029876708984, + "logps/chosen": -235.38076782226562, + "logps/rejected": -289.6200256347656, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.167604923248291, + "rewards/margins": 7.6959228515625, + "rewards/rejected": -10.86352825164795, + "step": 8111 + }, + { + "epoch": 1.26, + "learning_rate": 8.197664816639868e-06, + "logits/chosen": -2.6663296222686768, + "logits/rejected": -2.8557567596435547, + "logps/chosen": -606.73779296875, + "logps/rejected": -516.1810302734375, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.506507396697998, + "rewards/margins": 4.108038902282715, + "rewards/rejected": -7.614546298980713, + "step": 8112 + }, + { + "epoch": 1.26, + "learning_rate": 8.19693137610872e-06, + "logits/chosen": -3.013293743133545, + "logits/rejected": -3.055840015411377, + "logps/chosen": -71.15254211425781, + "logps/rejected": -211.27685546875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.386094570159912, + "rewards/margins": 7.07421875, + "rewards/rejected": -9.46031379699707, + "step": 8113 + }, + { + "epoch": 1.26, + "learning_rate": 8.196197935577572e-06, + "logits/chosen": -2.644922971725464, + "logits/rejected": -2.9158897399902344, + "logps/chosen": -424.6510009765625, + "logps/rejected": -535.9427490234375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7855489253997803, + "rewards/margins": 6.718915939331055, + "rewards/rejected": -10.504465103149414, + "step": 8114 + }, + { + "epoch": 1.26, + "learning_rate": 8.195464495046424e-06, + "logits/chosen": -2.9169435501098633, + "logits/rejected": -2.5888326168060303, + "logps/chosen": -229.6778564453125, + "logps/rejected": -307.8026123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7964035272598267, + "rewards/margins": 10.263681411743164, + "rewards/rejected": -12.06008529663086, + "step": 8115 + }, + { + "epoch": 1.26, + "learning_rate": 8.194731054515275e-06, + "logits/chosen": -1.9041367769241333, + "logits/rejected": -2.77321720123291, + "logps/chosen": -179.79579162597656, + "logps/rejected": -320.3363952636719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2266268730163574, + "rewards/margins": 7.380674839019775, + "rewards/rejected": -9.607301712036133, + "step": 8116 + }, + { + "epoch": 1.26, + "learning_rate": 8.193997613984127e-06, + "logits/chosen": -2.6076407432556152, + "logits/rejected": -2.881953001022339, + "logps/chosen": -374.70709228515625, + "logps/rejected": -630.6177978515625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.270831108093262, + "rewards/margins": 7.980310440063477, + "rewards/rejected": -13.251141548156738, + "step": 8117 + }, + { + "epoch": 1.26, + "learning_rate": 8.193264173452981e-06, + "logits/chosen": -2.4765145778656006, + "logits/rejected": -2.6352760791778564, + "logps/chosen": -126.63536071777344, + "logps/rejected": -228.12857055664062, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3407883644104, + "rewards/margins": 6.583592891693115, + "rewards/rejected": -11.924381256103516, + "step": 8118 + }, + { + "epoch": 1.26, + "learning_rate": 8.192530732921833e-06, + "logits/chosen": -2.5848147869110107, + "logits/rejected": -3.079423189163208, + "logps/chosen": -48.159908294677734, + "logps/rejected": -202.25518798828125, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7396702766418457, + "rewards/margins": 6.7458600997924805, + "rewards/rejected": -10.485529899597168, + "step": 8119 + }, + { + "epoch": 1.26, + "learning_rate": 8.191797292390685e-06, + "logits/chosen": -1.5099742412567139, + "logits/rejected": -2.785367965698242, + "logps/chosen": -148.66311645507812, + "logps/rejected": -483.9535217285156, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219959259033203, + "rewards/margins": 9.399385452270508, + "rewards/rejected": -13.619344711303711, + "step": 8120 + }, + { + "epoch": 1.26, + "learning_rate": 8.191063851859536e-06, + "logits/chosen": -1.9182775020599365, + "logits/rejected": -2.6371803283691406, + "logps/chosen": -194.534423828125, + "logps/rejected": -309.280029296875, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.355508327484131, + "rewards/margins": 4.882927894592285, + "rewards/rejected": -9.238435745239258, + "step": 8121 + }, + { + "epoch": 1.26, + "learning_rate": 8.190330411328388e-06, + "logits/chosen": -1.1396489143371582, + "logits/rejected": -1.5731638669967651, + "logps/chosen": -233.5303192138672, + "logps/rejected": -458.1812438964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9238823652267456, + "rewards/margins": 10.459746360778809, + "rewards/rejected": -11.383628845214844, + "step": 8122 + }, + { + "epoch": 1.26, + "learning_rate": 8.18959697079724e-06, + "logits/chosen": -0.5436400175094604, + "logits/rejected": -1.5785466432571411, + "logps/chosen": -184.32981872558594, + "logps/rejected": -630.6044921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.067298412322998, + "rewards/margins": 8.833488464355469, + "rewards/rejected": -11.900787353515625, + "step": 8123 + }, + { + "epoch": 1.26, + "learning_rate": 8.188863530266092e-06, + "logits/chosen": -2.463242292404175, + "logits/rejected": -2.846195697784424, + "logps/chosen": -78.68016052246094, + "logps/rejected": -238.15206909179688, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.410275459289551, + "rewards/margins": 7.277131080627441, + "rewards/rejected": -10.687406539916992, + "step": 8124 + }, + { + "epoch": 1.26, + "learning_rate": 8.188130089734944e-06, + "logits/chosen": -1.8128007650375366, + "logits/rejected": -2.5559511184692383, + "logps/chosen": -144.32904052734375, + "logps/rejected": -275.4935302734375, + "loss": 0.0565, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2729034423828125, + "rewards/margins": 6.063309669494629, + "rewards/rejected": -11.336214065551758, + "step": 8125 + }, + { + "epoch": 1.26, + "learning_rate": 8.187396649203796e-06, + "logits/chosen": -1.3779733180999756, + "logits/rejected": -2.8811895847320557, + "logps/chosen": -137.14324951171875, + "logps/rejected": -457.06396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3916218280792236, + "rewards/margins": 10.414225578308105, + "rewards/rejected": -12.80584716796875, + "step": 8126 + }, + { + "epoch": 1.26, + "learning_rate": 8.18666320867265e-06, + "logits/chosen": -2.6806411743164062, + "logits/rejected": -2.789585590362549, + "logps/chosen": -231.4898681640625, + "logps/rejected": -275.4006652832031, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.052046775817871, + "rewards/margins": 5.585910797119141, + "rewards/rejected": -7.637957572937012, + "step": 8127 + }, + { + "epoch": 1.26, + "learning_rate": 8.185929768141501e-06, + "logits/chosen": -2.535923719406128, + "logits/rejected": -2.8561394214630127, + "logps/chosen": -130.86431884765625, + "logps/rejected": -449.37738037109375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3388419151306152, + "rewards/margins": 7.302465438842773, + "rewards/rejected": -10.641307830810547, + "step": 8128 + }, + { + "epoch": 1.26, + "learning_rate": 8.185196327610353e-06, + "logits/chosen": -2.0928955078125, + "logits/rejected": -2.572817087173462, + "logps/chosen": -141.60202026367188, + "logps/rejected": -198.36634826660156, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.584249496459961, + "rewards/margins": 4.212922096252441, + "rewards/rejected": -7.797171592712402, + "step": 8129 + }, + { + "epoch": 1.26, + "learning_rate": 8.184462887079205e-06, + "logits/chosen": -1.0493663549423218, + "logits/rejected": -2.451916217803955, + "logps/chosen": -244.30552673339844, + "logps/rejected": -558.7779541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.178905487060547, + "rewards/margins": 9.099020957946777, + "rewards/rejected": -15.277926445007324, + "step": 8130 + }, + { + "epoch": 1.26, + "learning_rate": 8.183729446548057e-06, + "logits/chosen": -2.839756727218628, + "logits/rejected": -1.9992597103118896, + "logps/chosen": -555.015625, + "logps/rejected": -363.8785400390625, + "loss": 0.2344, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.943742275238037, + "rewards/margins": 4.837698936462402, + "rewards/rejected": -10.781440734863281, + "step": 8131 + }, + { + "epoch": 1.26, + "learning_rate": 8.182996006016909e-06, + "logits/chosen": -2.33392333984375, + "logits/rejected": -3.0306057929992676, + "logps/chosen": -107.50445556640625, + "logps/rejected": -396.9232177734375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9774408340454102, + "rewards/margins": 6.1663055419921875, + "rewards/rejected": -8.143746376037598, + "step": 8132 + }, + { + "epoch": 1.26, + "learning_rate": 8.18226256548576e-06, + "logits/chosen": -2.602529287338257, + "logits/rejected": -2.8717923164367676, + "logps/chosen": -382.70867919921875, + "logps/rejected": -320.7723388671875, + "loss": 0.8557, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.819638252258301, + "rewards/margins": 3.396242618560791, + "rewards/rejected": -8.215880393981934, + "step": 8133 + }, + { + "epoch": 1.27, + "learning_rate": 8.181529124954613e-06, + "logits/chosen": -2.692617654800415, + "logits/rejected": -1.5203826427459717, + "logps/chosen": -200.4739990234375, + "logps/rejected": -181.9740753173828, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.667666435241699, + "rewards/margins": 6.936176300048828, + "rewards/rejected": -11.603842735290527, + "step": 8134 + }, + { + "epoch": 1.27, + "learning_rate": 8.180795684423466e-06, + "logits/chosen": -3.0195541381835938, + "logits/rejected": -1.7343446016311646, + "logps/chosen": -294.675048828125, + "logps/rejected": -160.1289520263672, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0561468601226807, + "rewards/margins": 4.484295845031738, + "rewards/rejected": -7.54044246673584, + "step": 8135 + }, + { + "epoch": 1.27, + "learning_rate": 8.180062243892318e-06, + "logits/chosen": -2.485218048095703, + "logits/rejected": -1.9941960573196411, + "logps/chosen": -334.47552490234375, + "logps/rejected": -287.77325439453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.703029632568359, + "rewards/margins": 8.208574295043945, + "rewards/rejected": -12.911603927612305, + "step": 8136 + }, + { + "epoch": 1.27, + "learning_rate": 8.179328803361172e-06, + "logits/chosen": -2.4475979804992676, + "logits/rejected": -2.641587734222412, + "logps/chosen": -319.5423889160156, + "logps/rejected": -355.54345703125, + "loss": 0.884, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.465664863586426, + "rewards/margins": 3.4793763160705566, + "rewards/rejected": -9.94504165649414, + "step": 8137 + }, + { + "epoch": 1.27, + "learning_rate": 8.178595362830024e-06, + "logits/chosen": -3.031501293182373, + "logits/rejected": -2.440646171569824, + "logps/chosen": -458.4578552246094, + "logps/rejected": -324.3840637207031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2268166542053223, + "rewards/margins": 6.83957576751709, + "rewards/rejected": -10.06639289855957, + "step": 8138 + }, + { + "epoch": 1.27, + "learning_rate": 8.177861922298875e-06, + "logits/chosen": -1.4516732692718506, + "logits/rejected": -2.4362270832061768, + "logps/chosen": -94.73468017578125, + "logps/rejected": -245.43618774414062, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.222923278808594, + "rewards/margins": 6.587444305419922, + "rewards/rejected": -11.810367584228516, + "step": 8139 + }, + { + "epoch": 1.27, + "learning_rate": 8.177128481767727e-06, + "logits/chosen": -1.2230830192565918, + "logits/rejected": -2.762158155441284, + "logps/chosen": -108.63906860351562, + "logps/rejected": -203.4549560546875, + "loss": 1.1696, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.259646892547607, + "rewards/margins": 2.0483627319335938, + "rewards/rejected": -7.308010101318359, + "step": 8140 + }, + { + "epoch": 1.27, + "learning_rate": 8.176395041236579e-06, + "logits/chosen": -1.0001237392425537, + "logits/rejected": -2.4088473320007324, + "logps/chosen": -205.20504760742188, + "logps/rejected": -421.10662841796875, + "loss": 0.5256, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.67418098449707, + "rewards/margins": 7.435506343841553, + "rewards/rejected": -12.109686851501465, + "step": 8141 + }, + { + "epoch": 1.27, + "learning_rate": 8.175661600705431e-06, + "logits/chosen": -2.855473518371582, + "logits/rejected": -2.314646005630493, + "logps/chosen": -387.4051513671875, + "logps/rejected": -469.642333984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6758129596710205, + "rewards/margins": 7.596111297607422, + "rewards/rejected": -10.271924018859863, + "step": 8142 + }, + { + "epoch": 1.27, + "learning_rate": 8.174928160174283e-06, + "logits/chosen": -2.5406346321105957, + "logits/rejected": -2.6384196281433105, + "logps/chosen": -111.95780944824219, + "logps/rejected": -290.94342041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4348297119140625, + "rewards/margins": 9.31071662902832, + "rewards/rejected": -11.745546340942383, + "step": 8143 + }, + { + "epoch": 1.27, + "learning_rate": 8.174194719643135e-06, + "logits/chosen": -2.3762874603271484, + "logits/rejected": -3.0088956356048584, + "logps/chosen": -313.2237854003906, + "logps/rejected": -582.8839111328125, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3345017433166504, + "rewards/margins": 4.742276191711426, + "rewards/rejected": -8.076777458190918, + "step": 8144 + }, + { + "epoch": 1.27, + "learning_rate": 8.173461279111988e-06, + "logits/chosen": -2.012049674987793, + "logits/rejected": -2.7125985622406006, + "logps/chosen": -121.39797973632812, + "logps/rejected": -167.85829162597656, + "loss": 3.4784, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.110719203948975, + "rewards/margins": -1.4386870861053467, + "rewards/rejected": -4.672031879425049, + "step": 8145 + }, + { + "epoch": 1.27, + "learning_rate": 8.17272783858084e-06, + "logits/chosen": -0.960361659526825, + "logits/rejected": -3.010075807571411, + "logps/chosen": -127.40499114990234, + "logps/rejected": -545.3663330078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.629299163818359, + "rewards/margins": 8.716068267822266, + "rewards/rejected": -15.345367431640625, + "step": 8146 + }, + { + "epoch": 1.27, + "learning_rate": 8.171994398049692e-06, + "logits/chosen": -2.656756639480591, + "logits/rejected": -2.989412307739258, + "logps/chosen": -45.95635986328125, + "logps/rejected": -136.64439392089844, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8345649242401123, + "rewards/margins": 4.650774002075195, + "rewards/rejected": -7.485339164733887, + "step": 8147 + }, + { + "epoch": 1.27, + "learning_rate": 8.171260957518544e-06, + "logits/chosen": -2.815324544906616, + "logits/rejected": -2.102367639541626, + "logps/chosen": -174.53427124023438, + "logps/rejected": -290.44964599609375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.095980644226074, + "rewards/margins": 6.045904159545898, + "rewards/rejected": -10.141884803771973, + "step": 8148 + }, + { + "epoch": 1.27, + "learning_rate": 8.170527516987396e-06, + "logits/chosen": -2.7838969230651855, + "logits/rejected": -2.7120180130004883, + "logps/chosen": -149.76202392578125, + "logps/rejected": -139.52142333984375, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7933974266052246, + "rewards/margins": 4.7830400466918945, + "rewards/rejected": -8.576436996459961, + "step": 8149 + }, + { + "epoch": 1.27, + "learning_rate": 8.169794076456248e-06, + "logits/chosen": -2.828920602798462, + "logits/rejected": -2.0244946479797363, + "logps/chosen": -369.126708984375, + "logps/rejected": -237.70706176757812, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.771899223327637, + "rewards/margins": 4.734517574310303, + "rewards/rejected": -9.506417274475098, + "step": 8150 + }, + { + "epoch": 1.27, + "learning_rate": 8.1690606359251e-06, + "logits/chosen": -2.414712429046631, + "logits/rejected": -2.7333507537841797, + "logps/chosen": -226.83392333984375, + "logps/rejected": -426.5771484375, + "loss": 0.4426, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.804262161254883, + "rewards/margins": 4.391781330108643, + "rewards/rejected": -9.196043014526367, + "step": 8151 + }, + { + "epoch": 1.27, + "learning_rate": 8.168327195393952e-06, + "logits/chosen": -2.9456822872161865, + "logits/rejected": -2.3375673294067383, + "logps/chosen": -656.6776123046875, + "logps/rejected": -454.5021667480469, + "loss": 0.5415, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.556485176086426, + "rewards/margins": 2.7584614753723145, + "rewards/rejected": -7.314946174621582, + "step": 8152 + }, + { + "epoch": 1.27, + "learning_rate": 8.167593754862803e-06, + "logits/chosen": -2.0505270957946777, + "logits/rejected": -2.421886682510376, + "logps/chosen": -180.3988037109375, + "logps/rejected": -251.0083465576172, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.201264381408691, + "rewards/margins": 7.2864789962768555, + "rewards/rejected": -11.487743377685547, + "step": 8153 + }, + { + "epoch": 1.27, + "learning_rate": 8.166860314331657e-06, + "logits/chosen": -2.309115409851074, + "logits/rejected": -2.8109211921691895, + "logps/chosen": -78.65092468261719, + "logps/rejected": -199.90570068359375, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7831339836120605, + "rewards/margins": 4.614231109619141, + "rewards/rejected": -9.397364616394043, + "step": 8154 + }, + { + "epoch": 1.27, + "learning_rate": 8.166126873800509e-06, + "logits/chosen": -1.9232546091079712, + "logits/rejected": -2.8432247638702393, + "logps/chosen": -173.446044921875, + "logps/rejected": -299.7831115722656, + "loss": 0.5668, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.947620391845703, + "rewards/margins": 3.3534140586853027, + "rewards/rejected": -7.301034927368164, + "step": 8155 + }, + { + "epoch": 1.27, + "learning_rate": 8.16539343326936e-06, + "logits/chosen": -3.0343053340911865, + "logits/rejected": -2.7599737644195557, + "logps/chosen": -293.9452819824219, + "logps/rejected": -275.8616943359375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.038557052612305, + "rewards/margins": 6.081511497497559, + "rewards/rejected": -10.120068550109863, + "step": 8156 + }, + { + "epoch": 1.27, + "learning_rate": 8.164659992738213e-06, + "logits/chosen": -2.83939790725708, + "logits/rejected": -2.6905860900878906, + "logps/chosen": -121.34515380859375, + "logps/rejected": -351.938720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.292693138122559, + "rewards/margins": 9.00028133392334, + "rewards/rejected": -13.292974472045898, + "step": 8157 + }, + { + "epoch": 1.27, + "learning_rate": 8.163926552207064e-06, + "logits/chosen": -1.149140477180481, + "logits/rejected": -1.6897692680358887, + "logps/chosen": -67.13467407226562, + "logps/rejected": -239.92745971679688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.895041823387146, + "rewards/margins": 10.107921600341797, + "rewards/rejected": -12.002963066101074, + "step": 8158 + }, + { + "epoch": 1.27, + "learning_rate": 8.163193111675916e-06, + "logits/chosen": -2.0657477378845215, + "logits/rejected": -2.8299758434295654, + "logps/chosen": -238.7515411376953, + "logps/rejected": -322.86065673828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.593298316001892, + "rewards/margins": 7.199578285217285, + "rewards/rejected": -8.792876243591309, + "step": 8159 + }, + { + "epoch": 1.27, + "learning_rate": 8.162459671144768e-06, + "logits/chosen": -2.8909382820129395, + "logits/rejected": -2.361034870147705, + "logps/chosen": -207.46646118164062, + "logps/rejected": -168.23947143554688, + "loss": 2.4748, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.942335605621338, + "rewards/margins": -0.12627267837524414, + "rewards/rejected": -5.816062927246094, + "step": 8160 + }, + { + "epoch": 1.27, + "learning_rate": 8.16172623061362e-06, + "logits/chosen": -2.7356691360473633, + "logits/rejected": -2.2866690158843994, + "logps/chosen": -568.233154296875, + "logps/rejected": -544.9873046875, + "loss": 0.1627, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2443511486053467, + "rewards/margins": 4.41168212890625, + "rewards/rejected": -7.656033515930176, + "step": 8161 + }, + { + "epoch": 1.27, + "learning_rate": 8.160992790082472e-06, + "logits/chosen": -2.8359546661376953, + "logits/rejected": -2.9401400089263916, + "logps/chosen": -309.2950439453125, + "logps/rejected": -361.0025939941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6858901977539062, + "rewards/margins": 10.050636291503906, + "rewards/rejected": -13.736526489257812, + "step": 8162 + }, + { + "epoch": 1.27, + "learning_rate": 8.160259349551326e-06, + "logits/chosen": -2.1461727619171143, + "logits/rejected": -2.877131462097168, + "logps/chosen": -105.86026000976562, + "logps/rejected": -305.3912048339844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2986197471618652, + "rewards/margins": 9.313309669494629, + "rewards/rejected": -11.611928939819336, + "step": 8163 + }, + { + "epoch": 1.27, + "learning_rate": 8.159525909020177e-06, + "logits/chosen": -2.1934871673583984, + "logits/rejected": -2.424288272857666, + "logps/chosen": -150.43954467773438, + "logps/rejected": -359.32489013671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6921730041503906, + "rewards/margins": 9.91010570526123, + "rewards/rejected": -13.602278709411621, + "step": 8164 + }, + { + "epoch": 1.27, + "learning_rate": 8.15879246848903e-06, + "logits/chosen": -2.8493518829345703, + "logits/rejected": -3.090183734893799, + "logps/chosen": -44.019439697265625, + "logps/rejected": -290.46905517578125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3851239681243896, + "rewards/margins": 9.255596160888672, + "rewards/rejected": -10.64072036743164, + "step": 8165 + }, + { + "epoch": 1.27, + "learning_rate": 8.158059027957881e-06, + "logits/chosen": -2.2306551933288574, + "logits/rejected": -2.8265647888183594, + "logps/chosen": -122.12039184570312, + "logps/rejected": -357.8156433105469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5543038845062256, + "rewards/margins": 8.017122268676758, + "rewards/rejected": -11.571426391601562, + "step": 8166 + }, + { + "epoch": 1.27, + "learning_rate": 8.157325587426733e-06, + "logits/chosen": -2.236175298690796, + "logits/rejected": -2.9720027446746826, + "logps/chosen": -256.08642578125, + "logps/rejected": -391.54071044921875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2196896076202393, + "rewards/margins": 8.551811218261719, + "rewards/rejected": -11.771499633789062, + "step": 8167 + }, + { + "epoch": 1.27, + "learning_rate": 8.156592146895585e-06, + "logits/chosen": -2.196211338043213, + "logits/rejected": -2.5081546306610107, + "logps/chosen": -133.74305725097656, + "logps/rejected": -506.6861572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.090210437774658, + "rewards/margins": 12.425935745239258, + "rewards/rejected": -14.516145706176758, + "step": 8168 + }, + { + "epoch": 1.27, + "learning_rate": 8.155858706364439e-06, + "logits/chosen": -1.7461936473846436, + "logits/rejected": -3.1128721237182617, + "logps/chosen": -167.49191284179688, + "logps/rejected": -388.52392578125, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6872475147247314, + "rewards/margins": 4.614960670471191, + "rewards/rejected": -7.302207946777344, + "step": 8169 + }, + { + "epoch": 1.27, + "learning_rate": 8.15512526583329e-06, + "logits/chosen": -3.15462327003479, + "logits/rejected": -3.199380397796631, + "logps/chosen": -369.28369140625, + "logps/rejected": -485.9950866699219, + "loss": 0.4589, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.228843688964844, + "rewards/margins": 1.6415153741836548, + "rewards/rejected": -6.870359420776367, + "step": 8170 + }, + { + "epoch": 1.27, + "learning_rate": 8.154391825302142e-06, + "logits/chosen": -3.121744155883789, + "logits/rejected": -1.937738299369812, + "logps/chosen": -293.3978576660156, + "logps/rejected": -130.31163024902344, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5062716007232666, + "rewards/margins": 5.856019020080566, + "rewards/rejected": -8.362290382385254, + "step": 8171 + }, + { + "epoch": 1.27, + "learning_rate": 8.153658384770996e-06, + "logits/chosen": -2.6636414527893066, + "logits/rejected": -3.0539846420288086, + "logps/chosen": -123.970703125, + "logps/rejected": -291.9171447753906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.872286796569824, + "rewards/margins": 8.04202651977539, + "rewards/rejected": -10.914312362670898, + "step": 8172 + }, + { + "epoch": 1.27, + "learning_rate": 8.152924944239848e-06, + "logits/chosen": -2.404177188873291, + "logits/rejected": -3.1789305210113525, + "logps/chosen": -37.47587203979492, + "logps/rejected": -250.42532348632812, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.259709358215332, + "rewards/margins": 7.61799430847168, + "rewards/rejected": -8.877704620361328, + "step": 8173 + }, + { + "epoch": 1.27, + "learning_rate": 8.1521915037087e-06, + "logits/chosen": -2.456172227859497, + "logits/rejected": -3.06256365776062, + "logps/chosen": -71.6162338256836, + "logps/rejected": -340.7099304199219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.755708932876587, + "rewards/margins": 10.609868049621582, + "rewards/rejected": -14.365577697753906, + "step": 8174 + }, + { + "epoch": 1.27, + "learning_rate": 8.151458063177551e-06, + "logits/chosen": -2.9075231552124023, + "logits/rejected": -2.480731725692749, + "logps/chosen": -152.7678680419922, + "logps/rejected": -212.5506591796875, + "loss": 0.9948, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.322160720825195, + "rewards/margins": 3.466991662979126, + "rewards/rejected": -8.789152145385742, + "step": 8175 + }, + { + "epoch": 1.27, + "learning_rate": 8.150724622646403e-06, + "logits/chosen": -2.5967416763305664, + "logits/rejected": -3.117379665374756, + "logps/chosen": -286.5736083984375, + "logps/rejected": -331.592041015625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4161338806152344, + "rewards/margins": 5.24350643157959, + "rewards/rejected": -8.659640312194824, + "step": 8176 + }, + { + "epoch": 1.27, + "learning_rate": 8.149991182115255e-06, + "logits/chosen": -1.50450599193573, + "logits/rejected": -2.7935261726379395, + "logps/chosen": -104.8003921508789, + "logps/rejected": -366.37548828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.194305419921875, + "rewards/margins": 9.459939956665039, + "rewards/rejected": -12.654245376586914, + "step": 8177 + }, + { + "epoch": 1.27, + "learning_rate": 8.149257741584107e-06, + "logits/chosen": -2.892155647277832, + "logits/rejected": -2.993849515914917, + "logps/chosen": -52.832611083984375, + "logps/rejected": -314.9921569824219, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0568289756774902, + "rewards/margins": 7.606655120849609, + "rewards/rejected": -9.663484573364258, + "step": 8178 + }, + { + "epoch": 1.27, + "learning_rate": 8.148524301052959e-06, + "logits/chosen": -2.425964117050171, + "logits/rejected": -2.7447025775909424, + "logps/chosen": -120.05870056152344, + "logps/rejected": -377.82537841796875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2641730308532715, + "rewards/margins": 8.577528953552246, + "rewards/rejected": -11.84170150756836, + "step": 8179 + }, + { + "epoch": 1.27, + "learning_rate": 8.147790860521811e-06, + "logits/chosen": -2.8816843032836914, + "logits/rejected": -2.730839967727661, + "logps/chosen": -223.90771484375, + "logps/rejected": -382.08441162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023955345153809, + "rewards/margins": 8.254508018493652, + "rewards/rejected": -13.278463363647461, + "step": 8180 + }, + { + "epoch": 1.27, + "learning_rate": 8.147057419990664e-06, + "logits/chosen": -2.702570676803589, + "logits/rejected": -2.6561269760131836, + "logps/chosen": -116.43479919433594, + "logps/rejected": -183.6494140625, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2813453674316406, + "rewards/margins": 5.141204357147217, + "rewards/rejected": -8.422550201416016, + "step": 8181 + }, + { + "epoch": 1.27, + "learning_rate": 8.146323979459516e-06, + "logits/chosen": -1.3749399185180664, + "logits/rejected": -2.7033755779266357, + "logps/chosen": -570.1697998046875, + "logps/rejected": -608.9594116210938, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7315003871917725, + "rewards/margins": 8.983012199401855, + "rewards/rejected": -12.714512825012207, + "step": 8182 + }, + { + "epoch": 1.27, + "learning_rate": 8.145590538928368e-06, + "logits/chosen": -2.204812526702881, + "logits/rejected": -2.8674166202545166, + "logps/chosen": -90.77555847167969, + "logps/rejected": -256.1426696777344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.286923408508301, + "rewards/margins": 8.525918960571289, + "rewards/rejected": -11.812843322753906, + "step": 8183 + }, + { + "epoch": 1.27, + "learning_rate": 8.14485709839722e-06, + "logits/chosen": -1.1620087623596191, + "logits/rejected": -1.4612679481506348, + "logps/chosen": -73.9713134765625, + "logps/rejected": -297.91693115234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.364622950553894, + "rewards/margins": 7.719223976135254, + "rewards/rejected": -9.083847045898438, + "step": 8184 + }, + { + "epoch": 1.27, + "learning_rate": 8.144123657866072e-06, + "logits/chosen": -2.3138163089752197, + "logits/rejected": -3.0262534618377686, + "logps/chosen": -183.8919677734375, + "logps/rejected": -296.3399963378906, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5255351066589355, + "rewards/margins": 6.383918762207031, + "rewards/rejected": -10.909454345703125, + "step": 8185 + }, + { + "epoch": 1.27, + "learning_rate": 8.143390217334924e-06, + "logits/chosen": -1.2926849126815796, + "logits/rejected": -2.738088846206665, + "logps/chosen": -133.25804138183594, + "logps/rejected": -318.3634033203125, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6094601154327393, + "rewards/margins": 7.1434221267700195, + "rewards/rejected": -10.75288200378418, + "step": 8186 + }, + { + "epoch": 1.27, + "learning_rate": 8.142656776803776e-06, + "logits/chosen": -3.0284626483917236, + "logits/rejected": -2.841512680053711, + "logps/chosen": -230.65269470214844, + "logps/rejected": -267.6914367675781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.394494533538818, + "rewards/margins": 7.717336177825928, + "rewards/rejected": -12.111830711364746, + "step": 8187 + }, + { + "epoch": 1.27, + "learning_rate": 8.141923336272628e-06, + "logits/chosen": -3.0254337787628174, + "logits/rejected": -2.982076406478882, + "logps/chosen": -594.9117431640625, + "logps/rejected": -503.1632385253906, + "loss": 0.3965, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.4888558387756348, + "rewards/margins": 3.4494104385375977, + "rewards/rejected": -6.938265800476074, + "step": 8188 + }, + { + "epoch": 1.27, + "learning_rate": 8.14118989574148e-06, + "logits/chosen": -3.074232816696167, + "logits/rejected": -2.1331875324249268, + "logps/chosen": -367.8397216796875, + "logps/rejected": -310.009765625, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.960679769515991, + "rewards/margins": 4.761761665344238, + "rewards/rejected": -8.722440719604492, + "step": 8189 + }, + { + "epoch": 1.27, + "learning_rate": 8.140456455210333e-06, + "logits/chosen": -1.0954830646514893, + "logits/rejected": -3.0444693565368652, + "logps/chosen": -131.17620849609375, + "logps/rejected": -589.507080078125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7245423793792725, + "rewards/margins": 7.220823287963867, + "rewards/rejected": -9.945365905761719, + "step": 8190 + }, + { + "epoch": 1.27, + "learning_rate": 8.139723014679185e-06, + "logits/chosen": -2.134586811065674, + "logits/rejected": -2.7670934200286865, + "logps/chosen": -147.78402709960938, + "logps/rejected": -256.86688232421875, + "loss": 3.0061, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.778995037078857, + "rewards/margins": 1.4495251178741455, + "rewards/rejected": -7.228520393371582, + "step": 8191 + }, + { + "epoch": 1.27, + "learning_rate": 8.138989574148037e-06, + "logits/chosen": -3.091050863265991, + "logits/rejected": -3.1972239017486572, + "logps/chosen": -287.05999755859375, + "logps/rejected": -505.9951171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2563254833221436, + "rewards/margins": 7.85809326171875, + "rewards/rejected": -9.114418983459473, + "step": 8192 + }, + { + "epoch": 1.27, + "learning_rate": 8.138256133616889e-06, + "logits/chosen": -1.8809760808944702, + "logits/rejected": -2.787034749984741, + "logps/chosen": -198.60498046875, + "logps/rejected": -450.226806640625, + "loss": 0.0976, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.794605255126953, + "rewards/margins": 4.6824798583984375, + "rewards/rejected": -8.47708511352539, + "step": 8193 + }, + { + "epoch": 1.27, + "learning_rate": 8.13752269308574e-06, + "logits/chosen": -2.234083652496338, + "logits/rejected": -3.09920597076416, + "logps/chosen": -146.7428741455078, + "logps/rejected": -388.558349609375, + "loss": 0.5832, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7253031730651855, + "rewards/margins": 3.9058568477630615, + "rewards/rejected": -7.631159782409668, + "step": 8194 + }, + { + "epoch": 1.27, + "learning_rate": 8.136789252554592e-06, + "logits/chosen": -3.0060720443725586, + "logits/rejected": -2.9553985595703125, + "logps/chosen": -631.0254516601562, + "logps/rejected": -518.8790283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3167779445648193, + "rewards/margins": 9.7160062789917, + "rewards/rejected": -12.032783508300781, + "step": 8195 + }, + { + "epoch": 1.27, + "learning_rate": 8.136055812023444e-06, + "logits/chosen": -2.987203359603882, + "logits/rejected": -1.6825097799301147, + "logps/chosen": -619.508544921875, + "logps/rejected": -487.6068420410156, + "loss": 0.1286, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118945121765137, + "rewards/margins": 7.244331359863281, + "rewards/rejected": -11.363276481628418, + "step": 8196 + }, + { + "epoch": 1.27, + "learning_rate": 8.135322371492296e-06, + "logits/chosen": -3.100860357284546, + "logits/rejected": -3.103639602661133, + "logps/chosen": -245.12413024902344, + "logps/rejected": -211.3367919921875, + "loss": 0.9826, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.674796104431152, + "rewards/margins": 2.617265462875366, + "rewards/rejected": -7.292061805725098, + "step": 8197 + }, + { + "epoch": 1.27, + "learning_rate": 8.134588930961148e-06, + "logits/chosen": -2.216238021850586, + "logits/rejected": -3.0507009029388428, + "logps/chosen": -478.71417236328125, + "logps/rejected": -660.5126342773438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.076080799102783, + "rewards/margins": 8.814713478088379, + "rewards/rejected": -10.890793800354004, + "step": 8198 + }, + { + "epoch": 1.28, + "learning_rate": 8.133855490430002e-06, + "logits/chosen": -2.787306785583496, + "logits/rejected": -2.7037723064422607, + "logps/chosen": -43.3011474609375, + "logps/rejected": -165.16036987304688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956529140472412, + "rewards/margins": 9.289902687072754, + "rewards/rejected": -11.246431350708008, + "step": 8199 + }, + { + "epoch": 1.28, + "learning_rate": 8.133122049898854e-06, + "logits/chosen": -2.7272465229034424, + "logits/rejected": -2.2189433574676514, + "logps/chosen": -254.759033203125, + "logps/rejected": -341.36871337890625, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0497779846191406, + "rewards/margins": 6.535435676574707, + "rewards/rejected": -8.585214614868164, + "step": 8200 + }, + { + "epoch": 1.28, + "learning_rate": 8.132388609367705e-06, + "logits/chosen": -1.7056453227996826, + "logits/rejected": -2.814763307571411, + "logps/chosen": -168.62838745117188, + "logps/rejected": -303.14215087890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.641277313232422, + "rewards/margins": 8.67082405090332, + "rewards/rejected": -11.312101364135742, + "step": 8201 + }, + { + "epoch": 1.28, + "learning_rate": 8.131655168836557e-06, + "logits/chosen": -2.491959810256958, + "logits/rejected": -2.956559181213379, + "logps/chosen": -690.884521484375, + "logps/rejected": -683.668212890625, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.713696241378784, + "rewards/margins": 4.894402980804443, + "rewards/rejected": -8.608098983764648, + "step": 8202 + }, + { + "epoch": 1.28, + "learning_rate": 8.13092172830541e-06, + "logits/chosen": -2.614027261734009, + "logits/rejected": -3.026219129562378, + "logps/chosen": -141.12351989746094, + "logps/rejected": -167.37261962890625, + "loss": 0.4856, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.544147968292236, + "rewards/margins": 3.000800609588623, + "rewards/rejected": -7.544948577880859, + "step": 8203 + }, + { + "epoch": 1.28, + "learning_rate": 8.130188287774263e-06, + "logits/chosen": -2.02609920501709, + "logits/rejected": -2.5980355739593506, + "logps/chosen": -197.88426208496094, + "logps/rejected": -317.21258544921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0397932529449463, + "rewards/margins": 8.865815162658691, + "rewards/rejected": -11.905609130859375, + "step": 8204 + }, + { + "epoch": 1.28, + "learning_rate": 8.129454847243115e-06, + "logits/chosen": -3.169057846069336, + "logits/rejected": -1.8828898668289185, + "logps/chosen": -459.4103698730469, + "logps/rejected": -277.9432373046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2437636852264404, + "rewards/margins": 8.621432304382324, + "rewards/rejected": -10.865196228027344, + "step": 8205 + }, + { + "epoch": 1.28, + "learning_rate": 8.128721406711966e-06, + "logits/chosen": -3.0781779289245605, + "logits/rejected": -2.5136964321136475, + "logps/chosen": -338.12799072265625, + "logps/rejected": -184.6288299560547, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4063827991485596, + "rewards/margins": 2.5064005851745605, + "rewards/rejected": -5.912783622741699, + "step": 8206 + }, + { + "epoch": 1.28, + "learning_rate": 8.12798796618082e-06, + "logits/chosen": -2.244011163711548, + "logits/rejected": -2.832289695739746, + "logps/chosen": -110.90611267089844, + "logps/rejected": -283.07159423828125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.827471971511841, + "rewards/margins": 7.969269752502441, + "rewards/rejected": -10.796741485595703, + "step": 8207 + }, + { + "epoch": 1.28, + "learning_rate": 8.127254525649672e-06, + "logits/chosen": -3.0112929344177246, + "logits/rejected": -1.641775131225586, + "logps/chosen": -290.5509338378906, + "logps/rejected": -83.89728546142578, + "loss": 4.634, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.544790267944336, + "rewards/margins": -4.566646575927734, + "rewards/rejected": -3.9781436920166016, + "step": 8208 + }, + { + "epoch": 1.28, + "learning_rate": 8.126521085118524e-06, + "logits/chosen": -1.947515606880188, + "logits/rejected": -3.1032769680023193, + "logps/chosen": -48.19664764404297, + "logps/rejected": -227.53750610351562, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.113938331604004, + "rewards/margins": 5.297463417053223, + "rewards/rejected": -8.411401748657227, + "step": 8209 + }, + { + "epoch": 1.28, + "learning_rate": 8.125787644587376e-06, + "logits/chosen": -1.2617647647857666, + "logits/rejected": -2.839576244354248, + "logps/chosen": -147.42919921875, + "logps/rejected": -483.5173645019531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4983017444610596, + "rewards/margins": 12.78654670715332, + "rewards/rejected": -14.284849166870117, + "step": 8210 + }, + { + "epoch": 1.28, + "learning_rate": 8.125054204056228e-06, + "logits/chosen": -2.4050471782684326, + "logits/rejected": -2.3247992992401123, + "logps/chosen": -212.8910369873047, + "logps/rejected": -335.9287109375, + "loss": 0.4868, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.063398361206055, + "rewards/margins": 4.680814743041992, + "rewards/rejected": -8.74421215057373, + "step": 8211 + }, + { + "epoch": 1.28, + "learning_rate": 8.12432076352508e-06, + "logits/chosen": -2.621373176574707, + "logits/rejected": -3.0755937099456787, + "logps/chosen": -357.0797119140625, + "logps/rejected": -410.94781494140625, + "loss": 0.7974, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.530743598937988, + "rewards/margins": 2.3330752849578857, + "rewards/rejected": -8.863819122314453, + "step": 8212 + }, + { + "epoch": 1.28, + "learning_rate": 8.123587322993931e-06, + "logits/chosen": -1.6871594190597534, + "logits/rejected": -2.998082399368286, + "logps/chosen": -150.2626495361328, + "logps/rejected": -260.1587219238281, + "loss": 0.1301, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4885549545288086, + "rewards/margins": 2.7760725021362305, + "rewards/rejected": -6.264627456665039, + "step": 8213 + }, + { + "epoch": 1.28, + "learning_rate": 8.122853882462783e-06, + "logits/chosen": -2.8085713386535645, + "logits/rejected": -3.0432159900665283, + "logps/chosen": -159.97828674316406, + "logps/rejected": -233.03065490722656, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.387491226196289, + "rewards/margins": 6.63754940032959, + "rewards/rejected": -11.025040626525879, + "step": 8214 + }, + { + "epoch": 1.28, + "learning_rate": 8.122120441931635e-06, + "logits/chosen": -3.125430107116699, + "logits/rejected": -2.5495822429656982, + "logps/chosen": -318.4279479980469, + "logps/rejected": -242.11831665039062, + "loss": 0.9742, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.7004892826080322, + "rewards/margins": 4.015900611877441, + "rewards/rejected": -7.7163896560668945, + "step": 8215 + }, + { + "epoch": 1.28, + "learning_rate": 8.121387001400489e-06, + "logits/chosen": -3.079759359359741, + "logits/rejected": -2.491638660430908, + "logps/chosen": -340.3777770996094, + "logps/rejected": -190.41650390625, + "loss": 4.2065, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.058575630187988, + "rewards/margins": 1.5641446113586426, + "rewards/rejected": -7.622719764709473, + "step": 8216 + }, + { + "epoch": 1.28, + "learning_rate": 8.12065356086934e-06, + "logits/chosen": -2.609976053237915, + "logits/rejected": -2.98488712310791, + "logps/chosen": -295.725341796875, + "logps/rejected": -372.053955078125, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3611536026000977, + "rewards/margins": 5.492582321166992, + "rewards/rejected": -8.85373592376709, + "step": 8217 + }, + { + "epoch": 1.28, + "learning_rate": 8.119920120338192e-06, + "logits/chosen": -2.0906355381011963, + "logits/rejected": -3.076049327850342, + "logps/chosen": -391.17877197265625, + "logps/rejected": -543.1358642578125, + "loss": 1.068, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.126236915588379, + "rewards/margins": 2.2895491123199463, + "rewards/rejected": -10.415786743164062, + "step": 8218 + }, + { + "epoch": 1.28, + "learning_rate": 8.119186679807044e-06, + "logits/chosen": -3.0999815464019775, + "logits/rejected": -3.0954625606536865, + "logps/chosen": -138.86679077148438, + "logps/rejected": -252.468505859375, + "loss": 1.5878, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.64127254486084, + "rewards/margins": 2.7498486042022705, + "rewards/rejected": -10.391120910644531, + "step": 8219 + }, + { + "epoch": 1.28, + "learning_rate": 8.118453239275896e-06, + "logits/chosen": -2.2767770290374756, + "logits/rejected": -2.9547295570373535, + "logps/chosen": -122.4462661743164, + "logps/rejected": -378.4342041015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8346734046936035, + "rewards/margins": 9.013668060302734, + "rewards/rejected": -11.84834098815918, + "step": 8220 + }, + { + "epoch": 1.28, + "learning_rate": 8.117719798744748e-06, + "logits/chosen": -3.041027307510376, + "logits/rejected": -2.8405280113220215, + "logps/chosen": -157.21022033691406, + "logps/rejected": -240.10464477539062, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.295337438583374, + "rewards/margins": 6.606168270111084, + "rewards/rejected": -8.901506423950195, + "step": 8221 + }, + { + "epoch": 1.28, + "learning_rate": 8.1169863582136e-06, + "logits/chosen": -2.365673303604126, + "logits/rejected": -2.397672176361084, + "logps/chosen": -551.5159301757812, + "logps/rejected": -460.19775390625, + "loss": 0.1919, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9526774883270264, + "rewards/margins": 2.9520628452301025, + "rewards/rejected": -6.904740333557129, + "step": 8222 + }, + { + "epoch": 1.28, + "learning_rate": 8.116252917682452e-06, + "logits/chosen": -1.5419126749038696, + "logits/rejected": -2.4428093433380127, + "logps/chosen": -252.10775756835938, + "logps/rejected": -375.98876953125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8412094116210938, + "rewards/margins": 6.724309921264648, + "rewards/rejected": -10.565519332885742, + "step": 8223 + }, + { + "epoch": 1.28, + "learning_rate": 8.115519477151304e-06, + "logits/chosen": -2.9692482948303223, + "logits/rejected": -3.17965030670166, + "logps/chosen": -298.08984375, + "logps/rejected": -361.2896728515625, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.633042812347412, + "rewards/margins": 5.081174850463867, + "rewards/rejected": -7.714217662811279, + "step": 8224 + }, + { + "epoch": 1.28, + "learning_rate": 8.114786036620157e-06, + "logits/chosen": -2.0336692333221436, + "logits/rejected": -3.124739408493042, + "logps/chosen": -109.74771118164062, + "logps/rejected": -266.582275390625, + "loss": 1.1427, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.242503643035889, + "rewards/margins": 1.8651670217514038, + "rewards/rejected": -6.107670783996582, + "step": 8225 + }, + { + "epoch": 1.28, + "learning_rate": 8.114052596089009e-06, + "logits/chosen": -2.9127795696258545, + "logits/rejected": -2.8203721046447754, + "logps/chosen": -469.99884033203125, + "logps/rejected": -454.1116027832031, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2176601886749268, + "rewards/margins": 7.4460368156433105, + "rewards/rejected": -8.6636962890625, + "step": 8226 + }, + { + "epoch": 1.28, + "learning_rate": 8.113319155557861e-06, + "logits/chosen": -2.313208818435669, + "logits/rejected": -3.0577259063720703, + "logps/chosen": -127.59080505371094, + "logps/rejected": -246.26596069335938, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.196486473083496, + "rewards/margins": 6.132067680358887, + "rewards/rejected": -10.328554153442383, + "step": 8227 + }, + { + "epoch": 1.28, + "learning_rate": 8.112585715026713e-06, + "logits/chosen": -1.4862843751907349, + "logits/rejected": -2.707855463027954, + "logps/chosen": -155.15866088867188, + "logps/rejected": -409.04608154296875, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7591867446899414, + "rewards/margins": 4.305383682250977, + "rewards/rejected": -8.064571380615234, + "step": 8228 + }, + { + "epoch": 1.28, + "learning_rate": 8.111852274495565e-06, + "logits/chosen": -2.456867218017578, + "logits/rejected": -2.9895179271698, + "logps/chosen": -88.800537109375, + "logps/rejected": -235.9591064453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.745426893234253, + "rewards/margins": 7.278692245483398, + "rewards/rejected": -9.02411937713623, + "step": 8229 + }, + { + "epoch": 1.28, + "learning_rate": 8.111118833964417e-06, + "logits/chosen": -2.6190552711486816, + "logits/rejected": -3.025982618331909, + "logps/chosen": -97.10408782958984, + "logps/rejected": -230.28030395507812, + "loss": 0.0581, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.04973840713501, + "rewards/margins": 3.036846399307251, + "rewards/rejected": -8.08658504486084, + "step": 8230 + }, + { + "epoch": 1.28, + "learning_rate": 8.110385393433269e-06, + "logits/chosen": -2.8009872436523438, + "logits/rejected": -1.9840905666351318, + "logps/chosen": -705.4573364257812, + "logps/rejected": -515.5863647460938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7795612215995789, + "rewards/margins": 7.756900787353516, + "rewards/rejected": -8.53646183013916, + "step": 8231 + }, + { + "epoch": 1.28, + "learning_rate": 8.10965195290212e-06, + "logits/chosen": -3.0733957290649414, + "logits/rejected": -2.989889621734619, + "logps/chosen": -573.8199462890625, + "logps/rejected": -513.2750244140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.256372094154358, + "rewards/margins": 8.012701988220215, + "rewards/rejected": -9.269073486328125, + "step": 8232 + }, + { + "epoch": 1.28, + "learning_rate": 8.108918512370972e-06, + "logits/chosen": -1.8220019340515137, + "logits/rejected": -2.995110034942627, + "logps/chosen": -75.18788146972656, + "logps/rejected": -429.20684814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6454086303710938, + "rewards/margins": 9.76323413848877, + "rewards/rejected": -12.408642768859863, + "step": 8233 + }, + { + "epoch": 1.28, + "learning_rate": 8.108185071839826e-06, + "logits/chosen": -2.84491229057312, + "logits/rejected": -3.0887646675109863, + "logps/chosen": -279.22589111328125, + "logps/rejected": -252.86114501953125, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9014511108398438, + "rewards/margins": 4.493709564208984, + "rewards/rejected": -7.395160675048828, + "step": 8234 + }, + { + "epoch": 1.28, + "learning_rate": 8.107451631308678e-06, + "logits/chosen": -3.085946559906006, + "logits/rejected": -2.242018461227417, + "logps/chosen": -239.4904327392578, + "logps/rejected": -206.5516357421875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.321521759033203, + "rewards/margins": 6.935214042663574, + "rewards/rejected": -11.256735801696777, + "step": 8235 + }, + { + "epoch": 1.28, + "learning_rate": 8.10671819077753e-06, + "logits/chosen": -2.626500368118286, + "logits/rejected": -3.0451884269714355, + "logps/chosen": -201.6633758544922, + "logps/rejected": -256.6860046386719, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.829387664794922, + "rewards/margins": 5.623932838439941, + "rewards/rejected": -8.45332145690918, + "step": 8236 + }, + { + "epoch": 1.28, + "learning_rate": 8.105984750246383e-06, + "logits/chosen": -3.174344539642334, + "logits/rejected": -2.754439353942871, + "logps/chosen": -311.5466003417969, + "logps/rejected": -298.7850036621094, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.695195198059082, + "rewards/margins": 5.363308906555176, + "rewards/rejected": -7.058504104614258, + "step": 8237 + }, + { + "epoch": 1.28, + "learning_rate": 8.105251309715235e-06, + "logits/chosen": -2.929464340209961, + "logits/rejected": -2.161098003387451, + "logps/chosen": -226.35162353515625, + "logps/rejected": -104.83258056640625, + "loss": 3.7519, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.607069969177246, + "rewards/margins": -1.0707213878631592, + "rewards/rejected": -6.536348342895508, + "step": 8238 + }, + { + "epoch": 1.28, + "learning_rate": 8.104517869184087e-06, + "logits/chosen": -1.7870734930038452, + "logits/rejected": -2.482285261154175, + "logps/chosen": -85.98328399658203, + "logps/rejected": -280.24761962890625, + "loss": 0.4053, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.859490394592285, + "rewards/margins": 4.961455345153809, + "rewards/rejected": -10.820945739746094, + "step": 8239 + }, + { + "epoch": 1.28, + "learning_rate": 8.103784428652939e-06, + "logits/chosen": -3.0281484127044678, + "logits/rejected": -3.004274606704712, + "logps/chosen": -139.4554901123047, + "logps/rejected": -83.96231079101562, + "loss": 2.3399, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.751359462738037, + "rewards/margins": -0.4968705177307129, + "rewards/rejected": -6.254488945007324, + "step": 8240 + }, + { + "epoch": 1.28, + "learning_rate": 8.10305098812179e-06, + "logits/chosen": -2.746706008911133, + "logits/rejected": -2.9844141006469727, + "logps/chosen": -96.68660736083984, + "logps/rejected": -263.4125061035156, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144829750061035, + "rewards/margins": 5.431084632873535, + "rewards/rejected": -11.57591438293457, + "step": 8241 + }, + { + "epoch": 1.28, + "learning_rate": 8.102317547590643e-06, + "logits/chosen": -1.5321147441864014, + "logits/rejected": -2.679581880569458, + "logps/chosen": -83.3547592163086, + "logps/rejected": -422.10687255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9376392364501953, + "rewards/margins": 10.387741088867188, + "rewards/rejected": -14.325380325317383, + "step": 8242 + }, + { + "epoch": 1.28, + "learning_rate": 8.101584107059496e-06, + "logits/chosen": -1.826724648475647, + "logits/rejected": -2.638002872467041, + "logps/chosen": -180.08993530273438, + "logps/rejected": -310.75128173828125, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.784002304077148, + "rewards/margins": 3.203956365585327, + "rewards/rejected": -7.987958908081055, + "step": 8243 + }, + { + "epoch": 1.28, + "learning_rate": 8.100850666528348e-06, + "logits/chosen": -2.8399267196655273, + "logits/rejected": -2.926240921020508, + "logps/chosen": -171.55995178222656, + "logps/rejected": -225.03173828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2887847423553467, + "rewards/margins": 8.30633544921875, + "rewards/rejected": -10.595120429992676, + "step": 8244 + }, + { + "epoch": 1.28, + "learning_rate": 8.1001172259972e-06, + "logits/chosen": -1.5126930475234985, + "logits/rejected": -2.7919199466705322, + "logps/chosen": -221.31103515625, + "logps/rejected": -350.0447082519531, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.24056339263916, + "rewards/margins": 3.5148849487304688, + "rewards/rejected": -8.755448341369629, + "step": 8245 + }, + { + "epoch": 1.28, + "learning_rate": 8.099383785466052e-06, + "logits/chosen": -2.7782914638519287, + "logits/rejected": -2.919877052307129, + "logps/chosen": -356.9115295410156, + "logps/rejected": -590.8080444335938, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.001943111419678, + "rewards/margins": 6.033459663391113, + "rewards/rejected": -10.03540325164795, + "step": 8246 + }, + { + "epoch": 1.28, + "learning_rate": 8.098650344934904e-06, + "logits/chosen": -2.65519642829895, + "logits/rejected": -1.958905816078186, + "logps/chosen": -225.8072052001953, + "logps/rejected": -161.58514404296875, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.169203281402588, + "rewards/margins": 1.7721633911132812, + "rewards/rejected": -5.941366672515869, + "step": 8247 + }, + { + "epoch": 1.28, + "learning_rate": 8.097916904403756e-06, + "logits/chosen": -2.3258109092712402, + "logits/rejected": -3.0592639446258545, + "logps/chosen": -70.156005859375, + "logps/rejected": -230.1796875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.390953063964844, + "rewards/margins": 5.524287223815918, + "rewards/rejected": -9.915240287780762, + "step": 8248 + }, + { + "epoch": 1.28, + "learning_rate": 8.097183463872607e-06, + "logits/chosen": -2.453967571258545, + "logits/rejected": -3.0370421409606934, + "logps/chosen": -114.76734924316406, + "logps/rejected": -286.88726806640625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4165468215942383, + "rewards/margins": 5.4604949951171875, + "rewards/rejected": -8.877041816711426, + "step": 8249 + }, + { + "epoch": 1.28, + "learning_rate": 8.09645002334146e-06, + "logits/chosen": -2.4368040561676025, + "logits/rejected": -3.0889434814453125, + "logps/chosen": -242.97637939453125, + "logps/rejected": -503.5419921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.853938579559326, + "rewards/margins": 8.264816284179688, + "rewards/rejected": -13.118755340576172, + "step": 8250 + }, + { + "epoch": 1.28, + "learning_rate": 8.095716582810311e-06, + "logits/chosen": -1.933652639389038, + "logits/rejected": -2.959742307662964, + "logps/chosen": -308.17401123046875, + "logps/rejected": -588.388427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1432106494903564, + "rewards/margins": 9.883736610412598, + "rewards/rejected": -13.026947021484375, + "step": 8251 + }, + { + "epoch": 1.28, + "learning_rate": 8.094983142279165e-06, + "logits/chosen": -2.058668375015259, + "logits/rejected": -2.8353757858276367, + "logps/chosen": -245.28114318847656, + "logps/rejected": -307.2789001464844, + "loss": 0.1263, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.403695106506348, + "rewards/margins": 6.159952163696289, + "rewards/rejected": -11.563647270202637, + "step": 8252 + }, + { + "epoch": 1.28, + "learning_rate": 8.094249701748017e-06, + "logits/chosen": -2.9388556480407715, + "logits/rejected": -2.6340599060058594, + "logps/chosen": -74.21205139160156, + "logps/rejected": -406.651611328125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.490341901779175, + "rewards/margins": 7.57479190826416, + "rewards/rejected": -11.065134048461914, + "step": 8253 + }, + { + "epoch": 1.28, + "learning_rate": 8.093516261216868e-06, + "logits/chosen": -2.830932378768921, + "logits/rejected": -2.9611480236053467, + "logps/chosen": -289.82550048828125, + "logps/rejected": -211.61114501953125, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631889343261719, + "rewards/margins": 3.692599058151245, + "rewards/rejected": -9.324488639831543, + "step": 8254 + }, + { + "epoch": 1.28, + "learning_rate": 8.09278282068572e-06, + "logits/chosen": -2.871424913406372, + "logits/rejected": -1.894182801246643, + "logps/chosen": -273.4747009277344, + "logps/rejected": -338.3005676269531, + "loss": 0.6599, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.102372169494629, + "rewards/margins": 3.874309539794922, + "rewards/rejected": -7.976681709289551, + "step": 8255 + }, + { + "epoch": 1.28, + "learning_rate": 8.092049380154572e-06, + "logits/chosen": -1.512331485748291, + "logits/rejected": -2.502058744430542, + "logps/chosen": -428.86456298828125, + "logps/rejected": -730.158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5111846923828125, + "rewards/margins": 10.542669296264648, + "rewards/rejected": -14.053853988647461, + "step": 8256 + }, + { + "epoch": 1.28, + "learning_rate": 8.091315939623424e-06, + "logits/chosen": -1.1597237586975098, + "logits/rejected": -2.8164548873901367, + "logps/chosen": -115.11309814453125, + "logps/rejected": -447.9935302734375, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.841903209686279, + "rewards/margins": 4.530362129211426, + "rewards/rejected": -9.372264862060547, + "step": 8257 + }, + { + "epoch": 1.28, + "learning_rate": 8.090582499092276e-06, + "logits/chosen": -1.4931933879852295, + "logits/rejected": -2.9770333766937256, + "logps/chosen": -133.40992736816406, + "logps/rejected": -323.82501220703125, + "loss": 1.1057, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.725053787231445, + "rewards/margins": 2.550959587097168, + "rewards/rejected": -7.276013374328613, + "step": 8258 + }, + { + "epoch": 1.28, + "learning_rate": 8.089849058561128e-06, + "logits/chosen": -2.204617500305176, + "logits/rejected": -2.0271668434143066, + "logps/chosen": -362.0712890625, + "logps/rejected": -293.34429931640625, + "loss": 0.1124, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.181339979171753, + "rewards/margins": 3.585341215133667, + "rewards/rejected": -6.76668119430542, + "step": 8259 + }, + { + "epoch": 1.28, + "learning_rate": 8.08911561802998e-06, + "logits/chosen": -1.765289545059204, + "logits/rejected": -2.785207509994507, + "logps/chosen": -270.9102783203125, + "logps/rejected": -655.4264526367188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.93489146232605, + "rewards/margins": 9.801143646240234, + "rewards/rejected": -13.736035346984863, + "step": 8260 + }, + { + "epoch": 1.28, + "learning_rate": 8.088382177498833e-06, + "logits/chosen": -2.6416332721710205, + "logits/rejected": -2.8442959785461426, + "logps/chosen": -121.06834411621094, + "logps/rejected": -415.4900207519531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2460510730743408, + "rewards/margins": 10.097922325134277, + "rewards/rejected": -11.343973159790039, + "step": 8261 + }, + { + "epoch": 1.28, + "learning_rate": 8.087648736967685e-06, + "logits/chosen": -2.284189462661743, + "logits/rejected": -2.993549346923828, + "logps/chosen": -416.26080322265625, + "logps/rejected": -494.540283203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9001922607421875, + "rewards/margins": 8.250563621520996, + "rewards/rejected": -13.150755882263184, + "step": 8262 + }, + { + "epoch": 1.29, + "learning_rate": 8.086915296436537e-06, + "logits/chosen": -2.0395898818969727, + "logits/rejected": -3.0555739402770996, + "logps/chosen": -84.06293487548828, + "logps/rejected": -237.33749389648438, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.363914728164673, + "rewards/margins": 3.533682346343994, + "rewards/rejected": -6.897597312927246, + "step": 8263 + }, + { + "epoch": 1.29, + "learning_rate": 8.086181855905389e-06, + "logits/chosen": -2.124558448791504, + "logits/rejected": -3.0566797256469727, + "logps/chosen": -134.53936767578125, + "logps/rejected": -210.5751953125, + "loss": 2.0618, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.023674964904785, + "rewards/margins": 2.445924997329712, + "rewards/rejected": -8.469599723815918, + "step": 8264 + }, + { + "epoch": 1.29, + "learning_rate": 8.085448415374241e-06, + "logits/chosen": -2.5026679039001465, + "logits/rejected": -2.817392110824585, + "logps/chosen": -445.9285888671875, + "logps/rejected": -452.6381530761719, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8130524158477783, + "rewards/margins": 7.238160610198975, + "rewards/rejected": -11.051213264465332, + "step": 8265 + }, + { + "epoch": 1.29, + "learning_rate": 8.084714974843093e-06, + "logits/chosen": -2.071901798248291, + "logits/rejected": -3.0227558612823486, + "logps/chosen": -305.82196044921875, + "logps/rejected": -364.6512451171875, + "loss": 1.9588, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.003973007202148, + "rewards/margins": -1.762540578842163, + "rewards/rejected": -3.2414321899414062, + "step": 8266 + }, + { + "epoch": 1.29, + "learning_rate": 8.083981534311945e-06, + "logits/chosen": -2.766906499862671, + "logits/rejected": -2.654271364212036, + "logps/chosen": -261.4310302734375, + "logps/rejected": -304.04345703125, + "loss": 0.4299, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.384214401245117, + "rewards/margins": 2.087941884994507, + "rewards/rejected": -8.472156524658203, + "step": 8267 + }, + { + "epoch": 1.29, + "learning_rate": 8.083248093780796e-06, + "logits/chosen": -3.0593433380126953, + "logits/rejected": -2.0450539588928223, + "logps/chosen": -286.81976318359375, + "logps/rejected": -232.1968231201172, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3423492908477783, + "rewards/margins": 2.9163928031921387, + "rewards/rejected": -6.258742332458496, + "step": 8268 + }, + { + "epoch": 1.29, + "learning_rate": 8.08251465324965e-06, + "logits/chosen": -3.006999969482422, + "logits/rejected": -3.0924594402313232, + "logps/chosen": -451.5323181152344, + "logps/rejected": -562.8262939453125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9611923694610596, + "rewards/margins": 7.495928764343262, + "rewards/rejected": -10.457120895385742, + "step": 8269 + }, + { + "epoch": 1.29, + "learning_rate": 8.081781212718502e-06, + "logits/chosen": -0.894970178604126, + "logits/rejected": -2.8265140056610107, + "logps/chosen": -81.59687805175781, + "logps/rejected": -399.0750427246094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4270386695861816, + "rewards/margins": 8.728131294250488, + "rewards/rejected": -11.155170440673828, + "step": 8270 + }, + { + "epoch": 1.29, + "learning_rate": 8.081047772187356e-06, + "logits/chosen": -1.9363188743591309, + "logits/rejected": -3.1309685707092285, + "logps/chosen": -44.602874755859375, + "logps/rejected": -235.324951171875, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1699540615081787, + "rewards/margins": 5.575875282287598, + "rewards/rejected": -8.745829582214355, + "step": 8271 + }, + { + "epoch": 1.29, + "learning_rate": 8.080314331656207e-06, + "logits/chosen": -2.2483203411102295, + "logits/rejected": -2.8468542098999023, + "logps/chosen": -156.2340850830078, + "logps/rejected": -146.50062561035156, + "loss": 1.003, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9738693237304688, + "rewards/margins": 0.48182082176208496, + "rewards/rejected": -4.455689907073975, + "step": 8272 + }, + { + "epoch": 1.29, + "learning_rate": 8.07958089112506e-06, + "logits/chosen": -1.9901859760284424, + "logits/rejected": -2.790010690689087, + "logps/chosen": -131.1744384765625, + "logps/rejected": -236.5628662109375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8658792972564697, + "rewards/margins": 5.451888084411621, + "rewards/rejected": -7.31776762008667, + "step": 8273 + }, + { + "epoch": 1.29, + "learning_rate": 8.078847450593911e-06, + "logits/chosen": -2.644387722015381, + "logits/rejected": -1.7574374675750732, + "logps/chosen": -226.14199829101562, + "logps/rejected": -209.3740234375, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08093786239624, + "rewards/margins": 2.4329981803894043, + "rewards/rejected": -6.5139360427856445, + "step": 8274 + }, + { + "epoch": 1.29, + "learning_rate": 8.078114010062763e-06, + "logits/chosen": -2.838193893432617, + "logits/rejected": -3.1219065189361572, + "logps/chosen": -48.26649475097656, + "logps/rejected": -193.07757568359375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.160106658935547, + "rewards/margins": 5.380671501159668, + "rewards/rejected": -9.540778160095215, + "step": 8275 + }, + { + "epoch": 1.29, + "learning_rate": 8.077380569531615e-06, + "logits/chosen": -2.7402706146240234, + "logits/rejected": -3.059901237487793, + "logps/chosen": -50.309242248535156, + "logps/rejected": -177.962646484375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8755245208740234, + "rewards/margins": 6.083726406097412, + "rewards/rejected": -9.959251403808594, + "step": 8276 + }, + { + "epoch": 1.29, + "learning_rate": 8.076647129000467e-06, + "logits/chosen": -2.8038992881774902, + "logits/rejected": -2.082920789718628, + "logps/chosen": -758.0650024414062, + "logps/rejected": -617.22802734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07019500434398651, + "rewards/margins": 8.688060760498047, + "rewards/rejected": -8.758255004882812, + "step": 8277 + }, + { + "epoch": 1.29, + "learning_rate": 8.075913688469319e-06, + "logits/chosen": -2.8670833110809326, + "logits/rejected": -2.740934133529663, + "logps/chosen": -455.08624267578125, + "logps/rejected": -359.9493408203125, + "loss": 0.1395, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993080139160156, + "rewards/margins": 4.17608118057251, + "rewards/rejected": -9.169161796569824, + "step": 8278 + }, + { + "epoch": 1.29, + "learning_rate": 8.075180247938172e-06, + "logits/chosen": -3.0890707969665527, + "logits/rejected": -3.093189239501953, + "logps/chosen": -411.8282470703125, + "logps/rejected": -296.3529052734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.452323853969574, + "rewards/margins": 7.50653076171875, + "rewards/rejected": -7.958854675292969, + "step": 8279 + }, + { + "epoch": 1.29, + "learning_rate": 8.074446807407024e-06, + "logits/chosen": -2.82517409324646, + "logits/rejected": -2.6299922466278076, + "logps/chosen": -175.10055541992188, + "logps/rejected": -222.20101928710938, + "loss": 0.6335, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.769883155822754, + "rewards/margins": 3.3629424571990967, + "rewards/rejected": -9.13282585144043, + "step": 8280 + }, + { + "epoch": 1.29, + "learning_rate": 8.073713366875876e-06, + "logits/chosen": -1.9776504039764404, + "logits/rejected": -2.871462345123291, + "logps/chosen": -202.48709106445312, + "logps/rejected": -356.6807861328125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.650561571121216, + "rewards/margins": 6.446861267089844, + "rewards/rejected": -9.097423553466797, + "step": 8281 + }, + { + "epoch": 1.29, + "learning_rate": 8.072979926344728e-06, + "logits/chosen": -2.6420812606811523, + "logits/rejected": -2.5735585689544678, + "logps/chosen": -179.76002502441406, + "logps/rejected": -398.3760681152344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.151750326156616, + "rewards/margins": 8.344198226928711, + "rewards/rejected": -11.495948791503906, + "step": 8282 + }, + { + "epoch": 1.29, + "learning_rate": 8.07224648581358e-06, + "logits/chosen": -2.6721248626708984, + "logits/rejected": -2.8466312885284424, + "logps/chosen": -258.14111328125, + "logps/rejected": -335.6191101074219, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0483808517456055, + "rewards/margins": 6.092600345611572, + "rewards/rejected": -8.140981674194336, + "step": 8283 + }, + { + "epoch": 1.29, + "learning_rate": 8.071513045282432e-06, + "logits/chosen": -2.4913995265960693, + "logits/rejected": -2.819037914276123, + "logps/chosen": -203.94390869140625, + "logps/rejected": -406.4286193847656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3605222702026367, + "rewards/margins": 8.791471481323242, + "rewards/rejected": -12.151993751525879, + "step": 8284 + }, + { + "epoch": 1.29, + "learning_rate": 8.070779604751283e-06, + "logits/chosen": -2.4086544513702393, + "logits/rejected": -3.111449718475342, + "logps/chosen": -157.50250244140625, + "logps/rejected": -336.085693359375, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.953878879547119, + "rewards/margins": 4.519386291503906, + "rewards/rejected": -8.473264694213867, + "step": 8285 + }, + { + "epoch": 1.29, + "learning_rate": 8.070046164220135e-06, + "logits/chosen": -1.3420343399047852, + "logits/rejected": -2.9577009677886963, + "logps/chosen": -146.42575073242188, + "logps/rejected": -557.56396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.243084669113159, + "rewards/margins": 9.030828475952148, + "rewards/rejected": -11.273914337158203, + "step": 8286 + }, + { + "epoch": 1.29, + "learning_rate": 8.069312723688987e-06, + "logits/chosen": -2.964458465576172, + "logits/rejected": -2.9397382736206055, + "logps/chosen": -595.0712890625, + "logps/rejected": -748.22607421875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9288742542266846, + "rewards/margins": 5.854036331176758, + "rewards/rejected": -7.782910346984863, + "step": 8287 + }, + { + "epoch": 1.29, + "learning_rate": 8.06857928315784e-06, + "logits/chosen": -2.8146986961364746, + "logits/rejected": -2.600881338119507, + "logps/chosen": -110.99842834472656, + "logps/rejected": -361.94085693359375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9951324462890625, + "rewards/margins": 6.688074111938477, + "rewards/rejected": -10.683206558227539, + "step": 8288 + }, + { + "epoch": 1.29, + "learning_rate": 8.067845842626693e-06, + "logits/chosen": -2.6871187686920166, + "logits/rejected": -1.8337956666946411, + "logps/chosen": -317.3778076171875, + "logps/rejected": -300.05682373046875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.329720973968506, + "rewards/margins": 6.0718488693237305, + "rewards/rejected": -10.401570320129395, + "step": 8289 + }, + { + "epoch": 1.29, + "learning_rate": 8.067112402095545e-06, + "logits/chosen": -2.9284393787384033, + "logits/rejected": -2.868485689163208, + "logps/chosen": -103.51487731933594, + "logps/rejected": -256.11419677734375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8961448669433594, + "rewards/margins": 6.38297700881958, + "rewards/rejected": -10.279121398925781, + "step": 8290 + }, + { + "epoch": 1.29, + "learning_rate": 8.066378961564396e-06, + "logits/chosen": -1.536741018295288, + "logits/rejected": -2.7632412910461426, + "logps/chosen": -179.1588897705078, + "logps/rejected": -400.6968994140625, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.010980129241943, + "rewards/margins": 5.266280174255371, + "rewards/rejected": -9.277260780334473, + "step": 8291 + }, + { + "epoch": 1.29, + "learning_rate": 8.065645521033248e-06, + "logits/chosen": -2.353656768798828, + "logits/rejected": -2.9937446117401123, + "logps/chosen": -163.39207458496094, + "logps/rejected": -400.06048583984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4786690473556519, + "rewards/margins": 10.00855541229248, + "rewards/rejected": -11.487224578857422, + "step": 8292 + }, + { + "epoch": 1.29, + "learning_rate": 8.0649120805021e-06, + "logits/chosen": -2.900883674621582, + "logits/rejected": -2.912587881088257, + "logps/chosen": -465.2888488769531, + "logps/rejected": -492.8108215332031, + "loss": 0.2427, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5307419300079346, + "rewards/margins": 3.6354691982269287, + "rewards/rejected": -7.166211128234863, + "step": 8293 + }, + { + "epoch": 1.29, + "learning_rate": 8.064178639970952e-06, + "logits/chosen": -2.1355457305908203, + "logits/rejected": -2.58505916595459, + "logps/chosen": -157.5244598388672, + "logps/rejected": -531.2396240234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4516937732696533, + "rewards/margins": 11.750073432922363, + "rewards/rejected": -14.201766967773438, + "step": 8294 + }, + { + "epoch": 1.29, + "learning_rate": 8.063445199439804e-06, + "logits/chosen": -2.4324100017547607, + "logits/rejected": -2.8976526260375977, + "logps/chosen": -486.363037109375, + "logps/rejected": -391.36962890625, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.836625576019287, + "rewards/margins": 3.9950883388519287, + "rewards/rejected": -6.831713676452637, + "step": 8295 + }, + { + "epoch": 1.29, + "learning_rate": 8.062711758908658e-06, + "logits/chosen": -2.1894447803497314, + "logits/rejected": -2.688908576965332, + "logps/chosen": -94.42041015625, + "logps/rejected": -201.55282592773438, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998455047607422, + "rewards/margins": 3.2107810974121094, + "rewards/rejected": -9.209236145019531, + "step": 8296 + }, + { + "epoch": 1.29, + "learning_rate": 8.06197831837751e-06, + "logits/chosen": -2.52670955657959, + "logits/rejected": -2.8333141803741455, + "logps/chosen": -479.41009521484375, + "logps/rejected": -461.44647216796875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.987170696258545, + "rewards/margins": 7.5806565284729, + "rewards/rejected": -10.567827224731445, + "step": 8297 + }, + { + "epoch": 1.29, + "learning_rate": 8.061244877846361e-06, + "logits/chosen": -3.1017301082611084, + "logits/rejected": -2.0373165607452393, + "logps/chosen": -352.16522216796875, + "logps/rejected": -203.8899383544922, + "loss": 1.7293, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.895068168640137, + "rewards/margins": 0.8150432109832764, + "rewards/rejected": -5.710111618041992, + "step": 8298 + }, + { + "epoch": 1.29, + "learning_rate": 8.060511437315213e-06, + "logits/chosen": -2.1244394779205322, + "logits/rejected": -3.073197841644287, + "logps/chosen": -139.48483276367188, + "logps/rejected": -228.58164978027344, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.266749858856201, + "rewards/margins": 3.236423969268799, + "rewards/rejected": -6.503173828125, + "step": 8299 + }, + { + "epoch": 1.29, + "learning_rate": 8.059777996784065e-06, + "logits/chosen": -2.698624610900879, + "logits/rejected": -3.162583351135254, + "logps/chosen": -727.4818115234375, + "logps/rejected": -808.06298828125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.256674289703369, + "rewards/margins": 7.807583808898926, + "rewards/rejected": -11.064258575439453, + "step": 8300 + }, + { + "epoch": 1.29, + "learning_rate": 8.059044556252917e-06, + "logits/chosen": -1.3978400230407715, + "logits/rejected": -2.905571699142456, + "logps/chosen": -44.32526397705078, + "logps/rejected": -369.76812744140625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.064230442047119, + "rewards/margins": 5.588320255279541, + "rewards/rejected": -8.65255069732666, + "step": 8301 + }, + { + "epoch": 1.29, + "learning_rate": 8.058311115721769e-06, + "logits/chosen": -2.9044835567474365, + "logits/rejected": -2.6950550079345703, + "logps/chosen": -150.24209594726562, + "logps/rejected": -164.15301513671875, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8107006549835205, + "rewards/margins": 4.47797155380249, + "rewards/rejected": -7.28867244720459, + "step": 8302 + }, + { + "epoch": 1.29, + "learning_rate": 8.057577675190622e-06, + "logits/chosen": -0.7830522060394287, + "logits/rejected": -2.8376126289367676, + "logps/chosen": -109.08357238769531, + "logps/rejected": -473.69732666015625, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599924564361572, + "rewards/margins": 4.819733619689941, + "rewards/rejected": -9.419658660888672, + "step": 8303 + }, + { + "epoch": 1.29, + "learning_rate": 8.056844234659474e-06, + "logits/chosen": -1.907575011253357, + "logits/rejected": -2.7854323387145996, + "logps/chosen": -185.9118194580078, + "logps/rejected": -393.973876953125, + "loss": 1.0895, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.714715003967285, + "rewards/margins": 1.823087215423584, + "rewards/rejected": -8.537801742553711, + "step": 8304 + }, + { + "epoch": 1.29, + "learning_rate": 8.056110794128328e-06, + "logits/chosen": -3.119551658630371, + "logits/rejected": -2.6674365997314453, + "logps/chosen": -626.087890625, + "logps/rejected": -442.05029296875, + "loss": 0.5093, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.266364097595215, + "rewards/margins": 3.328221082687378, + "rewards/rejected": -8.594585418701172, + "step": 8305 + }, + { + "epoch": 1.29, + "learning_rate": 8.05537735359718e-06, + "logits/chosen": -1.5160659551620483, + "logits/rejected": -2.9650564193725586, + "logps/chosen": -265.29058837890625, + "logps/rejected": -403.7440490722656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4057717323303223, + "rewards/margins": 7.178837776184082, + "rewards/rejected": -9.584609985351562, + "step": 8306 + }, + { + "epoch": 1.29, + "learning_rate": 8.054643913066032e-06, + "logits/chosen": -2.894474506378174, + "logits/rejected": -2.2270541191101074, + "logps/chosen": -285.5364990234375, + "logps/rejected": -349.7771301269531, + "loss": 0.2241, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2664551734924316, + "rewards/margins": 2.9589900970458984, + "rewards/rejected": -6.22544527053833, + "step": 8307 + }, + { + "epoch": 1.29, + "learning_rate": 8.053910472534883e-06, + "logits/chosen": -2.7801084518432617, + "logits/rejected": -2.448859930038452, + "logps/chosen": -261.8013000488281, + "logps/rejected": -165.25802612304688, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.498426914215088, + "rewards/margins": 4.579140663146973, + "rewards/rejected": -9.077568054199219, + "step": 8308 + }, + { + "epoch": 1.29, + "learning_rate": 8.053177032003735e-06, + "logits/chosen": -1.7145674228668213, + "logits/rejected": -2.3106529712677, + "logps/chosen": -84.41590881347656, + "logps/rejected": -254.06414794921875, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.740839958190918, + "rewards/margins": 5.010139465332031, + "rewards/rejected": -8.75097942352295, + "step": 8309 + }, + { + "epoch": 1.29, + "learning_rate": 8.052443591472587e-06, + "logits/chosen": -2.4821884632110596, + "logits/rejected": -2.6971263885498047, + "logps/chosen": -108.97079467773438, + "logps/rejected": -327.26959228515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.686492919921875, + "rewards/margins": 9.43266487121582, + "rewards/rejected": -12.119157791137695, + "step": 8310 + }, + { + "epoch": 1.29, + "learning_rate": 8.051710150941439e-06, + "logits/chosen": -1.1218990087509155, + "logits/rejected": -2.5583484172821045, + "logps/chosen": -203.85711669921875, + "logps/rejected": -543.0645751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8157472610473633, + "rewards/margins": 8.534573554992676, + "rewards/rejected": -12.350320816040039, + "step": 8311 + }, + { + "epoch": 1.29, + "learning_rate": 8.050976710410291e-06, + "logits/chosen": -2.5451154708862305, + "logits/rejected": -3.051530122756958, + "logps/chosen": -479.72454833984375, + "logps/rejected": -787.9754638671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9012105464935303, + "rewards/margins": 10.999670028686523, + "rewards/rejected": -13.900880813598633, + "step": 8312 + }, + { + "epoch": 1.29, + "learning_rate": 8.050243269879143e-06, + "logits/chosen": -1.061215877532959, + "logits/rejected": -2.7788772583007812, + "logps/chosen": -67.89476776123047, + "logps/rejected": -315.8341064453125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5398452281951904, + "rewards/margins": 6.9025959968566895, + "rewards/rejected": -10.4424409866333, + "step": 8313 + }, + { + "epoch": 1.29, + "learning_rate": 8.049509829347996e-06, + "logits/chosen": -3.0967190265655518, + "logits/rejected": -2.644878387451172, + "logps/chosen": -263.22723388671875, + "logps/rejected": -151.4410858154297, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.690659523010254, + "rewards/margins": 4.1904096603393555, + "rewards/rejected": -7.881069183349609, + "step": 8314 + }, + { + "epoch": 1.29, + "learning_rate": 8.048776388816848e-06, + "logits/chosen": -2.940499782562256, + "logits/rejected": -2.434002161026001, + "logps/chosen": -276.28558349609375, + "logps/rejected": -290.37896728515625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9631857872009277, + "rewards/margins": 7.741129398345947, + "rewards/rejected": -11.704315185546875, + "step": 8315 + }, + { + "epoch": 1.29, + "learning_rate": 8.0480429482857e-06, + "logits/chosen": -2.5336570739746094, + "logits/rejected": -2.855875253677368, + "logps/chosen": -116.82440185546875, + "logps/rejected": -203.33804321289062, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4542322158813477, + "rewards/margins": 5.6686248779296875, + "rewards/rejected": -8.122857093811035, + "step": 8316 + }, + { + "epoch": 1.29, + "learning_rate": 8.047309507754552e-06, + "logits/chosen": -2.9998350143432617, + "logits/rejected": -1.9682027101516724, + "logps/chosen": -360.12689208984375, + "logps/rejected": -262.5423278808594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1462838649749756, + "rewards/margins": 7.831092834472656, + "rewards/rejected": -10.977376937866211, + "step": 8317 + }, + { + "epoch": 1.29, + "learning_rate": 8.046576067223404e-06, + "logits/chosen": -2.351903200149536, + "logits/rejected": -2.7651960849761963, + "logps/chosen": -106.3740234375, + "logps/rejected": -200.88938903808594, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.641880989074707, + "rewards/margins": 4.481034755706787, + "rewards/rejected": -8.122915267944336, + "step": 8318 + }, + { + "epoch": 1.29, + "learning_rate": 8.045842626692256e-06, + "logits/chosen": -2.697361469268799, + "logits/rejected": -1.5818428993225098, + "logps/chosen": -230.62290954589844, + "logps/rejected": -207.1298828125, + "loss": 0.8114, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.966370105743408, + "rewards/margins": 0.8185291290283203, + "rewards/rejected": -4.7848992347717285, + "step": 8319 + }, + { + "epoch": 1.29, + "learning_rate": 8.045109186161108e-06, + "logits/chosen": -2.105888605117798, + "logits/rejected": -2.7556650638580322, + "logps/chosen": -209.6853485107422, + "logps/rejected": -433.99420166015625, + "loss": 0.1175, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0386178493499756, + "rewards/margins": 7.17540979385376, + "rewards/rejected": -10.214027404785156, + "step": 8320 + }, + { + "epoch": 1.29, + "learning_rate": 8.04437574562996e-06, + "logits/chosen": -2.833360195159912, + "logits/rejected": -2.9684526920318604, + "logps/chosen": -374.3335266113281, + "logps/rejected": -225.79087829589844, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5891590118408203, + "rewards/margins": 4.794498443603516, + "rewards/rejected": -8.383657455444336, + "step": 8321 + }, + { + "epoch": 1.29, + "learning_rate": 8.043642305098811e-06, + "logits/chosen": -2.122919797897339, + "logits/rejected": -2.9885895252227783, + "logps/chosen": -243.45680236816406, + "logps/rejected": -563.45556640625, + "loss": 0.236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6204705238342285, + "rewards/margins": 3.0959038734436035, + "rewards/rejected": -5.716374397277832, + "step": 8322 + }, + { + "epoch": 1.29, + "learning_rate": 8.042908864567665e-06, + "logits/chosen": -2.017940044403076, + "logits/rejected": -3.125863552093506, + "logps/chosen": -256.1613464355469, + "logps/rejected": -325.6927795410156, + "loss": 2.1184, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.254267692565918, + "rewards/margins": 1.0104200839996338, + "rewards/rejected": -7.264688014984131, + "step": 8323 + }, + { + "epoch": 1.29, + "learning_rate": 8.042175424036517e-06, + "logits/chosen": -2.754547595977783, + "logits/rejected": -3.0424892902374268, + "logps/chosen": -104.3725814819336, + "logps/rejected": -334.644287109375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0098092555999756, + "rewards/margins": 8.52933406829834, + "rewards/rejected": -11.539142608642578, + "step": 8324 + }, + { + "epoch": 1.29, + "learning_rate": 8.041441983505369e-06, + "logits/chosen": -2.311310291290283, + "logits/rejected": -2.787631034851074, + "logps/chosen": -77.00466918945312, + "logps/rejected": -108.34455871582031, + "loss": 0.658, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.8821773529052734, + "rewards/margins": 2.985419750213623, + "rewards/rejected": -5.8675971031188965, + "step": 8325 + }, + { + "epoch": 1.29, + "learning_rate": 8.04070854297422e-06, + "logits/chosen": -1.6886471509933472, + "logits/rejected": -2.7288382053375244, + "logps/chosen": -86.45781707763672, + "logps/rejected": -222.88427734375, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.891617774963379, + "rewards/margins": 4.135367393493652, + "rewards/rejected": -8.026985168457031, + "step": 8326 + }, + { + "epoch": 1.3, + "learning_rate": 8.039975102443073e-06, + "logits/chosen": -1.7092068195343018, + "logits/rejected": -2.6005468368530273, + "logps/chosen": -201.58438110351562, + "logps/rejected": -451.5550537109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3064255714416504, + "rewards/margins": 8.042963027954102, + "rewards/rejected": -11.349388122558594, + "step": 8327 + }, + { + "epoch": 1.3, + "learning_rate": 8.039241661911924e-06, + "logits/chosen": -1.7186468839645386, + "logits/rejected": -2.8238513469696045, + "logps/chosen": -132.98519897460938, + "logps/rejected": -360.2328796386719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.496959686279297, + "rewards/margins": 8.954828262329102, + "rewards/rejected": -12.451787948608398, + "step": 8328 + }, + { + "epoch": 1.3, + "learning_rate": 8.038508221380776e-06, + "logits/chosen": -2.192281484603882, + "logits/rejected": -3.1026973724365234, + "logps/chosen": -348.47991943359375, + "logps/rejected": -424.2056884765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.193595051765442, + "rewards/margins": 9.381891250610352, + "rewards/rejected": -10.575486183166504, + "step": 8329 + }, + { + "epoch": 1.3, + "learning_rate": 8.037774780849628e-06, + "logits/chosen": -3.140002727508545, + "logits/rejected": -2.7374684810638428, + "logps/chosen": -161.0048065185547, + "logps/rejected": -102.96102905273438, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5244555473327637, + "rewards/margins": 0.8131619691848755, + "rewards/rejected": -4.337617874145508, + "step": 8330 + }, + { + "epoch": 1.3, + "learning_rate": 8.03704134031848e-06, + "logits/chosen": -1.7829750776290894, + "logits/rejected": -2.9856138229370117, + "logps/chosen": -60.819908142089844, + "logps/rejected": -315.8404846191406, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6073451042175293, + "rewards/margins": 6.906545639038086, + "rewards/rejected": -8.513891220092773, + "step": 8331 + }, + { + "epoch": 1.3, + "learning_rate": 8.036307899787334e-06, + "logits/chosen": -1.9662741422653198, + "logits/rejected": -2.8790090084075928, + "logps/chosen": -189.28662109375, + "logps/rejected": -550.6271362304688, + "loss": 1.0218, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.79102897644043, + "rewards/margins": 2.3074965476989746, + "rewards/rejected": -9.098526000976562, + "step": 8332 + }, + { + "epoch": 1.3, + "learning_rate": 8.035574459256186e-06, + "logits/chosen": -2.8609423637390137, + "logits/rejected": -3.063007116317749, + "logps/chosen": -228.052490234375, + "logps/rejected": -322.4495849609375, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9452831745147705, + "rewards/margins": 7.748968601226807, + "rewards/rejected": -10.694252014160156, + "step": 8333 + }, + { + "epoch": 1.3, + "learning_rate": 8.034841018725037e-06, + "logits/chosen": -2.2939183712005615, + "logits/rejected": -3.0032498836517334, + "logps/chosen": -361.7070617675781, + "logps/rejected": -458.37158203125, + "loss": 0.1173, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.102896213531494, + "rewards/margins": 4.170922756195068, + "rewards/rejected": -9.273818969726562, + "step": 8334 + }, + { + "epoch": 1.3, + "learning_rate": 8.03410757819389e-06, + "logits/chosen": -2.7419092655181885, + "logits/rejected": -3.0271623134613037, + "logps/chosen": -86.79299926757812, + "logps/rejected": -176.06837463378906, + "loss": 1.0235, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.195279121398926, + "rewards/margins": 3.7098939418792725, + "rewards/rejected": -8.905173301696777, + "step": 8335 + }, + { + "epoch": 1.3, + "learning_rate": 8.033374137662741e-06, + "logits/chosen": -2.454814910888672, + "logits/rejected": -2.5830254554748535, + "logps/chosen": -198.9864501953125, + "logps/rejected": -223.9124298095703, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.758951663970947, + "rewards/margins": 4.810540676116943, + "rewards/rejected": -9.56949234008789, + "step": 8336 + }, + { + "epoch": 1.3, + "learning_rate": 8.032640697131593e-06, + "logits/chosen": -1.7314705848693848, + "logits/rejected": -2.990565061569214, + "logps/chosen": -172.45895385742188, + "logps/rejected": -414.2536315917969, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7943553924560547, + "rewards/margins": 4.936312198638916, + "rewards/rejected": -7.7306671142578125, + "step": 8337 + }, + { + "epoch": 1.3, + "learning_rate": 8.031907256600447e-06, + "logits/chosen": -2.865821361541748, + "logits/rejected": -3.0335566997528076, + "logps/chosen": -223.12234497070312, + "logps/rejected": -263.9989929199219, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5511726140975952, + "rewards/margins": 6.257607460021973, + "rewards/rejected": -7.808779716491699, + "step": 8338 + }, + { + "epoch": 1.3, + "learning_rate": 8.031173816069298e-06, + "logits/chosen": -2.8763041496276855, + "logits/rejected": -2.3765432834625244, + "logps/chosen": -316.3038635253906, + "logps/rejected": -303.0483703613281, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0710437297821045, + "rewards/margins": 7.278402328491211, + "rewards/rejected": -10.349446296691895, + "step": 8339 + }, + { + "epoch": 1.3, + "learning_rate": 8.03044037553815e-06, + "logits/chosen": -2.681422472000122, + "logits/rejected": -3.0635221004486084, + "logps/chosen": -33.360389709472656, + "logps/rejected": -245.10812377929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.930582880973816, + "rewards/margins": 10.785269737243652, + "rewards/rejected": -12.715852737426758, + "step": 8340 + }, + { + "epoch": 1.3, + "learning_rate": 8.029706935007004e-06, + "logits/chosen": -3.0822739601135254, + "logits/rejected": -3.2861390113830566, + "logps/chosen": -81.49420928955078, + "logps/rejected": -177.82537841796875, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0785646438598633, + "rewards/margins": 3.5191493034362793, + "rewards/rejected": -5.597713947296143, + "step": 8341 + }, + { + "epoch": 1.3, + "learning_rate": 8.028973494475856e-06, + "logits/chosen": -2.780449390411377, + "logits/rejected": -2.469681978225708, + "logps/chosen": -311.25347900390625, + "logps/rejected": -360.8059997558594, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7701492309570312, + "rewards/margins": 8.731064796447754, + "rewards/rejected": -11.501214027404785, + "step": 8342 + }, + { + "epoch": 1.3, + "learning_rate": 8.028240053944708e-06, + "logits/chosen": -2.592897415161133, + "logits/rejected": -3.1215384006500244, + "logps/chosen": -137.8972625732422, + "logps/rejected": -281.7861328125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2049198150634766, + "rewards/margins": 5.621588706970215, + "rewards/rejected": -8.826508522033691, + "step": 8343 + }, + { + "epoch": 1.3, + "learning_rate": 8.02750661341356e-06, + "logits/chosen": -2.994302272796631, + "logits/rejected": -3.0213406085968018, + "logps/chosen": -103.40087890625, + "logps/rejected": -226.40525817871094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5261406898498535, + "rewards/margins": 6.4739227294921875, + "rewards/rejected": -10.0000638961792, + "step": 8344 + }, + { + "epoch": 1.3, + "learning_rate": 8.026773172882411e-06, + "logits/chosen": -2.9122397899627686, + "logits/rejected": -1.3360947370529175, + "logps/chosen": -557.776611328125, + "logps/rejected": -387.7024230957031, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045741081237793, + "rewards/margins": 4.934724807739258, + "rewards/rejected": -10.98046588897705, + "step": 8345 + }, + { + "epoch": 1.3, + "learning_rate": 8.026039732351263e-06, + "logits/chosen": -3.0137157440185547, + "logits/rejected": -2.3861935138702393, + "logps/chosen": -626.49365234375, + "logps/rejected": -435.5504455566406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8677200078964233, + "rewards/margins": 7.2483978271484375, + "rewards/rejected": -9.116117477416992, + "step": 8346 + }, + { + "epoch": 1.3, + "learning_rate": 8.025306291820115e-06, + "logits/chosen": -3.0138425827026367, + "logits/rejected": -2.5474839210510254, + "logps/chosen": -379.44049072265625, + "logps/rejected": -344.4837646484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3626251220703125, + "rewards/margins": 9.288997650146484, + "rewards/rejected": -9.651622772216797, + "step": 8347 + }, + { + "epoch": 1.3, + "learning_rate": 8.024572851288967e-06, + "logits/chosen": -0.9264441728591919, + "logits/rejected": -3.0404365062713623, + "logps/chosen": -178.49172973632812, + "logps/rejected": -397.5157470703125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.32236385345459, + "rewards/margins": 9.689369201660156, + "rewards/rejected": -14.011733055114746, + "step": 8348 + }, + { + "epoch": 1.3, + "learning_rate": 8.023839410757819e-06, + "logits/chosen": -2.7274932861328125, + "logits/rejected": -2.7297866344451904, + "logps/chosen": -286.6533508300781, + "logps/rejected": -270.15338134765625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.051487445831299, + "rewards/margins": 6.242259979248047, + "rewards/rejected": -10.293746948242188, + "step": 8349 + }, + { + "epoch": 1.3, + "learning_rate": 8.023105970226673e-06, + "logits/chosen": -2.1265029907226562, + "logits/rejected": -2.950228691101074, + "logps/chosen": -77.31210327148438, + "logps/rejected": -274.2887878417969, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.807344436645508, + "rewards/margins": 4.91270637512207, + "rewards/rejected": -10.720050811767578, + "step": 8350 + }, + { + "epoch": 1.3, + "learning_rate": 8.022372529695524e-06, + "logits/chosen": -3.128649950027466, + "logits/rejected": -2.908849000930786, + "logps/chosen": -221.90011596679688, + "logps/rejected": -239.8324432373047, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2578177452087402, + "rewards/margins": 4.936823844909668, + "rewards/rejected": -7.19464111328125, + "step": 8351 + }, + { + "epoch": 1.3, + "learning_rate": 8.021639089164376e-06, + "logits/chosen": -2.8049631118774414, + "logits/rejected": -2.585763454437256, + "logps/chosen": -118.23841094970703, + "logps/rejected": -141.4017333984375, + "loss": 0.2545, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1664767265319824, + "rewards/margins": 2.8948299884796143, + "rewards/rejected": -6.061306953430176, + "step": 8352 + }, + { + "epoch": 1.3, + "learning_rate": 8.020905648633228e-06, + "logits/chosen": -1.8703739643096924, + "logits/rejected": -2.69887638092041, + "logps/chosen": -306.77935791015625, + "logps/rejected": -544.0881958007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.399269104003906, + "rewards/margins": 13.310287475585938, + "rewards/rejected": -17.709556579589844, + "step": 8353 + }, + { + "epoch": 1.3, + "learning_rate": 8.02017220810208e-06, + "logits/chosen": -0.7820043563842773, + "logits/rejected": -2.0706334114074707, + "logps/chosen": -116.33631896972656, + "logps/rejected": -584.6724853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.744056701660156, + "rewards/margins": 9.90874195098877, + "rewards/rejected": -14.652798652648926, + "step": 8354 + }, + { + "epoch": 1.3, + "learning_rate": 8.019438767570932e-06, + "logits/chosen": -3.0415356159210205, + "logits/rejected": -1.4988751411437988, + "logps/chosen": -328.0998840332031, + "logps/rejected": -171.503173828125, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6376862525939941, + "rewards/margins": 4.686927795410156, + "rewards/rejected": -6.32461404800415, + "step": 8355 + }, + { + "epoch": 1.3, + "learning_rate": 8.018705327039784e-06, + "logits/chosen": -2.7243292331695557, + "logits/rejected": -2.9975383281707764, + "logps/chosen": -79.82106018066406, + "logps/rejected": -256.3877258300781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.394599437713623, + "rewards/margins": 8.760464668273926, + "rewards/rejected": -11.15506362915039, + "step": 8356 + }, + { + "epoch": 1.3, + "learning_rate": 8.017971886508636e-06, + "logits/chosen": -2.9998624324798584, + "logits/rejected": -2.5712392330169678, + "logps/chosen": -311.1274719238281, + "logps/rejected": -507.9871520996094, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5930252075195312, + "rewards/margins": 6.072211265563965, + "rewards/rejected": -8.665236473083496, + "step": 8357 + }, + { + "epoch": 1.3, + "learning_rate": 8.017238445977488e-06, + "logits/chosen": -2.6231613159179688, + "logits/rejected": -3.0927553176879883, + "logps/chosen": -54.75584411621094, + "logps/rejected": -356.5102844238281, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9777464866638184, + "rewards/margins": 8.09765338897705, + "rewards/rejected": -11.075399398803711, + "step": 8358 + }, + { + "epoch": 1.3, + "learning_rate": 8.016505005446341e-06, + "logits/chosen": -2.6681227684020996, + "logits/rejected": -2.7515361309051514, + "logps/chosen": -158.2938995361328, + "logps/rejected": -386.739990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.344029426574707, + "rewards/margins": 9.752541542053223, + "rewards/rejected": -13.09657096862793, + "step": 8359 + }, + { + "epoch": 1.3, + "learning_rate": 8.015771564915193e-06, + "logits/chosen": -3.0710930824279785, + "logits/rejected": -2.9733223915100098, + "logps/chosen": -629.4828491210938, + "logps/rejected": -330.302490234375, + "loss": 0.2229, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5812926292419434, + "rewards/margins": 2.855987071990967, + "rewards/rejected": -6.43727970123291, + "step": 8360 + }, + { + "epoch": 1.3, + "learning_rate": 8.015038124384045e-06, + "logits/chosen": -2.8132026195526123, + "logits/rejected": -2.7331039905548096, + "logps/chosen": -361.29443359375, + "logps/rejected": -401.89007568359375, + "loss": 2.5023, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.528193473815918, + "rewards/margins": 0.9173569679260254, + "rewards/rejected": -8.445550918579102, + "step": 8361 + }, + { + "epoch": 1.3, + "learning_rate": 8.014304683852897e-06, + "logits/chosen": -1.4026838541030884, + "logits/rejected": -2.9299113750457764, + "logps/chosen": -110.21438598632812, + "logps/rejected": -303.2994079589844, + "loss": 0.9531, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.721951961517334, + "rewards/margins": 1.7979907989501953, + "rewards/rejected": -6.519942283630371, + "step": 8362 + }, + { + "epoch": 1.3, + "learning_rate": 8.013571243321749e-06, + "logits/chosen": -1.2251739501953125, + "logits/rejected": -2.8425986766815186, + "logps/chosen": -263.1800842285156, + "logps/rejected": -249.7373046875, + "loss": 1.4093, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.28725004196167, + "rewards/margins": 1.7990834712982178, + "rewards/rejected": -7.086333274841309, + "step": 8363 + }, + { + "epoch": 1.3, + "learning_rate": 8.0128378027906e-06, + "logits/chosen": -2.5333409309387207, + "logits/rejected": -3.0729904174804688, + "logps/chosen": -147.17251586914062, + "logps/rejected": -187.4785614013672, + "loss": 0.0728, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9626622200012207, + "rewards/margins": 2.5843665599823, + "rewards/rejected": -6.547028541564941, + "step": 8364 + }, + { + "epoch": 1.3, + "learning_rate": 8.012104362259452e-06, + "logits/chosen": -1.8954706192016602, + "logits/rejected": -2.835696220397949, + "logps/chosen": -355.18292236328125, + "logps/rejected": -560.3179321289062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.094667911529541, + "rewards/margins": 9.304535865783691, + "rewards/rejected": -11.39920425415039, + "step": 8365 + }, + { + "epoch": 1.3, + "learning_rate": 8.011370921728304e-06, + "logits/chosen": -2.669807195663452, + "logits/rejected": -1.5978552103042603, + "logps/chosen": -391.4227294921875, + "logps/rejected": -189.5394287109375, + "loss": 1.0702, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.1168413162231445, + "rewards/margins": 2.177279233932495, + "rewards/rejected": -7.2941203117370605, + "step": 8366 + }, + { + "epoch": 1.3, + "learning_rate": 8.010637481197156e-06, + "logits/chosen": -2.453057050704956, + "logits/rejected": -3.0356357097625732, + "logps/chosen": -580.7333374023438, + "logps/rejected": -346.9313049316406, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.313509941101074, + "rewards/margins": 7.154000282287598, + "rewards/rejected": -11.467510223388672, + "step": 8367 + }, + { + "epoch": 1.3, + "learning_rate": 8.00990404066601e-06, + "logits/chosen": -2.9309592247009277, + "logits/rejected": -2.6604552268981934, + "logps/chosen": -162.81842041015625, + "logps/rejected": -301.4276428222656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.287277698516846, + "rewards/margins": 9.694730758666992, + "rewards/rejected": -13.98200798034668, + "step": 8368 + }, + { + "epoch": 1.3, + "learning_rate": 8.009170600134862e-06, + "logits/chosen": -2.908468723297119, + "logits/rejected": -1.4876952171325684, + "logps/chosen": -469.0626220703125, + "logps/rejected": -124.96699523925781, + "loss": 0.0899, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.572108507156372, + "rewards/margins": 2.709108829498291, + "rewards/rejected": -6.281217575073242, + "step": 8369 + }, + { + "epoch": 1.3, + "learning_rate": 8.008437159603713e-06, + "logits/chosen": -1.8373914957046509, + "logits/rejected": -2.8968398571014404, + "logps/chosen": -266.6398620605469, + "logps/rejected": -382.47625732421875, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.094964027404785, + "rewards/margins": 4.559332847595215, + "rewards/rejected": -9.654296875, + "step": 8370 + }, + { + "epoch": 1.3, + "learning_rate": 8.007703719072565e-06, + "logits/chosen": -1.2979271411895752, + "logits/rejected": -2.713984251022339, + "logps/chosen": -121.84555053710938, + "logps/rejected": -537.6746215820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.969219923019409, + "rewards/margins": 14.561573028564453, + "rewards/rejected": -18.530792236328125, + "step": 8371 + }, + { + "epoch": 1.3, + "learning_rate": 8.006970278541419e-06, + "logits/chosen": -2.950039863586426, + "logits/rejected": -2.7956199645996094, + "logps/chosen": -514.3140869140625, + "logps/rejected": -454.382080078125, + "loss": 0.0796, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.016418933868408, + "rewards/margins": 3.303334951400757, + "rewards/rejected": -8.319753646850586, + "step": 8372 + }, + { + "epoch": 1.3, + "learning_rate": 8.00623683801027e-06, + "logits/chosen": -2.1435604095458984, + "logits/rejected": -2.4164350032806396, + "logps/chosen": -198.97682189941406, + "logps/rejected": -326.8861389160156, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.674666404724121, + "rewards/margins": 6.102210998535156, + "rewards/rejected": -8.776877403259277, + "step": 8373 + }, + { + "epoch": 1.3, + "learning_rate": 8.005503397479123e-06, + "logits/chosen": -3.111785888671875, + "logits/rejected": -3.103374481201172, + "logps/chosen": -451.0423583984375, + "logps/rejected": -555.6302490234375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2859058380126953, + "rewards/margins": 6.694207668304443, + "rewards/rejected": -8.98011302947998, + "step": 8374 + }, + { + "epoch": 1.3, + "learning_rate": 8.004769956947975e-06, + "logits/chosen": -2.816105842590332, + "logits/rejected": -3.0362820625305176, + "logps/chosen": -482.4585876464844, + "logps/rejected": -685.7310791015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.554042100906372, + "rewards/margins": 8.68603229522705, + "rewards/rejected": -11.240074157714844, + "step": 8375 + }, + { + "epoch": 1.3, + "learning_rate": 8.004036516416826e-06, + "logits/chosen": -2.5274879932403564, + "logits/rejected": -2.99019193649292, + "logps/chosen": -148.533203125, + "logps/rejected": -154.342041015625, + "loss": 0.2233, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0643696784973145, + "rewards/margins": 2.013503313064575, + "rewards/rejected": -9.077873229980469, + "step": 8376 + }, + { + "epoch": 1.3, + "learning_rate": 8.00330307588568e-06, + "logits/chosen": -1.11715829372406, + "logits/rejected": -2.194115400314331, + "logps/chosen": -191.48733520507812, + "logps/rejected": -717.8956909179688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4895105361938477, + "rewards/margins": 12.05966567993164, + "rewards/rejected": -15.549176216125488, + "step": 8377 + }, + { + "epoch": 1.3, + "learning_rate": 8.002569635354532e-06, + "logits/chosen": -1.641232967376709, + "logits/rejected": -2.9643943309783936, + "logps/chosen": -150.23863220214844, + "logps/rejected": -430.4482727050781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.304503917694092, + "rewards/margins": 9.857730865478516, + "rewards/rejected": -13.162235260009766, + "step": 8378 + }, + { + "epoch": 1.3, + "learning_rate": 8.001836194823384e-06, + "logits/chosen": -3.073474168777466, + "logits/rejected": -2.599749803543091, + "logps/chosen": -116.24440002441406, + "logps/rejected": -152.9532928466797, + "loss": 0.0669, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9693918228149414, + "rewards/margins": 3.1031594276428223, + "rewards/rejected": -7.072551727294922, + "step": 8379 + }, + { + "epoch": 1.3, + "learning_rate": 8.001102754292236e-06, + "logits/chosen": -3.1284167766571045, + "logits/rejected": -2.805683135986328, + "logps/chosen": -385.0731201171875, + "logps/rejected": -256.14959716796875, + "loss": 0.0647, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.636289119720459, + "rewards/margins": 4.286444187164307, + "rewards/rejected": -7.922733306884766, + "step": 8380 + }, + { + "epoch": 1.3, + "learning_rate": 8.000369313761088e-06, + "logits/chosen": -2.026000738143921, + "logits/rejected": -3.1002590656280518, + "logps/chosen": -154.32144165039062, + "logps/rejected": -454.03265380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7131705284118652, + "rewards/margins": 10.992511749267578, + "rewards/rejected": -13.705682754516602, + "step": 8381 + }, + { + "epoch": 1.3, + "learning_rate": 7.99963587322994e-06, + "logits/chosen": -2.9165163040161133, + "logits/rejected": -2.4945266246795654, + "logps/chosen": -72.24760437011719, + "logps/rejected": -173.25067138671875, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4045259952545166, + "rewards/margins": 6.180198669433594, + "rewards/rejected": -9.584724426269531, + "step": 8382 + }, + { + "epoch": 1.3, + "learning_rate": 7.998902432698791e-06, + "logits/chosen": -2.954444408416748, + "logits/rejected": -3.201911687850952, + "logps/chosen": -102.09169006347656, + "logps/rejected": -151.18838500976562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.45029354095459, + "rewards/margins": 6.955684661865234, + "rewards/rejected": -9.405977249145508, + "step": 8383 + }, + { + "epoch": 1.3, + "learning_rate": 7.998168992167643e-06, + "logits/chosen": -2.5984315872192383, + "logits/rejected": -3.1372151374816895, + "logps/chosen": -134.90487670898438, + "logps/rejected": -357.84893798828125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.648428916931152, + "rewards/margins": 6.080443859100342, + "rewards/rejected": -11.728872299194336, + "step": 8384 + }, + { + "epoch": 1.3, + "learning_rate": 7.997435551636495e-06, + "logits/chosen": -3.094174385070801, + "logits/rejected": -2.5577356815338135, + "logps/chosen": -249.81488037109375, + "logps/rejected": -177.90081787109375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.098500967025757, + "rewards/margins": 7.5250163078308105, + "rewards/rejected": -10.623517036437988, + "step": 8385 + }, + { + "epoch": 1.3, + "learning_rate": 7.996702111105349e-06, + "logits/chosen": -2.2792139053344727, + "logits/rejected": -2.953929901123047, + "logps/chosen": -245.4381866455078, + "logps/rejected": -369.2232666015625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07082521915435791, + "rewards/margins": 11.004008293151855, + "rewards/rejected": -10.933183670043945, + "step": 8386 + }, + { + "epoch": 1.3, + "learning_rate": 7.9959686705742e-06, + "logits/chosen": -2.7975213527679443, + "logits/rejected": -2.3487541675567627, + "logps/chosen": -238.58457946777344, + "logps/rejected": -460.85491943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.869009017944336, + "rewards/margins": 9.613572120666504, + "rewards/rejected": -12.482580184936523, + "step": 8387 + }, + { + "epoch": 1.3, + "learning_rate": 7.995235230043052e-06, + "logits/chosen": -2.6136693954467773, + "logits/rejected": -2.92061448097229, + "logps/chosen": -214.79672241210938, + "logps/rejected": -355.1260986328125, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6048660278320312, + "rewards/margins": 3.3826589584350586, + "rewards/rejected": -6.98752498626709, + "step": 8388 + }, + { + "epoch": 1.3, + "learning_rate": 7.994501789511904e-06, + "logits/chosen": -1.7515569925308228, + "logits/rejected": -3.064918041229248, + "logps/chosen": -171.015380859375, + "logps/rejected": -504.223388671875, + "loss": 0.0955, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.828176975250244, + "rewards/margins": 6.239620208740234, + "rewards/rejected": -11.067797660827637, + "step": 8389 + }, + { + "epoch": 1.3, + "learning_rate": 7.993768348980756e-06, + "logits/chosen": -2.3498404026031494, + "logits/rejected": -2.8144643306732178, + "logps/chosen": -576.9591064453125, + "logps/rejected": -552.640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.56645131111145, + "rewards/margins": 8.577892303466797, + "rewards/rejected": -12.144344329833984, + "step": 8390 + }, + { + "epoch": 1.3, + "learning_rate": 7.993034908449608e-06, + "logits/chosen": -2.964444160461426, + "logits/rejected": -1.3470702171325684, + "logps/chosen": -200.12074279785156, + "logps/rejected": -200.89572143554688, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4019358158111572, + "rewards/margins": 5.011960029602051, + "rewards/rejected": -7.413895606994629, + "step": 8391 + }, + { + "epoch": 1.31, + "learning_rate": 7.99230146791846e-06, + "logits/chosen": -1.9715654850006104, + "logits/rejected": -2.8097667694091797, + "logps/chosen": -344.80804443359375, + "logps/rejected": -657.9437255859375, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.580272197723389, + "rewards/margins": 6.082293510437012, + "rewards/rejected": -11.662565231323242, + "step": 8392 + }, + { + "epoch": 1.31, + "learning_rate": 7.991568027387312e-06, + "logits/chosen": -1.470538854598999, + "logits/rejected": -2.261465311050415, + "logps/chosen": -128.0124053955078, + "logps/rejected": -183.6887969970703, + "loss": 0.1371, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.030847549438477, + "rewards/margins": 3.203145742416382, + "rewards/rejected": -7.2339935302734375, + "step": 8393 + }, + { + "epoch": 1.31, + "learning_rate": 7.990834586856165e-06, + "logits/chosen": -3.2109899520874023, + "logits/rejected": -2.780510425567627, + "logps/chosen": -134.68280029296875, + "logps/rejected": -305.29486083984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6217894554138184, + "rewards/margins": 8.4094820022583, + "rewards/rejected": -11.031270980834961, + "step": 8394 + }, + { + "epoch": 1.31, + "learning_rate": 7.990101146325017e-06, + "logits/chosen": -2.1065921783447266, + "logits/rejected": -2.833186626434326, + "logps/chosen": -129.6699981689453, + "logps/rejected": -327.0357971191406, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.507946968078613, + "rewards/margins": 10.19581413269043, + "rewards/rejected": -16.70376205444336, + "step": 8395 + }, + { + "epoch": 1.31, + "learning_rate": 7.989367705793869e-06, + "logits/chosen": -1.4635745286941528, + "logits/rejected": -2.7935264110565186, + "logps/chosen": -106.65725708007812, + "logps/rejected": -289.14984130859375, + "loss": 0.0738, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.835298538208008, + "rewards/margins": 6.249719619750977, + "rewards/rejected": -13.085018157958984, + "step": 8396 + }, + { + "epoch": 1.31, + "learning_rate": 7.988634265262721e-06, + "logits/chosen": -2.5005922317504883, + "logits/rejected": -3.0930333137512207, + "logps/chosen": -669.5703125, + "logps/rejected": -564.6868896484375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7218003273010254, + "rewards/margins": 7.517006874084473, + "rewards/rejected": -11.238807678222656, + "step": 8397 + }, + { + "epoch": 1.31, + "learning_rate": 7.987900824731573e-06, + "logits/chosen": -2.191511392593384, + "logits/rejected": -2.865731716156006, + "logps/chosen": -128.2104034423828, + "logps/rejected": -323.0870666503906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.658571720123291, + "rewards/margins": 7.294745445251465, + "rewards/rejected": -9.953317642211914, + "step": 8398 + }, + { + "epoch": 1.31, + "learning_rate": 7.987167384200425e-06, + "logits/chosen": -2.316563606262207, + "logits/rejected": -3.0849249362945557, + "logps/chosen": -123.97723388671875, + "logps/rejected": -433.0884704589844, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8761954307556152, + "rewards/margins": 7.0268964767456055, + "rewards/rejected": -10.903091430664062, + "step": 8399 + }, + { + "epoch": 1.31, + "learning_rate": 7.986433943669277e-06, + "logits/chosen": -3.180832862854004, + "logits/rejected": -3.1272222995758057, + "logps/chosen": -426.5241394042969, + "logps/rejected": -431.84576416015625, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6321640014648438, + "rewards/margins": 4.314459800720215, + "rewards/rejected": -7.946624279022217, + "step": 8400 + }, + { + "epoch": 1.31, + "learning_rate": 7.985700503138128e-06, + "logits/chosen": -2.528578281402588, + "logits/rejected": -2.775668144226074, + "logps/chosen": -347.36358642578125, + "logps/rejected": -406.97808837890625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.681466817855835, + "rewards/margins": 6.249898910522461, + "rewards/rejected": -8.931365966796875, + "step": 8401 + }, + { + "epoch": 1.31, + "learning_rate": 7.98496706260698e-06, + "logits/chosen": -2.0832715034484863, + "logits/rejected": -2.6000568866729736, + "logps/chosen": -91.08403015136719, + "logps/rejected": -277.57427978515625, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.223629951477051, + "rewards/margins": 7.437200546264648, + "rewards/rejected": -11.6608304977417, + "step": 8402 + }, + { + "epoch": 1.31, + "learning_rate": 7.984233622075834e-06, + "logits/chosen": -3.067537307739258, + "logits/rejected": -2.9205734729766846, + "logps/chosen": -110.13339233398438, + "logps/rejected": -108.67311096191406, + "loss": 0.5203, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.308861255645752, + "rewards/margins": 2.932708740234375, + "rewards/rejected": -8.241569519042969, + "step": 8403 + }, + { + "epoch": 1.31, + "learning_rate": 7.983500181544686e-06, + "logits/chosen": -2.705869436264038, + "logits/rejected": -2.1637654304504395, + "logps/chosen": -181.66307067871094, + "logps/rejected": -183.01593017578125, + "loss": 0.5681, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.153831958770752, + "rewards/margins": 3.3466789722442627, + "rewards/rejected": -9.500511169433594, + "step": 8404 + }, + { + "epoch": 1.31, + "learning_rate": 7.982766741013538e-06, + "logits/chosen": -3.0268492698669434, + "logits/rejected": -2.8252577781677246, + "logps/chosen": -82.6753158569336, + "logps/rejected": -111.4781723022461, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4569215774536133, + "rewards/margins": 4.573397636413574, + "rewards/rejected": -8.030319213867188, + "step": 8405 + }, + { + "epoch": 1.31, + "learning_rate": 7.982033300482391e-06, + "logits/chosen": -3.118934154510498, + "logits/rejected": -2.948282480239868, + "logps/chosen": -293.7309875488281, + "logps/rejected": -235.58226013183594, + "loss": 1.2361, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3476288318634033, + "rewards/margins": 2.0462379455566406, + "rewards/rejected": -5.393866539001465, + "step": 8406 + }, + { + "epoch": 1.31, + "learning_rate": 7.981299859951243e-06, + "logits/chosen": -1.743201494216919, + "logits/rejected": -2.7873144149780273, + "logps/chosen": -210.94210815429688, + "logps/rejected": -339.92962646484375, + "loss": 0.7449, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.039892196655273, + "rewards/margins": 1.6007864475250244, + "rewards/rejected": -6.640678405761719, + "step": 8407 + }, + { + "epoch": 1.31, + "learning_rate": 7.980566419420095e-06, + "logits/chosen": -2.1485960483551025, + "logits/rejected": -2.7295620441436768, + "logps/chosen": -459.2991943359375, + "logps/rejected": -525.69775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8866550922393799, + "rewards/margins": 8.952044486999512, + "rewards/rejected": -9.838699340820312, + "step": 8408 + }, + { + "epoch": 1.31, + "learning_rate": 7.979832978888947e-06, + "logits/chosen": -1.6610174179077148, + "logits/rejected": -2.644094228744507, + "logps/chosen": -184.07249450683594, + "logps/rejected": -237.52867126464844, + "loss": 0.7571, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.419271945953369, + "rewards/margins": 3.2710866928100586, + "rewards/rejected": -8.690359115600586, + "step": 8409 + }, + { + "epoch": 1.31, + "learning_rate": 7.979099538357799e-06, + "logits/chosen": -2.442654848098755, + "logits/rejected": -2.978944778442383, + "logps/chosen": -76.51841735839844, + "logps/rejected": -309.97235107421875, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.292574882507324, + "rewards/margins": 6.44127082824707, + "rewards/rejected": -10.733845710754395, + "step": 8410 + }, + { + "epoch": 1.31, + "learning_rate": 7.97836609782665e-06, + "logits/chosen": -2.857377290725708, + "logits/rejected": -2.7153663635253906, + "logps/chosen": -252.04786682128906, + "logps/rejected": -402.6194763183594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4866318106651306, + "rewards/margins": 8.592411994934082, + "rewards/rejected": -9.0790433883667, + "step": 8411 + }, + { + "epoch": 1.31, + "learning_rate": 7.977632657295504e-06, + "logits/chosen": -2.558203935623169, + "logits/rejected": -2.338103771209717, + "logps/chosen": -201.68621826171875, + "logps/rejected": -233.6217803955078, + "loss": 0.7158, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6399712562561035, + "rewards/margins": 1.9468414783477783, + "rewards/rejected": -7.586812973022461, + "step": 8412 + }, + { + "epoch": 1.31, + "learning_rate": 7.976899216764356e-06, + "logits/chosen": -3.0403892993927, + "logits/rejected": -2.717735528945923, + "logps/chosen": -271.0605163574219, + "logps/rejected": -316.3297119140625, + "loss": 0.7203, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.984818458557129, + "rewards/margins": 4.618342876434326, + "rewards/rejected": -8.603160858154297, + "step": 8413 + }, + { + "epoch": 1.31, + "learning_rate": 7.976165776233208e-06, + "logits/chosen": -2.0810964107513428, + "logits/rejected": -2.709726095199585, + "logps/chosen": -193.93035888671875, + "logps/rejected": -501.24285888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3918068408966064, + "rewards/margins": 14.177972793579102, + "rewards/rejected": -17.569780349731445, + "step": 8414 + }, + { + "epoch": 1.31, + "learning_rate": 7.97543233570206e-06, + "logits/chosen": -1.6156129837036133, + "logits/rejected": -2.838346481323242, + "logps/chosen": -193.01052856445312, + "logps/rejected": -370.58099365234375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3664493560791016, + "rewards/margins": 8.368727684020996, + "rewards/rejected": -10.735177040100098, + "step": 8415 + }, + { + "epoch": 1.31, + "learning_rate": 7.974698895170912e-06, + "logits/chosen": -2.9363062381744385, + "logits/rejected": -3.061826229095459, + "logps/chosen": -178.74359130859375, + "logps/rejected": -188.5283660888672, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3680720925331116, + "rewards/margins": 6.289253234863281, + "rewards/rejected": -6.657325267791748, + "step": 8416 + }, + { + "epoch": 1.31, + "learning_rate": 7.973965454639764e-06, + "logits/chosen": -2.9599533081054688, + "logits/rejected": -2.207008123397827, + "logps/chosen": -367.554931640625, + "logps/rejected": -173.4596710205078, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.089526653289795, + "rewards/margins": 4.197641849517822, + "rewards/rejected": -8.287168502807617, + "step": 8417 + }, + { + "epoch": 1.31, + "learning_rate": 7.973232014108615e-06, + "logits/chosen": -2.5197439193725586, + "logits/rejected": -2.8564765453338623, + "logps/chosen": -164.49810791015625, + "logps/rejected": -210.84320068359375, + "loss": 0.763, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.270910739898682, + "rewards/margins": 1.761784553527832, + "rewards/rejected": -6.032695293426514, + "step": 8418 + }, + { + "epoch": 1.31, + "learning_rate": 7.972498573577467e-06, + "logits/chosen": -2.6603927612304688, + "logits/rejected": -3.0723555088043213, + "logps/chosen": -157.87252807617188, + "logps/rejected": -271.728759765625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9918012619018555, + "rewards/margins": 5.125454902648926, + "rewards/rejected": -9.117256164550781, + "step": 8419 + }, + { + "epoch": 1.31, + "learning_rate": 7.97176513304632e-06, + "logits/chosen": -1.9635789394378662, + "logits/rejected": -2.8396973609924316, + "logps/chosen": -234.75210571289062, + "logps/rejected": -363.1564025878906, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.302610397338867, + "rewards/margins": 5.162209510803223, + "rewards/rejected": -9.464818954467773, + "step": 8420 + }, + { + "epoch": 1.31, + "learning_rate": 7.971031692515173e-06, + "logits/chosen": -2.9813127517700195, + "logits/rejected": -3.0297539234161377, + "logps/chosen": -182.2721710205078, + "logps/rejected": -462.4057312011719, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.03823184967041, + "rewards/margins": 8.309163093566895, + "rewards/rejected": -11.347394943237305, + "step": 8421 + }, + { + "epoch": 1.31, + "learning_rate": 7.970298251984025e-06, + "logits/chosen": -2.687152624130249, + "logits/rejected": -2.8708674907684326, + "logps/chosen": -317.9715576171875, + "logps/rejected": -261.07659912109375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4564926624298096, + "rewards/margins": 6.419987201690674, + "rewards/rejected": -9.876480102539062, + "step": 8422 + }, + { + "epoch": 1.31, + "learning_rate": 7.969564811452877e-06, + "logits/chosen": -2.771865129470825, + "logits/rejected": -3.181605100631714, + "logps/chosen": -80.0339584350586, + "logps/rejected": -309.07928466796875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7439746856689453, + "rewards/margins": 8.661006927490234, + "rewards/rejected": -12.40498161315918, + "step": 8423 + }, + { + "epoch": 1.31, + "learning_rate": 7.968831370921728e-06, + "logits/chosen": -3.0157883167266846, + "logits/rejected": -2.88641619682312, + "logps/chosen": -208.78648376464844, + "logps/rejected": -222.2073211669922, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.546277046203613, + "rewards/margins": 2.2601571083068848, + "rewards/rejected": -7.806434154510498, + "step": 8424 + }, + { + "epoch": 1.31, + "learning_rate": 7.96809793039058e-06, + "logits/chosen": -2.095611810684204, + "logits/rejected": -3.047647476196289, + "logps/chosen": -226.76284790039062, + "logps/rejected": -338.2115478515625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8852462768554688, + "rewards/margins": 7.0294342041015625, + "rewards/rejected": -9.914680480957031, + "step": 8425 + }, + { + "epoch": 1.31, + "learning_rate": 7.967364489859432e-06, + "logits/chosen": -1.4947792291641235, + "logits/rejected": -2.911105155944824, + "logps/chosen": -101.41786193847656, + "logps/rejected": -259.3760681152344, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.34879732131958, + "rewards/margins": 6.897433757781982, + "rewards/rejected": -10.246231079101562, + "step": 8426 + }, + { + "epoch": 1.31, + "learning_rate": 7.966631049328284e-06, + "logits/chosen": -2.752833366394043, + "logits/rejected": -3.180518865585327, + "logps/chosen": -75.47344970703125, + "logps/rejected": -281.4437255859375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6776018142700195, + "rewards/margins": 7.335873126983643, + "rewards/rejected": -11.01347541809082, + "step": 8427 + }, + { + "epoch": 1.31, + "learning_rate": 7.965897608797136e-06, + "logits/chosen": -2.3028221130371094, + "logits/rejected": -3.0145480632781982, + "logps/chosen": -335.52001953125, + "logps/rejected": -340.20440673828125, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3558349609375, + "rewards/margins": 6.332832336425781, + "rewards/rejected": -9.688667297363281, + "step": 8428 + }, + { + "epoch": 1.31, + "learning_rate": 7.965164168265988e-06, + "logits/chosen": -2.6832306385040283, + "logits/rejected": -3.0073800086975098, + "logps/chosen": -85.98896789550781, + "logps/rejected": -175.33975219726562, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.601760387420654, + "rewards/margins": 4.392455101013184, + "rewards/rejected": -8.99421501159668, + "step": 8429 + }, + { + "epoch": 1.31, + "learning_rate": 7.964430727734841e-06, + "logits/chosen": -2.6610589027404785, + "logits/rejected": -2.9968390464782715, + "logps/chosen": -76.13712310791016, + "logps/rejected": -330.875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3399949073791504, + "rewards/margins": 8.860689163208008, + "rewards/rejected": -11.200684547424316, + "step": 8430 + }, + { + "epoch": 1.31, + "learning_rate": 7.963697287203693e-06, + "logits/chosen": -2.9864351749420166, + "logits/rejected": -2.5325734615325928, + "logps/chosen": -414.8525695800781, + "logps/rejected": -551.8795166015625, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.477052688598633, + "rewards/margins": 5.27470588684082, + "rewards/rejected": -10.751758575439453, + "step": 8431 + }, + { + "epoch": 1.31, + "learning_rate": 7.962963846672545e-06, + "logits/chosen": -1.5041955709457397, + "logits/rejected": -2.739502429962158, + "logps/chosen": -87.10987854003906, + "logps/rejected": -354.99871826171875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3164725303649902, + "rewards/margins": 7.164488792419434, + "rewards/rejected": -10.480961799621582, + "step": 8432 + }, + { + "epoch": 1.31, + "learning_rate": 7.962230406141397e-06, + "logits/chosen": -1.6381736993789673, + "logits/rejected": -2.636136531829834, + "logps/chosen": -389.83477783203125, + "logps/rejected": -445.32568359375, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.16746711730957, + "rewards/margins": 6.1608476638793945, + "rewards/rejected": -10.328313827514648, + "step": 8433 + }, + { + "epoch": 1.31, + "learning_rate": 7.961496965610249e-06, + "logits/chosen": -2.409421682357788, + "logits/rejected": -2.8017687797546387, + "logps/chosen": -160.91482543945312, + "logps/rejected": -199.12863159179688, + "loss": 0.1394, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9620299339294434, + "rewards/margins": 4.495279312133789, + "rewards/rejected": -8.457308769226074, + "step": 8434 + }, + { + "epoch": 1.31, + "learning_rate": 7.9607635250791e-06, + "logits/chosen": -2.2904610633850098, + "logits/rejected": -2.750718116760254, + "logps/chosen": -291.74957275390625, + "logps/rejected": -396.00518798828125, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.526508092880249, + "rewards/margins": 5.701812744140625, + "rewards/rejected": -9.228321075439453, + "step": 8435 + }, + { + "epoch": 1.31, + "learning_rate": 7.960030084547953e-06, + "logits/chosen": -2.063976764678955, + "logits/rejected": -2.7145936489105225, + "logps/chosen": -101.99429321289062, + "logps/rejected": -280.5946044921875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.201535940170288, + "rewards/margins": 5.465319633483887, + "rewards/rejected": -8.666855812072754, + "step": 8436 + }, + { + "epoch": 1.31, + "learning_rate": 7.959296644016805e-06, + "logits/chosen": -1.914813756942749, + "logits/rejected": -2.4308724403381348, + "logps/chosen": -151.251708984375, + "logps/rejected": -216.46759033203125, + "loss": 1.6041, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.749050140380859, + "rewards/margins": -0.25246119499206543, + "rewards/rejected": -6.496588706970215, + "step": 8437 + }, + { + "epoch": 1.31, + "learning_rate": 7.958563203485658e-06, + "logits/chosen": -2.4805095195770264, + "logits/rejected": -2.942516803741455, + "logps/chosen": -456.1465759277344, + "logps/rejected": -635.5748291015625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0383377075195312, + "rewards/margins": 6.996065139770508, + "rewards/rejected": -10.034402847290039, + "step": 8438 + }, + { + "epoch": 1.31, + "learning_rate": 7.95782976295451e-06, + "logits/chosen": -2.472080707550049, + "logits/rejected": -3.0579934120178223, + "logps/chosen": -525.0712890625, + "logps/rejected": -638.5250244140625, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.092120409011841, + "rewards/margins": 4.717144966125488, + "rewards/rejected": -7.80926513671875, + "step": 8439 + }, + { + "epoch": 1.31, + "learning_rate": 7.957096322423364e-06, + "logits/chosen": -2.5766055583953857, + "logits/rejected": -3.0815954208374023, + "logps/chosen": -185.69091796875, + "logps/rejected": -150.10617065429688, + "loss": 1.7321, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.148830413818359, + "rewards/margins": -0.1932530403137207, + "rewards/rejected": -5.9555768966674805, + "step": 8440 + }, + { + "epoch": 1.31, + "learning_rate": 7.956362881892215e-06, + "logits/chosen": -3.0389106273651123, + "logits/rejected": -3.023585319519043, + "logps/chosen": -729.71142578125, + "logps/rejected": -495.1580810546875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.946131706237793, + "rewards/margins": 7.381082534790039, + "rewards/rejected": -10.327214241027832, + "step": 8441 + }, + { + "epoch": 1.31, + "learning_rate": 7.955629441361067e-06, + "logits/chosen": -2.6221017837524414, + "logits/rejected": -2.8326144218444824, + "logps/chosen": -139.55206298828125, + "logps/rejected": -259.8505554199219, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9812631607055664, + "rewards/margins": 5.807016372680664, + "rewards/rejected": -9.78827953338623, + "step": 8442 + }, + { + "epoch": 1.31, + "learning_rate": 7.95489600082992e-06, + "logits/chosen": -1.5810259580612183, + "logits/rejected": -2.7021889686584473, + "logps/chosen": -208.7126922607422, + "logps/rejected": -500.8087463378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5435593128204346, + "rewards/margins": 9.483964920043945, + "rewards/rejected": -13.0275239944458, + "step": 8443 + }, + { + "epoch": 1.31, + "learning_rate": 7.954162560298771e-06, + "logits/chosen": -2.660336971282959, + "logits/rejected": -3.0223209857940674, + "logps/chosen": -223.55958557128906, + "logps/rejected": -329.3841247558594, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4336905479431152, + "rewards/margins": 7.182635307312012, + "rewards/rejected": -10.616325378417969, + "step": 8444 + }, + { + "epoch": 1.31, + "learning_rate": 7.953429119767623e-06, + "logits/chosen": -2.949171781539917, + "logits/rejected": -3.008139133453369, + "logps/chosen": -99.42752075195312, + "logps/rejected": -207.197998046875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.344221830368042, + "rewards/margins": 7.639230728149414, + "rewards/rejected": -8.983451843261719, + "step": 8445 + }, + { + "epoch": 1.31, + "learning_rate": 7.952695679236475e-06, + "logits/chosen": -1.7437553405761719, + "logits/rejected": -2.907762289047241, + "logps/chosen": -170.57498168945312, + "logps/rejected": -318.0874328613281, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9940773248672485, + "rewards/margins": 7.502409934997559, + "rewards/rejected": -8.496487617492676, + "step": 8446 + }, + { + "epoch": 1.31, + "learning_rate": 7.951962238705327e-06, + "logits/chosen": -2.7356789112091064, + "logits/rejected": -2.258115291595459, + "logps/chosen": -109.94111633300781, + "logps/rejected": -121.10528564453125, + "loss": 0.4581, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.188046455383301, + "rewards/margins": 0.9534451961517334, + "rewards/rejected": -7.141491413116455, + "step": 8447 + }, + { + "epoch": 1.31, + "learning_rate": 7.95122879817418e-06, + "logits/chosen": -2.970398426055908, + "logits/rejected": -2.0875585079193115, + "logps/chosen": -363.5982360839844, + "logps/rejected": -313.87689208984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.384217619895935, + "rewards/margins": 8.3031005859375, + "rewards/rejected": -9.687318801879883, + "step": 8448 + }, + { + "epoch": 1.31, + "learning_rate": 7.950495357643032e-06, + "logits/chosen": -3.0738110542297363, + "logits/rejected": -2.390927791595459, + "logps/chosen": -549.8146362304688, + "logps/rejected": -325.839599609375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192323207855225, + "rewards/margins": 6.007992744445801, + "rewards/rejected": -11.200315475463867, + "step": 8449 + }, + { + "epoch": 1.31, + "learning_rate": 7.949761917111884e-06, + "logits/chosen": -1.7499067783355713, + "logits/rejected": -2.966721773147583, + "logps/chosen": -245.895263671875, + "logps/rejected": -642.5404052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.382876396179199, + "rewards/margins": 10.324719429016113, + "rewards/rejected": -14.707595825195312, + "step": 8450 + }, + { + "epoch": 1.31, + "learning_rate": 7.949028476580736e-06, + "logits/chosen": -2.3049259185791016, + "logits/rejected": -2.9676730632781982, + "logps/chosen": -538.094970703125, + "logps/rejected": -299.5273742675781, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4839999675750732, + "rewards/margins": 5.0234222412109375, + "rewards/rejected": -7.50742244720459, + "step": 8451 + }, + { + "epoch": 1.31, + "learning_rate": 7.948295036049588e-06, + "logits/chosen": -3.048240900039673, + "logits/rejected": -3.114245653152466, + "logps/chosen": -372.6722412109375, + "logps/rejected": -388.9625244140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.526486396789551, + "rewards/margins": 7.4563679695129395, + "rewards/rejected": -10.982854843139648, + "step": 8452 + }, + { + "epoch": 1.31, + "learning_rate": 7.94756159551844e-06, + "logits/chosen": -2.728548765182495, + "logits/rejected": -2.9445648193359375, + "logps/chosen": -299.62530517578125, + "logps/rejected": -463.8438415527344, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6611328125, + "rewards/margins": 6.026267051696777, + "rewards/rejected": -10.687399864196777, + "step": 8453 + }, + { + "epoch": 1.31, + "learning_rate": 7.946828154987292e-06, + "logits/chosen": -1.7964248657226562, + "logits/rejected": -2.7674660682678223, + "logps/chosen": -256.08612060546875, + "logps/rejected": -648.9476318359375, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.538941383361816, + "rewards/margins": 7.72125244140625, + "rewards/rejected": -13.260194778442383, + "step": 8454 + }, + { + "epoch": 1.31, + "learning_rate": 7.946094714456143e-06, + "logits/chosen": -1.836432695388794, + "logits/rejected": -2.8522491455078125, + "logps/chosen": -168.90072631835938, + "logps/rejected": -286.16595458984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.956956148147583, + "rewards/margins": 8.95932388305664, + "rewards/rejected": -12.916279792785645, + "step": 8455 + }, + { + "epoch": 1.32, + "learning_rate": 7.945361273924995e-06, + "logits/chosen": -3.0766196250915527, + "logits/rejected": -1.8732225894927979, + "logps/chosen": -606.4342041015625, + "logps/rejected": -318.75946044921875, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1211671829223633, + "rewards/margins": 8.225600242614746, + "rewards/rejected": -10.34676742553711, + "step": 8456 + }, + { + "epoch": 1.32, + "learning_rate": 7.944627833393849e-06, + "logits/chosen": -1.6142584085464478, + "logits/rejected": -1.5794146060943604, + "logps/chosen": -162.54861450195312, + "logps/rejected": -240.11932373046875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4759414196014404, + "rewards/margins": 7.20249080657959, + "rewards/rejected": -9.67843246459961, + "step": 8457 + }, + { + "epoch": 1.32, + "learning_rate": 7.9438943928627e-06, + "logits/chosen": -3.006795644760132, + "logits/rejected": -2.756824493408203, + "logps/chosen": -219.83059692382812, + "logps/rejected": -395.48382568359375, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.068871259689331, + "rewards/margins": 8.574178695678711, + "rewards/rejected": -11.643050193786621, + "step": 8458 + }, + { + "epoch": 1.32, + "learning_rate": 7.943160952331553e-06, + "logits/chosen": -2.0021703243255615, + "logits/rejected": -2.442776918411255, + "logps/chosen": -55.52519989013672, + "logps/rejected": -248.3787841796875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8983237743377686, + "rewards/margins": 7.503121852874756, + "rewards/rejected": -11.401445388793945, + "step": 8459 + }, + { + "epoch": 1.32, + "learning_rate": 7.942427511800405e-06, + "logits/chosen": -2.01865291595459, + "logits/rejected": -2.9090850353240967, + "logps/chosen": -216.9654541015625, + "logps/rejected": -500.70721435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8584887981414795, + "rewards/margins": 9.793001174926758, + "rewards/rejected": -12.651491165161133, + "step": 8460 + }, + { + "epoch": 1.32, + "learning_rate": 7.941694071269256e-06, + "logits/chosen": -2.2999351024627686, + "logits/rejected": -2.6479523181915283, + "logps/chosen": -65.75308227539062, + "logps/rejected": -327.28094482421875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7771544456481934, + "rewards/margins": 7.360933303833008, + "rewards/rejected": -10.13808822631836, + "step": 8461 + }, + { + "epoch": 1.32, + "learning_rate": 7.940960630738108e-06, + "logits/chosen": -1.239349603652954, + "logits/rejected": -2.6395089626312256, + "logps/chosen": -221.5457763671875, + "logps/rejected": -387.4339599609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.649724006652832, + "rewards/margins": 7.729331970214844, + "rewards/rejected": -11.379055976867676, + "step": 8462 + }, + { + "epoch": 1.32, + "learning_rate": 7.94022719020696e-06, + "logits/chosen": -2.709251642227173, + "logits/rejected": -1.8505685329437256, + "logps/chosen": -262.7757873535156, + "logps/rejected": -260.419189453125, + "loss": 0.12, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.261435031890869, + "rewards/margins": 4.021760940551758, + "rewards/rejected": -8.283195495605469, + "step": 8463 + }, + { + "epoch": 1.32, + "learning_rate": 7.939493749675812e-06, + "logits/chosen": -2.3070828914642334, + "logits/rejected": -2.7452468872070312, + "logps/chosen": -187.9940643310547, + "logps/rejected": -265.64410400390625, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.645050287246704, + "rewards/margins": 5.318262100219727, + "rewards/rejected": -8.963312149047852, + "step": 8464 + }, + { + "epoch": 1.32, + "learning_rate": 7.938760309144664e-06, + "logits/chosen": -2.4748454093933105, + "logits/rejected": -1.6538161039352417, + "logps/chosen": -271.4767761230469, + "logps/rejected": -284.3633728027344, + "loss": 1.3688, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.167498826980591, + "rewards/margins": 2.4882397651672363, + "rewards/rejected": -5.655738830566406, + "step": 8465 + }, + { + "epoch": 1.32, + "learning_rate": 7.938026868613518e-06, + "logits/chosen": -1.9643800258636475, + "logits/rejected": -2.9676060676574707, + "logps/chosen": -197.10452270507812, + "logps/rejected": -565.4266357421875, + "loss": 2.2565, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.914968490600586, + "rewards/margins": 5.744387626647949, + "rewards/rejected": -13.659356117248535, + "step": 8466 + }, + { + "epoch": 1.32, + "learning_rate": 7.93729342808237e-06, + "logits/chosen": -3.085167646408081, + "logits/rejected": -2.5926506519317627, + "logps/chosen": -132.71104431152344, + "logps/rejected": -124.78243255615234, + "loss": 1.7921, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.778256893157959, + "rewards/margins": 1.3976627588272095, + "rewards/rejected": -6.175919532775879, + "step": 8467 + }, + { + "epoch": 1.32, + "learning_rate": 7.936559987551221e-06, + "logits/chosen": -1.8151700496673584, + "logits/rejected": -3.117197275161743, + "logps/chosen": -128.84231567382812, + "logps/rejected": -322.7283020019531, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.712583303451538, + "rewards/margins": 7.624454021453857, + "rewards/rejected": -10.337038040161133, + "step": 8468 + }, + { + "epoch": 1.32, + "learning_rate": 7.935826547020073e-06, + "logits/chosen": -1.732445478439331, + "logits/rejected": -2.8227319717407227, + "logps/chosen": -66.05184936523438, + "logps/rejected": -411.7205810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8787894248962402, + "rewards/margins": 11.535569190979004, + "rewards/rejected": -14.414358139038086, + "step": 8469 + }, + { + "epoch": 1.32, + "learning_rate": 7.935093106488925e-06, + "logits/chosen": -2.491799831390381, + "logits/rejected": -2.6973154544830322, + "logps/chosen": -357.43463134765625, + "logps/rejected": -359.61895751953125, + "loss": 4.1697, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.519313812255859, + "rewards/margins": -0.04654979705810547, + "rewards/rejected": -6.472764015197754, + "step": 8470 + }, + { + "epoch": 1.32, + "learning_rate": 7.934359665957777e-06, + "logits/chosen": -2.2637338638305664, + "logits/rejected": -2.8302600383758545, + "logps/chosen": -203.6662139892578, + "logps/rejected": -445.217529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.753847599029541, + "rewards/margins": 9.912717819213867, + "rewards/rejected": -14.66656494140625, + "step": 8471 + }, + { + "epoch": 1.32, + "learning_rate": 7.93362622542663e-06, + "logits/chosen": -2.9045445919036865, + "logits/rejected": -2.0829508304595947, + "logps/chosen": -601.1155395507812, + "logps/rejected": -380.10595703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.630296468734741, + "rewards/margins": 8.593843460083008, + "rewards/rejected": -12.224140167236328, + "step": 8472 + }, + { + "epoch": 1.32, + "learning_rate": 7.932892784895482e-06, + "logits/chosen": -3.0257396697998047, + "logits/rejected": -1.2356399297714233, + "logps/chosen": -866.5089111328125, + "logps/rejected": -353.07293701171875, + "loss": 1.8396, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.455474853515625, + "rewards/margins": 1.6608400344848633, + "rewards/rejected": -6.116314888000488, + "step": 8473 + }, + { + "epoch": 1.32, + "learning_rate": 7.932159344364334e-06, + "logits/chosen": -2.6963913440704346, + "logits/rejected": -3.091259002685547, + "logps/chosen": -144.96519470214844, + "logps/rejected": -256.4394836425781, + "loss": 3.5187, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.195671081542969, + "rewards/margins": -3.44581937789917, + "rewards/rejected": -5.749852180480957, + "step": 8474 + }, + { + "epoch": 1.32, + "learning_rate": 7.931425903833188e-06, + "logits/chosen": -2.9937710762023926, + "logits/rejected": -2.652630090713501, + "logps/chosen": -175.29864501953125, + "logps/rejected": -241.4915771484375, + "loss": 1.2429, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.835692405700684, + "rewards/margins": 3.859276056289673, + "rewards/rejected": -8.694969177246094, + "step": 8475 + }, + { + "epoch": 1.32, + "learning_rate": 7.93069246330204e-06, + "logits/chosen": -2.7734265327453613, + "logits/rejected": -2.813023567199707, + "logps/chosen": -303.94915771484375, + "logps/rejected": -321.75054931640625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.393254280090332, + "rewards/margins": 7.303249359130859, + "rewards/rejected": -11.696502685546875, + "step": 8476 + }, + { + "epoch": 1.32, + "learning_rate": 7.929959022770892e-06, + "logits/chosen": -2.464045763015747, + "logits/rejected": -2.414213180541992, + "logps/chosen": -188.97674560546875, + "logps/rejected": -426.77850341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2731943130493164, + "rewards/margins": 11.72214126586914, + "rewards/rejected": -13.995335578918457, + "step": 8477 + }, + { + "epoch": 1.32, + "learning_rate": 7.929225582239743e-06, + "logits/chosen": -1.4278582334518433, + "logits/rejected": -2.454775810241699, + "logps/chosen": -466.185546875, + "logps/rejected": -476.223876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035437107086182, + "rewards/margins": 9.9208402633667, + "rewards/rejected": -14.956277847290039, + "step": 8478 + }, + { + "epoch": 1.32, + "learning_rate": 7.928492141708595e-06, + "logits/chosen": -2.567469835281372, + "logits/rejected": -2.861208438873291, + "logps/chosen": -71.68675994873047, + "logps/rejected": -220.17105102539062, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.08107852935791, + "rewards/margins": 4.801082611083984, + "rewards/rejected": -7.882160663604736, + "step": 8479 + }, + { + "epoch": 1.32, + "learning_rate": 7.927758701177447e-06, + "logits/chosen": -1.5725607872009277, + "logits/rejected": -2.2816998958587646, + "logps/chosen": -482.7049865722656, + "logps/rejected": -542.7928466796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.579573154449463, + "rewards/margins": 7.9676713943481445, + "rewards/rejected": -11.547245025634766, + "step": 8480 + }, + { + "epoch": 1.32, + "learning_rate": 7.927025260646299e-06, + "logits/chosen": -2.325423240661621, + "logits/rejected": -3.0590808391571045, + "logps/chosen": -107.71796417236328, + "logps/rejected": -220.11767578125, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.540987491607666, + "rewards/margins": 3.1565918922424316, + "rewards/rejected": -7.697579383850098, + "step": 8481 + }, + { + "epoch": 1.32, + "learning_rate": 7.926291820115151e-06, + "logits/chosen": -2.29664945602417, + "logits/rejected": -2.8602774143218994, + "logps/chosen": -122.88847351074219, + "logps/rejected": -297.6947937011719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2516987323760986, + "rewards/margins": 10.672181129455566, + "rewards/rejected": -11.923879623413086, + "step": 8482 + }, + { + "epoch": 1.32, + "learning_rate": 7.925558379584005e-06, + "logits/chosen": -1.6328932046890259, + "logits/rejected": -2.6865146160125732, + "logps/chosen": -74.97325897216797, + "logps/rejected": -317.8560485839844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.433227062225342, + "rewards/margins": 7.405917167663574, + "rewards/rejected": -10.839144706726074, + "step": 8483 + }, + { + "epoch": 1.32, + "learning_rate": 7.924824939052856e-06, + "logits/chosen": -1.444690465927124, + "logits/rejected": -2.6663029193878174, + "logps/chosen": -94.57084655761719, + "logps/rejected": -557.6207275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1686851978302, + "rewards/margins": 15.284505844116211, + "rewards/rejected": -17.453189849853516, + "step": 8484 + }, + { + "epoch": 1.32, + "learning_rate": 7.924091498521708e-06, + "logits/chosen": -2.921856641769409, + "logits/rejected": -3.0802125930786133, + "logps/chosen": -38.87893295288086, + "logps/rejected": -307.2182312011719, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4889936447143555, + "rewards/margins": 6.8651957511901855, + "rewards/rejected": -9.354188919067383, + "step": 8485 + }, + { + "epoch": 1.32, + "learning_rate": 7.92335805799056e-06, + "logits/chosen": -2.3566460609436035, + "logits/rejected": -2.6941633224487305, + "logps/chosen": -117.89253997802734, + "logps/rejected": -361.15313720703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4914512634277344, + "rewards/margins": 6.9469523429870605, + "rewards/rejected": -10.438404083251953, + "step": 8486 + }, + { + "epoch": 1.32, + "learning_rate": 7.922624617459412e-06, + "logits/chosen": -1.7148523330688477, + "logits/rejected": -2.6898326873779297, + "logps/chosen": -89.77401733398438, + "logps/rejected": -248.87939453125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8653767108917236, + "rewards/margins": 7.480159759521484, + "rewards/rejected": -9.345537185668945, + "step": 8487 + }, + { + "epoch": 1.32, + "learning_rate": 7.921891176928264e-06, + "logits/chosen": -2.807263135910034, + "logits/rejected": -1.9809534549713135, + "logps/chosen": -367.70245361328125, + "logps/rejected": -341.3157958984375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7598876953125, + "rewards/margins": 6.315980911254883, + "rewards/rejected": -11.075868606567383, + "step": 8488 + }, + { + "epoch": 1.32, + "learning_rate": 7.921157736397116e-06, + "logits/chosen": -2.844881057739258, + "logits/rejected": -2.815863609313965, + "logps/chosen": -544.3410034179688, + "logps/rejected": -561.5088500976562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.632575035095215, + "rewards/margins": 6.760363578796387, + "rewards/rejected": -11.392938613891602, + "step": 8489 + }, + { + "epoch": 1.32, + "learning_rate": 7.920424295865968e-06, + "logits/chosen": -2.8785266876220703, + "logits/rejected": -1.3965327739715576, + "logps/chosen": -529.728759765625, + "logps/rejected": -169.99578857421875, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.547106981277466, + "rewards/margins": 5.2034711837768555, + "rewards/rejected": -8.750577926635742, + "step": 8490 + }, + { + "epoch": 1.32, + "learning_rate": 7.91969085533482e-06, + "logits/chosen": -2.844205141067505, + "logits/rejected": -2.9446561336517334, + "logps/chosen": -147.93353271484375, + "logps/rejected": -205.97096252441406, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7455506324768066, + "rewards/margins": 4.063920974731445, + "rewards/rejected": -6.809471607208252, + "step": 8491 + }, + { + "epoch": 1.32, + "learning_rate": 7.918957414803673e-06, + "logits/chosen": -2.081928253173828, + "logits/rejected": -2.717872381210327, + "logps/chosen": -153.79180908203125, + "logps/rejected": -312.69097900390625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4122674465179443, + "rewards/margins": 5.340066909790039, + "rewards/rejected": -8.752334594726562, + "step": 8492 + }, + { + "epoch": 1.32, + "learning_rate": 7.918223974272525e-06, + "logits/chosen": -1.457251787185669, + "logits/rejected": -2.8418359756469727, + "logps/chosen": -105.078125, + "logps/rejected": -289.19744873046875, + "loss": 0.0674, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.220792770385742, + "rewards/margins": 4.648514747619629, + "rewards/rejected": -8.869306564331055, + "step": 8493 + }, + { + "epoch": 1.32, + "learning_rate": 7.917490533741377e-06, + "logits/chosen": -2.828796625137329, + "logits/rejected": -3.1302285194396973, + "logps/chosen": -161.69439697265625, + "logps/rejected": -260.0052490234375, + "loss": 0.2515, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.353078842163086, + "rewards/margins": 5.763383388519287, + "rewards/rejected": -8.116462707519531, + "step": 8494 + }, + { + "epoch": 1.32, + "learning_rate": 7.916757093210229e-06, + "logits/chosen": -1.533033847808838, + "logits/rejected": -2.773416519165039, + "logps/chosen": -84.39509582519531, + "logps/rejected": -248.57022094726562, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6129403114318848, + "rewards/margins": 7.086256980895996, + "rewards/rejected": -8.699196815490723, + "step": 8495 + }, + { + "epoch": 1.32, + "learning_rate": 7.91602365267908e-06, + "logits/chosen": -3.001521110534668, + "logits/rejected": -3.0193965435028076, + "logps/chosen": -213.60569763183594, + "logps/rejected": -167.51739501953125, + "loss": 0.2098, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8050942420959473, + "rewards/margins": 2.344602108001709, + "rewards/rejected": -6.149696350097656, + "step": 8496 + }, + { + "epoch": 1.32, + "learning_rate": 7.915290212147933e-06, + "logits/chosen": -1.2644007205963135, + "logits/rejected": -2.52441143989563, + "logps/chosen": -122.39540100097656, + "logps/rejected": -337.18389892578125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.041608810424805, + "rewards/margins": 7.867339611053467, + "rewards/rejected": -11.90894889831543, + "step": 8497 + }, + { + "epoch": 1.32, + "learning_rate": 7.914556771616784e-06, + "logits/chosen": -2.384948492050171, + "logits/rejected": -2.9599685668945312, + "logps/chosen": -407.21734619140625, + "logps/rejected": -511.3369140625, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7208404541015625, + "rewards/margins": 7.733180046081543, + "rewards/rejected": -11.454021453857422, + "step": 8498 + }, + { + "epoch": 1.32, + "learning_rate": 7.913823331085636e-06, + "logits/chosen": -2.9706759452819824, + "logits/rejected": -2.48024320602417, + "logps/chosen": -157.88499450683594, + "logps/rejected": -291.0948486328125, + "loss": 0.259, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.487569808959961, + "rewards/margins": 6.216780662536621, + "rewards/rejected": -10.704350471496582, + "step": 8499 + }, + { + "epoch": 1.32, + "learning_rate": 7.913089890554488e-06, + "logits/chosen": -2.7359492778778076, + "logits/rejected": -2.4933104515075684, + "logps/chosen": -312.8779296875, + "logps/rejected": -169.81863403320312, + "loss": 4.0142, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.763486862182617, + "rewards/margins": -3.9958789348602295, + "rewards/rejected": -6.767608642578125, + "step": 8500 + }, + { + "epoch": 1.32, + "learning_rate": 7.912356450023342e-06, + "logits/chosen": -2.124427080154419, + "logits/rejected": -2.8753416538238525, + "logps/chosen": -178.05328369140625, + "logps/rejected": -531.7583618164062, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5004730224609375, + "rewards/margins": 6.811221122741699, + "rewards/rejected": -11.311695098876953, + "step": 8501 + }, + { + "epoch": 1.32, + "learning_rate": 7.911623009492194e-06, + "logits/chosen": -2.848148822784424, + "logits/rejected": -2.063809394836426, + "logps/chosen": -132.47120666503906, + "logps/rejected": -150.19094848632812, + "loss": 0.941, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.193025588989258, + "rewards/margins": 3.065779209136963, + "rewards/rejected": -6.258804798126221, + "step": 8502 + }, + { + "epoch": 1.32, + "learning_rate": 7.910889568961045e-06, + "logits/chosen": -2.9681050777435303, + "logits/rejected": -2.018095016479492, + "logps/chosen": -232.53433227539062, + "logps/rejected": -234.59506225585938, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.691852569580078, + "rewards/margins": 7.339328765869141, + "rewards/rejected": -11.031181335449219, + "step": 8503 + }, + { + "epoch": 1.32, + "learning_rate": 7.910156128429897e-06, + "logits/chosen": -1.620678424835205, + "logits/rejected": -2.852759838104248, + "logps/chosen": -131.00320434570312, + "logps/rejected": -375.5777587890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.697322130203247, + "rewards/margins": 7.603729248046875, + "rewards/rejected": -9.301051139831543, + "step": 8504 + }, + { + "epoch": 1.32, + "learning_rate": 7.90942268789875e-06, + "logits/chosen": -2.287400245666504, + "logits/rejected": -1.974615216255188, + "logps/chosen": -697.37890625, + "logps/rejected": -593.135498046875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.212830543518066, + "rewards/margins": 7.178365707397461, + "rewards/rejected": -12.391196250915527, + "step": 8505 + }, + { + "epoch": 1.32, + "learning_rate": 7.908689247367603e-06, + "logits/chosen": -2.713688373565674, + "logits/rejected": -2.7022533416748047, + "logps/chosen": -146.92596435546875, + "logps/rejected": -342.4302673339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3654656410217285, + "rewards/margins": 11.194890022277832, + "rewards/rejected": -14.560356140136719, + "step": 8506 + }, + { + "epoch": 1.32, + "learning_rate": 7.907955806836455e-06, + "logits/chosen": -2.800379514694214, + "logits/rejected": -3.026390790939331, + "logps/chosen": -80.90199279785156, + "logps/rejected": -265.3772277832031, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8148794174194336, + "rewards/margins": 6.443122863769531, + "rewards/rejected": -10.258001327514648, + "step": 8507 + }, + { + "epoch": 1.32, + "learning_rate": 7.907222366305307e-06, + "logits/chosen": -2.7353806495666504, + "logits/rejected": -2.3732306957244873, + "logps/chosen": -95.19366455078125, + "logps/rejected": -233.77272033691406, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.095212459564209, + "rewards/margins": 7.2538557052612305, + "rewards/rejected": -9.349067687988281, + "step": 8508 + }, + { + "epoch": 1.32, + "learning_rate": 7.906488925774158e-06, + "logits/chosen": -2.6073877811431885, + "logits/rejected": -2.751035690307617, + "logps/chosen": -261.68157958984375, + "logps/rejected": -421.0655822753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4555001258850098, + "rewards/margins": 13.56832218170166, + "rewards/rejected": -16.023822784423828, + "step": 8509 + }, + { + "epoch": 1.32, + "learning_rate": 7.905755485243012e-06, + "logits/chosen": -2.709655284881592, + "logits/rejected": -2.5642013549804688, + "logps/chosen": -311.8802795410156, + "logps/rejected": -255.60960388183594, + "loss": 2.236, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.032874584197998, + "rewards/margins": 1.928718090057373, + "rewards/rejected": -7.961592674255371, + "step": 8510 + }, + { + "epoch": 1.32, + "learning_rate": 7.905022044711864e-06, + "logits/chosen": -2.604825496673584, + "logits/rejected": -1.0372257232666016, + "logps/chosen": -544.7355346679688, + "logps/rejected": -96.73348999023438, + "loss": 4.1319, + "rewards/accuracies": 0.0, + "rewards/chosen": -9.226476669311523, + "rewards/margins": -4.115683555603027, + "rewards/rejected": -5.1107940673828125, + "step": 8511 + }, + { + "epoch": 1.32, + "learning_rate": 7.904288604180716e-06, + "logits/chosen": -2.61875057220459, + "logits/rejected": -2.877959966659546, + "logps/chosen": -61.62870788574219, + "logps/rejected": -254.03900146484375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0452847480773926, + "rewards/margins": 6.035950660705566, + "rewards/rejected": -9.0812349319458, + "step": 8512 + }, + { + "epoch": 1.32, + "learning_rate": 7.903555163649568e-06, + "logits/chosen": -2.9845802783966064, + "logits/rejected": -2.215296745300293, + "logps/chosen": -506.63812255859375, + "logps/rejected": -423.2687683105469, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8651511669158936, + "rewards/margins": 9.802448272705078, + "rewards/rejected": -11.66759967803955, + "step": 8513 + }, + { + "epoch": 1.32, + "learning_rate": 7.90282172311842e-06, + "logits/chosen": -2.2605721950531006, + "logits/rejected": -2.6661531925201416, + "logps/chosen": -134.66738891601562, + "logps/rejected": -244.30967712402344, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.810645341873169, + "rewards/margins": 6.66838264465332, + "rewards/rejected": -9.479028701782227, + "step": 8514 + }, + { + "epoch": 1.32, + "learning_rate": 7.902088282587271e-06, + "logits/chosen": -2.5256409645080566, + "logits/rejected": -2.9445433616638184, + "logps/chosen": -334.9704895019531, + "logps/rejected": -680.782470703125, + "loss": 0.0565, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.175114631652832, + "rewards/margins": 5.682426452636719, + "rewards/rejected": -9.85754108428955, + "step": 8515 + }, + { + "epoch": 1.32, + "learning_rate": 7.901354842056123e-06, + "logits/chosen": -1.8278777599334717, + "logits/rejected": -2.3704097270965576, + "logps/chosen": -75.83222961425781, + "logps/rejected": -351.68426513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.864947557449341, + "rewards/margins": 10.378578186035156, + "rewards/rejected": -13.243526458740234, + "step": 8516 + }, + { + "epoch": 1.32, + "learning_rate": 7.900621401524975e-06, + "logits/chosen": -2.8972766399383545, + "logits/rejected": -1.6471575498580933, + "logps/chosen": -535.371337890625, + "logps/rejected": -233.61843872070312, + "loss": 0.2294, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.150606632232666, + "rewards/margins": 1.6957169771194458, + "rewards/rejected": -5.846323490142822, + "step": 8517 + }, + { + "epoch": 1.32, + "learning_rate": 7.899887960993827e-06, + "logits/chosen": -2.2678627967834473, + "logits/rejected": -2.875683069229126, + "logps/chosen": -343.7794494628906, + "logps/rejected": -531.0472412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5574195981025696, + "rewards/margins": 9.258092880249023, + "rewards/rejected": -9.815511703491211, + "step": 8518 + }, + { + "epoch": 1.32, + "learning_rate": 7.89915452046268e-06, + "logits/chosen": -2.0529212951660156, + "logits/rejected": -3.001019239425659, + "logps/chosen": -411.1961975097656, + "logps/rejected": -553.78466796875, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.227616786956787, + "rewards/margins": 5.228792190551758, + "rewards/rejected": -8.456409454345703, + "step": 8519 + }, + { + "epoch": 1.33, + "learning_rate": 7.898421079931532e-06, + "logits/chosen": -1.4204360246658325, + "logits/rejected": -2.8252005577087402, + "logps/chosen": -226.63235473632812, + "logps/rejected": -600.6177978515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0846314430236816, + "rewards/margins": 10.067317962646484, + "rewards/rejected": -13.151948928833008, + "step": 8520 + }, + { + "epoch": 1.33, + "learning_rate": 7.897687639400384e-06, + "logits/chosen": -1.1806061267852783, + "logits/rejected": -2.8319098949432373, + "logps/chosen": -232.5550079345703, + "logps/rejected": -485.44464111328125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6985976696014404, + "rewards/margins": 6.32757568359375, + "rewards/rejected": -9.02617359161377, + "step": 8521 + }, + { + "epoch": 1.33, + "learning_rate": 7.896954198869236e-06, + "logits/chosen": -3.074632406234741, + "logits/rejected": -2.629296064376831, + "logps/chosen": -198.2332763671875, + "logps/rejected": -215.36669921875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5307226181030273, + "rewards/margins": 6.627150058746338, + "rewards/rejected": -9.157873153686523, + "step": 8522 + }, + { + "epoch": 1.33, + "learning_rate": 7.896220758338088e-06, + "logits/chosen": -2.4467241764068604, + "logits/rejected": -3.014979124069214, + "logps/chosen": -643.2880249023438, + "logps/rejected": -478.86572265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.731604814529419, + "rewards/margins": 7.538288593292236, + "rewards/rejected": -10.269893646240234, + "step": 8523 + }, + { + "epoch": 1.33, + "learning_rate": 7.89548731780694e-06, + "logits/chosen": -3.077758550643921, + "logits/rejected": -2.898320436477661, + "logps/chosen": -163.4720001220703, + "logps/rejected": -137.98977661132812, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36130768060684204, + "rewards/margins": 5.823474884033203, + "rewards/rejected": -6.184782981872559, + "step": 8524 + }, + { + "epoch": 1.33, + "learning_rate": 7.894753877275792e-06, + "logits/chosen": -2.9561715126037598, + "logits/rejected": -2.276681900024414, + "logps/chosen": -207.28485107421875, + "logps/rejected": -222.4566192626953, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.411505937576294, + "rewards/margins": 7.533747673034668, + "rewards/rejected": -10.945253372192383, + "step": 8525 + }, + { + "epoch": 1.33, + "learning_rate": 7.894020436744644e-06, + "logits/chosen": -2.4826529026031494, + "logits/rejected": -2.9290578365325928, + "logps/chosen": -626.5250244140625, + "logps/rejected": -589.15869140625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.266826152801514, + "rewards/margins": 5.231959819793701, + "rewards/rejected": -10.498785972595215, + "step": 8526 + }, + { + "epoch": 1.33, + "learning_rate": 7.893286996213496e-06, + "logits/chosen": -2.402662515640259, + "logits/rejected": -2.9369149208068848, + "logps/chosen": -376.25872802734375, + "logps/rejected": -568.90234375, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5250444412231445, + "rewards/margins": 5.203937530517578, + "rewards/rejected": -8.728981971740723, + "step": 8527 + }, + { + "epoch": 1.33, + "learning_rate": 7.89255355568235e-06, + "logits/chosen": -2.697861671447754, + "logits/rejected": -2.3921167850494385, + "logps/chosen": -179.8075714111328, + "logps/rejected": -314.7110595703125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.485908508300781, + "rewards/margins": 6.437322616577148, + "rewards/rejected": -10.92323112487793, + "step": 8528 + }, + { + "epoch": 1.33, + "learning_rate": 7.891820115151201e-06, + "logits/chosen": -2.7271528244018555, + "logits/rejected": -2.5359387397766113, + "logps/chosen": -383.35699462890625, + "logps/rejected": -244.0849609375, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.922399044036865, + "rewards/margins": 2.584088087081909, + "rewards/rejected": -8.506486892700195, + "step": 8529 + }, + { + "epoch": 1.33, + "learning_rate": 7.891086674620053e-06, + "logits/chosen": -1.8684132099151611, + "logits/rejected": -2.901327133178711, + "logps/chosen": -92.57220458984375, + "logps/rejected": -250.07752990722656, + "loss": 0.0973, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8756513595581055, + "rewards/margins": 3.3925838470458984, + "rewards/rejected": -8.268235206604004, + "step": 8530 + }, + { + "epoch": 1.33, + "learning_rate": 7.890353234088905e-06, + "logits/chosen": -3.0339713096618652, + "logits/rejected": -2.877168655395508, + "logps/chosen": -399.5029602050781, + "logps/rejected": -426.23046875, + "loss": 0.4837, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.262457370758057, + "rewards/margins": 3.531155824661255, + "rewards/rejected": -8.79361343383789, + "step": 8531 + }, + { + "epoch": 1.33, + "learning_rate": 7.889619793557757e-06, + "logits/chosen": -2.7522852420806885, + "logits/rejected": -0.7923092842102051, + "logps/chosen": -606.3236694335938, + "logps/rejected": -176.4637451171875, + "loss": 0.3953, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.211158752441406, + "rewards/margins": 3.2386016845703125, + "rewards/rejected": -9.449760437011719, + "step": 8532 + }, + { + "epoch": 1.33, + "learning_rate": 7.888886353026609e-06, + "logits/chosen": -2.647578239440918, + "logits/rejected": -3.012407064437866, + "logps/chosen": -185.80899047851562, + "logps/rejected": -382.59423828125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8582425117492676, + "rewards/margins": 7.368607044219971, + "rewards/rejected": -11.226849555969238, + "step": 8533 + }, + { + "epoch": 1.33, + "learning_rate": 7.88815291249546e-06, + "logits/chosen": -2.5657477378845215, + "logits/rejected": -2.9208791255950928, + "logps/chosen": -159.8771209716797, + "logps/rejected": -318.67950439453125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.550893783569336, + "rewards/margins": 8.719277381896973, + "rewards/rejected": -13.270172119140625, + "step": 8534 + }, + { + "epoch": 1.33, + "learning_rate": 7.887419471964312e-06, + "logits/chosen": -2.53309965133667, + "logits/rejected": -2.905470371246338, + "logps/chosen": -434.45477294921875, + "logps/rejected": -640.5057373046875, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.955021858215332, + "rewards/margins": 6.373217582702637, + "rewards/rejected": -11.328239440917969, + "step": 8535 + }, + { + "epoch": 1.33, + "learning_rate": 7.886686031433164e-06, + "logits/chosen": -2.9879727363586426, + "logits/rejected": -1.9059377908706665, + "logps/chosen": -861.3154296875, + "logps/rejected": -427.1670837402344, + "loss": 0.6133, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6922712326049805, + "rewards/margins": 4.12407112121582, + "rewards/rejected": -9.8163423538208, + "step": 8536 + }, + { + "epoch": 1.33, + "learning_rate": 7.885952590902018e-06, + "logits/chosen": -2.359801769256592, + "logits/rejected": -2.933124542236328, + "logps/chosen": -97.58517456054688, + "logps/rejected": -203.8524169921875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0875227451324463, + "rewards/margins": 6.798375129699707, + "rewards/rejected": -7.885897636413574, + "step": 8537 + }, + { + "epoch": 1.33, + "learning_rate": 7.88521915037087e-06, + "logits/chosen": -2.792201042175293, + "logits/rejected": -2.926804542541504, + "logps/chosen": -139.6487579345703, + "logps/rejected": -166.02964782714844, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8503246307373047, + "rewards/margins": 5.310178279876709, + "rewards/rejected": -9.160503387451172, + "step": 8538 + }, + { + "epoch": 1.33, + "learning_rate": 7.884485709839722e-06, + "logits/chosen": -3.020601272583008, + "logits/rejected": -2.3791394233703613, + "logps/chosen": -559.8349609375, + "logps/rejected": -407.8614501953125, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79989767074585, + "rewards/margins": 5.210972785949707, + "rewards/rejected": -10.010869979858398, + "step": 8539 + }, + { + "epoch": 1.33, + "learning_rate": 7.883752269308575e-06, + "logits/chosen": -2.5019702911376953, + "logits/rejected": -2.8516297340393066, + "logps/chosen": -1039.141357421875, + "logps/rejected": -1384.513427734375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.273499965667725, + "rewards/margins": 12.499327659606934, + "rewards/rejected": -16.7728271484375, + "step": 8540 + }, + { + "epoch": 1.33, + "learning_rate": 7.883018828777427e-06, + "logits/chosen": -2.798093557357788, + "logits/rejected": -2.58412766456604, + "logps/chosen": -182.41217041015625, + "logps/rejected": -219.2449951171875, + "loss": 0.4432, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7667410373687744, + "rewards/margins": 3.40576171875, + "rewards/rejected": -6.1725029945373535, + "step": 8541 + }, + { + "epoch": 1.33, + "learning_rate": 7.882285388246279e-06, + "logits/chosen": -2.758303642272949, + "logits/rejected": -1.9962728023529053, + "logps/chosen": -266.3642272949219, + "logps/rejected": -461.8634948730469, + "loss": 0.2018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.062656402587891, + "rewards/margins": 4.250392913818359, + "rewards/rejected": -9.31304931640625, + "step": 8542 + }, + { + "epoch": 1.33, + "learning_rate": 7.88155194771513e-06, + "logits/chosen": -2.7136738300323486, + "logits/rejected": -3.049922466278076, + "logps/chosen": -77.5928955078125, + "logps/rejected": -227.8582000732422, + "loss": 0.0809, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.817502498626709, + "rewards/margins": 4.69113826751709, + "rewards/rejected": -9.50864028930664, + "step": 8543 + }, + { + "epoch": 1.33, + "learning_rate": 7.880818507183983e-06, + "logits/chosen": -1.5884883403778076, + "logits/rejected": -2.723407030105591, + "logps/chosen": -184.47647094726562, + "logps/rejected": -313.7887878417969, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.760758399963379, + "rewards/margins": 6.205577850341797, + "rewards/rejected": -10.966336250305176, + "step": 8544 + }, + { + "epoch": 1.33, + "learning_rate": 7.880085066652835e-06, + "logits/chosen": -2.410973072052002, + "logits/rejected": -2.9413087368011475, + "logps/chosen": -529.6887817382812, + "logps/rejected": -662.141845703125, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.808198928833008, + "rewards/margins": 5.59597110748291, + "rewards/rejected": -10.404170036315918, + "step": 8545 + }, + { + "epoch": 1.33, + "learning_rate": 7.879351626121688e-06, + "logits/chosen": -2.3596317768096924, + "logits/rejected": -3.004340887069702, + "logps/chosen": -50.10125732421875, + "logps/rejected": -348.78033447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1792054176330566, + "rewards/margins": 10.72996711730957, + "rewards/rejected": -13.909172058105469, + "step": 8546 + }, + { + "epoch": 1.33, + "learning_rate": 7.87861818559054e-06, + "logits/chosen": -2.445446252822876, + "logits/rejected": -2.807016372680664, + "logps/chosen": -107.07862091064453, + "logps/rejected": -240.97573852539062, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.704403877258301, + "rewards/margins": 4.502504348754883, + "rewards/rejected": -12.206908226013184, + "step": 8547 + }, + { + "epoch": 1.33, + "learning_rate": 7.877884745059392e-06, + "logits/chosen": -2.34519100189209, + "logits/rejected": -2.867326498031616, + "logps/chosen": -325.58575439453125, + "logps/rejected": -426.0826110839844, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009131908416748, + "rewards/margins": 6.576658248901367, + "rewards/rejected": -11.585790634155273, + "step": 8548 + }, + { + "epoch": 1.33, + "learning_rate": 7.877151304528244e-06, + "logits/chosen": -2.9577879905700684, + "logits/rejected": -1.1018693447113037, + "logps/chosen": -474.19207763671875, + "logps/rejected": -433.539306640625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6807007789611816, + "rewards/margins": 6.520613670349121, + "rewards/rejected": -8.201313972473145, + "step": 8549 + }, + { + "epoch": 1.33, + "learning_rate": 7.876417863997096e-06, + "logits/chosen": -3.0277652740478516, + "logits/rejected": -1.7010290622711182, + "logps/chosen": -336.95501708984375, + "logps/rejected": -282.1441955566406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.966316282749176, + "rewards/margins": 7.702921390533447, + "rewards/rejected": -8.669238090515137, + "step": 8550 + }, + { + "epoch": 1.33, + "learning_rate": 7.875684423465947e-06, + "logits/chosen": -2.3984100818634033, + "logits/rejected": -2.6297574043273926, + "logps/chosen": -186.104736328125, + "logps/rejected": -318.7925720214844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.180030345916748, + "rewards/margins": 7.437936782836914, + "rewards/rejected": -9.61796760559082, + "step": 8551 + }, + { + "epoch": 1.33, + "learning_rate": 7.8749509829348e-06, + "logits/chosen": -2.027600049972534, + "logits/rejected": -2.7136571407318115, + "logps/chosen": -101.58302307128906, + "logps/rejected": -274.522216796875, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.910879135131836, + "rewards/margins": 6.044712066650391, + "rewards/rejected": -9.955591201782227, + "step": 8552 + }, + { + "epoch": 1.33, + "learning_rate": 7.874217542403651e-06, + "logits/chosen": -1.765464186668396, + "logits/rejected": -2.944582462310791, + "logps/chosen": -67.38702392578125, + "logps/rejected": -200.49551391601562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4335546493530273, + "rewards/margins": 6.076326370239258, + "rewards/rejected": -8.509881019592285, + "step": 8553 + }, + { + "epoch": 1.33, + "learning_rate": 7.873484101872503e-06, + "logits/chosen": -1.5428686141967773, + "logits/rejected": -2.718278408050537, + "logps/chosen": -96.46056365966797, + "logps/rejected": -247.8350830078125, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.258963584899902, + "rewards/margins": 3.177475929260254, + "rewards/rejected": -7.436439514160156, + "step": 8554 + }, + { + "epoch": 1.33, + "learning_rate": 7.872750661341357e-06, + "logits/chosen": -2.8829097747802734, + "logits/rejected": -2.1059978008270264, + "logps/chosen": -291.3929443359375, + "logps/rejected": -289.7672119140625, + "loss": 1.9771, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.955778121948242, + "rewards/margins": 2.435540199279785, + "rewards/rejected": -11.391319274902344, + "step": 8555 + }, + { + "epoch": 1.33, + "learning_rate": 7.872017220810209e-06, + "logits/chosen": -2.811732292175293, + "logits/rejected": -3.055868625640869, + "logps/chosen": -345.86895751953125, + "logps/rejected": -412.6804504394531, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7478852272033691, + "rewards/margins": 6.028848171234131, + "rewards/rejected": -7.7767333984375, + "step": 8556 + }, + { + "epoch": 1.33, + "learning_rate": 7.87128378027906e-06, + "logits/chosen": -2.455699920654297, + "logits/rejected": -3.062835931777954, + "logps/chosen": -88.42059326171875, + "logps/rejected": -199.21298217773438, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.941588878631592, + "rewards/margins": 5.860681056976318, + "rewards/rejected": -9.80226993560791, + "step": 8557 + }, + { + "epoch": 1.33, + "learning_rate": 7.870550339747912e-06, + "logits/chosen": -3.0341813564300537, + "logits/rejected": -2.528982162475586, + "logps/chosen": -113.77989959716797, + "logps/rejected": -359.9794616699219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3802590370178223, + "rewards/margins": 7.781369209289551, + "rewards/rejected": -10.161628723144531, + "step": 8558 + }, + { + "epoch": 1.33, + "learning_rate": 7.869816899216764e-06, + "logits/chosen": -2.645545482635498, + "logits/rejected": -2.791036367416382, + "logps/chosen": -119.85629272460938, + "logps/rejected": -279.9442443847656, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6949028968811035, + "rewards/margins": 6.112823009490967, + "rewards/rejected": -9.80772590637207, + "step": 8559 + }, + { + "epoch": 1.33, + "learning_rate": 7.869083458685616e-06, + "logits/chosen": -2.859299898147583, + "logits/rejected": -2.9137160778045654, + "logps/chosen": -240.6669921875, + "logps/rejected": -332.1784362792969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.617193222045898, + "rewards/margins": 6.867209434509277, + "rewards/rejected": -11.48440170288086, + "step": 8560 + }, + { + "epoch": 1.33, + "learning_rate": 7.868350018154468e-06, + "logits/chosen": -0.3336210250854492, + "logits/rejected": -2.0650618076324463, + "logps/chosen": -160.95187377929688, + "logps/rejected": -580.6668701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.476944923400879, + "rewards/margins": 17.627460479736328, + "rewards/rejected": -22.10440444946289, + "step": 8561 + }, + { + "epoch": 1.33, + "learning_rate": 7.86761657762332e-06, + "logits/chosen": -1.8774783611297607, + "logits/rejected": -2.888157606124878, + "logps/chosen": -109.11289978027344, + "logps/rejected": -402.67742919921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6435487270355225, + "rewards/margins": 7.905391693115234, + "rewards/rejected": -11.548940658569336, + "step": 8562 + }, + { + "epoch": 1.33, + "learning_rate": 7.866883137092172e-06, + "logits/chosen": -2.3946330547332764, + "logits/rejected": -3.0954666137695312, + "logps/chosen": -107.21342468261719, + "logps/rejected": -267.87176513671875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.017084121704102, + "rewards/margins": 5.621294021606445, + "rewards/rejected": -11.638378143310547, + "step": 8563 + }, + { + "epoch": 1.33, + "learning_rate": 7.866149696561025e-06, + "logits/chosen": -2.6372861862182617, + "logits/rejected": -2.599548101425171, + "logps/chosen": -549.0506591796875, + "logps/rejected": -394.74822998046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.173992872238159, + "rewards/margins": 7.971803665161133, + "rewards/rejected": -11.145795822143555, + "step": 8564 + }, + { + "epoch": 1.33, + "learning_rate": 7.865416256029877e-06, + "logits/chosen": -2.1675424575805664, + "logits/rejected": -2.9224436283111572, + "logps/chosen": -137.83462524414062, + "logps/rejected": -347.3025817871094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5667476654052734, + "rewards/margins": 9.034677505493164, + "rewards/rejected": -11.601425170898438, + "step": 8565 + }, + { + "epoch": 1.33, + "learning_rate": 7.864682815498729e-06, + "logits/chosen": -2.551103353500366, + "logits/rejected": -0.979660153388977, + "logps/chosen": -198.69622802734375, + "logps/rejected": -187.1768798828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7258408069610596, + "rewards/margins": 9.604684829711914, + "rewards/rejected": -13.330526351928711, + "step": 8566 + }, + { + "epoch": 1.33, + "learning_rate": 7.863949374967581e-06, + "logits/chosen": -2.3750698566436768, + "logits/rejected": -2.0476717948913574, + "logps/chosen": -178.45155334472656, + "logps/rejected": -277.52099609375, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3962225914001465, + "rewards/margins": 5.235696792602539, + "rewards/rejected": -8.631918907165527, + "step": 8567 + }, + { + "epoch": 1.33, + "learning_rate": 7.863215934436433e-06, + "logits/chosen": -1.8379509449005127, + "logits/rejected": -2.482752799987793, + "logps/chosen": -169.05386352539062, + "logps/rejected": -466.0715637207031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.214632034301758, + "rewards/margins": 9.54210090637207, + "rewards/rejected": -13.756732940673828, + "step": 8568 + }, + { + "epoch": 1.33, + "learning_rate": 7.862482493905285e-06, + "logits/chosen": -2.916966199874878, + "logits/rejected": -2.7997500896453857, + "logps/chosen": -824.1444702148438, + "logps/rejected": -604.546142578125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.557939052581787, + "rewards/margins": 6.886102199554443, + "rewards/rejected": -10.44404125213623, + "step": 8569 + }, + { + "epoch": 1.33, + "learning_rate": 7.861749053374137e-06, + "logits/chosen": -2.6622836589813232, + "logits/rejected": -2.973417043685913, + "logps/chosen": -248.29473876953125, + "logps/rejected": -303.4510498046875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1604065895080566, + "rewards/margins": 6.378864288330078, + "rewards/rejected": -9.539270401000977, + "step": 8570 + }, + { + "epoch": 1.33, + "learning_rate": 7.861015612842988e-06, + "logits/chosen": -2.820460081100464, + "logits/rejected": -1.406353235244751, + "logps/chosen": -389.67236328125, + "logps/rejected": -217.73272705078125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.155060768127441, + "rewards/margins": 8.033662796020508, + "rewards/rejected": -13.188722610473633, + "step": 8571 + }, + { + "epoch": 1.33, + "learning_rate": 7.860282172311842e-06, + "logits/chosen": -2.986440658569336, + "logits/rejected": -2.8674471378326416, + "logps/chosen": -113.10505676269531, + "logps/rejected": -343.03045654296875, + "loss": 0.1351, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.924753189086914, + "rewards/margins": 5.2155914306640625, + "rewards/rejected": -8.140344619750977, + "step": 8572 + }, + { + "epoch": 1.33, + "learning_rate": 7.859548731780694e-06, + "logits/chosen": -2.7258520126342773, + "logits/rejected": -2.598823070526123, + "logps/chosen": -153.22320556640625, + "logps/rejected": -208.49139404296875, + "loss": 0.1521, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.291685104370117, + "rewards/margins": 4.557433605194092, + "rewards/rejected": -8.84911823272705, + "step": 8573 + }, + { + "epoch": 1.33, + "learning_rate": 7.858815291249547e-06, + "logits/chosen": -2.259537935256958, + "logits/rejected": -2.6387107372283936, + "logps/chosen": -168.53623962402344, + "logps/rejected": -187.30062866210938, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9588022232055664, + "rewards/margins": 5.013448715209961, + "rewards/rejected": -8.972251892089844, + "step": 8574 + }, + { + "epoch": 1.33, + "learning_rate": 7.8580818507184e-06, + "logits/chosen": -2.959916353225708, + "logits/rejected": -3.041707754135132, + "logps/chosen": -269.50946044921875, + "logps/rejected": -303.15924072265625, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8068710565567017, + "rewards/margins": 7.246511459350586, + "rewards/rejected": -8.053382873535156, + "step": 8575 + }, + { + "epoch": 1.33, + "learning_rate": 7.857348410187251e-06, + "logits/chosen": -1.8995798826217651, + "logits/rejected": -3.043328046798706, + "logps/chosen": -635.21923828125, + "logps/rejected": -665.3424682617188, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.526135444641113, + "rewards/margins": 5.693799018859863, + "rewards/rejected": -10.219934463500977, + "step": 8576 + }, + { + "epoch": 1.33, + "learning_rate": 7.856614969656103e-06, + "logits/chosen": -2.230853796005249, + "logits/rejected": -2.8010993003845215, + "logps/chosen": -59.43896484375, + "logps/rejected": -225.7285919189453, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5463829040527344, + "rewards/margins": 6.027050018310547, + "rewards/rejected": -9.573432922363281, + "step": 8577 + }, + { + "epoch": 1.33, + "learning_rate": 7.855881529124955e-06, + "logits/chosen": -2.8701281547546387, + "logits/rejected": -1.8640655279159546, + "logps/chosen": -205.09579467773438, + "logps/rejected": -183.62051391601562, + "loss": 0.6051, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.700876235961914, + "rewards/margins": 1.7959098815917969, + "rewards/rejected": -10.496786117553711, + "step": 8578 + }, + { + "epoch": 1.33, + "learning_rate": 7.855148088593807e-06, + "logits/chosen": -3.0706796646118164, + "logits/rejected": -2.1621921062469482, + "logps/chosen": -210.63565063476562, + "logps/rejected": -161.10482788085938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1613070964813232, + "rewards/margins": 7.808976650238037, + "rewards/rejected": -8.970283508300781, + "step": 8579 + }, + { + "epoch": 1.33, + "learning_rate": 7.854414648062659e-06, + "logits/chosen": -2.8046493530273438, + "logits/rejected": -2.4135451316833496, + "logps/chosen": -292.2598571777344, + "logps/rejected": -465.1332092285156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8464765548706055, + "rewards/margins": 11.151585578918457, + "rewards/rejected": -13.998062133789062, + "step": 8580 + }, + { + "epoch": 1.33, + "learning_rate": 7.853681207531512e-06, + "logits/chosen": -2.434537887573242, + "logits/rejected": -3.0577304363250732, + "logps/chosen": -180.15798950195312, + "logps/rejected": -373.63861083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8476204872131348, + "rewards/margins": 9.90565299987793, + "rewards/rejected": -12.753273010253906, + "step": 8581 + }, + { + "epoch": 1.33, + "learning_rate": 7.852947767000364e-06, + "logits/chosen": -2.3878095149993896, + "logits/rejected": -2.952332019805908, + "logps/chosen": -106.48297119140625, + "logps/rejected": -249.77162170410156, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.714613437652588, + "rewards/margins": 5.305751800537109, + "rewards/rejected": -10.020364761352539, + "step": 8582 + }, + { + "epoch": 1.33, + "learning_rate": 7.852214326469216e-06, + "logits/chosen": -2.9951059818267822, + "logits/rejected": -2.2899460792541504, + "logps/chosen": -860.5166015625, + "logps/rejected": -516.4540405273438, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.513284683227539, + "rewards/margins": 6.903359413146973, + "rewards/rejected": -9.416644096374512, + "step": 8583 + }, + { + "epoch": 1.33, + "learning_rate": 7.851480885938068e-06, + "logits/chosen": -2.598435163497925, + "logits/rejected": -2.935288429260254, + "logps/chosen": -434.83087158203125, + "logps/rejected": -200.08297729492188, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97450065612793, + "rewards/margins": 3.8441643714904785, + "rewards/rejected": -9.81866455078125, + "step": 8584 + }, + { + "epoch": 1.34, + "learning_rate": 7.85074744540692e-06, + "logits/chosen": -1.78520667552948, + "logits/rejected": -2.37178897857666, + "logps/chosen": -280.97686767578125, + "logps/rejected": -482.743408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.496858596801758, + "rewards/margins": 10.87568473815918, + "rewards/rejected": -17.372543334960938, + "step": 8585 + }, + { + "epoch": 1.34, + "learning_rate": 7.850014004875772e-06, + "logits/chosen": -2.6546967029571533, + "logits/rejected": -2.9951012134552, + "logps/chosen": -190.411865234375, + "logps/rejected": -212.14825439453125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.633889198303223, + "rewards/margins": 5.831794738769531, + "rewards/rejected": -10.465683937072754, + "step": 8586 + }, + { + "epoch": 1.34, + "learning_rate": 7.849280564344624e-06, + "logits/chosen": -2.8502860069274902, + "logits/rejected": -2.858292818069458, + "logps/chosen": -444.77410888671875, + "logps/rejected": -387.16290283203125, + "loss": 0.1228, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9791688919067383, + "rewards/margins": 5.0839667320251465, + "rewards/rejected": -8.063135147094727, + "step": 8587 + }, + { + "epoch": 1.34, + "learning_rate": 7.848547123813475e-06, + "logits/chosen": -2.3414664268493652, + "logits/rejected": -2.936797857284546, + "logps/chosen": -393.3756408691406, + "logps/rejected": -500.096435546875, + "loss": 1.1529, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.480335235595703, + "rewards/margins": 3.26607608795166, + "rewards/rejected": -9.74641227722168, + "step": 8588 + }, + { + "epoch": 1.34, + "learning_rate": 7.847813683282327e-06, + "logits/chosen": -2.7748069763183594, + "logits/rejected": -2.820333957672119, + "logps/chosen": -213.60789489746094, + "logps/rejected": -239.13873291015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.020754814147949, + "rewards/margins": 8.150550842285156, + "rewards/rejected": -13.171305656433105, + "step": 8589 + }, + { + "epoch": 1.34, + "learning_rate": 7.847080242751181e-06, + "logits/chosen": -0.7687280774116516, + "logits/rejected": -2.183907985687256, + "logps/chosen": -119.9074935913086, + "logps/rejected": -428.5535888671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0535759925842285, + "rewards/margins": 9.460809707641602, + "rewards/rejected": -12.514386177062988, + "step": 8590 + }, + { + "epoch": 1.34, + "learning_rate": 7.846346802220033e-06, + "logits/chosen": -3.102289915084839, + "logits/rejected": -2.602153778076172, + "logps/chosen": -490.7544860839844, + "logps/rejected": -429.514404296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.463897705078125, + "rewards/margins": 8.180951118469238, + "rewards/rejected": -11.644848823547363, + "step": 8591 + }, + { + "epoch": 1.34, + "learning_rate": 7.845613361688885e-06, + "logits/chosen": -1.9464560747146606, + "logits/rejected": -2.3713462352752686, + "logps/chosen": -671.4649658203125, + "logps/rejected": -766.285888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4362380504608154, + "rewards/margins": 13.395320892333984, + "rewards/rejected": -15.831560134887695, + "step": 8592 + }, + { + "epoch": 1.34, + "learning_rate": 7.844879921157737e-06, + "logits/chosen": -2.6260452270507812, + "logits/rejected": -2.7395808696746826, + "logps/chosen": -385.91558837890625, + "logps/rejected": -425.28857421875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7504692077636719, + "rewards/margins": 8.48628044128418, + "rewards/rejected": -9.236749649047852, + "step": 8593 + }, + { + "epoch": 1.34, + "learning_rate": 7.844146480626588e-06, + "logits/chosen": -2.579925537109375, + "logits/rejected": -2.9102771282196045, + "logps/chosen": -602.1025390625, + "logps/rejected": -720.8839111328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9728591442108154, + "rewards/margins": 8.17746353149414, + "rewards/rejected": -10.150323867797852, + "step": 8594 + }, + { + "epoch": 1.34, + "learning_rate": 7.84341304009544e-06, + "logits/chosen": -2.300736427307129, + "logits/rejected": -2.645047187805176, + "logps/chosen": -233.5516357421875, + "logps/rejected": -342.3805236816406, + "loss": 0.5545, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.238439083099365, + "rewards/margins": 4.598908424377441, + "rewards/rejected": -8.837347030639648, + "step": 8595 + }, + { + "epoch": 1.34, + "learning_rate": 7.842679599564292e-06, + "logits/chosen": -2.2357282638549805, + "logits/rejected": -2.8466579914093018, + "logps/chosen": -664.3505249023438, + "logps/rejected": -653.922119140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.328639268875122, + "rewards/margins": 6.786103248596191, + "rewards/rejected": -10.114742279052734, + "step": 8596 + }, + { + "epoch": 1.34, + "learning_rate": 7.841946159033144e-06, + "logits/chosen": -1.3876816034317017, + "logits/rejected": -2.9057841300964355, + "logps/chosen": -365.5563659667969, + "logps/rejected": -469.4875793457031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.560459613800049, + "rewards/margins": 9.241495132446289, + "rewards/rejected": -11.80195426940918, + "step": 8597 + }, + { + "epoch": 1.34, + "learning_rate": 7.841212718501996e-06, + "logits/chosen": -2.2677812576293945, + "logits/rejected": -1.1465810537338257, + "logps/chosen": -178.848388671875, + "logps/rejected": -352.18896484375, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.289438247680664, + "rewards/margins": 4.537744522094727, + "rewards/rejected": -9.82718276977539, + "step": 8598 + }, + { + "epoch": 1.34, + "learning_rate": 7.84047927797085e-06, + "logits/chosen": -3.0135440826416016, + "logits/rejected": -2.3206164836883545, + "logps/chosen": -301.8538818359375, + "logps/rejected": -203.8196563720703, + "loss": 0.1005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6718811988830566, + "rewards/margins": 6.814908027648926, + "rewards/rejected": -10.486788749694824, + "step": 8599 + }, + { + "epoch": 1.34, + "learning_rate": 7.839745837439701e-06, + "logits/chosen": -3.0290286540985107, + "logits/rejected": -2.6248726844787598, + "logps/chosen": -643.7407836914062, + "logps/rejected": -489.42547607421875, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8269190788269043, + "rewards/margins": 5.7117085456848145, + "rewards/rejected": -9.538627624511719, + "step": 8600 + }, + { + "epoch": 1.34, + "learning_rate": 7.839012396908553e-06, + "logits/chosen": -2.957347869873047, + "logits/rejected": -2.714581251144409, + "logps/chosen": -472.64178466796875, + "logps/rejected": -628.5887451171875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697258949279785, + "rewards/margins": 10.01203727722168, + "rewards/rejected": -15.709295272827148, + "step": 8601 + }, + { + "epoch": 1.34, + "learning_rate": 7.838278956377405e-06, + "logits/chosen": -1.913511872291565, + "logits/rejected": -2.880218505859375, + "logps/chosen": -113.64016723632812, + "logps/rejected": -308.8603515625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.479997396469116, + "rewards/margins": 6.509926795959473, + "rewards/rejected": -9.989924430847168, + "step": 8602 + }, + { + "epoch": 1.34, + "learning_rate": 7.837545515846257e-06, + "logits/chosen": -1.7190871238708496, + "logits/rejected": -2.7470946311950684, + "logps/chosen": -94.38906860351562, + "logps/rejected": -417.549072265625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.204322814941406, + "rewards/margins": 5.212388038635254, + "rewards/rejected": -9.41671085357666, + "step": 8603 + }, + { + "epoch": 1.34, + "learning_rate": 7.836812075315109e-06, + "logits/chosen": -1.540034532546997, + "logits/rejected": -2.6232821941375732, + "logps/chosen": -211.42471313476562, + "logps/rejected": -385.56927490234375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7521634101867676, + "rewards/margins": 7.184732437133789, + "rewards/rejected": -9.936895370483398, + "step": 8604 + }, + { + "epoch": 1.34, + "learning_rate": 7.83607863478396e-06, + "logits/chosen": -2.013652801513672, + "logits/rejected": -2.6972625255584717, + "logps/chosen": -92.75252532958984, + "logps/rejected": -235.88455200195312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.534959077835083, + "rewards/margins": 8.67338752746582, + "rewards/rejected": -10.20834732055664, + "step": 8605 + }, + { + "epoch": 1.34, + "learning_rate": 7.835345194252814e-06, + "logits/chosen": -2.742950677871704, + "logits/rejected": -2.993940830230713, + "logps/chosen": -231.10577392578125, + "logps/rejected": -399.1861267089844, + "loss": 0.6662, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.727766036987305, + "rewards/margins": 3.04729962348938, + "rewards/rejected": -11.775066375732422, + "step": 8606 + }, + { + "epoch": 1.34, + "learning_rate": 7.834611753721666e-06, + "logits/chosen": -2.331244707107544, + "logits/rejected": -3.050767183303833, + "logps/chosen": -188.23703002929688, + "logps/rejected": -445.85546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8108043670654297, + "rewards/margins": 9.453231811523438, + "rewards/rejected": -13.264036178588867, + "step": 8607 + }, + { + "epoch": 1.34, + "learning_rate": 7.83387831319052e-06, + "logits/chosen": -2.401202440261841, + "logits/rejected": -2.753032684326172, + "logps/chosen": -310.556884765625, + "logps/rejected": -282.92352294921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.642911434173584, + "rewards/margins": 6.59526252746582, + "rewards/rejected": -10.238174438476562, + "step": 8608 + }, + { + "epoch": 1.34, + "learning_rate": 7.833144872659372e-06, + "logits/chosen": -2.4589202404022217, + "logits/rejected": -3.02260684967041, + "logps/chosen": -262.06793212890625, + "logps/rejected": -432.0553283691406, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901185512542725, + "rewards/margins": 3.911539077758789, + "rewards/rejected": -9.812724113464355, + "step": 8609 + }, + { + "epoch": 1.34, + "learning_rate": 7.832411432128224e-06, + "logits/chosen": -1.6040139198303223, + "logits/rejected": -2.1983273029327393, + "logps/chosen": -124.753173828125, + "logps/rejected": -305.2005920410156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.235628604888916, + "rewards/margins": 8.048103332519531, + "rewards/rejected": -12.283732414245605, + "step": 8610 + }, + { + "epoch": 1.34, + "learning_rate": 7.831677991597075e-06, + "logits/chosen": -2.844132661819458, + "logits/rejected": -2.6552295684814453, + "logps/chosen": -114.07089233398438, + "logps/rejected": -161.68243408203125, + "loss": 2.6139, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.71791934967041, + "rewards/margins": 1.1112308502197266, + "rewards/rejected": -7.829150199890137, + "step": 8611 + }, + { + "epoch": 1.34, + "learning_rate": 7.830944551065927e-06, + "logits/chosen": -3.007735013961792, + "logits/rejected": -2.5747976303100586, + "logps/chosen": -177.51028442382812, + "logps/rejected": -286.7116394042969, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6219229698181152, + "rewards/margins": 3.8268823623657227, + "rewards/rejected": -7.448805332183838, + "step": 8612 + }, + { + "epoch": 1.34, + "learning_rate": 7.83021111053478e-06, + "logits/chosen": -2.9818766117095947, + "logits/rejected": -2.3142213821411133, + "logps/chosen": -406.2001953125, + "logps/rejected": -466.6976013183594, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213964462280273, + "rewards/margins": 7.0312652587890625, + "rewards/rejected": -11.245229721069336, + "step": 8613 + }, + { + "epoch": 1.34, + "learning_rate": 7.829477670003631e-06, + "logits/chosen": -2.699523448944092, + "logits/rejected": -1.8725603818893433, + "logps/chosen": -585.96142578125, + "logps/rejected": -269.09716796875, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4349617958068848, + "rewards/margins": 7.045539855957031, + "rewards/rejected": -10.480501174926758, + "step": 8614 + }, + { + "epoch": 1.34, + "learning_rate": 7.828744229472483e-06, + "logits/chosen": -2.8229572772979736, + "logits/rejected": -2.9793922901153564, + "logps/chosen": -118.36293029785156, + "logps/rejected": -319.0693664550781, + "loss": 0.0506, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.169169902801514, + "rewards/margins": 6.115121841430664, + "rewards/rejected": -10.284292221069336, + "step": 8615 + }, + { + "epoch": 1.34, + "learning_rate": 7.828010788941335e-06, + "logits/chosen": -2.764350175857544, + "logits/rejected": -2.490178346633911, + "logps/chosen": -646.7269897460938, + "logps/rejected": -478.9625549316406, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.752058744430542, + "rewards/margins": 5.817165374755859, + "rewards/rejected": -9.569223403930664, + "step": 8616 + }, + { + "epoch": 1.34, + "learning_rate": 7.827277348410188e-06, + "logits/chosen": -2.514392375946045, + "logits/rejected": -2.936715841293335, + "logps/chosen": -107.64222717285156, + "logps/rejected": -201.00210571289062, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1025309562683105, + "rewards/margins": 5.236073970794678, + "rewards/rejected": -8.338604927062988, + "step": 8617 + }, + { + "epoch": 1.34, + "learning_rate": 7.82654390787904e-06, + "logits/chosen": -2.3228018283843994, + "logits/rejected": -2.7048566341400146, + "logps/chosen": -232.37168884277344, + "logps/rejected": -238.1573028564453, + "loss": 0.5587, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.510618686676025, + "rewards/margins": 2.3783810138702393, + "rewards/rejected": -7.888999938964844, + "step": 8618 + }, + { + "epoch": 1.34, + "learning_rate": 7.825810467347892e-06, + "logits/chosen": -2.425447463989258, + "logits/rejected": -2.6380839347839355, + "logps/chosen": -216.89065551757812, + "logps/rejected": -520.5581665039062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684271335601807, + "rewards/margins": 9.701118469238281, + "rewards/rejected": -15.38538932800293, + "step": 8619 + }, + { + "epoch": 1.34, + "learning_rate": 7.825077026816744e-06, + "logits/chosen": -2.4900472164154053, + "logits/rejected": -2.925339937210083, + "logps/chosen": -360.3829650878906, + "logps/rejected": -440.9305725097656, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0838117599487305, + "rewards/margins": 5.93953800201416, + "rewards/rejected": -10.02334976196289, + "step": 8620 + }, + { + "epoch": 1.34, + "learning_rate": 7.824343586285596e-06, + "logits/chosen": -0.9955558776855469, + "logits/rejected": -2.743497610092163, + "logps/chosen": -150.2631378173828, + "logps/rejected": -647.19189453125, + "loss": 0.9937, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.424408912658691, + "rewards/margins": 10.441786766052246, + "rewards/rejected": -14.866195678710938, + "step": 8621 + }, + { + "epoch": 1.34, + "learning_rate": 7.823610145754448e-06, + "logits/chosen": -2.3461294174194336, + "logits/rejected": -3.0121920108795166, + "logps/chosen": -218.53955078125, + "logps/rejected": -288.5408020019531, + "loss": 2.5674, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.802569389343262, + "rewards/margins": 2.1690773963928223, + "rewards/rejected": -7.971646785736084, + "step": 8622 + }, + { + "epoch": 1.34, + "learning_rate": 7.8228767052233e-06, + "logits/chosen": -1.7041486501693726, + "logits/rejected": -2.7152814865112305, + "logps/chosen": -246.43621826171875, + "logps/rejected": -298.6697998046875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.960155487060547, + "rewards/margins": 6.139657020568848, + "rewards/rejected": -10.099812507629395, + "step": 8623 + }, + { + "epoch": 1.34, + "learning_rate": 7.822143264692152e-06, + "logits/chosen": -2.4109995365142822, + "logits/rejected": -2.998258590698242, + "logps/chosen": -209.91421508789062, + "logps/rejected": -547.32568359375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0017898082733154, + "rewards/margins": 6.590970993041992, + "rewards/rejected": -8.592761039733887, + "step": 8624 + }, + { + "epoch": 1.34, + "learning_rate": 7.821409824161003e-06, + "logits/chosen": -2.034559488296509, + "logits/rejected": -2.6646311283111572, + "logps/chosen": -113.95909118652344, + "logps/rejected": -495.34942626953125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479799747467041, + "rewards/margins": 11.802318572998047, + "rewards/rejected": -16.28211784362793, + "step": 8625 + }, + { + "epoch": 1.34, + "learning_rate": 7.820676383629857e-06, + "logits/chosen": -0.2632571756839752, + "logits/rejected": -2.5191919803619385, + "logps/chosen": -119.57725524902344, + "logps/rejected": -928.5963134765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.256469249725342, + "rewards/margins": 11.242830276489258, + "rewards/rejected": -16.499300003051758, + "step": 8626 + }, + { + "epoch": 1.34, + "learning_rate": 7.819942943098709e-06, + "logits/chosen": -1.8362985849380493, + "logits/rejected": -2.7775821685791016, + "logps/chosen": -244.62347412109375, + "logps/rejected": -538.9483642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53076171875, + "rewards/margins": 10.233419418334961, + "rewards/rejected": -15.764181137084961, + "step": 8627 + }, + { + "epoch": 1.34, + "learning_rate": 7.81920950256756e-06, + "logits/chosen": -1.205922245979309, + "logits/rejected": -2.686422348022461, + "logps/chosen": -229.07015991210938, + "logps/rejected": -556.9595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7336976528167725, + "rewards/margins": 14.347155570983887, + "rewards/rejected": -18.080852508544922, + "step": 8628 + }, + { + "epoch": 1.34, + "learning_rate": 7.818476062036413e-06, + "logits/chosen": -2.509373664855957, + "logits/rejected": -3.02374529838562, + "logps/chosen": -90.85902404785156, + "logps/rejected": -207.74545288085938, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.535585641860962, + "rewards/margins": 5.956421852111816, + "rewards/rejected": -8.4920072555542, + "step": 8629 + }, + { + "epoch": 1.34, + "learning_rate": 7.817742621505265e-06, + "logits/chosen": -3.102431058883667, + "logits/rejected": -3.0685694217681885, + "logps/chosen": -166.89443969726562, + "logps/rejected": -236.78477478027344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5791335105895996, + "rewards/margins": 8.231119155883789, + "rewards/rejected": -10.81025218963623, + "step": 8630 + }, + { + "epoch": 1.34, + "learning_rate": 7.817009180974116e-06, + "logits/chosen": -1.7604650259017944, + "logits/rejected": -2.744252920150757, + "logps/chosen": -145.03565979003906, + "logps/rejected": -493.9000549316406, + "loss": 0.0968, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515714645385742, + "rewards/margins": 6.975467681884766, + "rewards/rejected": -13.491182327270508, + "step": 8631 + }, + { + "epoch": 1.34, + "learning_rate": 7.816275740442968e-06, + "logits/chosen": -2.6423027515411377, + "logits/rejected": -3.0884554386138916, + "logps/chosen": -436.66705322265625, + "logps/rejected": -479.95648193359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.96760892868042, + "rewards/margins": 8.487958908081055, + "rewards/rejected": -11.455568313598633, + "step": 8632 + }, + { + "epoch": 1.34, + "learning_rate": 7.81554229991182e-06, + "logits/chosen": -2.475297451019287, + "logits/rejected": -3.070037364959717, + "logps/chosen": -152.13038635253906, + "logps/rejected": -480.9154052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1514639854431152, + "rewards/margins": 11.643436431884766, + "rewards/rejected": -14.794900894165039, + "step": 8633 + }, + { + "epoch": 1.34, + "learning_rate": 7.814808859380672e-06, + "logits/chosen": -1.7713425159454346, + "logits/rejected": -2.5787241458892822, + "logps/chosen": -200.72268676757812, + "logps/rejected": -185.75164794921875, + "loss": 0.4594, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.431890964508057, + "rewards/margins": 5.047905921936035, + "rewards/rejected": -9.47979736328125, + "step": 8634 + }, + { + "epoch": 1.34, + "learning_rate": 7.814075418849526e-06, + "logits/chosen": -2.8160719871520996, + "logits/rejected": -2.037274122238159, + "logps/chosen": -196.53988647460938, + "logps/rejected": -113.06829833984375, + "loss": 2.3797, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.890664100646973, + "rewards/margins": 0.8761072158813477, + "rewards/rejected": -6.76677131652832, + "step": 8635 + }, + { + "epoch": 1.34, + "learning_rate": 7.813341978318377e-06, + "logits/chosen": -2.160635232925415, + "logits/rejected": -2.8539352416992188, + "logps/chosen": -128.354736328125, + "logps/rejected": -379.47589111328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.33366060256958, + "rewards/margins": 9.287454605102539, + "rewards/rejected": -14.621115684509277, + "step": 8636 + }, + { + "epoch": 1.34, + "learning_rate": 7.81260853778723e-06, + "logits/chosen": -1.6199605464935303, + "logits/rejected": -2.3986563682556152, + "logps/chosen": -252.48822021484375, + "logps/rejected": -339.0542907714844, + "loss": 2.9519, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.820224761962891, + "rewards/margins": 4.653870582580566, + "rewards/rejected": -12.474096298217773, + "step": 8637 + }, + { + "epoch": 1.34, + "learning_rate": 7.811875097256081e-06, + "logits/chosen": -2.9131734371185303, + "logits/rejected": -2.9706742763519287, + "logps/chosen": -91.54924011230469, + "logps/rejected": -214.22357177734375, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1782689094543457, + "rewards/margins": 4.373606204986572, + "rewards/rejected": -7.551875114440918, + "step": 8638 + }, + { + "epoch": 1.34, + "learning_rate": 7.811141656724933e-06, + "logits/chosen": -2.671304941177368, + "logits/rejected": -2.278550386428833, + "logps/chosen": -189.8211669921875, + "logps/rejected": -172.92398071289062, + "loss": 0.1047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7481658458709717, + "rewards/margins": 6.049413681030273, + "rewards/rejected": -9.797579765319824, + "step": 8639 + }, + { + "epoch": 1.34, + "learning_rate": 7.810408216193787e-06, + "logits/chosen": -2.708648681640625, + "logits/rejected": -3.0911054611206055, + "logps/chosen": -977.3656616210938, + "logps/rejected": -873.7828979492188, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130882263183594, + "rewards/margins": 4.4857072830200195, + "rewards/rejected": -9.616589546203613, + "step": 8640 + }, + { + "epoch": 1.34, + "learning_rate": 7.809674775662639e-06, + "logits/chosen": -2.9603354930877686, + "logits/rejected": -2.936870813369751, + "logps/chosen": -467.791259765625, + "logps/rejected": -452.8093566894531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1996917724609375, + "rewards/margins": 9.255783081054688, + "rewards/rejected": -12.455474853515625, + "step": 8641 + }, + { + "epoch": 1.34, + "learning_rate": 7.80894133513149e-06, + "logits/chosen": -2.945732831954956, + "logits/rejected": -1.7800804376602173, + "logps/chosen": -389.2736511230469, + "logps/rejected": -238.346435546875, + "loss": 1.9542, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.521069526672363, + "rewards/margins": 0.05086839199066162, + "rewards/rejected": -5.5719380378723145, + "step": 8642 + }, + { + "epoch": 1.34, + "learning_rate": 7.808207894600342e-06, + "logits/chosen": -3.0894064903259277, + "logits/rejected": -2.4020705223083496, + "logps/chosen": -369.18243408203125, + "logps/rejected": -256.93280029296875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3234119415283203, + "rewards/margins": 6.096311569213867, + "rewards/rejected": -9.419723510742188, + "step": 8643 + }, + { + "epoch": 1.34, + "learning_rate": 7.807474454069196e-06, + "logits/chosen": -2.9771618843078613, + "logits/rejected": -2.492440700531006, + "logps/chosen": -535.791015625, + "logps/rejected": -589.6754150390625, + "loss": 2.3325, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.721782684326172, + "rewards/margins": -1.2323837280273438, + "rewards/rejected": -6.489398956298828, + "step": 8644 + }, + { + "epoch": 1.34, + "learning_rate": 7.806741013538048e-06, + "logits/chosen": -2.9312901496887207, + "logits/rejected": -3.0952484607696533, + "logps/chosen": -130.64349365234375, + "logps/rejected": -285.6607666015625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6697956323623657, + "rewards/margins": 7.17656135559082, + "rewards/rejected": -8.846357345581055, + "step": 8645 + }, + { + "epoch": 1.34, + "learning_rate": 7.8060075730069e-06, + "logits/chosen": -2.4017832279205322, + "logits/rejected": -2.404259204864502, + "logps/chosen": -121.13419342041016, + "logps/rejected": -218.19082641601562, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.063332557678223, + "rewards/margins": 4.781986236572266, + "rewards/rejected": -8.845318794250488, + "step": 8646 + }, + { + "epoch": 1.34, + "learning_rate": 7.805274132475752e-06, + "logits/chosen": -3.0139877796173096, + "logits/rejected": -1.7513223886489868, + "logps/chosen": -356.0431823730469, + "logps/rejected": -246.64459228515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7783887386322021, + "rewards/margins": 7.665338516235352, + "rewards/rejected": -9.443727493286133, + "step": 8647 + }, + { + "epoch": 1.34, + "learning_rate": 7.804540691944603e-06, + "logits/chosen": -2.0518388748168945, + "logits/rejected": -2.760262966156006, + "logps/chosen": -53.16615676879883, + "logps/rejected": -328.89227294921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.559786558151245, + "rewards/margins": 10.389148712158203, + "rewards/rejected": -12.948935508728027, + "step": 8648 + }, + { + "epoch": 1.35, + "learning_rate": 7.803807251413455e-06, + "logits/chosen": -1.777343988418579, + "logits/rejected": -2.875339984893799, + "logps/chosen": -257.4753112792969, + "logps/rejected": -602.842529296875, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3999080657958984, + "rewards/margins": 5.553592681884766, + "rewards/rejected": -8.953500747680664, + "step": 8649 + }, + { + "epoch": 1.35, + "learning_rate": 7.803073810882307e-06, + "logits/chosen": -2.8751237392425537, + "logits/rejected": -2.082537889480591, + "logps/chosen": -505.3682556152344, + "logps/rejected": -255.17147827148438, + "loss": 0.2004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8903732299804688, + "rewards/margins": 5.948721408843994, + "rewards/rejected": -8.839094161987305, + "step": 8650 + }, + { + "epoch": 1.35, + "learning_rate": 7.802340370351159e-06, + "logits/chosen": -1.8261950016021729, + "logits/rejected": -2.4726250171661377, + "logps/chosen": -119.77725219726562, + "logps/rejected": -437.992919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3243346214294434, + "rewards/margins": 11.40553092956543, + "rewards/rejected": -13.729866027832031, + "step": 8651 + }, + { + "epoch": 1.35, + "learning_rate": 7.801606929820011e-06, + "logits/chosen": -2.7706756591796875, + "logits/rejected": -2.773725986480713, + "logps/chosen": -652.718017578125, + "logps/rejected": -542.569091796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.354459524154663, + "rewards/margins": 8.207489013671875, + "rewards/rejected": -11.561948776245117, + "step": 8652 + }, + { + "epoch": 1.35, + "learning_rate": 7.800873489288864e-06, + "logits/chosen": -2.446359157562256, + "logits/rejected": -2.752121925354004, + "logps/chosen": -136.5255126953125, + "logps/rejected": -370.1032409667969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.326894760131836, + "rewards/margins": 10.395174980163574, + "rewards/rejected": -14.722070693969727, + "step": 8653 + }, + { + "epoch": 1.35, + "learning_rate": 7.800140048757716e-06, + "logits/chosen": -2.7329509258270264, + "logits/rejected": -2.0611655712127686, + "logps/chosen": -179.099853515625, + "logps/rejected": -342.92926025390625, + "loss": 0.1731, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.686715126037598, + "rewards/margins": 8.125100135803223, + "rewards/rejected": -12.81181526184082, + "step": 8654 + }, + { + "epoch": 1.35, + "learning_rate": 7.799406608226568e-06, + "logits/chosen": -1.5585843324661255, + "logits/rejected": -2.854194402694702, + "logps/chosen": -130.6778564453125, + "logps/rejected": -476.08306884765625, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8448076248168945, + "rewards/margins": 8.696435928344727, + "rewards/rejected": -13.541244506835938, + "step": 8655 + }, + { + "epoch": 1.35, + "learning_rate": 7.79867316769542e-06, + "logits/chosen": -2.8126232624053955, + "logits/rejected": -2.8166487216949463, + "logps/chosen": -120.52391815185547, + "logps/rejected": -248.87786865234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5919013023376465, + "rewards/margins": 7.353160381317139, + "rewards/rejected": -11.945061683654785, + "step": 8656 + }, + { + "epoch": 1.35, + "learning_rate": 7.797939727164272e-06, + "logits/chosen": -1.306693196296692, + "logits/rejected": -2.7531747817993164, + "logps/chosen": -106.38789367675781, + "logps/rejected": -509.18878173828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7344069480895996, + "rewards/margins": 9.378135681152344, + "rewards/rejected": -13.112543106079102, + "step": 8657 + }, + { + "epoch": 1.35, + "learning_rate": 7.797206286633124e-06, + "logits/chosen": -2.6311757564544678, + "logits/rejected": -3.045623302459717, + "logps/chosen": -305.79736328125, + "logps/rejected": -361.6697998046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.040510654449463, + "rewards/margins": 8.918328285217285, + "rewards/rejected": -10.958839416503906, + "step": 8658 + }, + { + "epoch": 1.35, + "learning_rate": 7.796472846101976e-06, + "logits/chosen": -2.0325846672058105, + "logits/rejected": -2.7765953540802, + "logps/chosen": -205.63124084472656, + "logps/rejected": -485.1609802246094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2995405197143555, + "rewards/margins": 11.43111515045166, + "rewards/rejected": -16.730655670166016, + "step": 8659 + }, + { + "epoch": 1.35, + "learning_rate": 7.795739405570828e-06, + "logits/chosen": -2.092167377471924, + "logits/rejected": -3.0522632598876953, + "logps/chosen": -182.73797607421875, + "logps/rejected": -301.1628723144531, + "loss": 0.5938, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.361536502838135, + "rewards/margins": 1.4933772087097168, + "rewards/rejected": -6.854913711547852, + "step": 8660 + }, + { + "epoch": 1.35, + "learning_rate": 7.79500596503968e-06, + "logits/chosen": -2.6593191623687744, + "logits/rejected": -2.4252963066101074, + "logps/chosen": -695.294189453125, + "logps/rejected": -541.800537109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8582992553710938, + "rewards/margins": 6.7516326904296875, + "rewards/rejected": -9.609931945800781, + "step": 8661 + }, + { + "epoch": 1.35, + "learning_rate": 7.794272524508533e-06, + "logits/chosen": -1.6909210681915283, + "logits/rejected": -2.9605822563171387, + "logps/chosen": -173.25811767578125, + "logps/rejected": -409.870361328125, + "loss": 0.9356, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.320451736450195, + "rewards/margins": 3.9351508617401123, + "rewards/rejected": -10.255602836608887, + "step": 8662 + }, + { + "epoch": 1.35, + "learning_rate": 7.793539083977385e-06, + "logits/chosen": -2.037696599960327, + "logits/rejected": -2.5937254428863525, + "logps/chosen": -82.119384765625, + "logps/rejected": -205.76889038085938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.685181617736816, + "rewards/margins": 6.681397438049316, + "rewards/rejected": -12.366579055786133, + "step": 8663 + }, + { + "epoch": 1.35, + "learning_rate": 7.792805643446237e-06, + "logits/chosen": -2.8170061111450195, + "logits/rejected": -2.3828630447387695, + "logps/chosen": -1018.9791259765625, + "logps/rejected": -814.7950439453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.292189121246338, + "rewards/margins": 8.539458274841309, + "rewards/rejected": -12.831646919250488, + "step": 8664 + }, + { + "epoch": 1.35, + "learning_rate": 7.792072202915089e-06, + "logits/chosen": -2.727619171142578, + "logits/rejected": -2.9050753116607666, + "logps/chosen": -46.219261169433594, + "logps/rejected": -139.63906860351562, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8582544326782227, + "rewards/margins": 5.490294456481934, + "rewards/rejected": -8.348548889160156, + "step": 8665 + }, + { + "epoch": 1.35, + "learning_rate": 7.79133876238394e-06, + "logits/chosen": -2.9969747066497803, + "logits/rejected": -2.251985549926758, + "logps/chosen": -153.49435424804688, + "logps/rejected": -242.42970275878906, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118250846862793, + "rewards/margins": 5.270801067352295, + "rewards/rejected": -11.38905143737793, + "step": 8666 + }, + { + "epoch": 1.35, + "learning_rate": 7.790605321852792e-06, + "logits/chosen": -2.3186962604522705, + "logits/rejected": -3.00315523147583, + "logps/chosen": -294.67364501953125, + "logps/rejected": -376.7001953125, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377973556518555, + "rewards/margins": 3.8940417766571045, + "rewards/rejected": -9.272014617919922, + "step": 8667 + }, + { + "epoch": 1.35, + "learning_rate": 7.789871881321644e-06, + "logits/chosen": -1.5535537004470825, + "logits/rejected": -2.728289842605591, + "logps/chosen": -131.42001342773438, + "logps/rejected": -287.843505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.278254508972168, + "rewards/margins": 8.008171081542969, + "rewards/rejected": -12.286426544189453, + "step": 8668 + }, + { + "epoch": 1.35, + "learning_rate": 7.789138440790496e-06, + "logits/chosen": -2.335817337036133, + "logits/rejected": -1.3879282474517822, + "logps/chosen": -203.6748504638672, + "logps/rejected": -142.35452270507812, + "loss": 2.0806, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.3775482177734375, + "rewards/margins": -1.741750955581665, + "rewards/rejected": -5.635797500610352, + "step": 8669 + }, + { + "epoch": 1.35, + "learning_rate": 7.78840500025935e-06, + "logits/chosen": -3.0365307331085205, + "logits/rejected": -2.9149906635284424, + "logps/chosen": -103.00540161132812, + "logps/rejected": -229.217041015625, + "loss": 1.1037, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.3730878829956055, + "rewards/margins": 2.5912556648254395, + "rewards/rejected": -9.964343070983887, + "step": 8670 + }, + { + "epoch": 1.35, + "learning_rate": 7.787671559728202e-06, + "logits/chosen": -2.6383676528930664, + "logits/rejected": -2.9684641361236572, + "logps/chosen": -679.3933715820312, + "logps/rejected": -702.1748657226562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.523012161254883, + "rewards/margins": 8.581465721130371, + "rewards/rejected": -15.104476928710938, + "step": 8671 + }, + { + "epoch": 1.35, + "learning_rate": 7.786938119197054e-06, + "logits/chosen": -2.72383189201355, + "logits/rejected": -3.0873844623565674, + "logps/chosen": -69.02574157714844, + "logps/rejected": -181.6340789794922, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.247607469558716, + "rewards/margins": 5.172947883605957, + "rewards/rejected": -8.420555114746094, + "step": 8672 + }, + { + "epoch": 1.35, + "learning_rate": 7.786204678665905e-06, + "logits/chosen": -1.5307291746139526, + "logits/rejected": -2.1794748306274414, + "logps/chosen": -183.25457763671875, + "logps/rejected": -131.47665405273438, + "loss": 0.8379, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.684624671936035, + "rewards/margins": 2.9363837242126465, + "rewards/rejected": -7.621007919311523, + "step": 8673 + }, + { + "epoch": 1.35, + "learning_rate": 7.785471238134759e-06, + "logits/chosen": -2.171440362930298, + "logits/rejected": -2.816749095916748, + "logps/chosen": -200.3579864501953, + "logps/rejected": -234.96951293945312, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.086216926574707, + "rewards/margins": 5.835268974304199, + "rewards/rejected": -9.921485900878906, + "step": 8674 + }, + { + "epoch": 1.35, + "learning_rate": 7.784737797603611e-06, + "logits/chosen": -1.1981821060180664, + "logits/rejected": -2.928030252456665, + "logps/chosen": -161.17752075195312, + "logps/rejected": -539.3297119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.684515953063965, + "rewards/margins": 12.466273307800293, + "rewards/rejected": -15.150789260864258, + "step": 8675 + }, + { + "epoch": 1.35, + "learning_rate": 7.784004357072463e-06, + "logits/chosen": -2.8797411918640137, + "logits/rejected": -2.5579395294189453, + "logps/chosen": -342.2745056152344, + "logps/rejected": -543.5172729492188, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5991103649139404, + "rewards/margins": 8.832560539245605, + "rewards/rejected": -12.431671142578125, + "step": 8676 + }, + { + "epoch": 1.35, + "learning_rate": 7.783270916541315e-06, + "logits/chosen": -2.7496702671051025, + "logits/rejected": -2.666693687438965, + "logps/chosen": -193.05392456054688, + "logps/rejected": -259.75177001953125, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.269350528717041, + "rewards/margins": 2.38551926612854, + "rewards/rejected": -8.65487003326416, + "step": 8677 + }, + { + "epoch": 1.35, + "learning_rate": 7.782537476010167e-06, + "logits/chosen": -1.8891043663024902, + "logits/rejected": -2.887169361114502, + "logps/chosen": -126.51174926757812, + "logps/rejected": -377.7144775390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5878994464874268, + "rewards/margins": 8.310321807861328, + "rewards/rejected": -11.898221015930176, + "step": 8678 + }, + { + "epoch": 1.35, + "learning_rate": 7.78180403547902e-06, + "logits/chosen": -2.4010133743286133, + "logits/rejected": -3.1338634490966797, + "logps/chosen": -121.44783020019531, + "logps/rejected": -352.201171875, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.016376495361328, + "rewards/margins": 4.252897262573242, + "rewards/rejected": -9.26927375793457, + "step": 8679 + }, + { + "epoch": 1.35, + "learning_rate": 7.781070594947872e-06, + "logits/chosen": -2.426973581314087, + "logits/rejected": -2.863705635070801, + "logps/chosen": -219.03652954101562, + "logps/rejected": -408.1739807128906, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.806903839111328, + "rewards/margins": 9.855632781982422, + "rewards/rejected": -13.66253662109375, + "step": 8680 + }, + { + "epoch": 1.35, + "learning_rate": 7.780337154416724e-06, + "logits/chosen": -2.602463483810425, + "logits/rejected": -2.7271203994750977, + "logps/chosen": -378.3634948730469, + "logps/rejected": -424.22930908203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.083646535873413, + "rewards/margins": 9.812484741210938, + "rewards/rejected": -11.89613151550293, + "step": 8681 + }, + { + "epoch": 1.35, + "learning_rate": 7.779603713885576e-06, + "logits/chosen": -2.7951500415802, + "logits/rejected": -2.8502800464630127, + "logps/chosen": -127.88545227050781, + "logps/rejected": -131.20758056640625, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781550407409668, + "rewards/margins": 2.3812685012817383, + "rewards/rejected": -8.162818908691406, + "step": 8682 + }, + { + "epoch": 1.35, + "learning_rate": 7.778870273354428e-06, + "logits/chosen": -2.8443119525909424, + "logits/rejected": -2.2051873207092285, + "logps/chosen": -357.24139404296875, + "logps/rejected": -311.1656494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1420608758926392, + "rewards/margins": 10.988571166992188, + "rewards/rejected": -12.130632400512695, + "step": 8683 + }, + { + "epoch": 1.35, + "learning_rate": 7.77813683282328e-06, + "logits/chosen": -1.7729394435882568, + "logits/rejected": -2.6455442905426025, + "logps/chosen": -120.44686889648438, + "logps/rejected": -472.104736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0809268951416016, + "rewards/margins": 9.726944923400879, + "rewards/rejected": -12.807870864868164, + "step": 8684 + }, + { + "epoch": 1.35, + "learning_rate": 7.777403392292131e-06, + "logits/chosen": -2.9334328174591064, + "logits/rejected": -2.2999446392059326, + "logps/chosen": -537.3038330078125, + "logps/rejected": -381.657470703125, + "loss": 0.1333, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.254995584487915, + "rewards/margins": 7.047331809997559, + "rewards/rejected": -10.302327156066895, + "step": 8685 + }, + { + "epoch": 1.35, + "learning_rate": 7.776669951760983e-06, + "logits/chosen": -2.941254138946533, + "logits/rejected": -3.0034284591674805, + "logps/chosen": -215.53445434570312, + "logps/rejected": -177.88803100585938, + "loss": 0.6835, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.607391357421875, + "rewards/margins": 4.555469512939453, + "rewards/rejected": -9.162860870361328, + "step": 8686 + }, + { + "epoch": 1.35, + "learning_rate": 7.775936511229835e-06, + "logits/chosen": -2.80845046043396, + "logits/rejected": -2.4466981887817383, + "logps/chosen": -164.23831176757812, + "logps/rejected": -192.84376525878906, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.504278659820557, + "rewards/margins": 5.007816314697266, + "rewards/rejected": -10.512094497680664, + "step": 8687 + }, + { + "epoch": 1.35, + "learning_rate": 7.775203070698689e-06, + "logits/chosen": -2.8738744258880615, + "logits/rejected": -2.9430556297302246, + "logps/chosen": -260.2278137207031, + "logps/rejected": -432.9696044921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7547035217285156, + "rewards/margins": 8.265120506286621, + "rewards/rejected": -12.019824028015137, + "step": 8688 + }, + { + "epoch": 1.35, + "learning_rate": 7.77446963016754e-06, + "logits/chosen": -3.026110887527466, + "logits/rejected": -3.1276938915252686, + "logps/chosen": -141.6697540283203, + "logps/rejected": -258.5918273925781, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.459089756011963, + "rewards/margins": 5.69866418838501, + "rewards/rejected": -9.157753944396973, + "step": 8689 + }, + { + "epoch": 1.35, + "learning_rate": 7.773736189636392e-06, + "logits/chosen": -2.921964645385742, + "logits/rejected": -3.068063259124756, + "logps/chosen": -554.374267578125, + "logps/rejected": -527.33203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.744152069091797, + "rewards/margins": 7.822669506072998, + "rewards/rejected": -11.566822052001953, + "step": 8690 + }, + { + "epoch": 1.35, + "learning_rate": 7.773002749105244e-06, + "logits/chosen": -3.048518657684326, + "logits/rejected": -3.0842361450195312, + "logps/chosen": -341.1252136230469, + "logps/rejected": -325.48492431640625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.77606201171875, + "rewards/margins": 6.756110191345215, + "rewards/rejected": -10.532172203063965, + "step": 8691 + }, + { + "epoch": 1.35, + "learning_rate": 7.772269308574096e-06, + "logits/chosen": -2.725879430770874, + "logits/rejected": -2.9136743545532227, + "logps/chosen": -78.46560668945312, + "logps/rejected": -275.98260498046875, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.023495674133301, + "rewards/margins": 4.81638240814209, + "rewards/rejected": -8.83987808227539, + "step": 8692 + }, + { + "epoch": 1.35, + "learning_rate": 7.771535868042948e-06, + "logits/chosen": -1.7901358604431152, + "logits/rejected": -3.011927604675293, + "logps/chosen": -139.81198120117188, + "logps/rejected": -481.7078857421875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.416672706604004, + "rewards/margins": 5.580348491668701, + "rewards/rejected": -9.997020721435547, + "step": 8693 + }, + { + "epoch": 1.35, + "learning_rate": 7.7708024275118e-06, + "logits/chosen": -2.7081336975097656, + "logits/rejected": -3.157454252243042, + "logps/chosen": -158.44717407226562, + "logps/rejected": -336.6944885253906, + "loss": 0.0371, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.013378143310547, + "rewards/margins": 3.2774603366851807, + "rewards/rejected": -8.290838241577148, + "step": 8694 + }, + { + "epoch": 1.35, + "learning_rate": 7.770068986980652e-06, + "logits/chosen": -1.6197407245635986, + "logits/rejected": -3.021488904953003, + "logps/chosen": -144.81387329101562, + "logps/rejected": -337.78521728515625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6283385753631592, + "rewards/margins": 7.0659990310668945, + "rewards/rejected": -8.694337844848633, + "step": 8695 + }, + { + "epoch": 1.35, + "learning_rate": 7.769335546449504e-06, + "logits/chosen": -1.796665906906128, + "logits/rejected": -2.55538010597229, + "logps/chosen": -148.01686096191406, + "logps/rejected": -277.51812744140625, + "loss": 0.2644, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.555824279785156, + "rewards/margins": 5.060468673706055, + "rewards/rejected": -12.616292953491211, + "step": 8696 + }, + { + "epoch": 1.35, + "learning_rate": 7.768602105918357e-06, + "logits/chosen": -2.7420222759246826, + "logits/rejected": -3.0086417198181152, + "logps/chosen": -74.51813507080078, + "logps/rejected": -349.4943542480469, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.393735408782959, + "rewards/margins": 7.810490608215332, + "rewards/rejected": -12.20422649383545, + "step": 8697 + }, + { + "epoch": 1.35, + "learning_rate": 7.76786866538721e-06, + "logits/chosen": -2.4949541091918945, + "logits/rejected": -2.9729812145233154, + "logps/chosen": -127.01903533935547, + "logps/rejected": -230.06411743164062, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2250263690948486, + "rewards/margins": 3.5794622898101807, + "rewards/rejected": -6.804488658905029, + "step": 8698 + }, + { + "epoch": 1.35, + "learning_rate": 7.767135224856061e-06, + "logits/chosen": -2.8383424282073975, + "logits/rejected": -1.9493681192398071, + "logps/chosen": -392.9227294921875, + "logps/rejected": -284.67755126953125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.919320821762085, + "rewards/margins": 8.37251091003418, + "rewards/rejected": -12.291831970214844, + "step": 8699 + }, + { + "epoch": 1.35, + "learning_rate": 7.766401784324913e-06, + "logits/chosen": -2.99643874168396, + "logits/rejected": -2.8893964290618896, + "logps/chosen": -340.33990478515625, + "logps/rejected": -333.78662109375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099903106689453, + "rewards/margins": 5.029404640197754, + "rewards/rejected": -11.12930679321289, + "step": 8700 + }, + { + "epoch": 1.35, + "learning_rate": 7.765668343793765e-06, + "logits/chosen": -1.9482567310333252, + "logits/rejected": -2.578704595565796, + "logps/chosen": -143.0135498046875, + "logps/rejected": -320.97613525390625, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.723080635070801, + "rewards/margins": 3.6409401893615723, + "rewards/rejected": -8.364021301269531, + "step": 8701 + }, + { + "epoch": 1.35, + "learning_rate": 7.764934903262617e-06, + "logits/chosen": -1.9390846490859985, + "logits/rejected": -2.710326671600342, + "logps/chosen": -430.1864318847656, + "logps/rejected": -734.141357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.82351541519165, + "rewards/margins": 8.489802360534668, + "rewards/rejected": -13.313318252563477, + "step": 8702 + }, + { + "epoch": 1.35, + "learning_rate": 7.764201462731469e-06, + "logits/chosen": -2.626616954803467, + "logits/rejected": -3.0619988441467285, + "logps/chosen": -694.9404296875, + "logps/rejected": -331.48785400390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3637895584106445, + "rewards/margins": 7.010350227355957, + "rewards/rejected": -10.374139785766602, + "step": 8703 + }, + { + "epoch": 1.35, + "learning_rate": 7.76346802220032e-06, + "logits/chosen": -2.621932029724121, + "logits/rejected": -2.983375310897827, + "logps/chosen": -133.0828094482422, + "logps/rejected": -201.85568237304688, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2929229736328125, + "rewards/margins": 7.032071113586426, + "rewards/rejected": -9.324994087219238, + "step": 8704 + }, + { + "epoch": 1.35, + "learning_rate": 7.762734581669172e-06, + "logits/chosen": -2.4304492473602295, + "logits/rejected": -2.901555061340332, + "logps/chosen": -74.80130004882812, + "logps/rejected": -344.8248596191406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9295830726623535, + "rewards/margins": 9.930378913879395, + "rewards/rejected": -12.859962463378906, + "step": 8705 + }, + { + "epoch": 1.35, + "learning_rate": 7.762001141138026e-06, + "logits/chosen": -2.594895362854004, + "logits/rejected": -2.9437448978424072, + "logps/chosen": -337.5153503417969, + "logps/rejected": -416.3975830078125, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.346874713897705, + "rewards/margins": 5.273435115814209, + "rewards/rejected": -10.620309829711914, + "step": 8706 + }, + { + "epoch": 1.35, + "learning_rate": 7.761267700606878e-06, + "logits/chosen": -2.608910322189331, + "logits/rejected": -2.9561073780059814, + "logps/chosen": -69.90339660644531, + "logps/rejected": -191.8800811767578, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6936211585998535, + "rewards/margins": 7.783149719238281, + "rewards/rejected": -13.476770401000977, + "step": 8707 + }, + { + "epoch": 1.35, + "learning_rate": 7.760534260075731e-06, + "logits/chosen": -3.104459762573242, + "logits/rejected": -3.078176736831665, + "logps/chosen": -236.4930877685547, + "logps/rejected": -208.06314086914062, + "loss": 0.2153, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011409759521484, + "rewards/margins": 7.556674003601074, + "rewards/rejected": -11.568083763122559, + "step": 8708 + }, + { + "epoch": 1.35, + "learning_rate": 7.759800819544583e-06, + "logits/chosen": -1.0947380065917969, + "logits/rejected": -2.4669501781463623, + "logps/chosen": -692.212158203125, + "logps/rejected": -335.232666015625, + "loss": 3.5542, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.402275085449219, + "rewards/margins": -0.10382390022277832, + "rewards/rejected": -9.298450469970703, + "step": 8709 + }, + { + "epoch": 1.35, + "learning_rate": 7.759067379013435e-06, + "logits/chosen": -2.8811604976654053, + "logits/rejected": -2.3688817024230957, + "logps/chosen": -233.28872680664062, + "logps/rejected": -247.84848022460938, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.746769905090332, + "rewards/margins": 7.264976501464844, + "rewards/rejected": -10.011746406555176, + "step": 8710 + }, + { + "epoch": 1.35, + "learning_rate": 7.758333938482287e-06, + "logits/chosen": -1.3393161296844482, + "logits/rejected": -2.4955363273620605, + "logps/chosen": -150.7029266357422, + "logps/rejected": -589.88818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.660114288330078, + "rewards/margins": 11.740686416625977, + "rewards/rejected": -16.400800704956055, + "step": 8711 + }, + { + "epoch": 1.35, + "learning_rate": 7.757600497951139e-06, + "logits/chosen": -1.847654104232788, + "logits/rejected": -3.043299436569214, + "logps/chosen": -325.2091369628906, + "logps/rejected": -677.9862060546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617166042327881, + "rewards/margins": 10.742146492004395, + "rewards/rejected": -16.359312057495117, + "step": 8712 + }, + { + "epoch": 1.36, + "learning_rate": 7.75686705741999e-06, + "logits/chosen": -1.2793081998825073, + "logits/rejected": -2.745805025100708, + "logps/chosen": -200.98046875, + "logps/rejected": -505.6366271972656, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.330148220062256, + "rewards/margins": 9.657691955566406, + "rewards/rejected": -12.98784065246582, + "step": 8713 + }, + { + "epoch": 1.36, + "learning_rate": 7.756133616888843e-06, + "logits/chosen": -2.536038637161255, + "logits/rejected": -2.860478639602661, + "logps/chosen": -235.99771118164062, + "logps/rejected": -313.4110412597656, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.753342628479004, + "rewards/margins": 5.5193047523498535, + "rewards/rejected": -8.272647857666016, + "step": 8714 + }, + { + "epoch": 1.36, + "learning_rate": 7.755400176357696e-06, + "logits/chosen": -2.5691821575164795, + "logits/rejected": -2.0278306007385254, + "logps/chosen": -344.1722106933594, + "logps/rejected": -244.11032104492188, + "loss": 0.9465, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.842129707336426, + "rewards/margins": 0.5874531269073486, + "rewards/rejected": -10.429582595825195, + "step": 8715 + }, + { + "epoch": 1.36, + "learning_rate": 7.754666735826548e-06, + "logits/chosen": -2.2093372344970703, + "logits/rejected": -2.7655575275421143, + "logps/chosen": -373.315673828125, + "logps/rejected": -327.93743896484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.777280807495117, + "rewards/margins": 7.3870344161987305, + "rewards/rejected": -13.164315223693848, + "step": 8716 + }, + { + "epoch": 1.36, + "learning_rate": 7.7539332952954e-06, + "logits/chosen": -1.5930185317993164, + "logits/rejected": -2.6836435794830322, + "logps/chosen": -81.60381317138672, + "logps/rejected": -138.0203857421875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.827193737030029, + "rewards/margins": 3.7098464965820312, + "rewards/rejected": -9.537040710449219, + "step": 8717 + }, + { + "epoch": 1.36, + "learning_rate": 7.753199854764252e-06, + "logits/chosen": -2.034780740737915, + "logits/rejected": -2.6969566345214844, + "logps/chosen": -128.3357391357422, + "logps/rejected": -296.9068603515625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4317688941955566, + "rewards/margins": 9.14013671875, + "rewards/rejected": -12.571905136108398, + "step": 8718 + }, + { + "epoch": 1.36, + "learning_rate": 7.752466414233104e-06, + "logits/chosen": -1.8993690013885498, + "logits/rejected": -2.7500412464141846, + "logps/chosen": -144.69223022460938, + "logps/rejected": -346.3612060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.009415149688721, + "rewards/margins": 9.299732208251953, + "rewards/rejected": -13.309146881103516, + "step": 8719 + }, + { + "epoch": 1.36, + "learning_rate": 7.751732973701956e-06, + "logits/chosen": -3.0320982933044434, + "logits/rejected": -2.600043296813965, + "logps/chosen": -333.50750732421875, + "logps/rejected": -249.23492431640625, + "loss": 0.2448, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308046340942383, + "rewards/margins": 3.5509374141693115, + "rewards/rejected": -9.858983993530273, + "step": 8720 + }, + { + "epoch": 1.36, + "learning_rate": 7.750999533170807e-06, + "logits/chosen": -1.8482521772384644, + "logits/rejected": -2.5321574211120605, + "logps/chosen": -173.81826782226562, + "logps/rejected": -344.2281188964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4977145195007324, + "rewards/margins": 13.227571487426758, + "rewards/rejected": -16.725284576416016, + "step": 8721 + }, + { + "epoch": 1.36, + "learning_rate": 7.75026609263966e-06, + "logits/chosen": -2.2931103706359863, + "logits/rejected": -2.657846450805664, + "logps/chosen": -124.29528045654297, + "logps/rejected": -288.08807373046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.346717357635498, + "rewards/margins": 9.648865699768066, + "rewards/rejected": -12.995583534240723, + "step": 8722 + }, + { + "epoch": 1.36, + "learning_rate": 7.749532652108511e-06, + "logits/chosen": -2.9190175533294678, + "logits/rejected": -1.3631603717803955, + "logps/chosen": -581.088134765625, + "logps/rejected": -457.68133544921875, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.323583126068115, + "rewards/margins": 6.523717403411865, + "rewards/rejected": -11.84730052947998, + "step": 8723 + }, + { + "epoch": 1.36, + "learning_rate": 7.748799211577365e-06, + "logits/chosen": -2.2066657543182373, + "logits/rejected": -2.562253475189209, + "logps/chosen": -146.26644897460938, + "logps/rejected": -437.3193359375, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3391358852386475, + "rewards/margins": 4.894048690795898, + "rewards/rejected": -8.233184814453125, + "step": 8724 + }, + { + "epoch": 1.36, + "learning_rate": 7.748065771046217e-06, + "logits/chosen": -2.003108263015747, + "logits/rejected": -2.930332660675049, + "logps/chosen": -637.4594116210938, + "logps/rejected": -473.24871826171875, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.414486885070801, + "rewards/margins": 3.933690071105957, + "rewards/rejected": -9.348176956176758, + "step": 8725 + }, + { + "epoch": 1.36, + "learning_rate": 7.747332330515069e-06, + "logits/chosen": -1.6777952909469604, + "logits/rejected": -2.677600622177124, + "logps/chosen": -118.61912536621094, + "logps/rejected": -389.1909484863281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.283469200134277, + "rewards/margins": 9.693055152893066, + "rewards/rejected": -14.976524353027344, + "step": 8726 + }, + { + "epoch": 1.36, + "learning_rate": 7.74659888998392e-06, + "logits/chosen": -1.7990868091583252, + "logits/rejected": -3.063170909881592, + "logps/chosen": -212.9472198486328, + "logps/rejected": -433.75823974609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.688089370727539, + "rewards/margins": 7.850886344909668, + "rewards/rejected": -10.538975715637207, + "step": 8727 + }, + { + "epoch": 1.36, + "learning_rate": 7.745865449452772e-06, + "logits/chosen": -2.830080509185791, + "logits/rejected": -2.2077925205230713, + "logps/chosen": -616.1402587890625, + "logps/rejected": -481.9866638183594, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.557711601257324, + "rewards/margins": 4.203038215637207, + "rewards/rejected": -10.760749816894531, + "step": 8728 + }, + { + "epoch": 1.36, + "learning_rate": 7.745132008921624e-06, + "logits/chosen": -2.644566297531128, + "logits/rejected": -2.4237990379333496, + "logps/chosen": -254.57815551757812, + "logps/rejected": -225.65611267089844, + "loss": 2.2775, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.330410957336426, + "rewards/margins": 2.521718978881836, + "rewards/rejected": -6.852129936218262, + "step": 8729 + }, + { + "epoch": 1.36, + "learning_rate": 7.744398568390476e-06, + "logits/chosen": -2.5281982421875, + "logits/rejected": -2.6799559593200684, + "logps/chosen": -422.5744934082031, + "logps/rejected": -397.356201171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.650659084320068, + "rewards/margins": 10.613287925720215, + "rewards/rejected": -15.263946533203125, + "step": 8730 + }, + { + "epoch": 1.36, + "learning_rate": 7.743665127859328e-06, + "logits/chosen": -1.8047250509262085, + "logits/rejected": -2.9862520694732666, + "logps/chosen": -220.81643676757812, + "logps/rejected": -379.8291931152344, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81343936920166, + "rewards/margins": 4.32962703704834, + "rewards/rejected": -10.14306640625, + "step": 8731 + }, + { + "epoch": 1.36, + "learning_rate": 7.74293168732818e-06, + "logits/chosen": -2.798560619354248, + "logits/rejected": -2.9164845943450928, + "logps/chosen": -149.76724243164062, + "logps/rejected": -163.97145080566406, + "loss": 0.0668, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.334657192230225, + "rewards/margins": 2.955796957015991, + "rewards/rejected": -9.290453910827637, + "step": 8732 + }, + { + "epoch": 1.36, + "learning_rate": 7.742198246797033e-06, + "logits/chosen": -2.6417038440704346, + "logits/rejected": -2.9858126640319824, + "logps/chosen": -423.85614013671875, + "logps/rejected": -392.8739013671875, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.150582313537598, + "rewards/margins": 3.966998338699341, + "rewards/rejected": -9.11758041381836, + "step": 8733 + }, + { + "epoch": 1.36, + "learning_rate": 7.741464806265885e-06, + "logits/chosen": -2.4035983085632324, + "logits/rejected": -2.7569820880889893, + "logps/chosen": -158.00616455078125, + "logps/rejected": -377.26666259765625, + "loss": 0.0588, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3711304664611816, + "rewards/margins": 4.854607105255127, + "rewards/rejected": -8.225737571716309, + "step": 8734 + }, + { + "epoch": 1.36, + "learning_rate": 7.740731365734737e-06, + "logits/chosen": -3.0177347660064697, + "logits/rejected": -3.042945623397827, + "logps/chosen": -422.6136169433594, + "logps/rejected": -630.1981201171875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.19818115234375, + "rewards/margins": 7.082815647125244, + "rewards/rejected": -11.280996322631836, + "step": 8735 + }, + { + "epoch": 1.36, + "learning_rate": 7.739997925203589e-06, + "logits/chosen": -1.794250726699829, + "logits/rejected": -2.855194568634033, + "logps/chosen": -277.3629150390625, + "logps/rejected": -366.8535461425781, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.683077812194824, + "rewards/margins": 5.190877914428711, + "rewards/rejected": -9.873956680297852, + "step": 8736 + }, + { + "epoch": 1.36, + "learning_rate": 7.739264484672441e-06, + "logits/chosen": -1.796531319618225, + "logits/rejected": -2.228330373764038, + "logps/chosen": -104.07669830322266, + "logps/rejected": -437.3934326171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1680426597595215, + "rewards/margins": 8.829096794128418, + "rewards/rejected": -13.997138977050781, + "step": 8737 + }, + { + "epoch": 1.36, + "learning_rate": 7.738531044141293e-06, + "logits/chosen": -0.7093656659126282, + "logits/rejected": -2.209916830062866, + "logps/chosen": -189.55157470703125, + "logps/rejected": -505.390380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.703009605407715, + "rewards/margins": 10.862648010253906, + "rewards/rejected": -13.565656661987305, + "step": 8738 + }, + { + "epoch": 1.36, + "learning_rate": 7.737797603610145e-06, + "logits/chosen": -2.7380824089050293, + "logits/rejected": -2.6107614040374756, + "logps/chosen": -116.3590087890625, + "logps/rejected": -281.94366455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.210569381713867, + "rewards/margins": 9.959566116333008, + "rewards/rejected": -13.170135498046875, + "step": 8739 + }, + { + "epoch": 1.36, + "learning_rate": 7.737064163078997e-06, + "logits/chosen": -2.071662664413452, + "logits/rejected": -2.4999866485595703, + "logps/chosen": -144.7358856201172, + "logps/rejected": -208.5977783203125, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998778343200684, + "rewards/margins": 6.555215835571289, + "rewards/rejected": -12.553994178771973, + "step": 8740 + }, + { + "epoch": 1.36, + "learning_rate": 7.73633072254785e-06, + "logits/chosen": -2.008249282836914, + "logits/rejected": -3.007495641708374, + "logps/chosen": -236.289794921875, + "logps/rejected": -483.1280517578125, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.607903957366943, + "rewards/margins": 5.786494731903076, + "rewards/rejected": -11.39439868927002, + "step": 8741 + }, + { + "epoch": 1.36, + "learning_rate": 7.735597282016702e-06, + "logits/chosen": -1.9946309328079224, + "logits/rejected": -3.1115498542785645, + "logps/chosen": -146.8089599609375, + "logps/rejected": -352.8467712402344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9616451263427734, + "rewards/margins": 7.711899757385254, + "rewards/rejected": -11.673544883728027, + "step": 8742 + }, + { + "epoch": 1.36, + "learning_rate": 7.734863841485556e-06, + "logits/chosen": -2.5762956142425537, + "logits/rejected": -2.705820322036743, + "logps/chosen": -363.34503173828125, + "logps/rejected": -333.426513671875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.561209201812744, + "rewards/margins": 5.1033549308776855, + "rewards/rejected": -12.66456413269043, + "step": 8743 + }, + { + "epoch": 1.36, + "learning_rate": 7.734130400954407e-06, + "logits/chosen": -2.7119193077087402, + "logits/rejected": -3.0496878623962402, + "logps/chosen": -49.20069885253906, + "logps/rejected": -301.95989990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7889907360076904, + "rewards/margins": 10.121347427368164, + "rewards/rejected": -12.91033935546875, + "step": 8744 + }, + { + "epoch": 1.36, + "learning_rate": 7.73339696042326e-06, + "logits/chosen": -0.8523827195167542, + "logits/rejected": -2.5372908115386963, + "logps/chosen": -153.83059692382812, + "logps/rejected": -546.3151245117188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8226760625839233, + "rewards/margins": 13.647249221801758, + "rewards/rejected": -15.469924926757812, + "step": 8745 + }, + { + "epoch": 1.36, + "learning_rate": 7.732663519892111e-06, + "logits/chosen": -1.8989115953445435, + "logits/rejected": -2.6080193519592285, + "logps/chosen": -158.31048583984375, + "logps/rejected": -414.38916015625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8340277671813965, + "rewards/margins": 9.69216537475586, + "rewards/rejected": -12.526193618774414, + "step": 8746 + }, + { + "epoch": 1.36, + "learning_rate": 7.731930079360963e-06, + "logits/chosen": -1.7994873523712158, + "logits/rejected": -2.8591487407684326, + "logps/chosen": -546.0654296875, + "logps/rejected": -671.6529541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.32786750793457, + "rewards/margins": 9.844573020935059, + "rewards/rejected": -14.172439575195312, + "step": 8747 + }, + { + "epoch": 1.36, + "learning_rate": 7.731196638829815e-06, + "logits/chosen": -2.953941822052002, + "logits/rejected": -2.176426649093628, + "logps/chosen": -226.03811645507812, + "logps/rejected": -168.41468811035156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0586752891540527, + "rewards/margins": 8.364415168762207, + "rewards/rejected": -11.423089981079102, + "step": 8748 + }, + { + "epoch": 1.36, + "learning_rate": 7.730463198298667e-06, + "logits/chosen": -2.6987102031707764, + "logits/rejected": -3.049611806869507, + "logps/chosen": -112.89762878417969, + "logps/rejected": -349.1343994140625, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9823410511016846, + "rewards/margins": 2.1520707607269287, + "rewards/rejected": -6.134411811828613, + "step": 8749 + }, + { + "epoch": 1.36, + "learning_rate": 7.729729757767519e-06, + "logits/chosen": -2.6617417335510254, + "logits/rejected": -3.0419321060180664, + "logps/chosen": -75.50650787353516, + "logps/rejected": -156.85760498046875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.537806749343872, + "rewards/margins": 6.74777889251709, + "rewards/rejected": -10.285585403442383, + "step": 8750 + }, + { + "epoch": 1.36, + "learning_rate": 7.728996317236372e-06, + "logits/chosen": -2.1004843711853027, + "logits/rejected": -2.9495930671691895, + "logps/chosen": -83.86078643798828, + "logps/rejected": -329.938232421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0621485710144043, + "rewards/margins": 8.882389068603516, + "rewards/rejected": -11.944538116455078, + "step": 8751 + }, + { + "epoch": 1.36, + "learning_rate": 7.728262876705224e-06, + "logits/chosen": -2.643350601196289, + "logits/rejected": -2.851902961730957, + "logps/chosen": -227.24192810058594, + "logps/rejected": -278.876953125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2113542556762695, + "rewards/margins": 7.561098098754883, + "rewards/rejected": -12.772453308105469, + "step": 8752 + }, + { + "epoch": 1.36, + "learning_rate": 7.727529436174076e-06, + "logits/chosen": -2.109035015106201, + "logits/rejected": -2.7190704345703125, + "logps/chosen": -397.6732177734375, + "logps/rejected": -549.0986328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0412979125976562, + "rewards/margins": 10.04582405090332, + "rewards/rejected": -13.087121963500977, + "step": 8753 + }, + { + "epoch": 1.36, + "learning_rate": 7.726795995642928e-06, + "logits/chosen": -3.1025280952453613, + "logits/rejected": -2.9534404277801514, + "logps/chosen": -556.4031982421875, + "logps/rejected": -356.704345703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2495226860046387, + "rewards/margins": 8.439508438110352, + "rewards/rejected": -10.689031600952148, + "step": 8754 + }, + { + "epoch": 1.36, + "learning_rate": 7.72606255511178e-06, + "logits/chosen": -2.4131088256835938, + "logits/rejected": -2.878777265548706, + "logps/chosen": -110.50096130371094, + "logps/rejected": -537.4691162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.974391460418701, + "rewards/margins": 10.054591178894043, + "rewards/rejected": -14.028982162475586, + "step": 8755 + }, + { + "epoch": 1.36, + "learning_rate": 7.725329114580632e-06, + "logits/chosen": -2.4893178939819336, + "logits/rejected": -2.947939395904541, + "logps/chosen": -170.52047729492188, + "logps/rejected": -424.06317138671875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.396513938903809, + "rewards/margins": 12.955801010131836, + "rewards/rejected": -18.35231590270996, + "step": 8756 + }, + { + "epoch": 1.36, + "learning_rate": 7.724595674049484e-06, + "logits/chosen": -2.9172255992889404, + "logits/rejected": -2.950472593307495, + "logps/chosen": -102.9574966430664, + "logps/rejected": -170.51197814941406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.358749866485596, + "rewards/margins": 8.258444786071777, + "rewards/rejected": -12.617195129394531, + "step": 8757 + }, + { + "epoch": 1.36, + "learning_rate": 7.723862233518335e-06, + "logits/chosen": -1.7403219938278198, + "logits/rejected": -2.32470965385437, + "logps/chosen": -97.46863555908203, + "logps/rejected": -400.9937744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7375152111053467, + "rewards/margins": 12.454005241394043, + "rewards/rejected": -15.191520690917969, + "step": 8758 + }, + { + "epoch": 1.36, + "learning_rate": 7.723128792987187e-06, + "logits/chosen": -2.7254562377929688, + "logits/rejected": -3.045050621032715, + "logps/chosen": -714.093017578125, + "logps/rejected": -668.8483276367188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.459996700286865, + "rewards/margins": 6.990657806396484, + "rewards/rejected": -11.450654029846191, + "step": 8759 + }, + { + "epoch": 1.36, + "learning_rate": 7.722395352456041e-06, + "logits/chosen": -2.7071053981781006, + "logits/rejected": -3.1124398708343506, + "logps/chosen": -72.33658599853516, + "logps/rejected": -227.95571899414062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8752012252807617, + "rewards/margins": 7.720427989959717, + "rewards/rejected": -10.59562873840332, + "step": 8760 + }, + { + "epoch": 1.36, + "learning_rate": 7.721661911924893e-06, + "logits/chosen": -1.793499231338501, + "logits/rejected": -2.7895846366882324, + "logps/chosen": -236.71937561035156, + "logps/rejected": -300.867919921875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118806838989258, + "rewards/margins": 7.705817222595215, + "rewards/rejected": -11.824624061584473, + "step": 8761 + }, + { + "epoch": 1.36, + "learning_rate": 7.720928471393745e-06, + "logits/chosen": -2.2070095539093018, + "logits/rejected": -2.6240394115448, + "logps/chosen": -146.20654296875, + "logps/rejected": -207.72132873535156, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.545600891113281, + "rewards/margins": 6.389656066894531, + "rewards/rejected": -12.935256958007812, + "step": 8762 + }, + { + "epoch": 1.36, + "learning_rate": 7.720195030862597e-06, + "logits/chosen": -1.4462003707885742, + "logits/rejected": -2.9879236221313477, + "logps/chosen": -108.5496597290039, + "logps/rejected": -422.3075256347656, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.042100429534912, + "rewards/margins": 9.05999755859375, + "rewards/rejected": -12.102097511291504, + "step": 8763 + }, + { + "epoch": 1.36, + "learning_rate": 7.719461590331448e-06, + "logits/chosen": -2.3666985034942627, + "logits/rejected": -2.3258862495422363, + "logps/chosen": -128.24964904785156, + "logps/rejected": -220.38821411132812, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.78972864151001, + "rewards/margins": 4.223147392272949, + "rewards/rejected": -12.012876510620117, + "step": 8764 + }, + { + "epoch": 1.36, + "learning_rate": 7.7187281498003e-06, + "logits/chosen": -1.8015726804733276, + "logits/rejected": -2.779808521270752, + "logps/chosen": -83.86961364746094, + "logps/rejected": -418.1034240722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.707520008087158, + "rewards/margins": 11.493938446044922, + "rewards/rejected": -16.201457977294922, + "step": 8765 + }, + { + "epoch": 1.36, + "learning_rate": 7.717994709269152e-06, + "logits/chosen": -2.2947838306427, + "logits/rejected": -2.753932237625122, + "logps/chosen": -308.5281982421875, + "logps/rejected": -363.6275634765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.554059028625488, + "rewards/margins": 8.047799110412598, + "rewards/rejected": -12.601858139038086, + "step": 8766 + }, + { + "epoch": 1.36, + "learning_rate": 7.717261268738004e-06, + "logits/chosen": -2.537332773208618, + "logits/rejected": -2.757882833480835, + "logps/chosen": -270.12591552734375, + "logps/rejected": -385.28546142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.726283311843872, + "rewards/margins": 11.859950065612793, + "rewards/rejected": -14.586233139038086, + "step": 8767 + }, + { + "epoch": 1.36, + "learning_rate": 7.716527828206858e-06, + "logits/chosen": -2.812671661376953, + "logits/rejected": -3.0567362308502197, + "logps/chosen": -537.614501953125, + "logps/rejected": -524.6749267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.877316951751709, + "rewards/margins": 11.696365356445312, + "rewards/rejected": -16.57368278503418, + "step": 8768 + }, + { + "epoch": 1.36, + "learning_rate": 7.71579438767571e-06, + "logits/chosen": -1.6109009981155396, + "logits/rejected": -2.3668344020843506, + "logps/chosen": -169.76409912109375, + "logps/rejected": -457.869873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.197138786315918, + "rewards/margins": 10.186491966247559, + "rewards/rejected": -15.383630752563477, + "step": 8769 + }, + { + "epoch": 1.36, + "learning_rate": 7.715060947144561e-06, + "logits/chosen": -2.954822063446045, + "logits/rejected": -3.0094211101531982, + "logps/chosen": -121.98179626464844, + "logps/rejected": -224.3794708251953, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209591865539551, + "rewards/margins": 5.996700286865234, + "rewards/rejected": -10.206293106079102, + "step": 8770 + }, + { + "epoch": 1.36, + "learning_rate": 7.714327506613413e-06, + "logits/chosen": -2.0629308223724365, + "logits/rejected": -2.690357208251953, + "logps/chosen": -141.16934204101562, + "logps/rejected": -354.6162109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.573503017425537, + "rewards/margins": 7.9452805519104, + "rewards/rejected": -13.518783569335938, + "step": 8771 + }, + { + "epoch": 1.36, + "learning_rate": 7.713594066082265e-06, + "logits/chosen": -1.9402896165847778, + "logits/rejected": -3.0261542797088623, + "logps/chosen": -199.61683654785156, + "logps/rejected": -680.4697265625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.578490734100342, + "rewards/margins": 10.446128845214844, + "rewards/rejected": -14.024619102478027, + "step": 8772 + }, + { + "epoch": 1.36, + "learning_rate": 7.712860625551117e-06, + "logits/chosen": -2.506618022918701, + "logits/rejected": -2.9120543003082275, + "logps/chosen": -308.30206298828125, + "logps/rejected": -414.030517578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4890644550323486, + "rewards/margins": 9.546285629272461, + "rewards/rejected": -13.035350799560547, + "step": 8773 + }, + { + "epoch": 1.36, + "learning_rate": 7.712127185019969e-06, + "logits/chosen": -2.050617218017578, + "logits/rejected": -2.9889352321624756, + "logps/chosen": -131.9178924560547, + "logps/rejected": -330.29132080078125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318198204040527, + "rewards/margins": 6.96681022644043, + "rewards/rejected": -12.285008430480957, + "step": 8774 + }, + { + "epoch": 1.36, + "learning_rate": 7.711393744488822e-06, + "logits/chosen": -2.978084087371826, + "logits/rejected": -2.2490179538726807, + "logps/chosen": -252.14697265625, + "logps/rejected": -254.10354614257812, + "loss": 0.2586, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.642569541931152, + "rewards/margins": 4.415493965148926, + "rewards/rejected": -11.058063507080078, + "step": 8775 + }, + { + "epoch": 1.36, + "learning_rate": 7.710660303957674e-06, + "logits/chosen": -2.4046285152435303, + "logits/rejected": -2.701559543609619, + "logps/chosen": -211.88442993164062, + "logps/rejected": -269.81170654296875, + "loss": 3.2432, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.400107383728027, + "rewards/margins": 0.3713340759277344, + "rewards/rejected": -10.771441459655762, + "step": 8776 + }, + { + "epoch": 1.37, + "learning_rate": 7.709926863426528e-06, + "logits/chosen": -1.842239260673523, + "logits/rejected": -2.4932990074157715, + "logps/chosen": -116.33486938476562, + "logps/rejected": -365.3031005859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.373069763183594, + "rewards/margins": 10.114826202392578, + "rewards/rejected": -14.487895965576172, + "step": 8777 + }, + { + "epoch": 1.37, + "learning_rate": 7.70919342289538e-06, + "logits/chosen": -2.0912022590637207, + "logits/rejected": -2.590158700942993, + "logps/chosen": -189.9530792236328, + "logps/rejected": -399.4523620605469, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.40113639831543, + "rewards/margins": 6.569994926452637, + "rewards/rejected": -10.971132278442383, + "step": 8778 + }, + { + "epoch": 1.37, + "learning_rate": 7.708459982364232e-06, + "logits/chosen": -2.8246448040008545, + "logits/rejected": -1.4126840829849243, + "logps/chosen": -353.760986328125, + "logps/rejected": -184.82041931152344, + "loss": 0.4865, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.439102172851562, + "rewards/margins": 4.487384796142578, + "rewards/rejected": -12.92648696899414, + "step": 8779 + }, + { + "epoch": 1.37, + "learning_rate": 7.707726541833084e-06, + "logits/chosen": -2.3921549320220947, + "logits/rejected": -2.8861889839172363, + "logps/chosen": -130.8563690185547, + "logps/rejected": -181.43389892578125, + "loss": 0.5378, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.150989532470703, + "rewards/margins": 2.3061206340789795, + "rewards/rejected": -10.457109451293945, + "step": 8780 + }, + { + "epoch": 1.37, + "learning_rate": 7.706993101301935e-06, + "logits/chosen": -1.6987764835357666, + "logits/rejected": -2.8056674003601074, + "logps/chosen": -122.49610900878906, + "logps/rejected": -351.844970703125, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423694610595703, + "rewards/margins": 5.159736156463623, + "rewards/rejected": -10.583430290222168, + "step": 8781 + }, + { + "epoch": 1.37, + "learning_rate": 7.706259660770787e-06, + "logits/chosen": -2.777029275894165, + "logits/rejected": -2.1618943214416504, + "logps/chosen": -220.05760192871094, + "logps/rejected": -231.7732696533203, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.945639133453369, + "rewards/margins": 7.033108711242676, + "rewards/rejected": -9.978747367858887, + "step": 8782 + }, + { + "epoch": 1.37, + "learning_rate": 7.70552622023964e-06, + "logits/chosen": -2.825204849243164, + "logits/rejected": -2.0765268802642822, + "logps/chosen": -221.564453125, + "logps/rejected": -151.46774291992188, + "loss": 1.422, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.75722599029541, + "rewards/margins": 2.521085739135742, + "rewards/rejected": -6.278311729431152, + "step": 8783 + }, + { + "epoch": 1.37, + "learning_rate": 7.704792779708491e-06, + "logits/chosen": -2.822589159011841, + "logits/rejected": -2.796973943710327, + "logps/chosen": -295.6912841796875, + "logps/rejected": -350.7232666015625, + "loss": 0.1977, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.095109939575195, + "rewards/margins": 6.664358139038086, + "rewards/rejected": -14.759468078613281, + "step": 8784 + }, + { + "epoch": 1.37, + "learning_rate": 7.704059339177343e-06, + "logits/chosen": -2.7851173877716064, + "logits/rejected": -2.7417776584625244, + "logps/chosen": -229.39163208007812, + "logps/rejected": -302.1689758300781, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.890723466873169, + "rewards/margins": 6.8662567138671875, + "rewards/rejected": -10.756980895996094, + "step": 8785 + }, + { + "epoch": 1.37, + "learning_rate": 7.703325898646196e-06, + "logits/chosen": -2.8317527770996094, + "logits/rejected": -2.911297559738159, + "logps/chosen": -141.94430541992188, + "logps/rejected": -396.81036376953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3328094482421875, + "rewards/margins": 10.606857299804688, + "rewards/rejected": -16.939666748046875, + "step": 8786 + }, + { + "epoch": 1.37, + "learning_rate": 7.702592458115048e-06, + "logits/chosen": -2.104149341583252, + "logits/rejected": -2.503706693649292, + "logps/chosen": -111.82119750976562, + "logps/rejected": -246.93190002441406, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.69601583480835, + "rewards/margins": 7.194058895111084, + "rewards/rejected": -12.890074729919434, + "step": 8787 + }, + { + "epoch": 1.37, + "learning_rate": 7.7018590175839e-06, + "logits/chosen": -1.4639390707015991, + "logits/rejected": -2.880038022994995, + "logps/chosen": -148.83673095703125, + "logps/rejected": -381.725341796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2306976318359375, + "rewards/margins": 12.146549224853516, + "rewards/rejected": -17.377246856689453, + "step": 8788 + }, + { + "epoch": 1.37, + "learning_rate": 7.701125577052752e-06, + "logits/chosen": -2.552419900894165, + "logits/rejected": -2.873399257659912, + "logps/chosen": -135.9541778564453, + "logps/rejected": -349.3255615234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.96318244934082, + "rewards/margins": 9.37230396270752, + "rewards/rejected": -14.33548641204834, + "step": 8789 + }, + { + "epoch": 1.37, + "learning_rate": 7.700392136521604e-06, + "logits/chosen": -2.7593297958374023, + "logits/rejected": -2.526909351348877, + "logps/chosen": -326.52496337890625, + "logps/rejected": -257.72216796875, + "loss": 0.9442, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.026655197143555, + "rewards/margins": 2.6109442710876465, + "rewards/rejected": -9.637598991394043, + "step": 8790 + }, + { + "epoch": 1.37, + "learning_rate": 7.699658695990456e-06, + "logits/chosen": -3.0416388511657715, + "logits/rejected": -3.033151149749756, + "logps/chosen": -290.6951904296875, + "logps/rejected": -307.44744873046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307468891143799, + "rewards/margins": 7.568220138549805, + "rewards/rejected": -9.875688552856445, + "step": 8791 + }, + { + "epoch": 1.37, + "learning_rate": 7.698925255459308e-06, + "logits/chosen": -2.5525856018066406, + "logits/rejected": -2.8961596488952637, + "logps/chosen": -249.79150390625, + "logps/rejected": -369.86419677734375, + "loss": 0.6472, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.8188347816467285, + "rewards/margins": 4.541576385498047, + "rewards/rejected": -10.360411643981934, + "step": 8792 + }, + { + "epoch": 1.37, + "learning_rate": 7.69819181492816e-06, + "logits/chosen": -1.101639986038208, + "logits/rejected": -2.7484683990478516, + "logps/chosen": -123.90680694580078, + "logps/rejected": -232.34619140625, + "loss": 2.1993, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.616474151611328, + "rewards/margins": 2.9746618270874023, + "rewards/rejected": -10.59113597869873, + "step": 8793 + }, + { + "epoch": 1.37, + "learning_rate": 7.697458374397012e-06, + "logits/chosen": -2.2715072631835938, + "logits/rejected": -2.853943347930908, + "logps/chosen": -246.66900634765625, + "logps/rejected": -258.564208984375, + "loss": 0.9675, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.6039533615112305, + "rewards/margins": 4.529360771179199, + "rewards/rejected": -11.13331413269043, + "step": 8794 + }, + { + "epoch": 1.37, + "learning_rate": 7.696724933865865e-06, + "logits/chosen": -2.8547966480255127, + "logits/rejected": -2.908867359161377, + "logps/chosen": -126.55633544921875, + "logps/rejected": -469.7547912597656, + "loss": 0.4857, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.976198196411133, + "rewards/margins": 8.74708366394043, + "rewards/rejected": -14.723281860351562, + "step": 8795 + }, + { + "epoch": 1.37, + "learning_rate": 7.695991493334717e-06, + "logits/chosen": -1.4310100078582764, + "logits/rejected": -2.6590240001678467, + "logps/chosen": -165.8777313232422, + "logps/rejected": -577.6876220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7409019470214844, + "rewards/margins": 13.195722579956055, + "rewards/rejected": -16.93662452697754, + "step": 8796 + }, + { + "epoch": 1.37, + "learning_rate": 7.695258052803569e-06, + "logits/chosen": -1.9534739255905151, + "logits/rejected": -2.8712656497955322, + "logps/chosen": -433.68804931640625, + "logps/rejected": -470.00286865234375, + "loss": 1.0614, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.706083297729492, + "rewards/margins": 2.3827953338623047, + "rewards/rejected": -10.088878631591797, + "step": 8797 + }, + { + "epoch": 1.37, + "learning_rate": 7.69452461227242e-06, + "logits/chosen": -2.7206122875213623, + "logits/rejected": -2.357731819152832, + "logps/chosen": -118.583984375, + "logps/rejected": -227.85162353515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.654146671295166, + "rewards/margins": 8.883211135864258, + "rewards/rejected": -14.537357330322266, + "step": 8798 + }, + { + "epoch": 1.37, + "learning_rate": 7.693791171741273e-06, + "logits/chosen": -2.8605895042419434, + "logits/rejected": -2.1849822998046875, + "logps/chosen": -433.1819763183594, + "logps/rejected": -370.5819396972656, + "loss": 0.9957, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.955710411071777, + "rewards/margins": 3.1243534088134766, + "rewards/rejected": -12.080063819885254, + "step": 8799 + }, + { + "epoch": 1.37, + "learning_rate": 7.693057731210124e-06, + "logits/chosen": -2.955601692199707, + "logits/rejected": -2.544267177581787, + "logps/chosen": -232.5213623046875, + "logps/rejected": -250.56838989257812, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9273147583007812, + "rewards/margins": 6.596458911895752, + "rewards/rejected": -10.523773193359375, + "step": 8800 + }, + { + "epoch": 1.37, + "learning_rate": 7.692324290678976e-06, + "logits/chosen": -1.8326956033706665, + "logits/rejected": -2.6751227378845215, + "logps/chosen": -195.564208984375, + "logps/rejected": -226.63278198242188, + "loss": 0.7173, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.384540557861328, + "rewards/margins": 5.494431495666504, + "rewards/rejected": -11.878972053527832, + "step": 8801 + }, + { + "epoch": 1.37, + "learning_rate": 7.691590850147828e-06, + "logits/chosen": -2.338670015335083, + "logits/rejected": -2.8382394313812256, + "logps/chosen": -202.6412353515625, + "logps/rejected": -354.2163391113281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7097290754318237, + "rewards/margins": 12.531898498535156, + "rewards/rejected": -14.241626739501953, + "step": 8802 + }, + { + "epoch": 1.37, + "learning_rate": 7.69085740961668e-06, + "logits/chosen": -2.835402011871338, + "logits/rejected": -1.9614746570587158, + "logps/chosen": -171.12022399902344, + "logps/rejected": -183.90951538085938, + "loss": 1.1301, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.698755264282227, + "rewards/margins": 1.2027499675750732, + "rewards/rejected": -7.901505470275879, + "step": 8803 + }, + { + "epoch": 1.37, + "learning_rate": 7.690123969085534e-06, + "logits/chosen": -2.781498670578003, + "logits/rejected": -2.838815212249756, + "logps/chosen": -162.0931396484375, + "logps/rejected": -220.38804626464844, + "loss": 2.3814, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.985793590545654, + "rewards/margins": 2.7646806240081787, + "rewards/rejected": -8.750473976135254, + "step": 8804 + }, + { + "epoch": 1.37, + "learning_rate": 7.689390528554386e-06, + "logits/chosen": -0.4970172941684723, + "logits/rejected": -2.127512216567993, + "logps/chosen": -78.17984008789062, + "logps/rejected": -458.5888671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0061798095703125, + "rewards/margins": 9.175952911376953, + "rewards/rejected": -14.182132720947266, + "step": 8805 + }, + { + "epoch": 1.37, + "learning_rate": 7.688657088023237e-06, + "logits/chosen": -1.694420337677002, + "logits/rejected": -2.507819175720215, + "logps/chosen": -59.16114807128906, + "logps/rejected": -267.1837158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2170608043670654, + "rewards/margins": 10.044875144958496, + "rewards/rejected": -12.26193618774414, + "step": 8806 + }, + { + "epoch": 1.37, + "learning_rate": 7.68792364749209e-06, + "logits/chosen": -2.9952473640441895, + "logits/rejected": -2.2488300800323486, + "logps/chosen": -388.7596740722656, + "logps/rejected": -482.5606689453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4708688259124756, + "rewards/margins": 8.386356353759766, + "rewards/rejected": -11.85722541809082, + "step": 8807 + }, + { + "epoch": 1.37, + "learning_rate": 7.687190206960941e-06, + "logits/chosen": -2.6748528480529785, + "logits/rejected": -2.9118387699127197, + "logps/chosen": -230.20916748046875, + "logps/rejected": -297.6580505371094, + "loss": 0.9724, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.408390998840332, + "rewards/margins": 0.7442152500152588, + "rewards/rejected": -8.152606010437012, + "step": 8808 + }, + { + "epoch": 1.37, + "learning_rate": 7.686456766429795e-06, + "logits/chosen": -2.890284776687622, + "logits/rejected": -2.2932629585266113, + "logps/chosen": -307.7029724121094, + "logps/rejected": -226.7687225341797, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.971884250640869, + "rewards/margins": 5.4822797775268555, + "rewards/rejected": -9.454164505004883, + "step": 8809 + }, + { + "epoch": 1.37, + "learning_rate": 7.685723325898647e-06, + "logits/chosen": -2.719886541366577, + "logits/rejected": -2.293820381164551, + "logps/chosen": -307.90887451171875, + "logps/rejected": -342.0853271484375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.944729804992676, + "rewards/margins": 6.667106628417969, + "rewards/rejected": -11.611836433410645, + "step": 8810 + }, + { + "epoch": 1.37, + "learning_rate": 7.684989885367499e-06, + "logits/chosen": -2.136927843093872, + "logits/rejected": -2.8992538452148438, + "logps/chosen": -165.14865112304688, + "logps/rejected": -540.2128295898438, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7894911766052246, + "rewards/margins": 7.6867523193359375, + "rewards/rejected": -11.47624397277832, + "step": 8811 + }, + { + "epoch": 1.37, + "learning_rate": 7.68425644483635e-06, + "logits/chosen": -2.477519989013672, + "logits/rejected": -2.907379150390625, + "logps/chosen": -244.706787109375, + "logps/rejected": -406.0748291015625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.01656436920166, + "rewards/margins": 7.356207370758057, + "rewards/rejected": -11.372772216796875, + "step": 8812 + }, + { + "epoch": 1.37, + "learning_rate": 7.683523004305204e-06, + "logits/chosen": -2.7333574295043945, + "logits/rejected": -2.8583245277404785, + "logps/chosen": -528.6573486328125, + "logps/rejected": -423.11883544921875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.067093849182129, + "rewards/margins": 9.575113296508789, + "rewards/rejected": -13.642208099365234, + "step": 8813 + }, + { + "epoch": 1.37, + "learning_rate": 7.682789563774056e-06, + "logits/chosen": -1.933510184288025, + "logits/rejected": -3.0735199451446533, + "logps/chosen": -339.35223388671875, + "logps/rejected": -335.0920104980469, + "loss": 3.4738, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.828920364379883, + "rewards/margins": -0.742687463760376, + "rewards/rejected": -8.086233139038086, + "step": 8814 + }, + { + "epoch": 1.37, + "learning_rate": 7.682056123242908e-06, + "logits/chosen": -2.5738182067871094, + "logits/rejected": -2.9619507789611816, + "logps/chosen": -56.50608825683594, + "logps/rejected": -316.3940124511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.511663913726807, + "rewards/margins": 10.362716674804688, + "rewards/rejected": -14.874380111694336, + "step": 8815 + }, + { + "epoch": 1.37, + "learning_rate": 7.68132268271176e-06, + "logits/chosen": -1.9376102685928345, + "logits/rejected": -2.755394220352173, + "logps/chosen": -65.31053161621094, + "logps/rejected": -219.1624755859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.778546094894409, + "rewards/margins": 8.070632934570312, + "rewards/rejected": -10.8491792678833, + "step": 8816 + }, + { + "epoch": 1.37, + "learning_rate": 7.680589242180611e-06, + "logits/chosen": -2.6949024200439453, + "logits/rejected": -2.316704034805298, + "logps/chosen": -347.9443359375, + "logps/rejected": -243.26901245117188, + "loss": 1.2417, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.743387222290039, + "rewards/margins": 1.7289981842041016, + "rewards/rejected": -8.47238540649414, + "step": 8817 + }, + { + "epoch": 1.37, + "learning_rate": 7.679855801649463e-06, + "logits/chosen": -1.9736769199371338, + "logits/rejected": -2.917994260787964, + "logps/chosen": -127.98564147949219, + "logps/rejected": -412.2506103515625, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.888291358947754, + "rewards/margins": 8.864548683166504, + "rewards/rejected": -16.752840042114258, + "step": 8818 + }, + { + "epoch": 1.37, + "learning_rate": 7.679122361118315e-06, + "logits/chosen": -2.626107692718506, + "logits/rejected": -2.891004800796509, + "logps/chosen": -380.8541564941406, + "logps/rejected": -438.3822937011719, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.451375961303711, + "rewards/margins": 4.836159706115723, + "rewards/rejected": -10.287534713745117, + "step": 8819 + }, + { + "epoch": 1.37, + "learning_rate": 7.678388920587167e-06, + "logits/chosen": -2.9647269248962402, + "logits/rejected": -2.2968811988830566, + "logps/chosen": -295.8232727050781, + "logps/rejected": -448.9609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.544538497924805, + "rewards/margins": 11.95536994934082, + "rewards/rejected": -17.499908447265625, + "step": 8820 + }, + { + "epoch": 1.37, + "learning_rate": 7.677655480056019e-06, + "logits/chosen": -2.1980788707733154, + "logits/rejected": -2.988675117492676, + "logps/chosen": -135.91995239257812, + "logps/rejected": -376.7984924316406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5531487464904785, + "rewards/margins": 8.456528663635254, + "rewards/rejected": -13.00967788696289, + "step": 8821 + }, + { + "epoch": 1.37, + "learning_rate": 7.676922039524873e-06, + "logits/chosen": -0.9421169757843018, + "logits/rejected": -2.8423357009887695, + "logps/chosen": -198.19580078125, + "logps/rejected": -549.3040771484375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.381490707397461, + "rewards/margins": 5.353974342346191, + "rewards/rejected": -10.735464096069336, + "step": 8822 + }, + { + "epoch": 1.37, + "learning_rate": 7.676188598993724e-06, + "logits/chosen": -1.6798278093338013, + "logits/rejected": -2.6655099391937256, + "logps/chosen": -139.18740844726562, + "logps/rejected": -306.33721923828125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.269272804260254, + "rewards/margins": 5.489343643188477, + "rewards/rejected": -12.758617401123047, + "step": 8823 + }, + { + "epoch": 1.37, + "learning_rate": 7.675455158462576e-06, + "logits/chosen": -2.731999397277832, + "logits/rejected": -2.125795364379883, + "logps/chosen": -161.28778076171875, + "logps/rejected": -119.08311462402344, + "loss": 2.6063, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.936713218688965, + "rewards/margins": 0.5665485858917236, + "rewards/rejected": -7.503261566162109, + "step": 8824 + }, + { + "epoch": 1.37, + "learning_rate": 7.674721717931428e-06, + "logits/chosen": -2.7711267471313477, + "logits/rejected": -2.143345594406128, + "logps/chosen": -405.55548095703125, + "logps/rejected": -303.77069091796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.831455230712891, + "rewards/margins": 7.932711601257324, + "rewards/rejected": -12.764167785644531, + "step": 8825 + }, + { + "epoch": 1.37, + "learning_rate": 7.67398827740028e-06, + "logits/chosen": -2.797478675842285, + "logits/rejected": -2.746354341506958, + "logps/chosen": -295.3211975097656, + "logps/rejected": -285.55242919921875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956883907318115, + "rewards/margins": 6.3205180168151855, + "rewards/rejected": -12.2774019241333, + "step": 8826 + }, + { + "epoch": 1.37, + "learning_rate": 7.673254836869132e-06, + "logits/chosen": -2.0719053745269775, + "logits/rejected": -2.856722116470337, + "logps/chosen": -77.31845092773438, + "logps/rejected": -350.51995849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.689855575561523, + "rewards/margins": 10.074406623840332, + "rewards/rejected": -14.764262199401855, + "step": 8827 + }, + { + "epoch": 1.37, + "learning_rate": 7.672521396337984e-06, + "logits/chosen": -2.5385031700134277, + "logits/rejected": -2.8572611808776855, + "logps/chosen": -172.89044189453125, + "logps/rejected": -322.563720703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.394126892089844, + "rewards/margins": 7.629877090454102, + "rewards/rejected": -13.024003982543945, + "step": 8828 + }, + { + "epoch": 1.37, + "learning_rate": 7.671787955806836e-06, + "logits/chosen": -1.4041856527328491, + "logits/rejected": -2.9705281257629395, + "logps/chosen": -114.26834106445312, + "logps/rejected": -564.5208740234375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.516294479370117, + "rewards/margins": 8.483495712280273, + "rewards/rejected": -15.99979019165039, + "step": 8829 + }, + { + "epoch": 1.37, + "learning_rate": 7.671054515275688e-06, + "logits/chosen": -2.6632883548736572, + "logits/rejected": -2.2041175365448, + "logps/chosen": -380.79632568359375, + "logps/rejected": -372.7689208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.078155517578125, + "rewards/margins": 11.484712600708008, + "rewards/rejected": -14.56286907196045, + "step": 8830 + }, + { + "epoch": 1.37, + "learning_rate": 7.670321074744541e-06, + "logits/chosen": -2.8275272846221924, + "logits/rejected": -2.8407235145568848, + "logps/chosen": -161.28924560546875, + "logps/rejected": -196.1838836669922, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7072854042053223, + "rewards/margins": 6.608278274536133, + "rewards/rejected": -10.315563201904297, + "step": 8831 + }, + { + "epoch": 1.37, + "learning_rate": 7.669587634213393e-06, + "logits/chosen": -2.2343215942382812, + "logits/rejected": -2.8360846042633057, + "logps/chosen": -290.9170837402344, + "logps/rejected": -216.7270050048828, + "loss": 4.0229, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.607450485229492, + "rewards/margins": -1.7371203899383545, + "rewards/rejected": -9.870329856872559, + "step": 8832 + }, + { + "epoch": 1.37, + "learning_rate": 7.668854193682245e-06, + "logits/chosen": -2.4548656940460205, + "logits/rejected": -2.9588277339935303, + "logps/chosen": -141.061767578125, + "logps/rejected": -363.7083435058594, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.667158126831055, + "rewards/margins": 6.561496734619141, + "rewards/rejected": -13.228654861450195, + "step": 8833 + }, + { + "epoch": 1.37, + "learning_rate": 7.668120753151097e-06, + "logits/chosen": -1.5081266164779663, + "logits/rejected": -2.7621941566467285, + "logps/chosen": -117.69312286376953, + "logps/rejected": -337.9503479003906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4524989128112793, + "rewards/margins": 7.622645378112793, + "rewards/rejected": -10.075143814086914, + "step": 8834 + }, + { + "epoch": 1.37, + "learning_rate": 7.667387312619949e-06, + "logits/chosen": -2.543365955352783, + "logits/rejected": -2.7088210582733154, + "logps/chosen": -167.71275329589844, + "logps/rejected": -244.0606231689453, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.986052989959717, + "rewards/margins": 5.778017997741699, + "rewards/rejected": -9.764071464538574, + "step": 8835 + }, + { + "epoch": 1.37, + "learning_rate": 7.6666538720888e-06, + "logits/chosen": -2.45333194732666, + "logits/rejected": -2.8437318801879883, + "logps/chosen": -249.16775512695312, + "logps/rejected": -242.62686157226562, + "loss": 1.1985, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.54277229309082, + "rewards/margins": 2.1635899543762207, + "rewards/rejected": -7.706361770629883, + "step": 8836 + }, + { + "epoch": 1.37, + "learning_rate": 7.665920431557652e-06, + "logits/chosen": -3.107102155685425, + "logits/rejected": -2.8735971450805664, + "logps/chosen": -212.52783203125, + "logps/rejected": -208.8759765625, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.943350076675415, + "rewards/margins": 6.1051740646362305, + "rewards/rejected": -9.048523902893066, + "step": 8837 + }, + { + "epoch": 1.37, + "learning_rate": 7.665186991026504e-06, + "logits/chosen": -2.979278087615967, + "logits/rejected": -2.130831241607666, + "logps/chosen": -264.0096740722656, + "logps/rejected": -246.393310546875, + "loss": 0.8151, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.689043045043945, + "rewards/margins": 5.027472972869873, + "rewards/rejected": -9.716516494750977, + "step": 8838 + }, + { + "epoch": 1.37, + "learning_rate": 7.664453550495356e-06, + "logits/chosen": -2.484485626220703, + "logits/rejected": -3.087665319442749, + "logps/chosen": -110.25027465820312, + "logps/rejected": -282.2847900390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2440714836120605, + "rewards/margins": 7.527625560760498, + "rewards/rejected": -10.771697044372559, + "step": 8839 + }, + { + "epoch": 1.37, + "learning_rate": 7.66372010996421e-06, + "logits/chosen": -2.588524341583252, + "logits/rejected": -2.87888240814209, + "logps/chosen": -73.9130630493164, + "logps/rejected": -268.37237548828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6721320152282715, + "rewards/margins": 9.164344787597656, + "rewards/rejected": -12.83647632598877, + "step": 8840 + }, + { + "epoch": 1.37, + "learning_rate": 7.662986669433062e-06, + "logits/chosen": -1.3963004350662231, + "logits/rejected": -2.6767585277557373, + "logps/chosen": -167.5594482421875, + "logps/rejected": -342.63287353515625, + "loss": 0.0795, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4821624755859375, + "rewards/margins": 2.548487901687622, + "rewards/rejected": -8.030651092529297, + "step": 8841 + }, + { + "epoch": 1.38, + "learning_rate": 7.662253228901914e-06, + "logits/chosen": -1.5608532428741455, + "logits/rejected": -2.839916229248047, + "logps/chosen": -192.90625, + "logps/rejected": -416.168701171875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.250266075134277, + "rewards/margins": 6.69951057434082, + "rewards/rejected": -10.949776649475098, + "step": 8842 + }, + { + "epoch": 1.38, + "learning_rate": 7.661519788370767e-06, + "logits/chosen": -2.1710665225982666, + "logits/rejected": -3.0687243938446045, + "logps/chosen": -146.97720336914062, + "logps/rejected": -443.0823974609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.300464153289795, + "rewards/margins": 8.466036796569824, + "rewards/rejected": -11.766500473022461, + "step": 8843 + }, + { + "epoch": 1.38, + "learning_rate": 7.660786347839619e-06, + "logits/chosen": -1.9409887790679932, + "logits/rejected": -2.7596988677978516, + "logps/chosen": -248.71725463867188, + "logps/rejected": -347.23101806640625, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.039253234863281, + "rewards/margins": 4.8872270584106445, + "rewards/rejected": -9.926480293273926, + "step": 8844 + }, + { + "epoch": 1.38, + "learning_rate": 7.660052907308471e-06, + "logits/chosen": -2.9686179161071777, + "logits/rejected": -3.227560043334961, + "logps/chosen": -237.24310302734375, + "logps/rejected": -381.9903259277344, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2326064109802246, + "rewards/margins": 6.095590591430664, + "rewards/rejected": -9.328197479248047, + "step": 8845 + }, + { + "epoch": 1.38, + "learning_rate": 7.659319466777323e-06, + "logits/chosen": -3.0036184787750244, + "logits/rejected": -2.500117540359497, + "logps/chosen": -363.3055419921875, + "logps/rejected": -340.02105712890625, + "loss": 0.596, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.826786994934082, + "rewards/margins": 5.777327060699463, + "rewards/rejected": -12.604114532470703, + "step": 8846 + }, + { + "epoch": 1.38, + "learning_rate": 7.658586026246175e-06, + "logits/chosen": -1.7538589239120483, + "logits/rejected": -2.291609764099121, + "logps/chosen": -140.6787872314453, + "logps/rejected": -460.1805725097656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9312829971313477, + "rewards/margins": 8.834606170654297, + "rewards/rejected": -12.765889167785645, + "step": 8847 + }, + { + "epoch": 1.38, + "learning_rate": 7.657852585715027e-06, + "logits/chosen": -2.5443317890167236, + "logits/rejected": -3.1130211353302, + "logps/chosen": -47.165733337402344, + "logps/rejected": -326.38360595703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2464497089385986, + "rewards/margins": 6.970932960510254, + "rewards/rejected": -10.217382431030273, + "step": 8848 + }, + { + "epoch": 1.38, + "learning_rate": 7.65711914518388e-06, + "logits/chosen": -2.93559193611145, + "logits/rejected": -2.7797117233276367, + "logps/chosen": -443.3590393066406, + "logps/rejected": -436.34222412109375, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.074525356292725, + "rewards/margins": 5.058442115783691, + "rewards/rejected": -10.132967948913574, + "step": 8849 + }, + { + "epoch": 1.38, + "learning_rate": 7.656385704652732e-06, + "logits/chosen": -2.767477512359619, + "logits/rejected": -2.6547348499298096, + "logps/chosen": -326.69451904296875, + "logps/rejected": -295.98687744140625, + "loss": 0.9727, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.865748405456543, + "rewards/margins": -0.497774600982666, + "rewards/rejected": -7.367974281311035, + "step": 8850 + }, + { + "epoch": 1.38, + "learning_rate": 7.655652264121584e-06, + "logits/chosen": -1.3965972661972046, + "logits/rejected": -2.6763339042663574, + "logps/chosen": -122.45587158203125, + "logps/rejected": -319.020263671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.425368309020996, + "rewards/margins": 7.629194736480713, + "rewards/rejected": -14.05456256866455, + "step": 8851 + }, + { + "epoch": 1.38, + "learning_rate": 7.654918823590436e-06, + "logits/chosen": -2.1155407428741455, + "logits/rejected": -2.75028920173645, + "logps/chosen": -74.49996948242188, + "logps/rejected": -327.24627685546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.244571208953857, + "rewards/margins": 10.286794662475586, + "rewards/rejected": -14.531366348266602, + "step": 8852 + }, + { + "epoch": 1.38, + "learning_rate": 7.654185383059288e-06, + "logits/chosen": -2.2483277320861816, + "logits/rejected": -2.4788014888763428, + "logps/chosen": -243.22427368164062, + "logps/rejected": -372.6031494140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.708444595336914, + "rewards/margins": 7.364534854888916, + "rewards/rejected": -14.072978973388672, + "step": 8853 + }, + { + "epoch": 1.38, + "learning_rate": 7.65345194252814e-06, + "logits/chosen": -2.650317668914795, + "logits/rejected": -3.1604297161102295, + "logps/chosen": -69.64863586425781, + "logps/rejected": -202.71084594726562, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.629876136779785, + "rewards/margins": 2.1651346683502197, + "rewards/rejected": -6.795010566711426, + "step": 8854 + }, + { + "epoch": 1.38, + "learning_rate": 7.652718501996991e-06, + "logits/chosen": -1.368828535079956, + "logits/rejected": -2.198624610900879, + "logps/chosen": -133.629638671875, + "logps/rejected": -378.8785095214844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.360908508300781, + "rewards/margins": 8.748981475830078, + "rewards/rejected": -14.10988998413086, + "step": 8855 + }, + { + "epoch": 1.38, + "learning_rate": 7.651985061465843e-06, + "logits/chosen": -3.0544116497039795, + "logits/rejected": -2.758105993270874, + "logps/chosen": -633.8840942382812, + "logps/rejected": -465.291015625, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4709930419921875, + "rewards/margins": 7.163474082946777, + "rewards/rejected": -10.634467124938965, + "step": 8856 + }, + { + "epoch": 1.38, + "learning_rate": 7.651251620934697e-06, + "logits/chosen": -2.9159328937530518, + "logits/rejected": -2.778125524520874, + "logps/chosen": -573.867919921875, + "logps/rejected": -563.5142822265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6732308864593506, + "rewards/margins": 8.83951187133789, + "rewards/rejected": -10.51274299621582, + "step": 8857 + }, + { + "epoch": 1.38, + "learning_rate": 7.650518180403549e-06, + "logits/chosen": -2.7620692253112793, + "logits/rejected": -2.5920376777648926, + "logps/chosen": -179.6842803955078, + "logps/rejected": -236.4753875732422, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.320426940917969, + "rewards/margins": 4.966695308685303, + "rewards/rejected": -9.287121772766113, + "step": 8858 + }, + { + "epoch": 1.38, + "learning_rate": 7.6497847398724e-06, + "logits/chosen": -2.8994805812835693, + "logits/rejected": -2.158073663711548, + "logps/chosen": -397.9002685546875, + "logps/rejected": -357.4693298339844, + "loss": 0.4405, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.522188186645508, + "rewards/margins": 1.0438323020935059, + "rewards/rejected": -7.5660200119018555, + "step": 8859 + }, + { + "epoch": 1.38, + "learning_rate": 7.649051299341252e-06, + "logits/chosen": -2.4417343139648438, + "logits/rejected": -2.584075689315796, + "logps/chosen": -340.356689453125, + "logps/rejected": -532.8131713867188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9830949306488037, + "rewards/margins": 9.753194808959961, + "rewards/rejected": -13.736289024353027, + "step": 8860 + }, + { + "epoch": 1.38, + "learning_rate": 7.648317858810104e-06, + "logits/chosen": -2.7424230575561523, + "logits/rejected": -2.8618781566619873, + "logps/chosen": -563.8684692382812, + "logps/rejected": -442.04742431640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7755682468414307, + "rewards/margins": 8.110248565673828, + "rewards/rejected": -11.885817527770996, + "step": 8861 + }, + { + "epoch": 1.38, + "learning_rate": 7.647584418278956e-06, + "logits/chosen": -2.748246669769287, + "logits/rejected": -1.78773832321167, + "logps/chosen": -408.4574279785156, + "logps/rejected": -398.17041015625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.846950054168701, + "rewards/margins": 8.906606674194336, + "rewards/rejected": -12.753557205200195, + "step": 8862 + }, + { + "epoch": 1.38, + "learning_rate": 7.646850977747808e-06, + "logits/chosen": -2.9002275466918945, + "logits/rejected": -3.1192269325256348, + "logps/chosen": -178.249267578125, + "logps/rejected": -212.64129638671875, + "loss": 0.6055, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6033618450164795, + "rewards/margins": 4.007905006408691, + "rewards/rejected": -7.61126708984375, + "step": 8863 + }, + { + "epoch": 1.38, + "learning_rate": 7.64611753721666e-06, + "logits/chosen": -2.8705344200134277, + "logits/rejected": -2.9704031944274902, + "logps/chosen": -73.66453552246094, + "logps/rejected": -214.52008056640625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.801055431365967, + "rewards/margins": 6.875164985656738, + "rewards/rejected": -10.676219940185547, + "step": 8864 + }, + { + "epoch": 1.38, + "learning_rate": 7.645384096685512e-06, + "logits/chosen": -2.537230968475342, + "logits/rejected": -2.7012455463409424, + "logps/chosen": -169.92312622070312, + "logps/rejected": -213.37875366210938, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.722315311431885, + "rewards/margins": 5.860964775085449, + "rewards/rejected": -11.583280563354492, + "step": 8865 + }, + { + "epoch": 1.38, + "learning_rate": 7.644650656154365e-06, + "logits/chosen": -2.766733169555664, + "logits/rejected": -2.7122225761413574, + "logps/chosen": -321.14483642578125, + "logps/rejected": -495.6284484863281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5641701221466064, + "rewards/margins": 8.849090576171875, + "rewards/rejected": -12.413261413574219, + "step": 8866 + }, + { + "epoch": 1.38, + "learning_rate": 7.643917215623217e-06, + "logits/chosen": -1.6974411010742188, + "logits/rejected": -2.7537267208099365, + "logps/chosen": -254.89932250976562, + "logps/rejected": -371.7209777832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8650102615356445, + "rewards/margins": 9.451355934143066, + "rewards/rejected": -13.316366195678711, + "step": 8867 + }, + { + "epoch": 1.38, + "learning_rate": 7.643183775092069e-06, + "logits/chosen": -2.824169397354126, + "logits/rejected": -2.920832633972168, + "logps/chosen": -134.5105438232422, + "logps/rejected": -249.17617797851562, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.808217525482178, + "rewards/margins": 6.411531448364258, + "rewards/rejected": -11.219749450683594, + "step": 8868 + }, + { + "epoch": 1.38, + "learning_rate": 7.642450334560921e-06, + "logits/chosen": -2.1830742359161377, + "logits/rejected": -2.6767008304595947, + "logps/chosen": -279.72735595703125, + "logps/rejected": -345.51702880859375, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2855706214904785, + "rewards/margins": 6.005764484405518, + "rewards/rejected": -10.291335105895996, + "step": 8869 + }, + { + "epoch": 1.38, + "learning_rate": 7.641716894029773e-06, + "logits/chosen": -2.8588693141937256, + "logits/rejected": -2.8316776752471924, + "logps/chosen": -262.1410827636719, + "logps/rejected": -340.59210205078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.473542213439941, + "rewards/margins": 8.680635452270508, + "rewards/rejected": -14.15417766571045, + "step": 8870 + }, + { + "epoch": 1.38, + "learning_rate": 7.640983453498625e-06, + "logits/chosen": -0.942462146282196, + "logits/rejected": -2.187161684036255, + "logps/chosen": -164.77523803710938, + "logps/rejected": -259.3971252441406, + "loss": 0.2334, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.707648277282715, + "rewards/margins": 3.6108009815216064, + "rewards/rejected": -8.318449020385742, + "step": 8871 + }, + { + "epoch": 1.38, + "learning_rate": 7.640250012967477e-06, + "logits/chosen": -2.1076040267944336, + "logits/rejected": -2.925124168395996, + "logps/chosen": -119.72380065917969, + "logps/rejected": -319.9781494140625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.22475528717041, + "rewards/margins": 6.340579032897949, + "rewards/rejected": -10.56533432006836, + "step": 8872 + }, + { + "epoch": 1.38, + "learning_rate": 7.639516572436329e-06, + "logits/chosen": -2.741408586502075, + "logits/rejected": -3.0192477703094482, + "logps/chosen": -126.82713317871094, + "logps/rejected": -251.99363708496094, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.383435249328613, + "rewards/margins": 3.8885879516601562, + "rewards/rejected": -8.27202320098877, + "step": 8873 + }, + { + "epoch": 1.38, + "learning_rate": 7.63878313190518e-06, + "logits/chosen": -2.34977388381958, + "logits/rejected": -2.9554190635681152, + "logps/chosen": -174.23516845703125, + "logps/rejected": -308.7723083496094, + "loss": 1.246, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.861956596374512, + "rewards/margins": 4.074689865112305, + "rewards/rejected": -8.936646461486816, + "step": 8874 + }, + { + "epoch": 1.38, + "learning_rate": 7.638049691374034e-06, + "logits/chosen": -2.793247938156128, + "logits/rejected": -2.9113831520080566, + "logps/chosen": -119.86996459960938, + "logps/rejected": -264.0654602050781, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7697105407714844, + "rewards/margins": 7.008676528930664, + "rewards/rejected": -10.778387069702148, + "step": 8875 + }, + { + "epoch": 1.38, + "learning_rate": 7.637316250842886e-06, + "logits/chosen": -3.001368761062622, + "logits/rejected": -2.7358062267303467, + "logps/chosen": -161.0531005859375, + "logps/rejected": -189.22836303710938, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.946686267852783, + "rewards/margins": 3.85298228263855, + "rewards/rejected": -7.799668312072754, + "step": 8876 + }, + { + "epoch": 1.38, + "learning_rate": 7.63658281031174e-06, + "logits/chosen": -2.138024091720581, + "logits/rejected": -2.950730562210083, + "logps/chosen": -516.322509765625, + "logps/rejected": -624.863037109375, + "loss": 1.8459, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.52469539642334, + "rewards/margins": 3.012331247329712, + "rewards/rejected": -9.537026405334473, + "step": 8877 + }, + { + "epoch": 1.38, + "learning_rate": 7.635849369780591e-06, + "logits/chosen": -2.013301134109497, + "logits/rejected": -2.583380699157715, + "logps/chosen": -236.86749267578125, + "logps/rejected": -456.94140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.007136583328247, + "rewards/margins": 11.029346466064453, + "rewards/rejected": -12.036481857299805, + "step": 8878 + }, + { + "epoch": 1.38, + "learning_rate": 7.635115929249443e-06, + "logits/chosen": -1.6203690767288208, + "logits/rejected": -2.7586171627044678, + "logps/chosen": -119.50349426269531, + "logps/rejected": -373.4617919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.264904022216797, + "rewards/margins": 9.066011428833008, + "rewards/rejected": -12.330915451049805, + "step": 8879 + }, + { + "epoch": 1.38, + "learning_rate": 7.634382488718295e-06, + "logits/chosen": -2.317795753479004, + "logits/rejected": -2.627277135848999, + "logps/chosen": -144.14019775390625, + "logps/rejected": -210.5752716064453, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337108135223389, + "rewards/margins": 5.352877616882324, + "rewards/rejected": -9.689985275268555, + "step": 8880 + }, + { + "epoch": 1.38, + "learning_rate": 7.633649048187147e-06, + "logits/chosen": -2.3539462089538574, + "logits/rejected": -2.8690311908721924, + "logps/chosen": -132.31149291992188, + "logps/rejected": -396.8336181640625, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1890735626220703, + "rewards/margins": 7.213497638702393, + "rewards/rejected": -10.402571678161621, + "step": 8881 + }, + { + "epoch": 1.38, + "learning_rate": 7.632915607655999e-06, + "logits/chosen": -2.855757713317871, + "logits/rejected": -2.218810558319092, + "logps/chosen": -535.7113037109375, + "logps/rejected": -538.6646118164062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.76735258102417, + "rewards/margins": 8.027128219604492, + "rewards/rejected": -10.79448127746582, + "step": 8882 + }, + { + "epoch": 1.38, + "learning_rate": 7.63218216712485e-06, + "logits/chosen": -2.0371644496917725, + "logits/rejected": -2.982492208480835, + "logps/chosen": -66.94477844238281, + "logps/rejected": -343.11199951171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2283060550689697, + "rewards/margins": 9.51788330078125, + "rewards/rejected": -11.74618911743164, + "step": 8883 + }, + { + "epoch": 1.38, + "learning_rate": 7.631448726593704e-06, + "logits/chosen": -2.8501076698303223, + "logits/rejected": -3.0689213275909424, + "logps/chosen": -204.78268432617188, + "logps/rejected": -392.00994873046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8826537132263184, + "rewards/margins": 7.123687267303467, + "rewards/rejected": -10.006340980529785, + "step": 8884 + }, + { + "epoch": 1.38, + "learning_rate": 7.630715286062556e-06, + "logits/chosen": -2.8650245666503906, + "logits/rejected": -1.8856143951416016, + "logps/chosen": -310.837890625, + "logps/rejected": -552.45458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181000709533691, + "rewards/margins": 8.771007537841797, + "rewards/rejected": -12.952008247375488, + "step": 8885 + }, + { + "epoch": 1.38, + "learning_rate": 7.629981845531408e-06, + "logits/chosen": -2.4627039432525635, + "logits/rejected": -2.9547836780548096, + "logps/chosen": -110.82167053222656, + "logps/rejected": -232.45542907714844, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0583460330963135, + "rewards/margins": 6.462263107299805, + "rewards/rejected": -8.520608901977539, + "step": 8886 + }, + { + "epoch": 1.38, + "learning_rate": 7.62924840500026e-06, + "logits/chosen": -1.9062467813491821, + "logits/rejected": -2.571631669998169, + "logps/chosen": -98.18531799316406, + "logps/rejected": -306.08941650390625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.452886581420898, + "rewards/margins": 7.356720447540283, + "rewards/rejected": -12.809606552124023, + "step": 8887 + }, + { + "epoch": 1.38, + "learning_rate": 7.628514964469112e-06, + "logits/chosen": -1.3593584299087524, + "logits/rejected": -2.5952656269073486, + "logps/chosen": -108.00588989257812, + "logps/rejected": -270.0457763671875, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.696477174758911, + "rewards/margins": 5.65205717086792, + "rewards/rejected": -9.34853458404541, + "step": 8888 + }, + { + "epoch": 1.38, + "learning_rate": 7.627781523937964e-06, + "logits/chosen": -3.10825252532959, + "logits/rejected": -3.091822624206543, + "logps/chosen": -424.3337707519531, + "logps/rejected": -373.5855712890625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.755425930023193, + "rewards/margins": 6.8304362297058105, + "rewards/rejected": -11.585862159729004, + "step": 8889 + }, + { + "epoch": 1.38, + "learning_rate": 7.6270480834068156e-06, + "logits/chosen": -2.3672807216644287, + "logits/rejected": -2.882697343826294, + "logps/chosen": -785.5590209960938, + "logps/rejected": -610.5596313476562, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144372940063477, + "rewards/margins": 5.674374580383301, + "rewards/rejected": -11.818748474121094, + "step": 8890 + }, + { + "epoch": 1.38, + "learning_rate": 7.6263146428756674e-06, + "logits/chosen": -2.9511704444885254, + "logits/rejected": -2.5944559574127197, + "logps/chosen": -252.87132263183594, + "logps/rejected": -270.9210510253906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.002339243888855, + "rewards/margins": 10.173907279968262, + "rewards/rejected": -11.176246643066406, + "step": 8891 + }, + { + "epoch": 1.38, + "learning_rate": 7.625581202344519e-06, + "logits/chosen": -2.368612766265869, + "logits/rejected": -2.5796597003936768, + "logps/chosen": -211.2190704345703, + "logps/rejected": -265.9401550292969, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.813198089599609, + "rewards/margins": 4.950333595275879, + "rewards/rejected": -10.763531684875488, + "step": 8892 + }, + { + "epoch": 1.38, + "learning_rate": 7.624847761813373e-06, + "logits/chosen": -1.7031629085540771, + "logits/rejected": -2.7970666885375977, + "logps/chosen": -77.8616943359375, + "logps/rejected": -350.9427795410156, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.969722747802734, + "rewards/margins": 7.747440338134766, + "rewards/rejected": -13.7171630859375, + "step": 8893 + }, + { + "epoch": 1.38, + "learning_rate": 7.624114321282225e-06, + "logits/chosen": -2.851954698562622, + "logits/rejected": -2.613459825515747, + "logps/chosen": -170.82366943359375, + "logps/rejected": -217.80799865722656, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.177568197250366, + "rewards/margins": 7.535699844360352, + "rewards/rejected": -9.713268280029297, + "step": 8894 + }, + { + "epoch": 1.38, + "learning_rate": 7.623380880751077e-06, + "logits/chosen": -2.116302490234375, + "logits/rejected": -3.0004594326019287, + "logps/chosen": -89.1248779296875, + "logps/rejected": -290.81365966796875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6810715198516846, + "rewards/margins": 7.150897979736328, + "rewards/rejected": -9.83197021484375, + "step": 8895 + }, + { + "epoch": 1.38, + "learning_rate": 7.6226474402199285e-06, + "logits/chosen": -2.5497653484344482, + "logits/rejected": -2.17402982711792, + "logps/chosen": -310.61175537109375, + "logps/rejected": -119.47361755371094, + "loss": 3.3907, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.220708847045898, + "rewards/margins": -2.791663885116577, + "rewards/rejected": -6.429044723510742, + "step": 8896 + }, + { + "epoch": 1.38, + "learning_rate": 7.62191399968878e-06, + "logits/chosen": -1.1196670532226562, + "logits/rejected": -2.4969863891601562, + "logps/chosen": -148.49105834960938, + "logps/rejected": -606.3167114257812, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2685906887054443, + "rewards/margins": 10.715192794799805, + "rewards/rejected": -13.983784675598145, + "step": 8897 + }, + { + "epoch": 1.38, + "learning_rate": 7.621180559157632e-06, + "logits/chosen": -1.894506812095642, + "logits/rejected": -2.707502841949463, + "logps/chosen": -183.1103515625, + "logps/rejected": -340.08856201171875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5448007583618164, + "rewards/margins": 8.3668851852417, + "rewards/rejected": -11.911685943603516, + "step": 8898 + }, + { + "epoch": 1.38, + "learning_rate": 7.620447118626484e-06, + "logits/chosen": -1.9880634546279907, + "logits/rejected": -2.7814040184020996, + "logps/chosen": -273.488525390625, + "logps/rejected": -317.23211669921875, + "loss": 1.5069, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.232941627502441, + "rewards/margins": -0.45374250411987305, + "rewards/rejected": -6.779199123382568, + "step": 8899 + }, + { + "epoch": 1.38, + "learning_rate": 7.619713678095336e-06, + "logits/chosen": -1.6985422372817993, + "logits/rejected": -2.9249889850616455, + "logps/chosen": -135.47799682617188, + "logps/rejected": -453.68072509765625, + "loss": 0.2275, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.583569049835205, + "rewards/margins": 3.291821241378784, + "rewards/rejected": -7.87539005279541, + "step": 8900 + }, + { + "epoch": 1.38, + "learning_rate": 7.618980237564189e-06, + "logits/chosen": -2.8249738216400146, + "logits/rejected": -2.87325119972229, + "logps/chosen": -104.25189971923828, + "logps/rejected": -359.87347412109375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6337718963623047, + "rewards/margins": 7.959371566772461, + "rewards/rejected": -10.593143463134766, + "step": 8901 + }, + { + "epoch": 1.38, + "learning_rate": 7.618246797033042e-06, + "logits/chosen": -2.7962894439697266, + "logits/rejected": -2.7928240299224854, + "logps/chosen": -355.8975830078125, + "logps/rejected": -316.40966796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0057976245880127, + "rewards/margins": 6.145355224609375, + "rewards/rejected": -8.151152610778809, + "step": 8902 + }, + { + "epoch": 1.38, + "learning_rate": 7.617513356501894e-06, + "logits/chosen": -2.829129934310913, + "logits/rejected": -2.5955393314361572, + "logps/chosen": -342.3722839355469, + "logps/rejected": -278.7760314941406, + "loss": 0.4529, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.5471694469451904, + "rewards/margins": 5.285248756408691, + "rewards/rejected": -8.832418441772461, + "step": 8903 + }, + { + "epoch": 1.38, + "learning_rate": 7.616779915970746e-06, + "logits/chosen": -1.0730897188186646, + "logits/rejected": -2.5669798851013184, + "logps/chosen": -65.80149841308594, + "logps/rejected": -350.9600830078125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.844103813171387, + "rewards/margins": 7.253361701965332, + "rewards/rejected": -12.097465515136719, + "step": 8904 + }, + { + "epoch": 1.38, + "learning_rate": 7.616046475439598e-06, + "logits/chosen": -2.297805070877075, + "logits/rejected": -2.9132282733917236, + "logps/chosen": -131.71176147460938, + "logps/rejected": -246.3616943359375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2518296241760254, + "rewards/margins": 8.240377426147461, + "rewards/rejected": -10.492206573486328, + "step": 8905 + }, + { + "epoch": 1.39, + "learning_rate": 7.61531303490845e-06, + "logits/chosen": -2.6685056686401367, + "logits/rejected": -2.8585331439971924, + "logps/chosen": -78.41813659667969, + "logps/rejected": -185.6802520751953, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6107146739959717, + "rewards/margins": 4.71937370300293, + "rewards/rejected": -7.330088138580322, + "step": 8906 + }, + { + "epoch": 1.39, + "learning_rate": 7.614579594377302e-06, + "logits/chosen": -2.464163064956665, + "logits/rejected": -3.0410444736480713, + "logps/chosen": -50.080936431884766, + "logps/rejected": -289.0984802246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.098439693450928, + "rewards/margins": 10.329850196838379, + "rewards/rejected": -14.428289413452148, + "step": 8907 + }, + { + "epoch": 1.39, + "learning_rate": 7.613846153846154e-06, + "logits/chosen": -2.634000778198242, + "logits/rejected": -1.7171818017959595, + "logps/chosen": -389.50030517578125, + "logps/rejected": -286.6863708496094, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1854782104492188, + "rewards/margins": 8.51948356628418, + "rewards/rejected": -11.704961776733398, + "step": 8908 + }, + { + "epoch": 1.39, + "learning_rate": 7.6131127133150055e-06, + "logits/chosen": -2.5796313285827637, + "logits/rejected": -2.4268229007720947, + "logps/chosen": -222.72439575195312, + "logps/rejected": -385.121826171875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277177810668945, + "rewards/margins": 7.248810768127441, + "rewards/rejected": -11.525989532470703, + "step": 8909 + }, + { + "epoch": 1.39, + "learning_rate": 7.612379272783857e-06, + "logits/chosen": -2.5923709869384766, + "logits/rejected": -2.5757429599761963, + "logps/chosen": -438.71978759765625, + "logps/rejected": -371.3994140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.596142292022705, + "rewards/margins": 7.226527214050293, + "rewards/rejected": -11.822669982910156, + "step": 8910 + }, + { + "epoch": 1.39, + "learning_rate": 7.611645832252711e-06, + "logits/chosen": -2.4053614139556885, + "logits/rejected": -2.9154255390167236, + "logps/chosen": -85.10946655273438, + "logps/rejected": -285.09515380859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9810619354248047, + "rewards/margins": 8.45067310333252, + "rewards/rejected": -11.431735038757324, + "step": 8911 + }, + { + "epoch": 1.39, + "learning_rate": 7.610912391721563e-06, + "logits/chosen": -1.8205795288085938, + "logits/rejected": -2.7409825325012207, + "logps/chosen": -201.6982879638672, + "logps/rejected": -308.4765930175781, + "loss": 1.2703, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.514807939529419, + "rewards/margins": 4.980799198150635, + "rewards/rejected": -8.495607376098633, + "step": 8912 + }, + { + "epoch": 1.39, + "learning_rate": 7.610178951190415e-06, + "logits/chosen": -2.0784125328063965, + "logits/rejected": -2.904059886932373, + "logps/chosen": -85.65792846679688, + "logps/rejected": -340.17626953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1663753986358643, + "rewards/margins": 7.908328533172607, + "rewards/rejected": -10.07470417022705, + "step": 8913 + }, + { + "epoch": 1.39, + "learning_rate": 7.6094455106592666e-06, + "logits/chosen": -1.579677939414978, + "logits/rejected": -2.5791850090026855, + "logps/chosen": -136.8966827392578, + "logps/rejected": -411.5605773925781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.781277656555176, + "rewards/margins": 8.600743293762207, + "rewards/rejected": -13.382020950317383, + "step": 8914 + }, + { + "epoch": 1.39, + "learning_rate": 7.6087120701281184e-06, + "logits/chosen": -2.8853209018707275, + "logits/rejected": -2.26719069480896, + "logps/chosen": -519.910888671875, + "logps/rejected": -434.11126708984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.484391689300537, + "rewards/margins": 7.730478286743164, + "rewards/rejected": -13.21487045288086, + "step": 8915 + }, + { + "epoch": 1.39, + "learning_rate": 7.60797862959697e-06, + "logits/chosen": -2.852543592453003, + "logits/rejected": -2.673823595046997, + "logps/chosen": -473.3292541503906, + "logps/rejected": -577.5225830078125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.372982025146484, + "rewards/margins": 8.810656547546387, + "rewards/rejected": -13.183637619018555, + "step": 8916 + }, + { + "epoch": 1.39, + "learning_rate": 7.607245189065822e-06, + "logits/chosen": -2.210557222366333, + "logits/rejected": -2.7862343788146973, + "logps/chosen": -323.1241149902344, + "logps/rejected": -516.4297485351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8811140060424805, + "rewards/margins": 9.532172203063965, + "rewards/rejected": -12.413286209106445, + "step": 8917 + }, + { + "epoch": 1.39, + "learning_rate": 7.606511748534675e-06, + "logits/chosen": -1.4758151769638062, + "logits/rejected": -2.8237216472625732, + "logps/chosen": -175.07211303710938, + "logps/rejected": -327.441650390625, + "loss": 1.5905, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.701402187347412, + "rewards/margins": 5.467540740966797, + "rewards/rejected": -9.168943405151367, + "step": 8918 + }, + { + "epoch": 1.39, + "learning_rate": 7.605778308003527e-06, + "logits/chosen": -3.0272982120513916, + "logits/rejected": -3.0060248374938965, + "logps/chosen": -416.53466796875, + "logps/rejected": -394.37225341796875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.601518154144287, + "rewards/margins": 8.40462875366211, + "rewards/rejected": -12.006146430969238, + "step": 8919 + }, + { + "epoch": 1.39, + "learning_rate": 7.60504486747238e-06, + "logits/chosen": -1.849076509475708, + "logits/rejected": -2.641988515853882, + "logps/chosen": -212.0665283203125, + "logps/rejected": -451.34912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1604413986206055, + "rewards/margins": 10.225154876708984, + "rewards/rejected": -13.385595321655273, + "step": 8920 + }, + { + "epoch": 1.39, + "learning_rate": 7.604311426941232e-06, + "logits/chosen": -2.8310585021972656, + "logits/rejected": -2.86051869392395, + "logps/chosen": -98.76557922363281, + "logps/rejected": -209.30638122558594, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.312694549560547, + "rewards/margins": 3.8476216793060303, + "rewards/rejected": -9.160316467285156, + "step": 8921 + }, + { + "epoch": 1.39, + "learning_rate": 7.603577986410084e-06, + "logits/chosen": -2.584780216217041, + "logits/rejected": -2.4889867305755615, + "logps/chosen": -82.94605255126953, + "logps/rejected": -336.4490966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5696197748184204, + "rewards/margins": 9.324728965759277, + "rewards/rejected": -10.89434814453125, + "step": 8922 + }, + { + "epoch": 1.39, + "learning_rate": 7.602844545878936e-06, + "logits/chosen": -2.7496848106384277, + "logits/rejected": -2.4434940814971924, + "logps/chosen": -102.08000946044922, + "logps/rejected": -290.844482421875, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2058260440826416, + "rewards/margins": 5.89924430847168, + "rewards/rejected": -8.105070114135742, + "step": 8923 + }, + { + "epoch": 1.39, + "learning_rate": 7.602111105347788e-06, + "logits/chosen": -1.6084507703781128, + "logits/rejected": -2.8442494869232178, + "logps/chosen": -182.1416778564453, + "logps/rejected": -376.8543701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7113983631134033, + "rewards/margins": 9.326560974121094, + "rewards/rejected": -10.037960052490234, + "step": 8924 + }, + { + "epoch": 1.39, + "learning_rate": 7.60137766481664e-06, + "logits/chosen": -1.797642469406128, + "logits/rejected": -2.7071306705474854, + "logps/chosen": -231.640869140625, + "logps/rejected": -426.95166015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4840588569641113, + "rewards/margins": 8.42691707611084, + "rewards/rejected": -10.91097640991211, + "step": 8925 + }, + { + "epoch": 1.39, + "learning_rate": 7.600644224285492e-06, + "logits/chosen": -2.459883451461792, + "logits/rejected": -2.922687530517578, + "logps/chosen": -121.03419494628906, + "logps/rejected": -366.6637878417969, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.104210376739502, + "rewards/margins": 6.959373474121094, + "rewards/rejected": -12.063583374023438, + "step": 8926 + }, + { + "epoch": 1.39, + "learning_rate": 7.5999107837543435e-06, + "logits/chosen": -2.282615900039673, + "logits/rejected": -2.7361629009246826, + "logps/chosen": -140.88377380371094, + "logps/rejected": -293.7710266113281, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9419748783111572, + "rewards/margins": 7.162412643432617, + "rewards/rejected": -11.104387283325195, + "step": 8927 + }, + { + "epoch": 1.39, + "learning_rate": 7.599177343223195e-06, + "logits/chosen": -2.2391841411590576, + "logits/rejected": -2.782623291015625, + "logps/chosen": -566.7988891601562, + "logps/rejected": -628.6643676757812, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.835339546203613, + "rewards/margins": 5.859748840332031, + "rewards/rejected": -11.695088386535645, + "step": 8928 + }, + { + "epoch": 1.39, + "learning_rate": 7.598443902692049e-06, + "logits/chosen": -3.1185522079467773, + "logits/rejected": -2.7846274375915527, + "logps/chosen": -195.52481079101562, + "logps/rejected": -152.38951110839844, + "loss": 0.5785, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.570208549499512, + "rewards/margins": 3.705544948577881, + "rewards/rejected": -9.27575397491455, + "step": 8929 + }, + { + "epoch": 1.39, + "learning_rate": 7.597710462160901e-06, + "logits/chosen": -2.4165170192718506, + "logits/rejected": -2.991428852081299, + "logps/chosen": -290.4678955078125, + "logps/rejected": -360.87255859375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.212329864501953, + "rewards/margins": 6.704680919647217, + "rewards/rejected": -8.917011260986328, + "step": 8930 + }, + { + "epoch": 1.39, + "learning_rate": 7.596977021629753e-06, + "logits/chosen": -2.8163013458251953, + "logits/rejected": -2.9651939868927, + "logps/chosen": -106.01736450195312, + "logps/rejected": -248.32513427734375, + "loss": 0.0698, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.565169334411621, + "rewards/margins": 3.337156057357788, + "rewards/rejected": -9.902325630187988, + "step": 8931 + }, + { + "epoch": 1.39, + "learning_rate": 7.596243581098605e-06, + "logits/chosen": -2.9610979557037354, + "logits/rejected": -2.844703197479248, + "logps/chosen": -614.9482421875, + "logps/rejected": -983.3179931640625, + "loss": 0.0275, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.53566312789917, + "rewards/margins": 7.268090724945068, + "rewards/rejected": -13.803753852844238, + "step": 8932 + }, + { + "epoch": 1.39, + "learning_rate": 7.5955101405674565e-06, + "logits/chosen": -1.7400739192962646, + "logits/rejected": -2.4786088466644287, + "logps/chosen": -237.45587158203125, + "logps/rejected": -353.85595703125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4714646339416504, + "rewards/margins": 7.510469913482666, + "rewards/rejected": -10.981934547424316, + "step": 8933 + }, + { + "epoch": 1.39, + "learning_rate": 7.594776700036308e-06, + "logits/chosen": -2.2031116485595703, + "logits/rejected": -2.672039747238159, + "logps/chosen": -251.9764404296875, + "logps/rejected": -419.8147277832031, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3197460174560547, + "rewards/margins": 7.215205192565918, + "rewards/rejected": -8.534952163696289, + "step": 8934 + }, + { + "epoch": 1.39, + "learning_rate": 7.594043259505161e-06, + "logits/chosen": -2.5506277084350586, + "logits/rejected": -2.0941162109375, + "logps/chosen": -518.9423828125, + "logps/rejected": -132.6392364501953, + "loss": 2.9974, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.6464128494262695, + "rewards/margins": -1.9062623977661133, + "rewards/rejected": -5.740150451660156, + "step": 8935 + }, + { + "epoch": 1.39, + "learning_rate": 7.593309818974013e-06, + "logits/chosen": -3.0646026134490967, + "logits/rejected": -2.5021371841430664, + "logps/chosen": -315.91339111328125, + "logps/rejected": -166.60159301757812, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.056208848953247, + "rewards/margins": 4.813958168029785, + "rewards/rejected": -6.870166778564453, + "step": 8936 + }, + { + "epoch": 1.39, + "learning_rate": 7.592576378442865e-06, + "logits/chosen": -2.794816017150879, + "logits/rejected": -2.8784494400024414, + "logps/chosen": -129.2760009765625, + "logps/rejected": -224.4322509765625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.475917339324951, + "rewards/margins": 5.611490726470947, + "rewards/rejected": -10.087408065795898, + "step": 8937 + }, + { + "epoch": 1.39, + "learning_rate": 7.591842937911718e-06, + "logits/chosen": -3.0005645751953125, + "logits/rejected": -2.9842662811279297, + "logps/chosen": -126.47634887695312, + "logps/rejected": -116.97227478027344, + "loss": 0.4403, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.360502243041992, + "rewards/margins": 2.3044354915618896, + "rewards/rejected": -8.664937973022461, + "step": 8938 + }, + { + "epoch": 1.39, + "learning_rate": 7.59110949738057e-06, + "logits/chosen": -1.8823118209838867, + "logits/rejected": -3.06984543800354, + "logps/chosen": -219.44680786132812, + "logps/rejected": -568.5344848632812, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362359046936035, + "rewards/margins": 8.317712783813477, + "rewards/rejected": -13.680072784423828, + "step": 8939 + }, + { + "epoch": 1.39, + "learning_rate": 7.590376056849422e-06, + "logits/chosen": -1.8271652460098267, + "logits/rejected": -2.586198329925537, + "logps/chosen": -106.2608871459961, + "logps/rejected": -320.8490905761719, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.848854064941406, + "rewards/margins": 6.043356895446777, + "rewards/rejected": -11.892210960388184, + "step": 8940 + }, + { + "epoch": 1.39, + "learning_rate": 7.589642616318274e-06, + "logits/chosen": -3.047337055206299, + "logits/rejected": -2.953890323638916, + "logps/chosen": -106.33767700195312, + "logps/rejected": -197.22848510742188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5143234729766846, + "rewards/margins": 8.686466217041016, + "rewards/rejected": -11.200789451599121, + "step": 8941 + }, + { + "epoch": 1.39, + "learning_rate": 7.588909175787126e-06, + "logits/chosen": -0.3865576982498169, + "logits/rejected": -2.8497564792633057, + "logps/chosen": -113.81137084960938, + "logps/rejected": -657.6258544921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.154690265655518, + "rewards/margins": 8.952564239501953, + "rewards/rejected": -13.107254028320312, + "step": 8942 + }, + { + "epoch": 1.39, + "learning_rate": 7.588175735255978e-06, + "logits/chosen": -2.478609085083008, + "logits/rejected": -2.9506139755249023, + "logps/chosen": -240.6424560546875, + "logps/rejected": -344.2518005371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0532751083374023, + "rewards/margins": 11.096648216247559, + "rewards/rejected": -14.149923324584961, + "step": 8943 + }, + { + "epoch": 1.39, + "learning_rate": 7.58744229472483e-06, + "logits/chosen": -2.52905011177063, + "logits/rejected": -3.0379040241241455, + "logps/chosen": -138.96853637695312, + "logps/rejected": -489.95745849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5386404991149902, + "rewards/margins": 12.57042121887207, + "rewards/rejected": -16.10906219482422, + "step": 8944 + }, + { + "epoch": 1.39, + "learning_rate": 7.5867088541936816e-06, + "logits/chosen": -1.6222200393676758, + "logits/rejected": -2.929875135421753, + "logps/chosen": -185.1265869140625, + "logps/rejected": -819.8807373046875, + "loss": 0.861, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.289817810058594, + "rewards/margins": 2.2182540893554688, + "rewards/rejected": -8.508071899414062, + "step": 8945 + }, + { + "epoch": 1.39, + "learning_rate": 7.5859754136625334e-06, + "logits/chosen": -2.6870205402374268, + "logits/rejected": -2.0631580352783203, + "logps/chosen": -213.72520446777344, + "logps/rejected": -209.43060302734375, + "loss": 0.4178, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.8322649002075195, + "rewards/margins": 4.252056121826172, + "rewards/rejected": -10.084321022033691, + "step": 8946 + }, + { + "epoch": 1.39, + "learning_rate": 7.585241973131387e-06, + "logits/chosen": -2.232741594314575, + "logits/rejected": -2.691176176071167, + "logps/chosen": -86.85688781738281, + "logps/rejected": -288.196533203125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.656984806060791, + "rewards/margins": 6.11651086807251, + "rewards/rejected": -8.7734956741333, + "step": 8947 + }, + { + "epoch": 1.39, + "learning_rate": 7.584508532600239e-06, + "logits/chosen": -2.8109652996063232, + "logits/rejected": -2.521331310272217, + "logps/chosen": -176.36647033691406, + "logps/rejected": -244.2820587158203, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0507252216339111, + "rewards/margins": 8.000648498535156, + "rewards/rejected": -9.051374435424805, + "step": 8948 + }, + { + "epoch": 1.39, + "learning_rate": 7.583775092069091e-06, + "logits/chosen": -1.9902002811431885, + "logits/rejected": -2.5938756465911865, + "logps/chosen": -199.22344970703125, + "logps/rejected": -336.17315673828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5545554161071777, + "rewards/margins": 10.19526481628418, + "rewards/rejected": -13.749820709228516, + "step": 8949 + }, + { + "epoch": 1.39, + "learning_rate": 7.583041651537943e-06, + "logits/chosen": -1.827479362487793, + "logits/rejected": -2.4687914848327637, + "logps/chosen": -67.49685668945312, + "logps/rejected": -236.35000610351562, + "loss": 0.1318, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.275579452514648, + "rewards/margins": 4.99072790145874, + "rewards/rejected": -10.266307830810547, + "step": 8950 + }, + { + "epoch": 1.39, + "learning_rate": 7.5823082110067945e-06, + "logits/chosen": -2.570124864578247, + "logits/rejected": -2.4714338779449463, + "logps/chosen": -147.63221740722656, + "logps/rejected": -188.3490753173828, + "loss": 0.7383, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.12900972366333, + "rewards/margins": 1.9125926494598389, + "rewards/rejected": -7.04160213470459, + "step": 8951 + }, + { + "epoch": 1.39, + "learning_rate": 7.581574770475647e-06, + "logits/chosen": -2.2115073204040527, + "logits/rejected": -2.7362046241760254, + "logps/chosen": -178.6808319091797, + "logps/rejected": -305.37579345703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.420798301696777, + "rewards/margins": 6.585872650146484, + "rewards/rejected": -11.006669998168945, + "step": 8952 + }, + { + "epoch": 1.39, + "learning_rate": 7.580841329944499e-06, + "logits/chosen": -2.9421257972717285, + "logits/rejected": -2.7792012691497803, + "logps/chosen": -252.2214813232422, + "logps/rejected": -364.93865966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9199166297912598, + "rewards/margins": 9.554239273071289, + "rewards/rejected": -13.47415542602539, + "step": 8953 + }, + { + "epoch": 1.39, + "learning_rate": 7.580107889413351e-06, + "logits/chosen": -2.644136905670166, + "logits/rejected": -1.7472647428512573, + "logps/chosen": -495.47967529296875, + "logps/rejected": -354.05670166015625, + "loss": 0.5821, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.119393825531006, + "rewards/margins": 5.427767753601074, + "rewards/rejected": -10.547161102294922, + "step": 8954 + }, + { + "epoch": 1.39, + "learning_rate": 7.579374448882205e-06, + "logits/chosen": -2.717306137084961, + "logits/rejected": -3.0061631202697754, + "logps/chosen": -84.81092071533203, + "logps/rejected": -298.58184814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.061507225036621, + "rewards/margins": 9.15778923034668, + "rewards/rejected": -13.219297409057617, + "step": 8955 + }, + { + "epoch": 1.39, + "learning_rate": 7.5786410083510565e-06, + "logits/chosen": -1.7829684019088745, + "logits/rejected": -2.972099781036377, + "logps/chosen": -285.46942138671875, + "logps/rejected": -587.8739013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2776236534118652, + "rewards/margins": 9.17734146118164, + "rewards/rejected": -12.454965591430664, + "step": 8956 + }, + { + "epoch": 1.39, + "learning_rate": 7.577907567819908e-06, + "logits/chosen": -1.530439019203186, + "logits/rejected": -2.347559928894043, + "logps/chosen": -93.74752807617188, + "logps/rejected": -480.7554626464844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.377148151397705, + "rewards/margins": 8.35448169708252, + "rewards/rejected": -12.731630325317383, + "step": 8957 + }, + { + "epoch": 1.39, + "learning_rate": 7.57717412728876e-06, + "logits/chosen": -2.3195741176605225, + "logits/rejected": -3.107377052307129, + "logps/chosen": -163.3306427001953, + "logps/rejected": -422.59417724609375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7534451484680176, + "rewards/margins": 7.722095489501953, + "rewards/rejected": -11.475540161132812, + "step": 8958 + }, + { + "epoch": 1.39, + "learning_rate": 7.576440686757612e-06, + "logits/chosen": -1.6917511224746704, + "logits/rejected": -2.799919843673706, + "logps/chosen": -219.3684844970703, + "logps/rejected": -393.4269714355469, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.771294593811035, + "rewards/margins": 2.6133270263671875, + "rewards/rejected": -10.384621620178223, + "step": 8959 + }, + { + "epoch": 1.39, + "learning_rate": 7.575707246226464e-06, + "logits/chosen": -2.853444814682007, + "logits/rejected": -2.8084168434143066, + "logps/chosen": -424.7002258300781, + "logps/rejected": -517.2215576171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.271131992340088, + "rewards/margins": 6.977797031402588, + "rewards/rejected": -12.248929023742676, + "step": 8960 + }, + { + "epoch": 1.39, + "learning_rate": 7.574973805695316e-06, + "logits/chosen": -1.3640609979629517, + "logits/rejected": -2.79984450340271, + "logps/chosen": -82.14970397949219, + "logps/rejected": -456.0611572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.850383758544922, + "rewards/margins": 10.78357219696045, + "rewards/rejected": -15.633955001831055, + "step": 8961 + }, + { + "epoch": 1.39, + "learning_rate": 7.574240365164168e-06, + "logits/chosen": -2.7167911529541016, + "logits/rejected": -1.8341330289840698, + "logps/chosen": -290.0853576660156, + "logps/rejected": -272.4009704589844, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.037304401397705, + "rewards/margins": 5.3089728355407715, + "rewards/rejected": -8.346277236938477, + "step": 8962 + }, + { + "epoch": 1.39, + "learning_rate": 7.57350692463302e-06, + "logits/chosen": -2.856534242630005, + "logits/rejected": -2.7227795124053955, + "logps/chosen": -331.3448486328125, + "logps/rejected": -270.781982421875, + "loss": 0.1073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.273036241531372, + "rewards/margins": 4.027247428894043, + "rewards/rejected": -7.300283432006836, + "step": 8963 + }, + { + "epoch": 1.39, + "learning_rate": 7.572773484101873e-06, + "logits/chosen": -2.9098775386810303, + "logits/rejected": -2.793649911880493, + "logps/chosen": -126.41217803955078, + "logps/rejected": -196.69561767578125, + "loss": 1.5945, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.852298736572266, + "rewards/margins": 1.1722521781921387, + "rewards/rejected": -9.024551391601562, + "step": 8964 + }, + { + "epoch": 1.39, + "learning_rate": 7.572040043570725e-06, + "logits/chosen": -3.073183298110962, + "logits/rejected": -2.5779919624328613, + "logps/chosen": -217.8380584716797, + "logps/rejected": -267.5987854003906, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.274069309234619, + "rewards/margins": 6.66461181640625, + "rewards/rejected": -9.938681602478027, + "step": 8965 + }, + { + "epoch": 1.39, + "learning_rate": 7.571306603039577e-06, + "logits/chosen": -2.9822545051574707, + "logits/rejected": -2.7594432830810547, + "logps/chosen": -693.2642822265625, + "logps/rejected": -456.0888671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2364776134490967, + "rewards/margins": 7.402284622192383, + "rewards/rejected": -8.638761520385742, + "step": 8966 + }, + { + "epoch": 1.39, + "learning_rate": 7.570573162508429e-06, + "logits/chosen": -1.1888585090637207, + "logits/rejected": -2.7153444290161133, + "logps/chosen": -176.4065399169922, + "logps/rejected": -613.1527709960938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8688783645629883, + "rewards/margins": 8.611401557922363, + "rewards/rejected": -12.480279922485352, + "step": 8967 + }, + { + "epoch": 1.39, + "learning_rate": 7.569839721977281e-06, + "logits/chosen": -1.6945546865463257, + "logits/rejected": -2.536754846572876, + "logps/chosen": -132.6295928955078, + "logps/rejected": -376.9541015625, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6853623390197754, + "rewards/margins": 6.712531089782715, + "rewards/rejected": -10.397893905639648, + "step": 8968 + }, + { + "epoch": 1.39, + "learning_rate": 7.569106281446133e-06, + "logits/chosen": -1.7918773889541626, + "logits/rejected": -2.980083465576172, + "logps/chosen": -82.62606811523438, + "logps/rejected": -300.35430908203125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.388697624206543, + "rewards/margins": 6.041693687438965, + "rewards/rejected": -11.430391311645508, + "step": 8969 + }, + { + "epoch": 1.4, + "learning_rate": 7.568372840914985e-06, + "logits/chosen": -2.9508888721466064, + "logits/rejected": -1.7868884801864624, + "logps/chosen": -446.744384765625, + "logps/rejected": -429.5662536621094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7670700550079346, + "rewards/margins": 11.988048553466797, + "rewards/rejected": -14.755119323730469, + "step": 8970 + }, + { + "epoch": 1.4, + "learning_rate": 7.567639400383837e-06, + "logits/chosen": -2.670969247817993, + "logits/rejected": -2.770193576812744, + "logps/chosen": -278.9237060546875, + "logps/rejected": -317.98974609375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2235779762268066, + "rewards/margins": 4.9310503005981445, + "rewards/rejected": -8.154627799987793, + "step": 8971 + }, + { + "epoch": 1.4, + "learning_rate": 7.566905959852689e-06, + "logits/chosen": -2.174337863922119, + "logits/rejected": -2.456768035888672, + "logps/chosen": -137.33802795410156, + "logps/rejected": -239.9014892578125, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.465149402618408, + "rewards/margins": 5.325766086578369, + "rewards/rejected": -11.790915489196777, + "step": 8972 + }, + { + "epoch": 1.4, + "learning_rate": 7.566172519321543e-06, + "logits/chosen": -2.2399778366088867, + "logits/rejected": -2.5818939208984375, + "logps/chosen": -123.86528015136719, + "logps/rejected": -296.9069519042969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.991239547729492, + "rewards/margins": 8.29522705078125, + "rewards/rejected": -12.286466598510742, + "step": 8973 + }, + { + "epoch": 1.4, + "learning_rate": 7.5654390787903945e-06, + "logits/chosen": -0.7022836208343506, + "logits/rejected": -0.8991726636886597, + "logps/chosen": -172.2328643798828, + "logps/rejected": -259.3802490234375, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.900059223175049, + "rewards/margins": 5.253305912017822, + "rewards/rejected": -8.153365135192871, + "step": 8974 + }, + { + "epoch": 1.4, + "learning_rate": 7.564705638259246e-06, + "logits/chosen": -1.9314697980880737, + "logits/rejected": -2.772777795791626, + "logps/chosen": -184.8686065673828, + "logps/rejected": -340.856201171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5589449405670166, + "rewards/margins": 7.7811479568481445, + "rewards/rejected": -11.340092658996582, + "step": 8975 + }, + { + "epoch": 1.4, + "learning_rate": 7.563972197728098e-06, + "logits/chosen": -1.5854145288467407, + "logits/rejected": -2.5420596599578857, + "logps/chosen": -196.0135498046875, + "logps/rejected": -458.96649169921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.217326641082764, + "rewards/margins": 8.668500900268555, + "rewards/rejected": -12.885828018188477, + "step": 8976 + }, + { + "epoch": 1.4, + "learning_rate": 7.56323875719695e-06, + "logits/chosen": -3.0475172996520996, + "logits/rejected": -2.6363489627838135, + "logps/chosen": -199.16006469726562, + "logps/rejected": -118.42444610595703, + "loss": 1.2357, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.405794620513916, + "rewards/margins": 1.611314058303833, + "rewards/rejected": -6.017108917236328, + "step": 8977 + }, + { + "epoch": 1.4, + "learning_rate": 7.562505316665802e-06, + "logits/chosen": -0.6911746859550476, + "logits/rejected": -2.3118274211883545, + "logps/chosen": -166.43765258789062, + "logps/rejected": -424.7569580078125, + "loss": 0.4068, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.060861110687256, + "rewards/margins": 6.9950337409973145, + "rewards/rejected": -12.05589485168457, + "step": 8978 + }, + { + "epoch": 1.4, + "learning_rate": 7.561771876134654e-06, + "logits/chosen": -1.6865696907043457, + "logits/rejected": -2.7603113651275635, + "logps/chosen": -158.4398956298828, + "logps/rejected": -295.0628967285156, + "loss": 0.6243, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.301092147827148, + "rewards/margins": 1.9185714721679688, + "rewards/rejected": -10.219663619995117, + "step": 8979 + }, + { + "epoch": 1.4, + "learning_rate": 7.561038435603506e-06, + "logits/chosen": -2.441124200820923, + "logits/rejected": -3.1077916622161865, + "logps/chosen": -98.81400299072266, + "logps/rejected": -412.48590087890625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2004135847091675, + "rewards/margins": 9.002272605895996, + "rewards/rejected": -10.202686309814453, + "step": 8980 + }, + { + "epoch": 1.4, + "learning_rate": 7.560304995072358e-06, + "logits/chosen": -2.0327649116516113, + "logits/rejected": -2.7552378177642822, + "logps/chosen": -108.60662841796875, + "logps/rejected": -323.4634094238281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.075105667114258, + "rewards/margins": 8.501914978027344, + "rewards/rejected": -12.577020645141602, + "step": 8981 + }, + { + "epoch": 1.4, + "learning_rate": 7.559571554541211e-06, + "logits/chosen": -2.773256540298462, + "logits/rejected": -2.5807650089263916, + "logps/chosen": -279.98748779296875, + "logps/rejected": -341.9572448730469, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9460511207580566, + "rewards/margins": 5.727014541625977, + "rewards/rejected": -7.673066139221191, + "step": 8982 + }, + { + "epoch": 1.4, + "learning_rate": 7.558838114010063e-06, + "logits/chosen": -1.8847193717956543, + "logits/rejected": -2.730651617050171, + "logps/chosen": -341.1298828125, + "logps/rejected": -261.1946105957031, + "loss": 0.1727, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993008613586426, + "rewards/margins": 2.3663721084594727, + "rewards/rejected": -7.359380722045898, + "step": 8983 + }, + { + "epoch": 1.4, + "learning_rate": 7.558104673478915e-06, + "logits/chosen": -2.0629594326019287, + "logits/rejected": -2.8442776203155518, + "logps/chosen": -151.96844482421875, + "logps/rejected": -590.8362426757812, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.977794647216797, + "rewards/margins": 5.733678817749023, + "rewards/rejected": -11.71147346496582, + "step": 8984 + }, + { + "epoch": 1.4, + "learning_rate": 7.557371232947767e-06, + "logits/chosen": -2.9608237743377686, + "logits/rejected": -3.0691452026367188, + "logps/chosen": -166.73768615722656, + "logps/rejected": -210.271484375, + "loss": 0.1367, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.972576141357422, + "rewards/margins": 1.9499328136444092, + "rewards/rejected": -8.92250919342041, + "step": 8985 + }, + { + "epoch": 1.4, + "learning_rate": 7.55663779241662e-06, + "logits/chosen": -1.8148175477981567, + "logits/rejected": -1.8497507572174072, + "logps/chosen": -311.04022216796875, + "logps/rejected": -178.9829559326172, + "loss": 0.0649, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.092719078063965, + "rewards/margins": 6.3119354248046875, + "rewards/rejected": -10.404654502868652, + "step": 8986 + }, + { + "epoch": 1.4, + "learning_rate": 7.5559043518854715e-06, + "logits/chosen": -2.876575469970703, + "logits/rejected": -3.011775255203247, + "logps/chosen": -233.3878173828125, + "logps/rejected": -210.23016357421875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3537135124206543, + "rewards/margins": 8.825920104980469, + "rewards/rejected": -12.179634094238281, + "step": 8987 + }, + { + "epoch": 1.4, + "learning_rate": 7.555170911354323e-06, + "logits/chosen": -1.547563076019287, + "logits/rejected": -2.5781924724578857, + "logps/chosen": -215.31915283203125, + "logps/rejected": -229.68772888183594, + "loss": 0.4232, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.606703281402588, + "rewards/margins": 2.230257272720337, + "rewards/rejected": -7.836960792541504, + "step": 8988 + }, + { + "epoch": 1.4, + "learning_rate": 7.554437470823175e-06, + "logits/chosen": -2.6061758995056152, + "logits/rejected": -2.1835014820098877, + "logps/chosen": -163.29322814941406, + "logps/rejected": -118.78399658203125, + "loss": 1.4073, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.776719093322754, + "rewards/margins": -0.010712742805480957, + "rewards/rejected": -5.7660064697265625, + "step": 8989 + }, + { + "epoch": 1.4, + "learning_rate": 7.553704030292027e-06, + "logits/chosen": -2.7091004848480225, + "logits/rejected": -2.9235308170318604, + "logps/chosen": -71.47227478027344, + "logps/rejected": -244.2288818359375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5364325046539307, + "rewards/margins": 7.881048679351807, + "rewards/rejected": -11.41748046875, + "step": 8990 + }, + { + "epoch": 1.4, + "learning_rate": 7.552970589760881e-06, + "logits/chosen": -2.6800615787506104, + "logits/rejected": -2.930694103240967, + "logps/chosen": -122.43226623535156, + "logps/rejected": -241.93252563476562, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.775754928588867, + "rewards/margins": 2.2648355960845947, + "rewards/rejected": -6.040590763092041, + "step": 8991 + }, + { + "epoch": 1.4, + "learning_rate": 7.5522371492297325e-06, + "logits/chosen": -2.396885871887207, + "logits/rejected": -2.8158175945281982, + "logps/chosen": -142.0001678466797, + "logps/rejected": -238.76223754882812, + "loss": 0.388, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.958087921142578, + "rewards/margins": 2.8124473094940186, + "rewards/rejected": -7.770535469055176, + "step": 8992 + }, + { + "epoch": 1.4, + "learning_rate": 7.5515037086985844e-06, + "logits/chosen": -2.9905407428741455, + "logits/rejected": -2.83540678024292, + "logps/chosen": -586.96484375, + "logps/rejected": -930.43212890625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3047738075256348, + "rewards/margins": 5.2536234855651855, + "rewards/rejected": -8.55839729309082, + "step": 8993 + }, + { + "epoch": 1.4, + "learning_rate": 7.550770268167436e-06, + "logits/chosen": -2.8003363609313965, + "logits/rejected": -2.3963797092437744, + "logps/chosen": -306.3517761230469, + "logps/rejected": -312.2120666503906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.865206241607666, + "rewards/margins": 9.331521987915039, + "rewards/rejected": -11.196727752685547, + "step": 8994 + }, + { + "epoch": 1.4, + "learning_rate": 7.550036827636288e-06, + "logits/chosen": -2.207181215286255, + "logits/rejected": -2.6708152294158936, + "logps/chosen": -152.28652954101562, + "logps/rejected": -404.0637512207031, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.194698810577393, + "rewards/margins": 8.960426330566406, + "rewards/rejected": -13.155125617980957, + "step": 8995 + }, + { + "epoch": 1.4, + "learning_rate": 7.54930338710514e-06, + "logits/chosen": -2.72892165184021, + "logits/rejected": -1.9512196779251099, + "logps/chosen": -287.0952453613281, + "logps/rejected": -297.0533447265625, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6772308349609375, + "rewards/margins": 4.941612243652344, + "rewards/rejected": -8.618843078613281, + "step": 8996 + }, + { + "epoch": 1.4, + "learning_rate": 7.548569946573992e-06, + "logits/chosen": -1.911517858505249, + "logits/rejected": -2.7916581630706787, + "logps/chosen": -277.1496887207031, + "logps/rejected": -457.78472900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1546874046325684, + "rewards/margins": 11.566883087158203, + "rewards/rejected": -14.721570014953613, + "step": 8997 + }, + { + "epoch": 1.4, + "learning_rate": 7.547836506042844e-06, + "logits/chosen": -2.8054001331329346, + "logits/rejected": -2.9400596618652344, + "logps/chosen": -518.1788940429688, + "logps/rejected": -512.8733520507812, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2980637550354, + "rewards/margins": 7.021629333496094, + "rewards/rejected": -11.319693565368652, + "step": 8998 + }, + { + "epoch": 1.4, + "learning_rate": 7.547103065511696e-06, + "logits/chosen": -1.4248294830322266, + "logits/rejected": -2.807518482208252, + "logps/chosen": -115.96187591552734, + "logps/rejected": -218.40890502929688, + "loss": 1.2514, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.829596519470215, + "rewards/margins": 1.1078448295593262, + "rewards/rejected": -8.9374418258667, + "step": 8999 + }, + { + "epoch": 1.4, + "learning_rate": 7.546369624980549e-06, + "logits/chosen": -2.9956741333007812, + "logits/rejected": -3.0647525787353516, + "logps/chosen": -67.15170288085938, + "logps/rejected": -306.8861083984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.700795650482178, + "rewards/margins": 7.509388446807861, + "rewards/rejected": -12.210184097290039, + "step": 9000 + }, + { + "epoch": 1.4, + "learning_rate": 7.545636184449401e-06, + "logits/chosen": -2.871290683746338, + "logits/rejected": -2.1610593795776367, + "logps/chosen": -323.3927917480469, + "logps/rejected": -364.00738525390625, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.93609619140625, + "rewards/margins": 3.421710252761841, + "rewards/rejected": -7.357806205749512, + "step": 9001 + }, + { + "epoch": 1.4, + "learning_rate": 7.544902743918253e-06, + "logits/chosen": -2.7661123275756836, + "logits/rejected": -2.1081089973449707, + "logps/chosen": -154.00738525390625, + "logps/rejected": -354.27740478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.020621299743652, + "rewards/margins": 10.974964141845703, + "rewards/rejected": -14.995584487915039, + "step": 9002 + }, + { + "epoch": 1.4, + "learning_rate": 7.544169303387106e-06, + "logits/chosen": -1.0965458154678345, + "logits/rejected": -1.749985694885254, + "logps/chosen": -177.308837890625, + "logps/rejected": -426.031982421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.508062839508057, + "rewards/margins": 8.572381019592285, + "rewards/rejected": -13.0804443359375, + "step": 9003 + }, + { + "epoch": 1.4, + "learning_rate": 7.543435862855958e-06, + "logits/chosen": -2.2998673915863037, + "logits/rejected": -3.051509141921997, + "logps/chosen": -166.74801635742188, + "logps/rejected": -381.3485107421875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0902373790740967, + "rewards/margins": 7.709560394287109, + "rewards/rejected": -9.799798011779785, + "step": 9004 + }, + { + "epoch": 1.4, + "learning_rate": 7.5427024223248095e-06, + "logits/chosen": -2.7133748531341553, + "logits/rejected": -1.7688872814178467, + "logps/chosen": -202.009765625, + "logps/rejected": -226.39218139648438, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.370450019836426, + "rewards/margins": 4.585055351257324, + "rewards/rejected": -8.95550537109375, + "step": 9005 + }, + { + "epoch": 1.4, + "learning_rate": 7.541968981793661e-06, + "logits/chosen": -2.5454039573669434, + "logits/rejected": -2.929266929626465, + "logps/chosen": -553.3670043945312, + "logps/rejected": -554.40234375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.819550514221191, + "rewards/margins": 5.161595344543457, + "rewards/rejected": -9.981145858764648, + "step": 9006 + }, + { + "epoch": 1.4, + "learning_rate": 7.541235541262513e-06, + "logits/chosen": -2.5334291458129883, + "logits/rejected": -3.225358724594116, + "logps/chosen": -199.2830352783203, + "logps/rejected": -445.3324279785156, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.212188482284546, + "rewards/margins": 7.408886909484863, + "rewards/rejected": -10.621075630187988, + "step": 9007 + }, + { + "epoch": 1.4, + "learning_rate": 7.540502100731365e-06, + "logits/chosen": -1.607302188873291, + "logits/rejected": -2.8172693252563477, + "logps/chosen": -94.19232177734375, + "logps/rejected": -383.0828857421875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2732629776000977, + "rewards/margins": 8.239197731018066, + "rewards/rejected": -11.512460708618164, + "step": 9008 + }, + { + "epoch": 1.4, + "learning_rate": 7.539768660200219e-06, + "logits/chosen": -1.4667211771011353, + "logits/rejected": -2.954371929168701, + "logps/chosen": -302.00262451171875, + "logps/rejected": -673.1030883789062, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.000466346740723, + "rewards/margins": 6.92220401763916, + "rewards/rejected": -10.922670364379883, + "step": 9009 + }, + { + "epoch": 1.4, + "learning_rate": 7.539035219669071e-06, + "logits/chosen": -2.3007779121398926, + "logits/rejected": -2.871464967727661, + "logps/chosen": -389.73504638671875, + "logps/rejected": -406.83685302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.541353702545166, + "rewards/margins": 9.974380493164062, + "rewards/rejected": -11.51573371887207, + "step": 9010 + }, + { + "epoch": 1.4, + "learning_rate": 7.5383017791379225e-06, + "logits/chosen": -2.447406530380249, + "logits/rejected": -2.8005011081695557, + "logps/chosen": -111.07342529296875, + "logps/rejected": -356.1558837890625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5751311779022217, + "rewards/margins": 6.674762725830078, + "rewards/rejected": -10.249893188476562, + "step": 9011 + }, + { + "epoch": 1.4, + "learning_rate": 7.537568338606774e-06, + "logits/chosen": -3.0788943767547607, + "logits/rejected": -2.8192598819732666, + "logps/chosen": -503.1512451171875, + "logps/rejected": -430.75164794921875, + "loss": 1.5509, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.8191070556640625, + "rewards/margins": 1.910383701324463, + "rewards/rejected": -8.729491233825684, + "step": 9012 + }, + { + "epoch": 1.4, + "learning_rate": 7.536834898075626e-06, + "logits/chosen": -2.5301923751831055, + "logits/rejected": -2.897712469100952, + "logps/chosen": -591.2434692382812, + "logps/rejected": -577.1617431640625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0199217796325684, + "rewards/margins": 5.014248847961426, + "rewards/rejected": -7.034170150756836, + "step": 9013 + }, + { + "epoch": 1.4, + "learning_rate": 7.536101457544478e-06, + "logits/chosen": -2.8208377361297607, + "logits/rejected": -1.4099644422531128, + "logps/chosen": -599.0330810546875, + "logps/rejected": -416.0760192871094, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3668651580810547, + "rewards/margins": 6.662978172302246, + "rewards/rejected": -10.029844284057617, + "step": 9014 + }, + { + "epoch": 1.4, + "learning_rate": 7.53536801701333e-06, + "logits/chosen": -1.8455699682235718, + "logits/rejected": -2.8304595947265625, + "logps/chosen": -174.88526916503906, + "logps/rejected": -367.88720703125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377248764038086, + "rewards/margins": 6.8496809005737305, + "rewards/rejected": -12.2269287109375, + "step": 9015 + }, + { + "epoch": 1.4, + "learning_rate": 7.534634576482182e-06, + "logits/chosen": -2.622602701187134, + "logits/rejected": -2.9723496437072754, + "logps/chosen": -91.67991638183594, + "logps/rejected": -325.7728576660156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.477670192718506, + "rewards/margins": 9.014184951782227, + "rewards/rejected": -11.49185562133789, + "step": 9016 + }, + { + "epoch": 1.4, + "learning_rate": 7.533901135951034e-06, + "logits/chosen": -2.8193576335906982, + "logits/rejected": -2.6793015003204346, + "logps/chosen": -403.72247314453125, + "logps/rejected": -282.0457458496094, + "loss": 0.3618, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.681981086730957, + "rewards/margins": 3.6497251987457275, + "rewards/rejected": -8.331706047058105, + "step": 9017 + }, + { + "epoch": 1.4, + "learning_rate": 7.533167695419887e-06, + "logits/chosen": -2.7729227542877197, + "logits/rejected": -2.2281837463378906, + "logps/chosen": -263.285400390625, + "logps/rejected": -372.24188232421875, + "loss": 0.0795, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.412223815917969, + "rewards/margins": 6.28916597366333, + "rewards/rejected": -11.701390266418457, + "step": 9018 + }, + { + "epoch": 1.4, + "learning_rate": 7.532434254888739e-06, + "logits/chosen": -2.9305734634399414, + "logits/rejected": -2.4625916481018066, + "logps/chosen": -217.81439208984375, + "logps/rejected": -226.43429565429688, + "loss": 1.7848, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.492105960845947, + "rewards/margins": 1.5537797212600708, + "rewards/rejected": -7.0458855628967285, + "step": 9019 + }, + { + "epoch": 1.4, + "learning_rate": 7.531700814357592e-06, + "logits/chosen": -1.7896453142166138, + "logits/rejected": -2.7403178215026855, + "logps/chosen": -172.4875030517578, + "logps/rejected": -489.61474609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2172999382019043, + "rewards/margins": 10.611482620239258, + "rewards/rejected": -13.82878303527832, + "step": 9020 + }, + { + "epoch": 1.4, + "learning_rate": 7.530967373826444e-06, + "logits/chosen": -2.9750359058380127, + "logits/rejected": -2.819404125213623, + "logps/chosen": -200.8892822265625, + "logps/rejected": -181.5487823486328, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.597525119781494, + "rewards/margins": 5.620246410369873, + "rewards/rejected": -10.217771530151367, + "step": 9021 + }, + { + "epoch": 1.4, + "learning_rate": 7.530233933295296e-06, + "logits/chosen": -2.4329614639282227, + "logits/rejected": -1.9754924774169922, + "logps/chosen": -121.15817260742188, + "logps/rejected": -150.58148193359375, + "loss": 1.0489, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.223215579986572, + "rewards/margins": 3.412991523742676, + "rewards/rejected": -9.636207580566406, + "step": 9022 + }, + { + "epoch": 1.4, + "learning_rate": 7.5295004927641476e-06, + "logits/chosen": -1.5413140058517456, + "logits/rejected": -2.4936575889587402, + "logps/chosen": -183.13006591796875, + "logps/rejected": -596.3056030273438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.396019220352173, + "rewards/margins": 8.304220199584961, + "rewards/rejected": -11.700240135192871, + "step": 9023 + }, + { + "epoch": 1.4, + "learning_rate": 7.5287670522329994e-06, + "logits/chosen": -2.200113534927368, + "logits/rejected": -2.597817897796631, + "logps/chosen": -112.75129699707031, + "logps/rejected": -266.8044128417969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.836495876312256, + "rewards/margins": 8.082866668701172, + "rewards/rejected": -10.91936206817627, + "step": 9024 + }, + { + "epoch": 1.4, + "learning_rate": 7.528033611701851e-06, + "logits/chosen": -2.8399112224578857, + "logits/rejected": -2.6524055004119873, + "logps/chosen": -369.764892578125, + "logps/rejected": -531.9231567382812, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.02324104309082, + "rewards/margins": 7.282364845275879, + "rewards/rejected": -11.305606842041016, + "step": 9025 + }, + { + "epoch": 1.4, + "learning_rate": 7.527300171170703e-06, + "logits/chosen": -1.6060984134674072, + "logits/rejected": -2.636265277862549, + "logps/chosen": -167.8552703857422, + "logps/rejected": -416.1941223144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5985615253448486, + "rewards/margins": 13.087581634521484, + "rewards/rejected": -14.68614387512207, + "step": 9026 + }, + { + "epoch": 1.4, + "learning_rate": 7.526566730639557e-06, + "logits/chosen": -2.127688407897949, + "logits/rejected": -3.033656597137451, + "logps/chosen": -177.3549346923828, + "logps/rejected": -363.5690612792969, + "loss": 0.5648, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.084090232849121, + "rewards/margins": 7.322303771972656, + "rewards/rejected": -11.406394004821777, + "step": 9027 + }, + { + "epoch": 1.4, + "learning_rate": 7.525833290108409e-06, + "logits/chosen": -2.5722219944000244, + "logits/rejected": -2.9739794731140137, + "logps/chosen": -131.2208251953125, + "logps/rejected": -163.85452270507812, + "loss": 1.8178, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.417381286621094, + "rewards/margins": 1.7255439758300781, + "rewards/rejected": -10.142925262451172, + "step": 9028 + }, + { + "epoch": 1.4, + "learning_rate": 7.5250998495772605e-06, + "logits/chosen": -2.703805923461914, + "logits/rejected": -2.9483602046966553, + "logps/chosen": -554.3185424804688, + "logps/rejected": -544.8406982421875, + "loss": 0.3744, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.593545913696289, + "rewards/margins": 5.179894924163818, + "rewards/rejected": -10.773441314697266, + "step": 9029 + }, + { + "epoch": 1.4, + "learning_rate": 7.524366409046112e-06, + "logits/chosen": -2.8033318519592285, + "logits/rejected": -2.977156639099121, + "logps/chosen": -89.47978210449219, + "logps/rejected": -164.4402313232422, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.071115732192993, + "rewards/margins": 4.581653594970703, + "rewards/rejected": -7.652769088745117, + "step": 9030 + }, + { + "epoch": 1.4, + "learning_rate": 7.523632968514964e-06, + "logits/chosen": -1.1322753429412842, + "logits/rejected": -2.3066704273223877, + "logps/chosen": -120.79083251953125, + "logps/rejected": -357.0384216308594, + "loss": 0.1908, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.952065467834473, + "rewards/margins": 7.334641933441162, + "rewards/rejected": -12.286706924438477, + "step": 9031 + }, + { + "epoch": 1.4, + "learning_rate": 7.522899527983816e-06, + "logits/chosen": -2.9275221824645996, + "logits/rejected": -2.6430459022521973, + "logps/chosen": -573.3762817382812, + "logps/rejected": -448.2568054199219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.138380527496338, + "rewards/margins": 8.522005081176758, + "rewards/rejected": -12.660385131835938, + "step": 9032 + }, + { + "epoch": 1.4, + "learning_rate": 7.522166087452668e-06, + "logits/chosen": -1.1224946975708008, + "logits/rejected": -2.8074166774749756, + "logps/chosen": -74.79595947265625, + "logps/rejected": -290.5146484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2767672538757324, + "rewards/margins": 9.04838752746582, + "rewards/rejected": -12.325155258178711, + "step": 9033 + }, + { + "epoch": 1.4, + "learning_rate": 7.52143264692152e-06, + "logits/chosen": -2.722853660583496, + "logits/rejected": -2.5641307830810547, + "logps/chosen": -251.8297119140625, + "logps/rejected": -264.33148193359375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7456111907958984, + "rewards/margins": 5.9358320236206055, + "rewards/rejected": -9.681443214416504, + "step": 9034 + }, + { + "epoch": 1.41, + "learning_rate": 7.520699206390373e-06, + "logits/chosen": -2.0959744453430176, + "logits/rejected": -2.6112804412841797, + "logps/chosen": -162.20223999023438, + "logps/rejected": -310.2503662109375, + "loss": 0.6677, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.337070941925049, + "rewards/margins": 4.0175089836120605, + "rewards/rejected": -9.35457992553711, + "step": 9035 + }, + { + "epoch": 1.41, + "learning_rate": 7.519965765859225e-06, + "logits/chosen": -2.081897258758545, + "logits/rejected": -2.9675614833831787, + "logps/chosen": -124.8760986328125, + "logps/rejected": -278.423828125, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1257781982421875, + "rewards/margins": 5.192536354064941, + "rewards/rejected": -9.318314552307129, + "step": 9036 + }, + { + "epoch": 1.41, + "learning_rate": 7.519232325328078e-06, + "logits/chosen": -1.397606372833252, + "logits/rejected": -2.8517372608184814, + "logps/chosen": -158.6565704345703, + "logps/rejected": -488.35992431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7088685035705566, + "rewards/margins": 9.47564697265625, + "rewards/rejected": -12.184514999389648, + "step": 9037 + }, + { + "epoch": 1.41, + "learning_rate": 7.51849888479693e-06, + "logits/chosen": -3.159085273742676, + "logits/rejected": -3.12507963180542, + "logps/chosen": -539.3197021484375, + "logps/rejected": -290.2025146484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302947998046875, + "rewards/margins": 7.063739776611328, + "rewards/rejected": -12.366687774658203, + "step": 9038 + }, + { + "epoch": 1.41, + "learning_rate": 7.517765444265782e-06, + "logits/chosen": -2.7594926357269287, + "logits/rejected": -0.6578835844993591, + "logps/chosen": -383.9049072265625, + "logps/rejected": -230.42056274414062, + "loss": 1.8322, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.955554962158203, + "rewards/margins": 2.1809234619140625, + "rewards/rejected": -8.136478424072266, + "step": 9039 + }, + { + "epoch": 1.41, + "learning_rate": 7.517032003734634e-06, + "logits/chosen": -2.0763587951660156, + "logits/rejected": -2.387850761413574, + "logps/chosen": -168.64993286132812, + "logps/rejected": -326.59942626953125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8527917861938477, + "rewards/margins": 6.120560646057129, + "rewards/rejected": -9.973352432250977, + "step": 9040 + }, + { + "epoch": 1.41, + "learning_rate": 7.516298563203486e-06, + "logits/chosen": -3.0374534130096436, + "logits/rejected": -2.4077987670898438, + "logps/chosen": -208.59518432617188, + "logps/rejected": -143.8349609375, + "loss": 0.7294, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.765687942504883, + "rewards/margins": 3.2453773021698, + "rewards/rejected": -9.011064529418945, + "step": 9041 + }, + { + "epoch": 1.41, + "learning_rate": 7.5155651226723375e-06, + "logits/chosen": -2.5666959285736084, + "logits/rejected": -2.656240940093994, + "logps/chosen": -319.12872314453125, + "logps/rejected": -386.1735534667969, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7113547325134277, + "rewards/margins": 5.571990013122559, + "rewards/rejected": -9.283345222473145, + "step": 9042 + }, + { + "epoch": 1.41, + "learning_rate": 7.514831682141189e-06, + "logits/chosen": -2.3394312858581543, + "logits/rejected": -2.9895834922790527, + "logps/chosen": -131.75265502929688, + "logps/rejected": -272.45697021484375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3939803838729858, + "rewards/margins": 6.123402118682861, + "rewards/rejected": -7.517382621765137, + "step": 9043 + }, + { + "epoch": 1.41, + "learning_rate": 7.514098241610043e-06, + "logits/chosen": -3.0579769611358643, + "logits/rejected": -2.596874475479126, + "logps/chosen": -268.8834228515625, + "logps/rejected": -283.4512939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4038116931915283, + "rewards/margins": 9.380132675170898, + "rewards/rejected": -10.783944129943848, + "step": 9044 + }, + { + "epoch": 1.41, + "learning_rate": 7.513364801078895e-06, + "logits/chosen": -1.9818860292434692, + "logits/rejected": -2.9279332160949707, + "logps/chosen": -333.96282958984375, + "logps/rejected": -697.748291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.438380479812622, + "rewards/margins": 12.231550216674805, + "rewards/rejected": -15.669931411743164, + "step": 9045 + }, + { + "epoch": 1.41, + "learning_rate": 7.512631360547747e-06, + "logits/chosen": -1.480980634689331, + "logits/rejected": -2.3125391006469727, + "logps/chosen": -97.2773666381836, + "logps/rejected": -323.56768798828125, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1373114585876465, + "rewards/margins": 7.319955825805664, + "rewards/rejected": -12.457267761230469, + "step": 9046 + }, + { + "epoch": 1.41, + "learning_rate": 7.5118979200165986e-06, + "logits/chosen": -1.3569562435150146, + "logits/rejected": -2.8993771076202393, + "logps/chosen": -196.475830078125, + "logps/rejected": -400.49365234375, + "loss": 2.8424, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.458218574523926, + "rewards/margins": 0.6099429130554199, + "rewards/rejected": -7.068161487579346, + "step": 9047 + }, + { + "epoch": 1.41, + "learning_rate": 7.5111644794854504e-06, + "logits/chosen": -2.795499563217163, + "logits/rejected": -2.81900691986084, + "logps/chosen": -113.73924255371094, + "logps/rejected": -237.9555206298828, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.189013719558716, + "rewards/margins": 8.917767524719238, + "rewards/rejected": -11.106781005859375, + "step": 9048 + }, + { + "epoch": 1.41, + "learning_rate": 7.510431038954302e-06, + "logits/chosen": -2.1279842853546143, + "logits/rejected": -2.6884727478027344, + "logps/chosen": -177.1868438720703, + "logps/rejected": -252.71368408203125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.07940149307251, + "rewards/margins": 5.304782867431641, + "rewards/rejected": -9.384184837341309, + "step": 9049 + }, + { + "epoch": 1.41, + "learning_rate": 7.509697598423154e-06, + "logits/chosen": -1.4799284934997559, + "logits/rejected": -2.0724143981933594, + "logps/chosen": -280.8586120605469, + "logps/rejected": -366.321533203125, + "loss": 0.2236, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6010541915893555, + "rewards/margins": 3.4056689739227295, + "rewards/rejected": -9.006723403930664, + "step": 9050 + }, + { + "epoch": 1.41, + "learning_rate": 7.508964157892006e-06, + "logits/chosen": -3.0516204833984375, + "logits/rejected": -2.3527886867523193, + "logps/chosen": -220.6053466796875, + "logps/rejected": -104.60713195800781, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354441165924072, + "rewards/margins": 0.9803273677825928, + "rewards/rejected": -5.334768295288086, + "step": 9051 + }, + { + "epoch": 1.41, + "learning_rate": 7.508230717360859e-06, + "logits/chosen": -3.1010870933532715, + "logits/rejected": -3.148782253265381, + "logps/chosen": -96.04098510742188, + "logps/rejected": -223.26116943359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4077816009521484, + "rewards/margins": 7.599433898925781, + "rewards/rejected": -11.00721549987793, + "step": 9052 + }, + { + "epoch": 1.41, + "learning_rate": 7.5074972768297115e-06, + "logits/chosen": -2.122952699661255, + "logits/rejected": -2.778585433959961, + "logps/chosen": -137.07562255859375, + "logps/rejected": -276.2244873046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6481475830078125, + "rewards/margins": 7.816370964050293, + "rewards/rejected": -11.464518547058105, + "step": 9053 + }, + { + "epoch": 1.41, + "learning_rate": 7.506763836298564e-06, + "logits/chosen": -1.1911183595657349, + "logits/rejected": -2.627969264984131, + "logps/chosen": -124.84336853027344, + "logps/rejected": -303.2000427246094, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.920628547668457, + "rewards/margins": 6.815612316131592, + "rewards/rejected": -11.73624038696289, + "step": 9054 + }, + { + "epoch": 1.41, + "learning_rate": 7.506030395767416e-06, + "logits/chosen": -0.9799315929412842, + "logits/rejected": -2.472416639328003, + "logps/chosen": -91.65498352050781, + "logps/rejected": -408.07061767578125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.341278076171875, + "rewards/margins": 8.075187683105469, + "rewards/rejected": -13.416465759277344, + "step": 9055 + }, + { + "epoch": 1.41, + "learning_rate": 7.505296955236268e-06, + "logits/chosen": -2.8389475345611572, + "logits/rejected": -2.742309808731079, + "logps/chosen": -297.71771240234375, + "logps/rejected": -280.92041015625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.479196071624756, + "rewards/margins": 5.7520928382873535, + "rewards/rejected": -9.23128890991211, + "step": 9056 + }, + { + "epoch": 1.41, + "learning_rate": 7.50456351470512e-06, + "logits/chosen": -2.7644340991973877, + "logits/rejected": -3.0145721435546875, + "logps/chosen": -92.01380157470703, + "logps/rejected": -222.531982421875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8237364292144775, + "rewards/margins": 6.5829176902771, + "rewards/rejected": -9.406654357910156, + "step": 9057 + }, + { + "epoch": 1.41, + "learning_rate": 7.503830074173972e-06, + "logits/chosen": -0.9753437042236328, + "logits/rejected": -2.793921709060669, + "logps/chosen": -124.93122863769531, + "logps/rejected": -437.35546875, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954151630401611, + "rewards/margins": 5.61521577835083, + "rewards/rejected": -11.569367408752441, + "step": 9058 + }, + { + "epoch": 1.41, + "learning_rate": 7.503096633642824e-06, + "logits/chosen": -2.1520578861236572, + "logits/rejected": -0.9101822376251221, + "logps/chosen": -1050.2958984375, + "logps/rejected": -439.913330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4574470520019531, + "rewards/margins": 11.706693649291992, + "rewards/rejected": -13.164140701293945, + "step": 9059 + }, + { + "epoch": 1.41, + "learning_rate": 7.5023631931116755e-06, + "logits/chosen": -1.9010123014450073, + "logits/rejected": -2.827282667160034, + "logps/chosen": -234.46609497070312, + "logps/rejected": -246.25831604003906, + "loss": 1.6871, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.656454086303711, + "rewards/margins": 1.0166666507720947, + "rewards/rejected": -8.673120498657227, + "step": 9060 + }, + { + "epoch": 1.41, + "learning_rate": 7.501629752580527e-06, + "logits/chosen": -3.1394450664520264, + "logits/rejected": -2.36651873588562, + "logps/chosen": -314.6993408203125, + "logps/rejected": -264.453125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3869307041168213, + "rewards/margins": 7.007530212402344, + "rewards/rejected": -9.394460678100586, + "step": 9061 + }, + { + "epoch": 1.41, + "learning_rate": 7.500896312049381e-06, + "logits/chosen": -2.912611722946167, + "logits/rejected": -3.0532312393188477, + "logps/chosen": -88.9994125366211, + "logps/rejected": -191.7766876220703, + "loss": 0.119, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.961062431335449, + "rewards/margins": 2.310804605484009, + "rewards/rejected": -10.271867752075195, + "step": 9062 + }, + { + "epoch": 1.41, + "learning_rate": 7.500162871518233e-06, + "logits/chosen": -2.7749359607696533, + "logits/rejected": -3.0614700317382812, + "logps/chosen": -171.20091247558594, + "logps/rejected": -170.27523803710938, + "loss": 0.3796, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.536432266235352, + "rewards/margins": 4.334103584289551, + "rewards/rejected": -10.870535850524902, + "step": 9063 + }, + { + "epoch": 1.41, + "learning_rate": 7.499429430987085e-06, + "logits/chosen": -2.904059410095215, + "logits/rejected": -3.09006929397583, + "logps/chosen": -208.4709930419922, + "logps/rejected": -366.6153564453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.05784797668457, + "rewards/margins": 8.025100708007812, + "rewards/rejected": -12.082947731018066, + "step": 9064 + }, + { + "epoch": 1.41, + "learning_rate": 7.498695990455937e-06, + "logits/chosen": -2.555340051651001, + "logits/rejected": -3.0839266777038574, + "logps/chosen": -478.5028076171875, + "logps/rejected": -512.9790649414062, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.161644697189331, + "rewards/margins": 5.72180700302124, + "rewards/rejected": -8.883451461791992, + "step": 9065 + }, + { + "epoch": 1.41, + "learning_rate": 7.4979625499247885e-06, + "logits/chosen": -2.29773211479187, + "logits/rejected": -2.769289493560791, + "logps/chosen": -74.86521911621094, + "logps/rejected": -327.6972351074219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.018850326538086, + "rewards/margins": 9.482461929321289, + "rewards/rejected": -14.501312255859375, + "step": 9066 + }, + { + "epoch": 1.41, + "learning_rate": 7.49722910939364e-06, + "logits/chosen": -1.636850118637085, + "logits/rejected": -2.9259350299835205, + "logps/chosen": -263.23193359375, + "logps/rejected": -544.0660400390625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.969613552093506, + "rewards/margins": 7.042917251586914, + "rewards/rejected": -10.012531280517578, + "step": 9067 + }, + { + "epoch": 1.41, + "learning_rate": 7.496495668862492e-06, + "logits/chosen": -2.555225372314453, + "logits/rejected": -2.9360296726226807, + "logps/chosen": -252.8077850341797, + "logps/rejected": -323.8169860839844, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.197833061218262, + "rewards/margins": 3.2981767654418945, + "rewards/rejected": -7.496009826660156, + "step": 9068 + }, + { + "epoch": 1.41, + "learning_rate": 7.495762228331345e-06, + "logits/chosen": -3.0521230697631836, + "logits/rejected": -2.1171157360076904, + "logps/chosen": -264.8443603515625, + "logps/rejected": -475.98876953125, + "loss": 0.1808, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0511274337768555, + "rewards/margins": 8.200922012329102, + "rewards/rejected": -12.252049446105957, + "step": 9069 + }, + { + "epoch": 1.41, + "learning_rate": 7.495028787800197e-06, + "logits/chosen": -0.6888653635978699, + "logits/rejected": -1.7908681631088257, + "logps/chosen": -215.02235412597656, + "logps/rejected": -546.5416870117188, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.807219505310059, + "rewards/margins": 9.637954711914062, + "rewards/rejected": -14.445175170898438, + "step": 9070 + }, + { + "epoch": 1.41, + "learning_rate": 7.49429534726905e-06, + "logits/chosen": -1.8977466821670532, + "logits/rejected": -2.635478973388672, + "logps/chosen": -357.8433837890625, + "logps/rejected": -466.78509521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9403197765350342, + "rewards/margins": 12.827330589294434, + "rewards/rejected": -14.767650604248047, + "step": 9071 + }, + { + "epoch": 1.41, + "learning_rate": 7.493561906737902e-06, + "logits/chosen": -2.2560582160949707, + "logits/rejected": -2.668123960494995, + "logps/chosen": -93.08100891113281, + "logps/rejected": -309.2649230957031, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.02987813949585, + "rewards/margins": 8.410552978515625, + "rewards/rejected": -12.440430641174316, + "step": 9072 + }, + { + "epoch": 1.41, + "learning_rate": 7.492828466206754e-06, + "logits/chosen": -2.619997262954712, + "logits/rejected": -2.828763723373413, + "logps/chosen": -271.478271484375, + "logps/rejected": -366.7481384277344, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7625458240509033, + "rewards/margins": 6.632996082305908, + "rewards/rejected": -8.39554214477539, + "step": 9073 + }, + { + "epoch": 1.41, + "learning_rate": 7.492095025675606e-06, + "logits/chosen": -2.787179946899414, + "logits/rejected": -2.865772247314453, + "logps/chosen": -432.56756591796875, + "logps/rejected": -272.43463134765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.30940580368042, + "rewards/margins": 7.295584678649902, + "rewards/rejected": -10.604990005493164, + "step": 9074 + }, + { + "epoch": 1.41, + "learning_rate": 7.491361585144458e-06, + "logits/chosen": -2.897707462310791, + "logits/rejected": -1.893716812133789, + "logps/chosen": -836.8743896484375, + "logps/rejected": -402.93035888671875, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.010871887207031, + "rewards/margins": 5.216588020324707, + "rewards/rejected": -9.227459907531738, + "step": 9075 + }, + { + "epoch": 1.41, + "learning_rate": 7.49062814461331e-06, + "logits/chosen": -2.528606653213501, + "logits/rejected": -2.4367592334747314, + "logps/chosen": -172.30453491210938, + "logps/rejected": -221.6627960205078, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.226474761962891, + "rewards/margins": 7.131680965423584, + "rewards/rejected": -12.358156204223633, + "step": 9076 + }, + { + "epoch": 1.41, + "learning_rate": 7.489894704082162e-06, + "logits/chosen": -3.023130178451538, + "logits/rejected": -2.9379525184631348, + "logps/chosen": -307.7590026855469, + "logps/rejected": -329.95947265625, + "loss": 0.5937, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.567187786102295, + "rewards/margins": 3.039334774017334, + "rewards/rejected": -9.606522560119629, + "step": 9077 + }, + { + "epoch": 1.41, + "learning_rate": 7.4891612635510136e-06, + "logits/chosen": -2.9390199184417725, + "logits/rejected": -3.0678601264953613, + "logps/chosen": -162.52003479003906, + "logps/rejected": -405.3160095214844, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.85776948928833, + "rewards/margins": 5.745637893676758, + "rewards/rejected": -10.60340690612793, + "step": 9078 + }, + { + "epoch": 1.41, + "learning_rate": 7.4884278230198654e-06, + "logits/chosen": -2.428607940673828, + "logits/rejected": -2.9053616523742676, + "logps/chosen": -145.9654541015625, + "logps/rejected": -377.36700439453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.024590492248535, + "rewards/margins": 10.132696151733398, + "rewards/rejected": -13.157285690307617, + "step": 9079 + }, + { + "epoch": 1.41, + "learning_rate": 7.487694382488719e-06, + "logits/chosen": -2.925746440887451, + "logits/rejected": -2.7142457962036133, + "logps/chosen": -150.29933166503906, + "logps/rejected": -212.9355010986328, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6020145416259766, + "rewards/margins": 6.096832275390625, + "rewards/rejected": -9.698846817016602, + "step": 9080 + }, + { + "epoch": 1.41, + "learning_rate": 7.486960941957571e-06, + "logits/chosen": -2.7175071239471436, + "logits/rejected": -2.638784885406494, + "logps/chosen": -462.82476806640625, + "logps/rejected": -413.1759033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.74796462059021, + "rewards/margins": 9.435979843139648, + "rewards/rejected": -13.183944702148438, + "step": 9081 + }, + { + "epoch": 1.41, + "learning_rate": 7.486227501426423e-06, + "logits/chosen": -2.0826046466827393, + "logits/rejected": -2.709674835205078, + "logps/chosen": -334.81884765625, + "logps/rejected": -352.4969482421875, + "loss": 0.5735, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.193751335144043, + "rewards/margins": 3.437358856201172, + "rewards/rejected": -8.631110191345215, + "step": 9082 + }, + { + "epoch": 1.41, + "learning_rate": 7.485494060895275e-06, + "logits/chosen": -1.8911248445510864, + "logits/rejected": -2.911290407180786, + "logps/chosen": -94.11660766601562, + "logps/rejected": -201.27822875976562, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4337053298950195, + "rewards/margins": 4.568720817565918, + "rewards/rejected": -11.002426147460938, + "step": 9083 + }, + { + "epoch": 1.41, + "learning_rate": 7.4847606203641265e-06, + "logits/chosen": -2.744304656982422, + "logits/rejected": -1.09807550907135, + "logps/chosen": -287.8240966796875, + "logps/rejected": -126.27208709716797, + "loss": 2.5323, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.559518337249756, + "rewards/margins": -2.4097442626953125, + "rewards/rejected": -5.149774074554443, + "step": 9084 + }, + { + "epoch": 1.41, + "learning_rate": 7.484027179832978e-06, + "logits/chosen": -3.0051684379577637, + "logits/rejected": -2.075197458267212, + "logps/chosen": -681.7967529296875, + "logps/rejected": -508.3849792480469, + "loss": 0.1242, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.404576301574707, + "rewards/margins": 4.678859710693359, + "rewards/rejected": -9.083436012268066, + "step": 9085 + }, + { + "epoch": 1.41, + "learning_rate": 7.483293739301831e-06, + "logits/chosen": -2.100759506225586, + "logits/rejected": -3.040644884109497, + "logps/chosen": -402.7541809082031, + "logps/rejected": -513.8984375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.491643905639648, + "rewards/margins": 7.041930198669434, + "rewards/rejected": -12.533574104309082, + "step": 9086 + }, + { + "epoch": 1.41, + "learning_rate": 7.482560298770683e-06, + "logits/chosen": -2.1675777435302734, + "logits/rejected": -2.9171032905578613, + "logps/chosen": -127.76972198486328, + "logps/rejected": -383.8924865722656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0169906616210938, + "rewards/margins": 10.685802459716797, + "rewards/rejected": -12.70279312133789, + "step": 9087 + }, + { + "epoch": 1.41, + "learning_rate": 7.481826858239535e-06, + "logits/chosen": -2.191901683807373, + "logits/rejected": -2.1547951698303223, + "logps/chosen": -359.6786804199219, + "logps/rejected": -342.37347412109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.670863151550293, + "rewards/margins": 8.649269104003906, + "rewards/rejected": -12.320131301879883, + "step": 9088 + }, + { + "epoch": 1.41, + "learning_rate": 7.4810934177083885e-06, + "logits/chosen": -2.6598939895629883, + "logits/rejected": -2.7553513050079346, + "logps/chosen": -101.94374084472656, + "logps/rejected": -446.6614990234375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.629661560058594, + "rewards/margins": 8.767294883728027, + "rewards/rejected": -13.396956443786621, + "step": 9089 + }, + { + "epoch": 1.41, + "learning_rate": 7.48035997717724e-06, + "logits/chosen": -1.2895281314849854, + "logits/rejected": -2.574120283126831, + "logps/chosen": -135.09713745117188, + "logps/rejected": -293.3377685546875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.807401418685913, + "rewards/margins": 8.363675117492676, + "rewards/rejected": -11.171076774597168, + "step": 9090 + }, + { + "epoch": 1.41, + "learning_rate": 7.479626536646092e-06, + "logits/chosen": -2.196202039718628, + "logits/rejected": -3.051832437515259, + "logps/chosen": -72.66080474853516, + "logps/rejected": -310.9150390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4264847040176392, + "rewards/margins": 8.263030052185059, + "rewards/rejected": -9.689515113830566, + "step": 9091 + }, + { + "epoch": 1.41, + "learning_rate": 7.478893096114944e-06, + "logits/chosen": -2.341632127761841, + "logits/rejected": -2.5756139755249023, + "logps/chosen": -319.61590576171875, + "logps/rejected": -480.09210205078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6770336627960205, + "rewards/margins": 8.667420387268066, + "rewards/rejected": -12.344453811645508, + "step": 9092 + }, + { + "epoch": 1.41, + "learning_rate": 7.478159655583796e-06, + "logits/chosen": -2.7359511852264404, + "logits/rejected": -2.4113194942474365, + "logps/chosen": -251.0111083984375, + "logps/rejected": -240.5271453857422, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.127497673034668, + "rewards/margins": 6.241331100463867, + "rewards/rejected": -8.368828773498535, + "step": 9093 + }, + { + "epoch": 1.41, + "learning_rate": 7.477426215052648e-06, + "logits/chosen": -1.6028095483779907, + "logits/rejected": -2.2835006713867188, + "logps/chosen": -263.94921875, + "logps/rejected": -270.7348327636719, + "loss": 0.4402, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.664851665496826, + "rewards/margins": 4.834489822387695, + "rewards/rejected": -9.499341011047363, + "step": 9094 + }, + { + "epoch": 1.41, + "learning_rate": 7.4766927745215e-06, + "logits/chosen": -2.105916976928711, + "logits/rejected": -2.9844534397125244, + "logps/chosen": -121.80322265625, + "logps/rejected": -411.94342041015625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6301257610321045, + "rewards/margins": 6.661740303039551, + "rewards/rejected": -10.291866302490234, + "step": 9095 + }, + { + "epoch": 1.41, + "learning_rate": 7.475959333990352e-06, + "logits/chosen": -2.0924489498138428, + "logits/rejected": -2.896627426147461, + "logps/chosen": -122.63803100585938, + "logps/rejected": -292.0860290527344, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.828515529632568, + "rewards/margins": 7.58446741104126, + "rewards/rejected": -12.412982940673828, + "step": 9096 + }, + { + "epoch": 1.41, + "learning_rate": 7.4752258934592035e-06, + "logits/chosen": -2.5963568687438965, + "logits/rejected": -3.051400899887085, + "logps/chosen": -92.50177764892578, + "logps/rejected": -250.97604370117188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.417694330215454, + "rewards/margins": 9.24351692199707, + "rewards/rejected": -10.661211013793945, + "step": 9097 + }, + { + "epoch": 1.41, + "learning_rate": 7.474492452928057e-06, + "logits/chosen": -2.6342175006866455, + "logits/rejected": -3.024315118789673, + "logps/chosen": -75.98329162597656, + "logps/rejected": -203.0374298095703, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0229597091674805, + "rewards/margins": 7.23291015625, + "rewards/rejected": -12.255870819091797, + "step": 9098 + }, + { + "epoch": 1.42, + "learning_rate": 7.473759012396909e-06, + "logits/chosen": -2.814742088317871, + "logits/rejected": -1.5798181295394897, + "logps/chosen": -323.16015625, + "logps/rejected": -340.2312927246094, + "loss": 1.439, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.38864803314209, + "rewards/margins": 3.2079787254333496, + "rewards/rejected": -9.596626281738281, + "step": 9099 + }, + { + "epoch": 1.42, + "learning_rate": 7.473025571865761e-06, + "logits/chosen": -1.9586081504821777, + "logits/rejected": -2.6338438987731934, + "logps/chosen": -165.2007293701172, + "logps/rejected": -266.5645751953125, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3373842239379883, + "rewards/margins": 5.431196212768555, + "rewards/rejected": -7.768580436706543, + "step": 9100 + }, + { + "epoch": 1.42, + "learning_rate": 7.472292131334613e-06, + "logits/chosen": -1.7706719636917114, + "logits/rejected": -2.726816415786743, + "logps/chosen": -182.57447814941406, + "logps/rejected": -390.2470703125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.761814594268799, + "rewards/margins": 8.48817253112793, + "rewards/rejected": -11.24998664855957, + "step": 9101 + }, + { + "epoch": 1.42, + "learning_rate": 7.4715586908034646e-06, + "logits/chosen": -3.066272735595703, + "logits/rejected": -3.043118953704834, + "logps/chosen": -104.43437957763672, + "logps/rejected": -341.03204345703125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3108723163604736, + "rewards/margins": 8.59689712524414, + "rewards/rejected": -10.907769203186035, + "step": 9102 + }, + { + "epoch": 1.42, + "learning_rate": 7.470825250272317e-06, + "logits/chosen": -2.51971697807312, + "logits/rejected": -3.0741002559661865, + "logps/chosen": -132.17520141601562, + "logps/rejected": -413.75115966796875, + "loss": 0.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.960954666137695, + "rewards/margins": 5.660763740539551, + "rewards/rejected": -12.621718406677246, + "step": 9103 + }, + { + "epoch": 1.42, + "learning_rate": 7.470091809741169e-06, + "logits/chosen": -2.8060736656188965, + "logits/rejected": -2.93601131439209, + "logps/chosen": -157.5634765625, + "logps/rejected": -334.8174743652344, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.989448070526123, + "rewards/margins": 7.460483551025391, + "rewards/rejected": -12.449932098388672, + "step": 9104 + }, + { + "epoch": 1.42, + "learning_rate": 7.469358369210021e-06, + "logits/chosen": -2.8691399097442627, + "logits/rejected": -1.8287982940673828, + "logps/chosen": -193.159423828125, + "logps/rejected": -199.1785125732422, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2467079162597656, + "rewards/margins": 5.671572208404541, + "rewards/rejected": -7.918280124664307, + "step": 9105 + }, + { + "epoch": 1.42, + "learning_rate": 7.468624928678873e-06, + "logits/chosen": -2.9373347759246826, + "logits/rejected": -2.29129958152771, + "logps/chosen": -394.7474670410156, + "logps/rejected": -466.5994873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8289299011230469, + "rewards/margins": 11.889568328857422, + "rewards/rejected": -13.718498229980469, + "step": 9106 + }, + { + "epoch": 1.42, + "learning_rate": 7.4678914881477265e-06, + "logits/chosen": -2.8377232551574707, + "logits/rejected": -1.9136922359466553, + "logps/chosen": -275.262451171875, + "logps/rejected": -229.14779663085938, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.974260091781616, + "rewards/margins": 7.620718955993652, + "rewards/rejected": -10.594979286193848, + "step": 9107 + }, + { + "epoch": 1.42, + "learning_rate": 7.467158047616578e-06, + "logits/chosen": -2.1640331745147705, + "logits/rejected": -2.46970272064209, + "logps/chosen": -253.89175415039062, + "logps/rejected": -463.8011779785156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7849113941192627, + "rewards/margins": 8.862879753112793, + "rewards/rejected": -11.647790908813477, + "step": 9108 + }, + { + "epoch": 1.42, + "learning_rate": 7.46642460708543e-06, + "logits/chosen": -2.4486262798309326, + "logits/rejected": -2.863413095474243, + "logps/chosen": -282.56072998046875, + "logps/rejected": -276.2364501953125, + "loss": 0.9582, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.8018906116485596, + "rewards/margins": 3.113464593887329, + "rewards/rejected": -6.915355205535889, + "step": 9109 + }, + { + "epoch": 1.42, + "learning_rate": 7.465691166554282e-06, + "logits/chosen": -2.412423610687256, + "logits/rejected": -2.906055212020874, + "logps/chosen": -142.4967803955078, + "logps/rejected": -268.78802490234375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1834664344787598, + "rewards/margins": 7.190097808837891, + "rewards/rejected": -10.373564720153809, + "step": 9110 + }, + { + "epoch": 1.42, + "learning_rate": 7.464957726023134e-06, + "logits/chosen": -1.1124109029769897, + "logits/rejected": -2.9322850704193115, + "logps/chosen": -172.56748962402344, + "logps/rejected": -739.7745971679688, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.127250671386719, + "rewards/margins": 8.573209762573242, + "rewards/rejected": -14.700460433959961, + "step": 9111 + }, + { + "epoch": 1.42, + "learning_rate": 7.464224285491986e-06, + "logits/chosen": -1.9814453125, + "logits/rejected": -2.568796396255493, + "logps/chosen": -84.22688293457031, + "logps/rejected": -251.82664489746094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5783491134643555, + "rewards/margins": 7.382213115692139, + "rewards/rejected": -9.960561752319336, + "step": 9112 + }, + { + "epoch": 1.42, + "learning_rate": 7.463490844960838e-06, + "logits/chosen": -2.9368720054626465, + "logits/rejected": -2.067934274673462, + "logps/chosen": -741.1622924804688, + "logps/rejected": -511.0603942871094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3996292352676392, + "rewards/margins": 9.500290870666504, + "rewards/rejected": -10.899919509887695, + "step": 9113 + }, + { + "epoch": 1.42, + "learning_rate": 7.46275740442969e-06, + "logits/chosen": -1.7870354652404785, + "logits/rejected": -2.958756923675537, + "logps/chosen": -137.38250732421875, + "logps/rejected": -356.74493408203125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3716840744018555, + "rewards/margins": 7.11224365234375, + "rewards/rejected": -12.483927726745605, + "step": 9114 + }, + { + "epoch": 1.42, + "learning_rate": 7.4620239638985415e-06, + "logits/chosen": -2.1818151473999023, + "logits/rejected": -2.8960187435150146, + "logps/chosen": -172.1727294921875, + "logps/rejected": -589.9971923828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.267569065093994, + "rewards/margins": 7.619688987731934, + "rewards/rejected": -11.88725757598877, + "step": 9115 + }, + { + "epoch": 1.42, + "learning_rate": 7.461290523367395e-06, + "logits/chosen": -1.1235191822052002, + "logits/rejected": -2.612513303756714, + "logps/chosen": -104.95037078857422, + "logps/rejected": -388.6280822753906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.973479270935059, + "rewards/margins": 9.520408630371094, + "rewards/rejected": -15.493886947631836, + "step": 9116 + }, + { + "epoch": 1.42, + "learning_rate": 7.460557082836247e-06, + "logits/chosen": -3.0964767932891846, + "logits/rejected": -2.9827685356140137, + "logps/chosen": -127.02568054199219, + "logps/rejected": -140.95816040039062, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.552772045135498, + "rewards/margins": 3.9044365882873535, + "rewards/rejected": -7.457208633422852, + "step": 9117 + }, + { + "epoch": 1.42, + "learning_rate": 7.459823642305099e-06, + "logits/chosen": -3.121774673461914, + "logits/rejected": -2.533263683319092, + "logps/chosen": -174.0923614501953, + "logps/rejected": -107.03927612304688, + "loss": 1.2187, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.269865989685059, + "rewards/margins": 0.7285964488983154, + "rewards/rejected": -8.998462677001953, + "step": 9118 + }, + { + "epoch": 1.42, + "learning_rate": 7.459090201773951e-06, + "logits/chosen": -2.442713499069214, + "logits/rejected": -3.078397512435913, + "logps/chosen": -175.36282348632812, + "logps/rejected": -267.863525390625, + "loss": 0.2557, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891063690185547, + "rewards/margins": 2.318553924560547, + "rewards/rejected": -8.209617614746094, + "step": 9119 + }, + { + "epoch": 1.42, + "learning_rate": 7.4583567612428035e-06, + "logits/chosen": -1.9686731100082397, + "logits/rejected": -2.660972833633423, + "logps/chosen": -167.24517822265625, + "logps/rejected": -335.8116760253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4941794872283936, + "rewards/margins": 10.975738525390625, + "rewards/rejected": -13.469918251037598, + "step": 9120 + }, + { + "epoch": 1.42, + "learning_rate": 7.457623320711655e-06, + "logits/chosen": -2.0214598178863525, + "logits/rejected": -2.9207403659820557, + "logps/chosen": -150.54391479492188, + "logps/rejected": -358.9722595214844, + "loss": 0.8894, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.237991809844971, + "rewards/margins": 3.5557384490966797, + "rewards/rejected": -8.793730735778809, + "step": 9121 + }, + { + "epoch": 1.42, + "learning_rate": 7.456889880180507e-06, + "logits/chosen": -2.5699234008789062, + "logits/rejected": -3.0422301292419434, + "logps/chosen": -121.32965850830078, + "logps/rejected": -174.2022705078125, + "loss": 1.886, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.2936811447143555, + "rewards/margins": 1.2231526374816895, + "rewards/rejected": -6.516833782196045, + "step": 9122 + }, + { + "epoch": 1.42, + "learning_rate": 7.456156439649359e-06, + "logits/chosen": -2.0883169174194336, + "logits/rejected": -2.7762739658355713, + "logps/chosen": -168.47828674316406, + "logps/rejected": -374.1637268066406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9207191467285156, + "rewards/margins": 8.312039375305176, + "rewards/rejected": -11.232758522033691, + "step": 9123 + }, + { + "epoch": 1.42, + "learning_rate": 7.455422999118211e-06, + "logits/chosen": -2.9279656410217285, + "logits/rejected": -2.7177047729492188, + "logps/chosen": -106.83366394042969, + "logps/rejected": -165.19297790527344, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.450757026672363, + "rewards/margins": 4.098962783813477, + "rewards/rejected": -10.549718856811523, + "step": 9124 + }, + { + "epoch": 1.42, + "learning_rate": 7.4546895585870645e-06, + "logits/chosen": -2.3474721908569336, + "logits/rejected": -1.4994926452636719, + "logps/chosen": -482.2049560546875, + "logps/rejected": -228.68270874023438, + "loss": 1.4192, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.5198469161987305, + "rewards/margins": 1.7896273136138916, + "rewards/rejected": -9.309473991394043, + "step": 9125 + }, + { + "epoch": 1.42, + "learning_rate": 7.4539561180559164e-06, + "logits/chosen": -2.6542234420776367, + "logits/rejected": -2.284925699234009, + "logps/chosen": -251.92298889160156, + "logps/rejected": -208.0145263671875, + "loss": 0.0457, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.579280853271484, + "rewards/margins": 6.082526206970215, + "rewards/rejected": -13.6618070602417, + "step": 9126 + }, + { + "epoch": 1.42, + "learning_rate": 7.453222677524768e-06, + "logits/chosen": -2.1124467849731445, + "logits/rejected": -1.9096804857254028, + "logps/chosen": -518.1165771484375, + "logps/rejected": -468.6280517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.289225101470947, + "rewards/margins": 11.665529251098633, + "rewards/rejected": -15.954753875732422, + "step": 9127 + }, + { + "epoch": 1.42, + "learning_rate": 7.45248923699362e-06, + "logits/chosen": -2.617304563522339, + "logits/rejected": -2.410080671310425, + "logps/chosen": -116.85697174072266, + "logps/rejected": -213.20416259765625, + "loss": 0.161, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.797464370727539, + "rewards/margins": 4.281520843505859, + "rewards/rejected": -10.078985214233398, + "step": 9128 + }, + { + "epoch": 1.42, + "learning_rate": 7.451755796462472e-06, + "logits/chosen": -1.6253983974456787, + "logits/rejected": -2.5361876487731934, + "logps/chosen": -132.11703491210938, + "logps/rejected": -574.0894775390625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8521437644958496, + "rewards/margins": 7.8115644454956055, + "rewards/rejected": -10.663708686828613, + "step": 9129 + }, + { + "epoch": 1.42, + "learning_rate": 7.451022355931324e-06, + "logits/chosen": -3.0170044898986816, + "logits/rejected": -1.277638554573059, + "logps/chosen": -632.861328125, + "logps/rejected": -328.63189697265625, + "loss": 0.3637, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9024720191955566, + "rewards/margins": 2.59352707862854, + "rewards/rejected": -6.495999336242676, + "step": 9130 + }, + { + "epoch": 1.42, + "learning_rate": 7.450288915400176e-06, + "logits/chosen": -2.9518463611602783, + "logits/rejected": -1.160348892211914, + "logps/chosen": -183.25607299804688, + "logps/rejected": -179.49313354492188, + "loss": 0.1094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.347105026245117, + "rewards/margins": 6.222485065460205, + "rewards/rejected": -10.56959056854248, + "step": 9131 + }, + { + "epoch": 1.42, + "learning_rate": 7.449555474869028e-06, + "logits/chosen": -3.0380678176879883, + "logits/rejected": -2.113638401031494, + "logps/chosen": -424.5914306640625, + "logps/rejected": -365.8218688964844, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1341753005981445, + "rewards/margins": 5.312984466552734, + "rewards/rejected": -8.447159767150879, + "step": 9132 + }, + { + "epoch": 1.42, + "learning_rate": 7.4488220343378796e-06, + "logits/chosen": -2.2442758083343506, + "logits/rejected": -2.861419200897217, + "logps/chosen": -659.5648193359375, + "logps/rejected": -524.2256469726562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8150627613067627, + "rewards/margins": 9.096315383911133, + "rewards/rejected": -12.911378860473633, + "step": 9133 + }, + { + "epoch": 1.42, + "learning_rate": 7.448088593806733e-06, + "logits/chosen": -2.908751964569092, + "logits/rejected": -2.846074342727661, + "logps/chosen": -275.98419189453125, + "logps/rejected": -315.0821533203125, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6850228309631348, + "rewards/margins": 5.892722129821777, + "rewards/rejected": -7.57774543762207, + "step": 9134 + }, + { + "epoch": 1.42, + "learning_rate": 7.447355153275585e-06, + "logits/chosen": -1.9084676504135132, + "logits/rejected": -2.5888431072235107, + "logps/chosen": -184.6018829345703, + "logps/rejected": -364.7367858886719, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6537327766418457, + "rewards/margins": 6.1852922439575195, + "rewards/rejected": -8.839024543762207, + "step": 9135 + }, + { + "epoch": 1.42, + "learning_rate": 7.446621712744437e-06, + "logits/chosen": -2.1796021461486816, + "logits/rejected": -2.8666739463806152, + "logps/chosen": -187.83609008789062, + "logps/rejected": -253.59310913085938, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.51047945022583, + "rewards/margins": 5.803918838500977, + "rewards/rejected": -11.314397811889648, + "step": 9136 + }, + { + "epoch": 1.42, + "learning_rate": 7.44588827221329e-06, + "logits/chosen": -2.95512318611145, + "logits/rejected": -2.6927549839019775, + "logps/chosen": -173.17135620117188, + "logps/rejected": -428.35150146484375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.503049612045288, + "rewards/margins": 9.057863235473633, + "rewards/rejected": -12.5609130859375, + "step": 9137 + }, + { + "epoch": 1.42, + "learning_rate": 7.4451548316821415e-06, + "logits/chosen": -2.8154103755950928, + "logits/rejected": -2.935312509536743, + "logps/chosen": -156.6961669921875, + "logps/rejected": -379.82110595703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7224175930023193, + "rewards/margins": 8.56847858428955, + "rewards/rejected": -12.290895462036133, + "step": 9138 + }, + { + "epoch": 1.42, + "learning_rate": 7.444421391150993e-06, + "logits/chosen": -2.131605625152588, + "logits/rejected": -3.1737139225006104, + "logps/chosen": -210.68344116210938, + "logps/rejected": -388.3650817871094, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.431632041931152, + "rewards/margins": 7.073019981384277, + "rewards/rejected": -11.50465202331543, + "step": 9139 + }, + { + "epoch": 1.42, + "learning_rate": 7.443687950619845e-06, + "logits/chosen": -1.3609000444412231, + "logits/rejected": -2.86875319480896, + "logps/chosen": -121.46348571777344, + "logps/rejected": -356.6644287109375, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.581076622009277, + "rewards/margins": 6.082050800323486, + "rewards/rejected": -11.663127899169922, + "step": 9140 + }, + { + "epoch": 1.42, + "learning_rate": 7.442954510088697e-06, + "logits/chosen": -2.335238218307495, + "logits/rejected": -2.817307472229004, + "logps/chosen": -255.724365234375, + "logps/rejected": -307.4843444824219, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7341551780700684, + "rewards/margins": 8.417693138122559, + "rewards/rejected": -12.151847839355469, + "step": 9141 + }, + { + "epoch": 1.42, + "learning_rate": 7.442221069557551e-06, + "logits/chosen": -2.5889458656311035, + "logits/rejected": -2.6747453212738037, + "logps/chosen": -221.36044311523438, + "logps/rejected": -348.10382080078125, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305937767028809, + "rewards/margins": 7.809258937835693, + "rewards/rejected": -13.115196228027344, + "step": 9142 + }, + { + "epoch": 1.42, + "learning_rate": 7.441487629026403e-06, + "logits/chosen": -2.8569133281707764, + "logits/rejected": -1.1393173933029175, + "logps/chosen": -638.46630859375, + "logps/rejected": -441.6673889160156, + "loss": 0.032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.764115810394287, + "rewards/margins": 6.411409378051758, + "rewards/rejected": -11.175525665283203, + "step": 9143 + }, + { + "epoch": 1.42, + "learning_rate": 7.4407541884952545e-06, + "logits/chosen": -3.0673046112060547, + "logits/rejected": -2.513655424118042, + "logps/chosen": -251.71337890625, + "logps/rejected": -213.5000762939453, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.474442958831787, + "rewards/margins": 4.048186302185059, + "rewards/rejected": -7.5226287841796875, + "step": 9144 + }, + { + "epoch": 1.42, + "learning_rate": 7.440020747964106e-06, + "logits/chosen": -2.422034978866577, + "logits/rejected": -2.962137222290039, + "logps/chosen": -281.40130615234375, + "logps/rejected": -429.1673583984375, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6017894744873047, + "rewards/margins": 6.899039268493652, + "rewards/rejected": -10.500828742980957, + "step": 9145 + }, + { + "epoch": 1.42, + "learning_rate": 7.439287307432958e-06, + "logits/chosen": -2.855708122253418, + "logits/rejected": -2.938467502593994, + "logps/chosen": -146.20535278320312, + "logps/rejected": -255.41058349609375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.579176902770996, + "rewards/margins": 4.476694107055664, + "rewards/rejected": -10.05587100982666, + "step": 9146 + }, + { + "epoch": 1.42, + "learning_rate": 7.43855386690181e-06, + "logits/chosen": -2.360597848892212, + "logits/rejected": -3.079972743988037, + "logps/chosen": -97.76557922363281, + "logps/rejected": -259.29876708984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.662939071655273, + "rewards/margins": 7.030369758605957, + "rewards/rejected": -11.69330883026123, + "step": 9147 + }, + { + "epoch": 1.42, + "learning_rate": 7.437820426370662e-06, + "logits/chosen": -2.9529120922088623, + "logits/rejected": -2.4010403156280518, + "logps/chosen": -816.6905517578125, + "logps/rejected": -607.7005615234375, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.42210054397583, + "rewards/margins": 5.498127460479736, + "rewards/rejected": -10.920228004455566, + "step": 9148 + }, + { + "epoch": 1.42, + "learning_rate": 7.437086985839514e-06, + "logits/chosen": -2.898987293243408, + "logits/rejected": -2.972240447998047, + "logps/chosen": -605.0299072265625, + "logps/rejected": -442.46124267578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1613953113555908, + "rewards/margins": 8.779548645019531, + "rewards/rejected": -9.94094467163086, + "step": 9149 + }, + { + "epoch": 1.42, + "learning_rate": 7.436353545308366e-06, + "logits/chosen": -2.4662492275238037, + "logits/rejected": -2.649379253387451, + "logps/chosen": -332.65570068359375, + "logps/rejected": -343.01812744140625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1041669845581055, + "rewards/margins": 5.200927734375, + "rewards/rejected": -9.305094718933105, + "step": 9150 + }, + { + "epoch": 1.42, + "learning_rate": 7.435620104777219e-06, + "logits/chosen": -2.914118766784668, + "logits/rejected": -2.7573134899139404, + "logps/chosen": -613.940185546875, + "logps/rejected": -725.7300415039062, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.153106689453125, + "rewards/margins": 7.555453300476074, + "rewards/rejected": -10.7085599899292, + "step": 9151 + }, + { + "epoch": 1.42, + "learning_rate": 7.434886664246071e-06, + "logits/chosen": -2.7566256523132324, + "logits/rejected": -3.045008897781372, + "logps/chosen": -81.94992065429688, + "logps/rejected": -322.2975158691406, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.371218204498291, + "rewards/margins": 4.654013633728027, + "rewards/rejected": -8.025232315063477, + "step": 9152 + }, + { + "epoch": 1.42, + "learning_rate": 7.434153223714923e-06, + "logits/chosen": -2.4719960689544678, + "logits/rejected": -3.051802396774292, + "logps/chosen": -40.57709503173828, + "logps/rejected": -178.92523193359375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9378981590270996, + "rewards/margins": 6.938103675842285, + "rewards/rejected": -9.876001358032227, + "step": 9153 + }, + { + "epoch": 1.42, + "learning_rate": 7.433419783183776e-06, + "logits/chosen": -1.8034909963607788, + "logits/rejected": -2.6834990978240967, + "logps/chosen": -141.5943603515625, + "logps/rejected": -242.3679656982422, + "loss": 0.0772, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.124973773956299, + "rewards/margins": 2.6251540184020996, + "rewards/rejected": -9.750127792358398, + "step": 9154 + }, + { + "epoch": 1.42, + "learning_rate": 7.432686342652628e-06, + "logits/chosen": -2.909390926361084, + "logits/rejected": -2.45290207862854, + "logps/chosen": -445.482177734375, + "logps/rejected": -403.02105712890625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.304433822631836, + "rewards/margins": 5.219775199890137, + "rewards/rejected": -10.524209022521973, + "step": 9155 + }, + { + "epoch": 1.42, + "learning_rate": 7.4319529021214796e-06, + "logits/chosen": -1.7679301500320435, + "logits/rejected": -2.9431231021881104, + "logps/chosen": -62.96854019165039, + "logps/rejected": -377.7159729003906, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.83286190032959, + "rewards/margins": 6.381366729736328, + "rewards/rejected": -11.214228630065918, + "step": 9156 + }, + { + "epoch": 1.42, + "learning_rate": 7.4312194615903314e-06, + "logits/chosen": -2.488537073135376, + "logits/rejected": -2.792726993560791, + "logps/chosen": -223.75778198242188, + "logps/rejected": -341.43780517578125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.323349952697754, + "rewards/margins": 8.049947738647461, + "rewards/rejected": -11.373297691345215, + "step": 9157 + }, + { + "epoch": 1.42, + "learning_rate": 7.430486021059183e-06, + "logits/chosen": -2.782287120819092, + "logits/rejected": -2.6172866821289062, + "logps/chosen": -216.5703125, + "logps/rejected": -270.249755859375, + "loss": 0.2634, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.847475528717041, + "rewards/margins": 3.5836987495422363, + "rewards/rejected": -8.431174278259277, + "step": 9158 + }, + { + "epoch": 1.42, + "learning_rate": 7.429752580528035e-06, + "logits/chosen": -3.0073533058166504, + "logits/rejected": -2.9363014698028564, + "logps/chosen": -224.86962890625, + "logps/rejected": -258.2763671875, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894047737121582, + "rewards/margins": 4.041958332061768, + "rewards/rejected": -9.936006546020508, + "step": 9159 + }, + { + "epoch": 1.42, + "learning_rate": 7.429019139996889e-06, + "logits/chosen": -2.9308648109436035, + "logits/rejected": -2.5745561122894287, + "logps/chosen": -516.9769287109375, + "logps/rejected": -729.3839111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5748889446258545, + "rewards/margins": 12.486000061035156, + "rewards/rejected": -15.06088924407959, + "step": 9160 + }, + { + "epoch": 1.42, + "learning_rate": 7.428285699465741e-06, + "logits/chosen": -2.063483476638794, + "logits/rejected": -2.949770450592041, + "logps/chosen": -87.97671508789062, + "logps/rejected": -361.8428955078125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.355825901031494, + "rewards/margins": 7.304610252380371, + "rewards/rejected": -11.660435676574707, + "step": 9161 + }, + { + "epoch": 1.42, + "learning_rate": 7.4275522589345925e-06, + "logits/chosen": -2.04168963432312, + "logits/rejected": -2.9620699882507324, + "logps/chosen": -329.85888671875, + "logps/rejected": -418.2647705078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.20531702041626, + "rewards/margins": 10.48274040222168, + "rewards/rejected": -14.688056945800781, + "step": 9162 + }, + { + "epoch": 1.43, + "learning_rate": 7.426818818403444e-06, + "logits/chosen": -1.3830865621566772, + "logits/rejected": -2.6824941635131836, + "logps/chosen": -263.1634521484375, + "logps/rejected": -575.853271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.435343265533447, + "rewards/margins": 12.066110610961914, + "rewards/rejected": -17.501453399658203, + "step": 9163 + }, + { + "epoch": 1.43, + "learning_rate": 7.426085377872296e-06, + "logits/chosen": -2.832838773727417, + "logits/rejected": -1.7609306573867798, + "logps/chosen": -328.2562255859375, + "logps/rejected": -187.4478302001953, + "loss": 0.0563, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.906662940979004, + "rewards/margins": 5.305805683135986, + "rewards/rejected": -8.212469100952148, + "step": 9164 + }, + { + "epoch": 1.43, + "learning_rate": 7.425351937341148e-06, + "logits/chosen": -2.59230637550354, + "logits/rejected": -2.7039966583251953, + "logps/chosen": -233.8200225830078, + "logps/rejected": -315.74908447265625, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192760944366455, + "rewards/margins": 7.011586666107178, + "rewards/rejected": -12.204347610473633, + "step": 9165 + }, + { + "epoch": 1.43, + "learning_rate": 7.42461849681e-06, + "logits/chosen": -1.9719740152359009, + "logits/rejected": -2.465773820877075, + "logps/chosen": -85.988525390625, + "logps/rejected": -337.7342224121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.022243976593018, + "rewards/margins": 12.615716934204102, + "rewards/rejected": -16.63796043395996, + "step": 9166 + }, + { + "epoch": 1.43, + "learning_rate": 7.423885056278852e-06, + "logits/chosen": -2.618079900741577, + "logits/rejected": -3.004704236984253, + "logps/chosen": -178.37252807617188, + "logps/rejected": -194.21463012695312, + "loss": 1.5111, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.971436977386475, + "rewards/margins": 2.5465192794799805, + "rewards/rejected": -10.517955780029297, + "step": 9167 + }, + { + "epoch": 1.43, + "learning_rate": 7.423151615747704e-06, + "logits/chosen": -2.7992851734161377, + "logits/rejected": -1.9614652395248413, + "logps/chosen": -146.3223876953125, + "logps/rejected": -328.9897155761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5732064247131348, + "rewards/margins": 13.234725952148438, + "rewards/rejected": -16.807933807373047, + "step": 9168 + }, + { + "epoch": 1.43, + "learning_rate": 7.422418175216557e-06, + "logits/chosen": -2.4309821128845215, + "logits/rejected": -1.9588299989700317, + "logps/chosen": -189.83226013183594, + "logps/rejected": -285.2364196777344, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.070804119110107, + "rewards/margins": 5.840715408325195, + "rewards/rejected": -9.911520004272461, + "step": 9169 + }, + { + "epoch": 1.43, + "learning_rate": 7.421684734685409e-06, + "logits/chosen": -2.654416799545288, + "logits/rejected": -2.281184673309326, + "logps/chosen": -146.64930725097656, + "logps/rejected": -384.614501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4817867279052734, + "rewards/margins": 14.5889892578125, + "rewards/rejected": -18.07077407836914, + "step": 9170 + }, + { + "epoch": 1.43, + "learning_rate": 7.420951294154262e-06, + "logits/chosen": -2.6621010303497314, + "logits/rejected": -2.9852685928344727, + "logps/chosen": -156.6573486328125, + "logps/rejected": -354.3316650390625, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.503101348876953, + "rewards/margins": 5.695474624633789, + "rewards/rejected": -13.198575973510742, + "step": 9171 + }, + { + "epoch": 1.43, + "learning_rate": 7.420217853623114e-06, + "logits/chosen": -2.8631362915039062, + "logits/rejected": -2.6491332054138184, + "logps/chosen": -380.2857666015625, + "logps/rejected": -395.4679870605469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.082511901855469, + "rewards/margins": 10.368379592895508, + "rewards/rejected": -14.450891494750977, + "step": 9172 + }, + { + "epoch": 1.43, + "learning_rate": 7.419484413091966e-06, + "logits/chosen": -1.971737265586853, + "logits/rejected": -3.031728982925415, + "logps/chosen": -65.54554748535156, + "logps/rejected": -256.8853454589844, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.509911060333252, + "rewards/margins": 5.852898120880127, + "rewards/rejected": -10.362809181213379, + "step": 9173 + }, + { + "epoch": 1.43, + "learning_rate": 7.418750972560818e-06, + "logits/chosen": -1.5156790018081665, + "logits/rejected": -2.2322447299957275, + "logps/chosen": -338.3409729003906, + "logps/rejected": -335.71343994140625, + "loss": 3.4353, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.060626983642578, + "rewards/margins": 1.2725231647491455, + "rewards/rejected": -10.333149909973145, + "step": 9174 + }, + { + "epoch": 1.43, + "learning_rate": 7.4180175320296695e-06, + "logits/chosen": -2.911003828048706, + "logits/rejected": -3.000145196914673, + "logps/chosen": -365.3965759277344, + "logps/rejected": -415.9874572753906, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.566951274871826, + "rewards/margins": 6.95609188079834, + "rewards/rejected": -12.523042678833008, + "step": 9175 + }, + { + "epoch": 1.43, + "learning_rate": 7.417284091498521e-06, + "logits/chosen": -3.022265911102295, + "logits/rejected": -3.0860824584960938, + "logps/chosen": -94.06207275390625, + "logps/rejected": -307.512451171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8125624656677246, + "rewards/margins": 8.127065658569336, + "rewards/rejected": -11.939628601074219, + "step": 9176 + }, + { + "epoch": 1.43, + "learning_rate": 7.416550650967373e-06, + "logits/chosen": -2.976112127304077, + "logits/rejected": -2.6522748470306396, + "logps/chosen": -171.31692504882812, + "logps/rejected": -250.22604370117188, + "loss": 0.0519, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4289984703063965, + "rewards/margins": 5.549877166748047, + "rewards/rejected": -10.978876113891602, + "step": 9177 + }, + { + "epoch": 1.43, + "learning_rate": 7.415817210436227e-06, + "logits/chosen": -1.8292286396026611, + "logits/rejected": -2.3941702842712402, + "logps/chosen": -188.58612060546875, + "logps/rejected": -299.97039794921875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.740328311920166, + "rewards/margins": 5.306972026824951, + "rewards/rejected": -12.047300338745117, + "step": 9178 + }, + { + "epoch": 1.43, + "learning_rate": 7.415083769905079e-06, + "logits/chosen": -1.6170177459716797, + "logits/rejected": -2.8654279708862305, + "logps/chosen": -102.32023620605469, + "logps/rejected": -274.4176330566406, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.489772796630859, + "rewards/margins": 5.394314765930176, + "rewards/rejected": -10.884086608886719, + "step": 9179 + }, + { + "epoch": 1.43, + "learning_rate": 7.4143503293739306e-06, + "logits/chosen": -1.5238431692123413, + "logits/rejected": -2.566859006881714, + "logps/chosen": -289.46942138671875, + "logps/rejected": -515.2098388671875, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7070722579956055, + "rewards/margins": 8.83995246887207, + "rewards/rejected": -14.547024726867676, + "step": 9180 + }, + { + "epoch": 1.43, + "learning_rate": 7.4136168888427824e-06, + "logits/chosen": -2.903240919113159, + "logits/rejected": -3.0806877613067627, + "logps/chosen": -201.77407836914062, + "logps/rejected": -329.5290832519531, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8957200050354004, + "rewards/margins": 6.29945182800293, + "rewards/rejected": -9.195172309875488, + "step": 9181 + }, + { + "epoch": 1.43, + "learning_rate": 7.412883448311634e-06, + "logits/chosen": -2.2902514934539795, + "logits/rejected": -2.906795024871826, + "logps/chosen": -234.7633514404297, + "logps/rejected": -432.08282470703125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.590235710144043, + "rewards/margins": 7.6201910972595215, + "rewards/rejected": -15.210427284240723, + "step": 9182 + }, + { + "epoch": 1.43, + "learning_rate": 7.412150007780486e-06, + "logits/chosen": -1.852249026298523, + "logits/rejected": -2.766874313354492, + "logps/chosen": -155.86785888671875, + "logps/rejected": -423.5924072265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.115208625793457, + "rewards/margins": 7.75425910949707, + "rewards/rejected": -12.869466781616211, + "step": 9183 + }, + { + "epoch": 1.43, + "learning_rate": 7.411416567249338e-06, + "logits/chosen": -2.988720655441284, + "logits/rejected": -3.0360944271087646, + "logps/chosen": -62.768531799316406, + "logps/rejected": -296.0597839355469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.58889102935791, + "rewards/margins": 8.355337142944336, + "rewards/rejected": -11.944228172302246, + "step": 9184 + }, + { + "epoch": 1.43, + "learning_rate": 7.41068312671819e-06, + "logits/chosen": -2.971297025680542, + "logits/rejected": -2.9705398082733154, + "logps/chosen": -440.975341796875, + "logps/rejected": -460.6878662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.737919330596924, + "rewards/margins": 9.744868278503418, + "rewards/rejected": -12.4827880859375, + "step": 9185 + }, + { + "epoch": 1.43, + "learning_rate": 7.409949686187043e-06, + "logits/chosen": -2.898087739944458, + "logits/rejected": -1.655815839767456, + "logps/chosen": -201.03158569335938, + "logps/rejected": -208.8306884765625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.344036102294922, + "rewards/margins": 6.855856418609619, + "rewards/rejected": -10.1998929977417, + "step": 9186 + }, + { + "epoch": 1.43, + "learning_rate": 7.409216245655895e-06, + "logits/chosen": -1.8036925792694092, + "logits/rejected": -1.9879961013793945, + "logps/chosen": -148.178955078125, + "logps/rejected": -209.70655822753906, + "loss": 0.4382, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.412703514099121, + "rewards/margins": 5.340959548950195, + "rewards/rejected": -11.753663063049316, + "step": 9187 + }, + { + "epoch": 1.43, + "learning_rate": 7.408482805124748e-06, + "logits/chosen": -2.7962424755096436, + "logits/rejected": -2.501513719558716, + "logps/chosen": -662.6361694335938, + "logps/rejected": -419.73681640625, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5267014503479, + "rewards/margins": 4.662001609802246, + "rewards/rejected": -9.188703536987305, + "step": 9188 + }, + { + "epoch": 1.43, + "learning_rate": 7.4077493645936e-06, + "logits/chosen": -3.049447536468506, + "logits/rejected": -2.959200620651245, + "logps/chosen": -137.5962371826172, + "logps/rejected": -99.02934265136719, + "loss": 0.6412, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.508719444274902, + "rewards/margins": 0.10667681694030762, + "rewards/rejected": -6.615396499633789, + "step": 9189 + }, + { + "epoch": 1.43, + "learning_rate": 7.407015924062452e-06, + "logits/chosen": -2.76723575592041, + "logits/rejected": -2.2609784603118896, + "logps/chosen": -566.782958984375, + "logps/rejected": -406.75384521484375, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.48859691619873, + "rewards/margins": 5.972349166870117, + "rewards/rejected": -14.460946083068848, + "step": 9190 + }, + { + "epoch": 1.43, + "learning_rate": 7.406282483531304e-06, + "logits/chosen": -1.5318139791488647, + "logits/rejected": -2.6844003200531006, + "logps/chosen": -170.398193359375, + "logps/rejected": -362.414306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4671783447265625, + "rewards/margins": 9.089282989501953, + "rewards/rejected": -13.556461334228516, + "step": 9191 + }, + { + "epoch": 1.43, + "learning_rate": 7.405549043000156e-06, + "logits/chosen": -2.2414865493774414, + "logits/rejected": -1.4486064910888672, + "logps/chosen": -229.28244018554688, + "logps/rejected": -112.07032775878906, + "loss": 2.1238, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.268431186676025, + "rewards/margins": -1.6931263208389282, + "rewards/rejected": -4.575304985046387, + "step": 9192 + }, + { + "epoch": 1.43, + "learning_rate": 7.4048156024690075e-06, + "logits/chosen": -2.4386258125305176, + "logits/rejected": -2.771378517150879, + "logps/chosen": -58.53487777709961, + "logps/rejected": -265.6096496582031, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6046993732452393, + "rewards/margins": 7.231129169464111, + "rewards/rejected": -9.83582878112793, + "step": 9193 + }, + { + "epoch": 1.43, + "learning_rate": 7.404082161937859e-06, + "logits/chosen": -2.3645966053009033, + "logits/rejected": -2.906135320663452, + "logps/chosen": -93.55840301513672, + "logps/rejected": -237.5498809814453, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6373019218444824, + "rewards/margins": 5.634202003479004, + "rewards/rejected": -9.271503448486328, + "step": 9194 + }, + { + "epoch": 1.43, + "learning_rate": 7.403348721406711e-06, + "logits/chosen": -2.5898468494415283, + "logits/rejected": -2.8068854808807373, + "logps/chosen": -562.5698852539062, + "logps/rejected": -446.87310791015625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.196784973144531, + "rewards/margins": 6.962604999542236, + "rewards/rejected": -11.15938949584961, + "step": 9195 + }, + { + "epoch": 1.43, + "learning_rate": 7.402615280875565e-06, + "logits/chosen": -2.623810052871704, + "logits/rejected": -3.008864164352417, + "logps/chosen": -58.606597900390625, + "logps/rejected": -221.73834228515625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6600236892700195, + "rewards/margins": 7.188635349273682, + "rewards/rejected": -10.84865951538086, + "step": 9196 + }, + { + "epoch": 1.43, + "learning_rate": 7.401881840344417e-06, + "logits/chosen": -3.053009510040283, + "logits/rejected": -1.3747620582580566, + "logps/chosen": -343.9593200683594, + "logps/rejected": -258.96282958984375, + "loss": 1.7021, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.18635082244873, + "rewards/margins": 0.6536617279052734, + "rewards/rejected": -8.840012550354004, + "step": 9197 + }, + { + "epoch": 1.43, + "learning_rate": 7.401148399813269e-06, + "logits/chosen": -2.3746042251586914, + "logits/rejected": -2.7501344680786133, + "logps/chosen": -54.95701217651367, + "logps/rejected": -300.86126708984375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.142448902130127, + "rewards/margins": 8.331239700317383, + "rewards/rejected": -12.473688125610352, + "step": 9198 + }, + { + "epoch": 1.43, + "learning_rate": 7.4004149592821205e-06, + "logits/chosen": -2.5761773586273193, + "logits/rejected": -3.0134735107421875, + "logps/chosen": -290.8206787109375, + "logps/rejected": -354.2878112792969, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.392773151397705, + "rewards/margins": 8.056122779846191, + "rewards/rejected": -11.448896408081055, + "step": 9199 + }, + { + "epoch": 1.43, + "learning_rate": 7.399681518750972e-06, + "logits/chosen": -2.139927864074707, + "logits/rejected": -2.6894500255584717, + "logps/chosen": -133.65582275390625, + "logps/rejected": -331.727294921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.339489936828613, + "rewards/margins": 7.893333435058594, + "rewards/rejected": -12.232823371887207, + "step": 9200 + }, + { + "epoch": 1.43, + "learning_rate": 7.398948078219824e-06, + "logits/chosen": -2.995940685272217, + "logits/rejected": -2.4628679752349854, + "logps/chosen": -237.16830444335938, + "logps/rejected": -322.12164306640625, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5498151779174805, + "rewards/margins": 7.243663787841797, + "rewards/rejected": -12.793478012084961, + "step": 9201 + }, + { + "epoch": 1.43, + "learning_rate": 7.398214637688676e-06, + "logits/chosen": -2.647676944732666, + "logits/rejected": -3.0132334232330322, + "logps/chosen": -117.58264923095703, + "logps/rejected": -314.4235534667969, + "loss": 0.0574, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.604841232299805, + "rewards/margins": 3.1902787685394287, + "rewards/rejected": -8.795120239257812, + "step": 9202 + }, + { + "epoch": 1.43, + "learning_rate": 7.397481197157529e-06, + "logits/chosen": -2.6832592487335205, + "logits/rejected": -2.535850763320923, + "logps/chosen": -290.0216064453125, + "logps/rejected": -242.24380493164062, + "loss": 0.1412, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.933311462402344, + "rewards/margins": 2.1584718227386475, + "rewards/rejected": -9.09178352355957, + "step": 9203 + }, + { + "epoch": 1.43, + "learning_rate": 7.396747756626381e-06, + "logits/chosen": -2.71759033203125, + "logits/rejected": -2.856250762939453, + "logps/chosen": -434.6007995605469, + "logps/rejected": -451.8271484375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.693153381347656, + "rewards/margins": 6.042471885681152, + "rewards/rejected": -10.735624313354492, + "step": 9204 + }, + { + "epoch": 1.43, + "learning_rate": 7.396014316095234e-06, + "logits/chosen": -2.9615979194641113, + "logits/rejected": -2.9793646335601807, + "logps/chosen": -104.1176986694336, + "logps/rejected": -178.4151153564453, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1034674644470215, + "rewards/margins": 5.9286675453186035, + "rewards/rejected": -11.032135009765625, + "step": 9205 + }, + { + "epoch": 1.43, + "learning_rate": 7.395280875564086e-06, + "logits/chosen": -1.6602813005447388, + "logits/rejected": -2.4861230850219727, + "logps/chosen": -563.3372802734375, + "logps/rejected": -614.2412109375, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605357646942139, + "rewards/margins": 8.9083251953125, + "rewards/rejected": -15.513683319091797, + "step": 9206 + }, + { + "epoch": 1.43, + "learning_rate": 7.394547435032938e-06, + "logits/chosen": -2.8322629928588867, + "logits/rejected": -2.7554514408111572, + "logps/chosen": -218.92909240722656, + "logps/rejected": -267.35992431640625, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.607879638671875, + "rewards/margins": 5.340053558349609, + "rewards/rejected": -9.947933197021484, + "step": 9207 + }, + { + "epoch": 1.43, + "learning_rate": 7.39381399450179e-06, + "logits/chosen": -2.9050612449645996, + "logits/rejected": -2.7477989196777344, + "logps/chosen": -699.08935546875, + "logps/rejected": -664.966796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.466006278991699, + "rewards/margins": 6.520971298217773, + "rewards/rejected": -10.986978530883789, + "step": 9208 + }, + { + "epoch": 1.43, + "learning_rate": 7.393080553970642e-06, + "logits/chosen": -2.7622015476226807, + "logits/rejected": -2.853170156478882, + "logps/chosen": -686.960693359375, + "logps/rejected": -607.9152221679688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43482673168182373, + "rewards/margins": 10.737060546875, + "rewards/rejected": -11.171887397766113, + "step": 9209 + }, + { + "epoch": 1.43, + "learning_rate": 7.392347113439494e-06, + "logits/chosen": -1.0202701091766357, + "logits/rejected": -2.574742078781128, + "logps/chosen": -162.55746459960938, + "logps/rejected": -513.06787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.675014019012451, + "rewards/margins": 12.569402694702148, + "rewards/rejected": -16.244417190551758, + "step": 9210 + }, + { + "epoch": 1.43, + "learning_rate": 7.3916136729083456e-06, + "logits/chosen": -2.83217453956604, + "logits/rejected": -2.313779830932617, + "logps/chosen": -545.7698974609375, + "logps/rejected": -560.6347045898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.281579494476318, + "rewards/margins": 10.41356372833252, + "rewards/rejected": -14.69514274597168, + "step": 9211 + }, + { + "epoch": 1.43, + "learning_rate": 7.3908802323771974e-06, + "logits/chosen": -2.6622562408447266, + "logits/rejected": -1.6615574359893799, + "logps/chosen": -197.89739990234375, + "logps/rejected": -250.2055206298828, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.34297513961792, + "rewards/margins": 10.026073455810547, + "rewards/rejected": -16.369049072265625, + "step": 9212 + }, + { + "epoch": 1.43, + "learning_rate": 7.390146791846049e-06, + "logits/chosen": -2.6278016567230225, + "logits/rejected": -1.7917778491973877, + "logps/chosen": -492.90850830078125, + "logps/rejected": -567.5455322265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3538312911987305, + "rewards/margins": 7.226858615875244, + "rewards/rejected": -12.580690383911133, + "step": 9213 + }, + { + "epoch": 1.43, + "learning_rate": 7.389413351314903e-06, + "logits/chosen": -2.930182933807373, + "logits/rejected": -1.3117908239364624, + "logps/chosen": -407.65545654296875, + "logps/rejected": -272.435302734375, + "loss": 0.6566, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.122586727142334, + "rewards/margins": 1.837808609008789, + "rewards/rejected": -8.960395812988281, + "step": 9214 + }, + { + "epoch": 1.43, + "learning_rate": 7.388679910783755e-06, + "logits/chosen": -2.5159242153167725, + "logits/rejected": -2.6934781074523926, + "logps/chosen": -270.3840026855469, + "logps/rejected": -249.89813232421875, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3755903244018555, + "rewards/margins": 3.3846142292022705, + "rewards/rejected": -9.760204315185547, + "step": 9215 + }, + { + "epoch": 1.43, + "learning_rate": 7.387946470252607e-06, + "logits/chosen": -2.8590598106384277, + "logits/rejected": -2.48587965965271, + "logps/chosen": -382.8292541503906, + "logps/rejected": -576.82568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.27266001701355, + "rewards/margins": 9.524883270263672, + "rewards/rejected": -12.7975435256958, + "step": 9216 + }, + { + "epoch": 1.43, + "learning_rate": 7.3872130297214585e-06, + "logits/chosen": -2.063366413116455, + "logits/rejected": -2.6764094829559326, + "logps/chosen": -214.31326293945312, + "logps/rejected": -430.5136413574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393446922302246, + "rewards/margins": 11.039139747619629, + "rewards/rejected": -16.432586669921875, + "step": 9217 + }, + { + "epoch": 1.43, + "learning_rate": 7.38647958919031e-06, + "logits/chosen": -1.8151332139968872, + "logits/rejected": -2.8849878311157227, + "logps/chosen": -148.59616088867188, + "logps/rejected": -539.4363403320312, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8874354362487793, + "rewards/margins": 9.046998977661133, + "rewards/rejected": -12.93443489074707, + "step": 9218 + }, + { + "epoch": 1.43, + "learning_rate": 7.385746148659162e-06, + "logits/chosen": -2.9112603664398193, + "logits/rejected": -2.970515727996826, + "logps/chosen": -151.61077880859375, + "logps/rejected": -239.12583923339844, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.599530816078186, + "rewards/margins": 6.502496719360352, + "rewards/rejected": -8.102027893066406, + "step": 9219 + }, + { + "epoch": 1.43, + "learning_rate": 7.385012708128015e-06, + "logits/chosen": -1.8263119459152222, + "logits/rejected": -3.0362963676452637, + "logps/chosen": -76.42778015136719, + "logps/rejected": -278.7908935546875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.649188041687012, + "rewards/margins": 6.30162239074707, + "rewards/rejected": -10.950810432434082, + "step": 9220 + }, + { + "epoch": 1.43, + "learning_rate": 7.384279267596867e-06, + "logits/chosen": -2.3652117252349854, + "logits/rejected": -2.876112699508667, + "logps/chosen": -375.7320556640625, + "logps/rejected": -312.43359375, + "loss": 2.1767, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.964453220367432, + "rewards/margins": 2.894453525543213, + "rewards/rejected": -10.858906745910645, + "step": 9221 + }, + { + "epoch": 1.43, + "learning_rate": 7.383545827065719e-06, + "logits/chosen": -2.2143633365631104, + "logits/rejected": -2.6976633071899414, + "logps/chosen": -286.3095703125, + "logps/rejected": -534.72802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.216871738433838, + "rewards/margins": 10.089247703552246, + "rewards/rejected": -15.306118965148926, + "step": 9222 + }, + { + "epoch": 1.43, + "learning_rate": 7.382812386534572e-06, + "logits/chosen": -2.903751850128174, + "logits/rejected": -2.475707530975342, + "logps/chosen": -688.6298828125, + "logps/rejected": -567.4301147460938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.905688285827637, + "rewards/margins": 6.871878623962402, + "rewards/rejected": -12.777566909790039, + "step": 9223 + }, + { + "epoch": 1.43, + "learning_rate": 7.382078946003424e-06, + "logits/chosen": -2.7580087184906006, + "logits/rejected": -3.0192110538482666, + "logps/chosen": -128.97268676757812, + "logps/rejected": -312.110595703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.328336715698242, + "rewards/margins": 8.257858276367188, + "rewards/rejected": -12.58619499206543, + "step": 9224 + }, + { + "epoch": 1.43, + "learning_rate": 7.381345505472276e-06, + "logits/chosen": -2.8223564624786377, + "logits/rejected": -2.9372756481170654, + "logps/chosen": -618.126708984375, + "logps/rejected": -523.3428344726562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.144933700561523, + "rewards/margins": 6.999137878417969, + "rewards/rejected": -11.144071578979492, + "step": 9225 + }, + { + "epoch": 1.43, + "learning_rate": 7.380612064941128e-06, + "logits/chosen": -2.497851610183716, + "logits/rejected": -2.5977251529693604, + "logps/chosen": -345.3836669921875, + "logps/rejected": -345.9786682128906, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.173937797546387, + "rewards/margins": 4.1830644607543945, + "rewards/rejected": -11.357002258300781, + "step": 9226 + }, + { + "epoch": 1.43, + "learning_rate": 7.37987862440998e-06, + "logits/chosen": -2.8004631996154785, + "logits/rejected": -2.7771003246307373, + "logps/chosen": -203.26889038085938, + "logps/rejected": -323.1946105957031, + "loss": 0.3848, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.099464416503906, + "rewards/margins": 6.3726677894592285, + "rewards/rejected": -14.472131729125977, + "step": 9227 + }, + { + "epoch": 1.44, + "learning_rate": 7.379145183878832e-06, + "logits/chosen": -1.2562403678894043, + "logits/rejected": -2.8016345500946045, + "logps/chosen": -174.02496337890625, + "logps/rejected": -547.1979370117188, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7209367752075195, + "rewards/margins": 7.379138469696045, + "rewards/rejected": -13.100074768066406, + "step": 9228 + }, + { + "epoch": 1.44, + "learning_rate": 7.378411743347684e-06, + "logits/chosen": -2.8095922470092773, + "logits/rejected": -1.6595690250396729, + "logps/chosen": -257.54583740234375, + "logps/rejected": -162.59942626953125, + "loss": 0.5035, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.785614967346191, + "rewards/margins": 2.01550555229187, + "rewards/rejected": -9.80112075805664, + "step": 9229 + }, + { + "epoch": 1.44, + "learning_rate": 7.3776783028165355e-06, + "logits/chosen": -2.9874095916748047, + "logits/rejected": -2.45578670501709, + "logps/chosen": -175.78884887695312, + "logps/rejected": -186.78282165527344, + "loss": 3.9303, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.0025835037231445, + "rewards/margins": -0.6863892078399658, + "rewards/rejected": -6.316194534301758, + "step": 9230 + }, + { + "epoch": 1.44, + "learning_rate": 7.376944862285389e-06, + "logits/chosen": -2.728959083557129, + "logits/rejected": -2.992764472961426, + "logps/chosen": -74.5769271850586, + "logps/rejected": -207.85198974609375, + "loss": 0.0905, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.17802619934082, + "rewards/margins": 4.441145896911621, + "rewards/rejected": -10.619172096252441, + "step": 9231 + }, + { + "epoch": 1.44, + "learning_rate": 7.376211421754241e-06, + "logits/chosen": -3.1017987728118896, + "logits/rejected": -1.8853411674499512, + "logps/chosen": -230.2372589111328, + "logps/rejected": -363.0473937988281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.714261531829834, + "rewards/margins": 8.118684768676758, + "rewards/rejected": -9.83294677734375, + "step": 9232 + }, + { + "epoch": 1.44, + "learning_rate": 7.375477981223093e-06, + "logits/chosen": -1.533976674079895, + "logits/rejected": -2.812729597091675, + "logps/chosen": -105.69839477539062, + "logps/rejected": -330.5256652832031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.158513069152832, + "rewards/margins": 7.858025550842285, + "rewards/rejected": -13.016538619995117, + "step": 9233 + }, + { + "epoch": 1.44, + "learning_rate": 7.374744540691945e-06, + "logits/chosen": -2.228095769882202, + "logits/rejected": -2.6439082622528076, + "logps/chosen": -194.28826904296875, + "logps/rejected": -185.1849365234375, + "loss": 0.3731, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.91089391708374, + "rewards/margins": 3.6174561977386475, + "rewards/rejected": -8.528350830078125, + "step": 9234 + }, + { + "epoch": 1.44, + "learning_rate": 7.3740111001607966e-06, + "logits/chosen": -2.3429884910583496, + "logits/rejected": -2.7018585205078125, + "logps/chosen": -132.79071044921875, + "logps/rejected": -356.0039367675781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4056601524353027, + "rewards/margins": 8.37717342376709, + "rewards/rejected": -11.78283405303955, + "step": 9235 + }, + { + "epoch": 1.44, + "learning_rate": 7.3732776596296485e-06, + "logits/chosen": -2.724191188812256, + "logits/rejected": -2.620722770690918, + "logps/chosen": -163.72779846191406, + "logps/rejected": -288.5641784667969, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.374859809875488, + "rewards/margins": 8.201200485229492, + "rewards/rejected": -13.57606029510498, + "step": 9236 + }, + { + "epoch": 1.44, + "learning_rate": 7.372544219098501e-06, + "logits/chosen": -2.1330947875976562, + "logits/rejected": -2.540799856185913, + "logps/chosen": -420.7291259765625, + "logps/rejected": -465.1860046386719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174419403076172, + "rewards/margins": 10.401439666748047, + "rewards/rejected": -14.575859069824219, + "step": 9237 + }, + { + "epoch": 1.44, + "learning_rate": 7.371810778567353e-06, + "logits/chosen": -2.6566500663757324, + "logits/rejected": -2.9720349311828613, + "logps/chosen": -126.44398498535156, + "logps/rejected": -317.12811279296875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.296849250793457, + "rewards/margins": 7.150759696960449, + "rewards/rejected": -11.447608947753906, + "step": 9238 + }, + { + "epoch": 1.44, + "learning_rate": 7.371077338036205e-06, + "logits/chosen": -2.7468197345733643, + "logits/rejected": -1.5613832473754883, + "logps/chosen": -499.40509033203125, + "logps/rejected": -395.41510009765625, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0455169677734375, + "rewards/margins": 5.921131134033203, + "rewards/rejected": -10.96664810180664, + "step": 9239 + }, + { + "epoch": 1.44, + "learning_rate": 7.3703438975050585e-06, + "logits/chosen": -2.981340169906616, + "logits/rejected": -2.8198888301849365, + "logps/chosen": -198.12225341796875, + "logps/rejected": -358.40631103515625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.785290718078613, + "rewards/margins": 4.955232620239258, + "rewards/rejected": -9.740523338317871, + "step": 9240 + }, + { + "epoch": 1.44, + "learning_rate": 7.36961045697391e-06, + "logits/chosen": -2.8351027965545654, + "logits/rejected": -2.655496120452881, + "logps/chosen": -129.69277954101562, + "logps/rejected": -192.18743896484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.433352470397949, + "rewards/margins": 6.6968793869018555, + "rewards/rejected": -10.130231857299805, + "step": 9241 + }, + { + "epoch": 1.44, + "learning_rate": 7.368877016442762e-06, + "logits/chosen": -2.794036388397217, + "logits/rejected": -2.640242099761963, + "logps/chosen": -434.681884765625, + "logps/rejected": -289.8125305175781, + "loss": 1.4021, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.855711460113525, + "rewards/margins": 4.591244220733643, + "rewards/rejected": -10.446955680847168, + "step": 9242 + }, + { + "epoch": 1.44, + "learning_rate": 7.368143575911614e-06, + "logits/chosen": -2.668227434158325, + "logits/rejected": -2.856128215789795, + "logps/chosen": -111.2723388671875, + "logps/rejected": -199.64024353027344, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3240270614624023, + "rewards/margins": 6.233145236968994, + "rewards/rejected": -9.557172775268555, + "step": 9243 + }, + { + "epoch": 1.44, + "learning_rate": 7.367410135380466e-06, + "logits/chosen": -2.507223606109619, + "logits/rejected": -2.6656954288482666, + "logps/chosen": -110.55125427246094, + "logps/rejected": -273.6123046875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887923240661621, + "rewards/margins": 8.028305053710938, + "rewards/rejected": -13.916227340698242, + "step": 9244 + }, + { + "epoch": 1.44, + "learning_rate": 7.366676694849318e-06, + "logits/chosen": -2.8231287002563477, + "logits/rejected": -2.203193426132202, + "logps/chosen": -209.35914611816406, + "logps/rejected": -303.2914733886719, + "loss": 0.2161, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.167584419250488, + "rewards/margins": 3.2102279663085938, + "rewards/rejected": -9.377812385559082, + "step": 9245 + }, + { + "epoch": 1.44, + "learning_rate": 7.36594325431817e-06, + "logits/chosen": -1.8217048645019531, + "logits/rejected": -2.249141216278076, + "logps/chosen": -417.9473571777344, + "logps/rejected": -369.7227783203125, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2749738693237305, + "rewards/margins": 4.331096172332764, + "rewards/rejected": -10.606069564819336, + "step": 9246 + }, + { + "epoch": 1.44, + "learning_rate": 7.365209813787022e-06, + "logits/chosen": -1.6631542444229126, + "logits/rejected": -2.5852231979370117, + "logps/chosen": -360.1392822265625, + "logps/rejected": -490.7339172363281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.076415538787842, + "rewards/margins": 8.47266960144043, + "rewards/rejected": -13.549084663391113, + "step": 9247 + }, + { + "epoch": 1.44, + "learning_rate": 7.3644763732558735e-06, + "logits/chosen": -2.915048837661743, + "logits/rejected": -3.013080358505249, + "logps/chosen": -89.60603332519531, + "logps/rejected": -278.5933837890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6068878173828125, + "rewards/margins": 8.338861465454102, + "rewards/rejected": -11.945749282836914, + "step": 9248 + }, + { + "epoch": 1.44, + "learning_rate": 7.363742932724727e-06, + "logits/chosen": -0.8295994997024536, + "logits/rejected": -1.5107262134552002, + "logps/chosen": -183.0382537841797, + "logps/rejected": -249.220703125, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479538440704346, + "rewards/margins": 4.953287124633789, + "rewards/rejected": -9.432825088500977, + "step": 9249 + }, + { + "epoch": 1.44, + "learning_rate": 7.363009492193579e-06, + "logits/chosen": -2.354968309402466, + "logits/rejected": -2.924592971801758, + "logps/chosen": -553.5921630859375, + "logps/rejected": -599.1522216796875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0448198318481445, + "rewards/margins": 4.817835807800293, + "rewards/rejected": -7.8626556396484375, + "step": 9250 + }, + { + "epoch": 1.44, + "learning_rate": 7.362276051662431e-06, + "logits/chosen": -1.6619549989700317, + "logits/rejected": -3.0759942531585693, + "logps/chosen": -73.36687469482422, + "logps/rejected": -402.03802490234375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.386955976486206, + "rewards/margins": 7.361083984375, + "rewards/rejected": -10.748039245605469, + "step": 9251 + }, + { + "epoch": 1.44, + "learning_rate": 7.361542611131283e-06, + "logits/chosen": -2.3312907218933105, + "logits/rejected": -3.06418776512146, + "logps/chosen": -130.84532165527344, + "logps/rejected": -430.33746337890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.069558620452881, + "rewards/margins": 8.734855651855469, + "rewards/rejected": -13.804414749145508, + "step": 9252 + }, + { + "epoch": 1.44, + "learning_rate": 7.360809170600135e-06, + "logits/chosen": -2.534501552581787, + "logits/rejected": -3.039005756378174, + "logps/chosen": -221.4456787109375, + "logps/rejected": -529.2422485351562, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.458043098449707, + "rewards/margins": 5.015179634094238, + "rewards/rejected": -11.473222732543945, + "step": 9253 + }, + { + "epoch": 1.44, + "learning_rate": 7.360075730068987e-06, + "logits/chosen": -2.976768732070923, + "logits/rejected": -2.9167349338531494, + "logps/chosen": -161.32774353027344, + "logps/rejected": -199.2109832763672, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.077770233154297, + "rewards/margins": 7.522138595581055, + "rewards/rejected": -11.599908828735352, + "step": 9254 + }, + { + "epoch": 1.44, + "learning_rate": 7.359342289537839e-06, + "logits/chosen": -2.361453056335449, + "logits/rejected": -3.1391477584838867, + "logps/chosen": -130.6327667236328, + "logps/rejected": -293.9769287109375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6799769401550293, + "rewards/margins": 8.435220718383789, + "rewards/rejected": -10.11519718170166, + "step": 9255 + }, + { + "epoch": 1.44, + "learning_rate": 7.358608849006691e-06, + "logits/chosen": -3.0186879634857178, + "logits/rejected": -2.829111337661743, + "logps/chosen": -576.5621337890625, + "logps/rejected": -531.7291870117188, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.004315376281738, + "rewards/margins": 6.3113203048706055, + "rewards/rejected": -11.315635681152344, + "step": 9256 + }, + { + "epoch": 1.44, + "learning_rate": 7.357875408475543e-06, + "logits/chosen": -2.615920305252075, + "logits/rejected": -2.743021011352539, + "logps/chosen": -538.7197265625, + "logps/rejected": -489.7232666015625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7898576259613037, + "rewards/margins": 6.487468242645264, + "rewards/rejected": -9.277325630187988, + "step": 9257 + }, + { + "epoch": 1.44, + "learning_rate": 7.3571419679443965e-06, + "logits/chosen": -2.665201187133789, + "logits/rejected": -2.8567981719970703, + "logps/chosen": -108.03237915039062, + "logps/rejected": -317.0888977050781, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.619381904602051, + "rewards/margins": 6.24232292175293, + "rewards/rejected": -11.861703872680664, + "step": 9258 + }, + { + "epoch": 1.44, + "learning_rate": 7.3564085274132484e-06, + "logits/chosen": -3.0127954483032227, + "logits/rejected": -2.7233834266662598, + "logps/chosen": -605.40380859375, + "logps/rejected": -567.78173828125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.76307487487793, + "rewards/margins": 5.62314510345459, + "rewards/rejected": -10.38621997833252, + "step": 9259 + }, + { + "epoch": 1.44, + "learning_rate": 7.3556750868821e-06, + "logits/chosen": -2.821199417114258, + "logits/rejected": -2.884232997894287, + "logps/chosen": -186.60202026367188, + "logps/rejected": -340.6542053222656, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487907886505127, + "rewards/margins": 5.139287948608398, + "rewards/rejected": -11.627195358276367, + "step": 9260 + }, + { + "epoch": 1.44, + "learning_rate": 7.354941646350952e-06, + "logits/chosen": -2.83007550239563, + "logits/rejected": -2.2720048427581787, + "logps/chosen": -378.3682861328125, + "logps/rejected": -362.603515625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.968615531921387, + "rewards/margins": 7.767662525177002, + "rewards/rejected": -13.736278533935547, + "step": 9261 + }, + { + "epoch": 1.44, + "learning_rate": 7.354208205819804e-06, + "logits/chosen": -2.8958752155303955, + "logits/rejected": -2.668527364730835, + "logps/chosen": -285.41058349609375, + "logps/rejected": -321.69610595703125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5461950302124023, + "rewards/margins": 5.986446380615234, + "rewards/rejected": -9.532641410827637, + "step": 9262 + }, + { + "epoch": 1.44, + "learning_rate": 7.353474765288656e-06, + "logits/chosen": -2.5077812671661377, + "logits/rejected": -2.935210704803467, + "logps/chosen": -101.9566650390625, + "logps/rejected": -345.6152038574219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.064969539642334, + "rewards/margins": 7.849232196807861, + "rewards/rejected": -11.914201736450195, + "step": 9263 + }, + { + "epoch": 1.44, + "learning_rate": 7.352741324757508e-06, + "logits/chosen": -2.285996675491333, + "logits/rejected": -2.93137526512146, + "logps/chosen": -213.25555419921875, + "logps/rejected": -403.20416259765625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.290315628051758, + "rewards/margins": 6.52488899230957, + "rewards/rejected": -10.815204620361328, + "step": 9264 + }, + { + "epoch": 1.44, + "learning_rate": 7.35200788422636e-06, + "logits/chosen": -2.519313097000122, + "logits/rejected": -1.3931427001953125, + "logps/chosen": -135.29678344726562, + "logps/rejected": -164.49362182617188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4800357818603516, + "rewards/margins": 7.710075378417969, + "rewards/rejected": -11.19011116027832, + "step": 9265 + }, + { + "epoch": 1.44, + "learning_rate": 7.3512744436952116e-06, + "logits/chosen": -2.971116781234741, + "logits/rejected": -3.0087814331054688, + "logps/chosen": -725.94482421875, + "logps/rejected": -717.620361328125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.223446369171143, + "rewards/margins": 7.994488716125488, + "rewards/rejected": -12.217935562133789, + "step": 9266 + }, + { + "epoch": 1.44, + "learning_rate": 7.350541003164065e-06, + "logits/chosen": -2.779858112335205, + "logits/rejected": -3.1663260459899902, + "logps/chosen": -43.80801010131836, + "logps/rejected": -201.69100952148438, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2047863006591797, + "rewards/margins": 8.402656555175781, + "rewards/rejected": -11.607442855834961, + "step": 9267 + }, + { + "epoch": 1.44, + "learning_rate": 7.349807562632917e-06, + "logits/chosen": -2.003573179244995, + "logits/rejected": -3.014536142349243, + "logps/chosen": -274.71490478515625, + "logps/rejected": -545.9879150390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1309914588928223, + "rewards/margins": 7.827362060546875, + "rewards/rejected": -10.958353042602539, + "step": 9268 + }, + { + "epoch": 1.44, + "learning_rate": 7.349074122101769e-06, + "logits/chosen": -3.086362361907959, + "logits/rejected": -2.98694109916687, + "logps/chosen": -125.43618774414062, + "logps/rejected": -331.82257080078125, + "loss": 1.5344, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.268107891082764, + "rewards/margins": 4.042519569396973, + "rewards/rejected": -9.310626983642578, + "step": 9269 + }, + { + "epoch": 1.44, + "learning_rate": 7.348340681570621e-06, + "logits/chosen": -2.841188907623291, + "logits/rejected": -2.6020750999450684, + "logps/chosen": -362.45574951171875, + "logps/rejected": -414.28265380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5928670167922974, + "rewards/margins": 11.903043746948242, + "rewards/rejected": -12.49591064453125, + "step": 9270 + }, + { + "epoch": 1.44, + "learning_rate": 7.3476072410394735e-06, + "logits/chosen": -2.7149105072021484, + "logits/rejected": -2.530332088470459, + "logps/chosen": -461.6231689453125, + "logps/rejected": -401.6773681640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.679281711578369, + "rewards/margins": 7.152444839477539, + "rewards/rejected": -12.83172607421875, + "step": 9271 + }, + { + "epoch": 1.44, + "learning_rate": 7.346873800508325e-06, + "logits/chosen": -2.650063991546631, + "logits/rejected": -2.896664619445801, + "logps/chosen": -716.9866943359375, + "logps/rejected": -696.618896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.506457567214966, + "rewards/margins": 9.993249893188477, + "rewards/rejected": -13.499707221984863, + "step": 9272 + }, + { + "epoch": 1.44, + "learning_rate": 7.346140359977177e-06, + "logits/chosen": -2.68953013420105, + "logits/rejected": -3.023649215698242, + "logps/chosen": -130.89559936523438, + "logps/rejected": -331.37530517578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3103208541870117, + "rewards/margins": 8.702911376953125, + "rewards/rejected": -12.013233184814453, + "step": 9273 + }, + { + "epoch": 1.44, + "learning_rate": 7.345406919446029e-06, + "logits/chosen": -2.977834939956665, + "logits/rejected": -2.631135940551758, + "logps/chosen": -275.5437927246094, + "logps/rejected": -316.7266845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.990438938140869, + "rewards/margins": 9.936622619628906, + "rewards/rejected": -12.927061080932617, + "step": 9274 + }, + { + "epoch": 1.44, + "learning_rate": 7.344673478914881e-06, + "logits/chosen": -2.271439552307129, + "logits/rejected": -3.0496532917022705, + "logps/chosen": -249.39404296875, + "logps/rejected": -543.37548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.888247013092041, + "rewards/margins": 9.869413375854492, + "rewards/rejected": -15.757660865783691, + "step": 9275 + }, + { + "epoch": 1.44, + "learning_rate": 7.343940038383735e-06, + "logits/chosen": -1.7902445793151855, + "logits/rejected": -2.149163246154785, + "logps/chosen": -363.01226806640625, + "logps/rejected": -362.4604187011719, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.529357671737671, + "rewards/margins": 9.08261489868164, + "rewards/rejected": -11.61197280883789, + "step": 9276 + }, + { + "epoch": 1.44, + "learning_rate": 7.3432065978525865e-06, + "logits/chosen": -2.7007455825805664, + "logits/rejected": -1.9451433420181274, + "logps/chosen": -322.390625, + "logps/rejected": -266.60693359375, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9888343811035156, + "rewards/margins": 6.356986045837402, + "rewards/rejected": -10.345821380615234, + "step": 9277 + }, + { + "epoch": 1.44, + "learning_rate": 7.342473157321438e-06, + "logits/chosen": -2.637730121612549, + "logits/rejected": -3.1815285682678223, + "logps/chosen": -55.367515563964844, + "logps/rejected": -239.41299438476562, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4492340087890625, + "rewards/margins": 8.12108039855957, + "rewards/rejected": -12.570314407348633, + "step": 9278 + }, + { + "epoch": 1.44, + "learning_rate": 7.34173971679029e-06, + "logits/chosen": -2.603343963623047, + "logits/rejected": -2.8447630405426025, + "logps/chosen": -203.71682739257812, + "logps/rejected": -442.8404541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0355920791625977, + "rewards/margins": 12.085582733154297, + "rewards/rejected": -15.121173858642578, + "step": 9279 + }, + { + "epoch": 1.44, + "learning_rate": 7.341006276259142e-06, + "logits/chosen": -2.8211326599121094, + "logits/rejected": -2.5255842208862305, + "logps/chosen": -293.4793701171875, + "logps/rejected": -513.5719604492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3701438903808594, + "rewards/margins": 11.311960220336914, + "rewards/rejected": -14.682104110717773, + "step": 9280 + }, + { + "epoch": 1.44, + "learning_rate": 7.340272835727994e-06, + "logits/chosen": -3.0545363426208496, + "logits/rejected": -1.8127501010894775, + "logps/chosen": -289.2940979003906, + "logps/rejected": -158.09609985351562, + "loss": 0.2687, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.317361831665039, + "rewards/margins": 3.6922669410705566, + "rewards/rejected": -9.009628295898438, + "step": 9281 + }, + { + "epoch": 1.44, + "learning_rate": 7.339539395196846e-06, + "logits/chosen": -2.643524169921875, + "logits/rejected": -1.9618550539016724, + "logps/chosen": -797.6206665039062, + "logps/rejected": -455.5445251464844, + "loss": 0.9397, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.123877048492432, + "rewards/margins": 1.601562738418579, + "rewards/rejected": -8.72544002532959, + "step": 9282 + }, + { + "epoch": 1.44, + "learning_rate": 7.338805954665698e-06, + "logits/chosen": -1.9589239358901978, + "logits/rejected": -2.9745242595672607, + "logps/chosen": -106.54600524902344, + "logps/rejected": -265.5617370605469, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.888319969177246, + "rewards/margins": 4.8865814208984375, + "rewards/rejected": -8.774901390075684, + "step": 9283 + }, + { + "epoch": 1.44, + "learning_rate": 7.33807251413455e-06, + "logits/chosen": -2.4609155654907227, + "logits/rejected": -2.884948253631592, + "logps/chosen": -245.04022216796875, + "logps/rejected": -479.1460876464844, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539160251617432, + "rewards/margins": 6.100157260894775, + "rewards/rejected": -10.639317512512207, + "step": 9284 + }, + { + "epoch": 1.44, + "learning_rate": 7.337339073603403e-06, + "logits/chosen": -2.183344841003418, + "logits/rejected": -2.346914052963257, + "logps/chosen": -140.1385955810547, + "logps/rejected": -397.54058837890625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.652810096740723, + "rewards/margins": 5.488241195678711, + "rewards/rejected": -11.141050338745117, + "step": 9285 + }, + { + "epoch": 1.44, + "learning_rate": 7.336605633072255e-06, + "logits/chosen": -1.244037389755249, + "logits/rejected": -2.8531224727630615, + "logps/chosen": -169.97543334960938, + "logps/rejected": -499.36505126953125, + "loss": 0.2117, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.751981735229492, + "rewards/margins": 1.5242724418640137, + "rewards/rejected": -7.276254653930664, + "step": 9286 + }, + { + "epoch": 1.44, + "learning_rate": 7.335872192541107e-06, + "logits/chosen": -2.681316375732422, + "logits/rejected": -2.9380950927734375, + "logps/chosen": -91.20573425292969, + "logps/rejected": -342.6799621582031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.775644779205322, + "rewards/margins": 10.8462553024292, + "rewards/rejected": -15.62190055847168, + "step": 9287 + }, + { + "epoch": 1.44, + "learning_rate": 7.335138752009959e-06, + "logits/chosen": -2.3872554302215576, + "logits/rejected": -2.8285486698150635, + "logps/chosen": -625.8253784179688, + "logps/rejected": -572.96875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.056449890136719, + "rewards/margins": 10.447796821594238, + "rewards/rejected": -14.504246711730957, + "step": 9288 + }, + { + "epoch": 1.44, + "learning_rate": 7.3344053114788116e-06, + "logits/chosen": -2.917994976043701, + "logits/rejected": -2.9555084705352783, + "logps/chosen": -84.48101806640625, + "logps/rejected": -217.02267456054688, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6555395126342773, + "rewards/margins": 6.953604698181152, + "rewards/rejected": -10.60914421081543, + "step": 9289 + }, + { + "epoch": 1.44, + "learning_rate": 7.3336718709476634e-06, + "logits/chosen": -2.7118279933929443, + "logits/rejected": -3.0201382637023926, + "logps/chosen": -76.53368377685547, + "logps/rejected": -424.8654479980469, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.881309509277344, + "rewards/margins": 5.315670013427734, + "rewards/rejected": -10.196979522705078, + "step": 9290 + }, + { + "epoch": 1.44, + "learning_rate": 7.332938430416515e-06, + "logits/chosen": -2.1539533138275146, + "logits/rejected": -2.904417037963867, + "logps/chosen": -188.7783966064453, + "logps/rejected": -374.5748596191406, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.484048843383789, + "rewards/margins": 9.12728500366211, + "rewards/rejected": -13.611332893371582, + "step": 9291 + }, + { + "epoch": 1.45, + "learning_rate": 7.332204989885367e-06, + "logits/chosen": -2.2660601139068604, + "logits/rejected": -2.85416316986084, + "logps/chosen": -375.26141357421875, + "logps/rejected": -321.4462890625, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0241594314575195, + "rewards/margins": 3.999164581298828, + "rewards/rejected": -10.023324012756348, + "step": 9292 + }, + { + "epoch": 1.45, + "learning_rate": 7.331471549354219e-06, + "logits/chosen": -2.215902805328369, + "logits/rejected": -2.598799467086792, + "logps/chosen": -420.43408203125, + "logps/rejected": -642.9739990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0365082025527954, + "rewards/margins": 13.462848663330078, + "rewards/rejected": -14.499357223510742, + "step": 9293 + }, + { + "epoch": 1.45, + "learning_rate": 7.330738108823073e-06, + "logits/chosen": -2.9478416442871094, + "logits/rejected": -2.954293727874756, + "logps/chosen": -92.283935546875, + "logps/rejected": -166.51077270507812, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059586048126221, + "rewards/margins": 3.9510622024536133, + "rewards/rejected": -10.010648727416992, + "step": 9294 + }, + { + "epoch": 1.45, + "learning_rate": 7.3300046682919245e-06, + "logits/chosen": -1.5267417430877686, + "logits/rejected": -2.508615493774414, + "logps/chosen": -201.4005889892578, + "logps/rejected": -483.7545166015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.965503692626953, + "rewards/margins": 8.663251876831055, + "rewards/rejected": -12.628755569458008, + "step": 9295 + }, + { + "epoch": 1.45, + "learning_rate": 7.329271227760776e-06, + "logits/chosen": -2.261984348297119, + "logits/rejected": -2.7694437503814697, + "logps/chosen": -68.64691925048828, + "logps/rejected": -161.7818145751953, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880095958709717, + "rewards/margins": 4.259494781494141, + "rewards/rejected": -10.139591217041016, + "step": 9296 + }, + { + "epoch": 1.45, + "learning_rate": 7.328537787229628e-06, + "logits/chosen": -2.6877737045288086, + "logits/rejected": -3.086878538131714, + "logps/chosen": -285.8183898925781, + "logps/rejected": -299.5986328125, + "loss": 0.3935, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.43629264831543, + "rewards/margins": 3.6191656589508057, + "rewards/rejected": -8.055458068847656, + "step": 9297 + }, + { + "epoch": 1.45, + "learning_rate": 7.32780434669848e-06, + "logits/chosen": -1.7915499210357666, + "logits/rejected": -2.5510342121124268, + "logps/chosen": -204.0518035888672, + "logps/rejected": -385.0475158691406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.774838447570801, + "rewards/margins": 8.223941802978516, + "rewards/rejected": -11.998780250549316, + "step": 9298 + }, + { + "epoch": 1.45, + "learning_rate": 7.327070906167332e-06, + "logits/chosen": -1.5883405208587646, + "logits/rejected": -2.483266592025757, + "logps/chosen": -73.41436004638672, + "logps/rejected": -398.5014953613281, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.885657787322998, + "rewards/margins": 7.226461410522461, + "rewards/rejected": -12.1121187210083, + "step": 9299 + }, + { + "epoch": 1.45, + "learning_rate": 7.326337465636184e-06, + "logits/chosen": -2.953869342803955, + "logits/rejected": -3.0577285289764404, + "logps/chosen": -191.96063232421875, + "logps/rejected": -348.03326416015625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.186469316482544, + "rewards/margins": 7.892452239990234, + "rewards/rejected": -11.078922271728516, + "step": 9300 + }, + { + "epoch": 1.45, + "learning_rate": 7.325604025105036e-06, + "logits/chosen": -2.148925304412842, + "logits/rejected": -2.759397029876709, + "logps/chosen": -418.87164306640625, + "logps/rejected": -488.0513610839844, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009676933288574, + "rewards/margins": 7.7985687255859375, + "rewards/rejected": -12.808245658874512, + "step": 9301 + }, + { + "epoch": 1.45, + "learning_rate": 7.324870584573888e-06, + "logits/chosen": -2.702394962310791, + "logits/rejected": -2.8800952434539795, + "logps/chosen": -116.76185607910156, + "logps/rejected": -294.72607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4742746353149414, + "rewards/margins": 10.533880233764648, + "rewards/rejected": -13.008153915405273, + "step": 9302 + }, + { + "epoch": 1.45, + "learning_rate": 7.324137144042741e-06, + "logits/chosen": -1.8991928100585938, + "logits/rejected": -2.7114572525024414, + "logps/chosen": -169.8148651123047, + "logps/rejected": -306.723876953125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.410824775695801, + "rewards/margins": 8.407363891601562, + "rewards/rejected": -13.81818962097168, + "step": 9303 + }, + { + "epoch": 1.45, + "learning_rate": 7.323403703511593e-06, + "logits/chosen": -2.2034709453582764, + "logits/rejected": -2.491550922393799, + "logps/chosen": -121.33709716796875, + "logps/rejected": -154.4682159423828, + "loss": 0.1224, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089703559875488, + "rewards/margins": 3.992987871170044, + "rewards/rejected": -11.082691192626953, + "step": 9304 + }, + { + "epoch": 1.45, + "learning_rate": 7.322670262980445e-06, + "logits/chosen": -2.9159493446350098, + "logits/rejected": -2.227787494659424, + "logps/chosen": -239.46456909179688, + "logps/rejected": -279.69842529296875, + "loss": 2.1538, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.799521446228027, + "rewards/margins": 0.6702845096588135, + "rewards/rejected": -6.46980619430542, + "step": 9305 + }, + { + "epoch": 1.45, + "learning_rate": 7.321936822449298e-06, + "logits/chosen": -0.9861254096031189, + "logits/rejected": -2.688995361328125, + "logps/chosen": -130.9322509765625, + "logps/rejected": -296.52252197265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.588047504425049, + "rewards/margins": 9.119722366333008, + "rewards/rejected": -12.707770347595215, + "step": 9306 + }, + { + "epoch": 1.45, + "learning_rate": 7.32120338191815e-06, + "logits/chosen": -1.830277442932129, + "logits/rejected": -2.378394842147827, + "logps/chosen": -162.38775634765625, + "logps/rejected": -460.13385009765625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.210956573486328, + "rewards/margins": 10.556975364685059, + "rewards/rejected": -14.767931938171387, + "step": 9307 + }, + { + "epoch": 1.45, + "learning_rate": 7.3204699413870015e-06, + "logits/chosen": -1.6213757991790771, + "logits/rejected": -2.9964334964752197, + "logps/chosen": -476.3846435546875, + "logps/rejected": -926.3786010742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0494065284729004, + "rewards/margins": 12.359908103942871, + "rewards/rejected": -15.40931510925293, + "step": 9308 + }, + { + "epoch": 1.45, + "learning_rate": 7.319736500855853e-06, + "logits/chosen": -3.038234233856201, + "logits/rejected": -2.6424639225006104, + "logps/chosen": -656.4678344726562, + "logps/rejected": -580.4276123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.029898166656494, + "rewards/margins": 11.052864074707031, + "rewards/rejected": -16.082761764526367, + "step": 9309 + }, + { + "epoch": 1.45, + "learning_rate": 7.319003060324705e-06, + "logits/chosen": -2.482170820236206, + "logits/rejected": -3.1811134815216064, + "logps/chosen": -108.26069641113281, + "logps/rejected": -283.4128723144531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.899858236312866, + "rewards/margins": 9.50495433807373, + "rewards/rejected": -13.404812812805176, + "step": 9310 + }, + { + "epoch": 1.45, + "learning_rate": 7.318269619793557e-06, + "logits/chosen": -2.8955767154693604, + "logits/rejected": -2.709465503692627, + "logps/chosen": -317.5998840332031, + "logps/rejected": -434.72998046875, + "loss": 1.1061, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.081228256225586, + "rewards/margins": 5.297128677368164, + "rewards/rejected": -12.37835693359375, + "step": 9311 + }, + { + "epoch": 1.45, + "learning_rate": 7.317536179262411e-06, + "logits/chosen": -2.5814716815948486, + "logits/rejected": -2.9929358959198, + "logps/chosen": -142.03370666503906, + "logps/rejected": -240.0833740234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.927781105041504, + "rewards/margins": 8.254477500915527, + "rewards/rejected": -12.182258605957031, + "step": 9312 + }, + { + "epoch": 1.45, + "learning_rate": 7.3168027387312626e-06, + "logits/chosen": -2.3834593296051025, + "logits/rejected": -2.8329758644104004, + "logps/chosen": -149.9950714111328, + "logps/rejected": -336.13983154296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.750277996063232, + "rewards/margins": 7.277458190917969, + "rewards/rejected": -14.02773666381836, + "step": 9313 + }, + { + "epoch": 1.45, + "learning_rate": 7.3160692982001144e-06, + "logits/chosen": -2.6286139488220215, + "logits/rejected": -2.6315231323242188, + "logps/chosen": -361.5068359375, + "logps/rejected": -457.551513671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.02859354019165, + "rewards/margins": 8.597585678100586, + "rewards/rejected": -12.626179695129395, + "step": 9314 + }, + { + "epoch": 1.45, + "learning_rate": 7.315335857668966e-06, + "logits/chosen": -2.7047860622406006, + "logits/rejected": -2.185035467147827, + "logps/chosen": -292.35284423828125, + "logps/rejected": -220.93798828125, + "loss": 1.2754, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.678705215454102, + "rewards/margins": -0.811007022857666, + "rewards/rejected": -7.8676981925964355, + "step": 9315 + }, + { + "epoch": 1.45, + "learning_rate": 7.314602417137818e-06, + "logits/chosen": -3.086148500442505, + "logits/rejected": -2.8684914112091064, + "logps/chosen": -253.08212280273438, + "logps/rejected": -413.2900390625, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9968180656433105, + "rewards/margins": 8.47696304321289, + "rewards/rejected": -12.473780632019043, + "step": 9316 + }, + { + "epoch": 1.45, + "learning_rate": 7.31386897660667e-06, + "logits/chosen": -2.4829204082489014, + "logits/rejected": -2.892644166946411, + "logps/chosen": -70.34417724609375, + "logps/rejected": -345.2498779296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0398426055908203, + "rewards/margins": 8.345707893371582, + "rewards/rejected": -10.385551452636719, + "step": 9317 + }, + { + "epoch": 1.45, + "learning_rate": 7.313135536075522e-06, + "logits/chosen": -2.3814857006073, + "logits/rejected": -2.835069179534912, + "logps/chosen": -272.8271484375, + "logps/rejected": -378.0198059082031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.552110195159912, + "rewards/margins": 9.742737770080566, + "rewards/rejected": -13.29484748840332, + "step": 9318 + }, + { + "epoch": 1.45, + "learning_rate": 7.312402095544374e-06, + "logits/chosen": -2.3316762447357178, + "logits/rejected": -2.8855831623077393, + "logps/chosen": -738.7973022460938, + "logps/rejected": -617.5094604492188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.084546089172363, + "rewards/margins": 7.292202472686768, + "rewards/rejected": -13.376749038696289, + "step": 9319 + }, + { + "epoch": 1.45, + "learning_rate": 7.311668655013226e-06, + "logits/chosen": -2.7850735187530518, + "logits/rejected": -2.6398587226867676, + "logps/chosen": -453.9132385253906, + "logps/rejected": -461.85198974609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5010693073272705, + "rewards/margins": 7.467218399047852, + "rewards/rejected": -10.96828842163086, + "step": 9320 + }, + { + "epoch": 1.45, + "learning_rate": 7.310935214482079e-06, + "logits/chosen": -2.985417604446411, + "logits/rejected": -2.504164457321167, + "logps/chosen": -200.71206665039062, + "logps/rejected": -162.57449340820312, + "loss": 4.473, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.460899353027344, + "rewards/margins": -1.3229103088378906, + "rewards/rejected": -7.137989044189453, + "step": 9321 + }, + { + "epoch": 1.45, + "learning_rate": 7.310201773950931e-06, + "logits/chosen": -2.552082061767578, + "logits/rejected": -2.108983039855957, + "logps/chosen": -224.4756317138672, + "logps/rejected": -248.19915771484375, + "loss": 2.1514, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.039922714233398, + "rewards/margins": 0.9869678020477295, + "rewards/rejected": -9.02688980102539, + "step": 9322 + }, + { + "epoch": 1.45, + "learning_rate": 7.309468333419784e-06, + "logits/chosen": -2.8662266731262207, + "logits/rejected": -2.844594717025757, + "logps/chosen": -125.23052978515625, + "logps/rejected": -290.66375732421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5664381980896, + "rewards/margins": 8.631662368774414, + "rewards/rejected": -13.198101043701172, + "step": 9323 + }, + { + "epoch": 1.45, + "learning_rate": 7.308734892888636e-06, + "logits/chosen": -1.5249552726745605, + "logits/rejected": -2.9312188625335693, + "logps/chosen": -147.14892578125, + "logps/rejected": -425.05096435546875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.137462615966797, + "rewards/margins": 6.309083938598633, + "rewards/rejected": -11.44654655456543, + "step": 9324 + }, + { + "epoch": 1.45, + "learning_rate": 7.308001452357488e-06, + "logits/chosen": -1.976598858833313, + "logits/rejected": -2.8808400630950928, + "logps/chosen": -164.27984619140625, + "logps/rejected": -490.3555908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.682729721069336, + "rewards/margins": 9.09573745727539, + "rewards/rejected": -12.778467178344727, + "step": 9325 + }, + { + "epoch": 1.45, + "learning_rate": 7.3072680118263395e-06, + "logits/chosen": -2.1251280307769775, + "logits/rejected": -3.0003201961517334, + "logps/chosen": -136.68270874023438, + "logps/rejected": -462.5501708984375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331694602966309, + "rewards/margins": 7.020917892456055, + "rewards/rejected": -13.352612495422363, + "step": 9326 + }, + { + "epoch": 1.45, + "learning_rate": 7.306534571295191e-06, + "logits/chosen": -2.7867112159729004, + "logits/rejected": -2.5967233180999756, + "logps/chosen": -311.9498291015625, + "logps/rejected": -143.95040893554688, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.281907081604004, + "rewards/margins": 5.544958591461182, + "rewards/rejected": -7.8268656730651855, + "step": 9327 + }, + { + "epoch": 1.45, + "learning_rate": 7.305801130764043e-06, + "logits/chosen": -2.917945384979248, + "logits/rejected": -2.9120259284973145, + "logps/chosen": -223.83660888671875, + "logps/rejected": -341.97027587890625, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9706850051879883, + "rewards/margins": 7.119043350219727, + "rewards/rejected": -10.089728355407715, + "step": 9328 + }, + { + "epoch": 1.45, + "learning_rate": 7.305067690232897e-06, + "logits/chosen": -1.8241381645202637, + "logits/rejected": -2.717926263809204, + "logps/chosen": -343.504150390625, + "logps/rejected": -532.009765625, + "loss": 0.0553, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.259007453918457, + "rewards/margins": 3.7353150844573975, + "rewards/rejected": -11.994321823120117, + "step": 9329 + }, + { + "epoch": 1.45, + "learning_rate": 7.304334249701749e-06, + "logits/chosen": -1.751442313194275, + "logits/rejected": -2.3740317821502686, + "logps/chosen": -212.08740234375, + "logps/rejected": -398.90325927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.439581871032715, + "rewards/margins": 9.30211353302002, + "rewards/rejected": -14.741695404052734, + "step": 9330 + }, + { + "epoch": 1.45, + "learning_rate": 7.303600809170601e-06, + "logits/chosen": -0.43386924266815186, + "logits/rejected": -1.3563138246536255, + "logps/chosen": -125.80249786376953, + "logps/rejected": -556.0327758789062, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1983203887939453, + "rewards/margins": 31.82769012451172, + "rewards/rejected": -35.02600860595703, + "step": 9331 + }, + { + "epoch": 1.45, + "learning_rate": 7.3028673686394525e-06, + "logits/chosen": -1.221476435661316, + "logits/rejected": -2.873063564300537, + "logps/chosen": -106.71285247802734, + "logps/rejected": -399.20635986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.993009090423584, + "rewards/margins": 9.160633087158203, + "rewards/rejected": -12.153642654418945, + "step": 9332 + }, + { + "epoch": 1.45, + "learning_rate": 7.302133928108304e-06, + "logits/chosen": -2.8021135330200195, + "logits/rejected": -2.94028902053833, + "logps/chosen": -805.9615478515625, + "logps/rejected": -596.93896484375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.784497261047363, + "rewards/margins": 8.318501472473145, + "rewards/rejected": -13.102998733520508, + "step": 9333 + }, + { + "epoch": 1.45, + "learning_rate": 7.301400487577156e-06, + "logits/chosen": -2.9290194511413574, + "logits/rejected": -2.9650862216949463, + "logps/chosen": -77.5351333618164, + "logps/rejected": -146.5974884033203, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.959735870361328, + "rewards/margins": 5.223522663116455, + "rewards/rejected": -10.183258056640625, + "step": 9334 + }, + { + "epoch": 1.45, + "learning_rate": 7.300667047046008e-06, + "logits/chosen": -1.8462806940078735, + "logits/rejected": -2.700603723526001, + "logps/chosen": -138.08840942382812, + "logps/rejected": -341.9442138671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.509828567504883, + "rewards/margins": 8.148529052734375, + "rewards/rejected": -10.658357620239258, + "step": 9335 + }, + { + "epoch": 1.45, + "learning_rate": 7.29993360651486e-06, + "logits/chosen": -2.6457667350769043, + "logits/rejected": -2.77248477935791, + "logps/chosen": -263.828369140625, + "logps/rejected": -384.9617919921875, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.952923774719238, + "rewards/margins": 3.5580568313598633, + "rewards/rejected": -8.510980606079102, + "step": 9336 + }, + { + "epoch": 1.45, + "learning_rate": 7.299200165983712e-06, + "logits/chosen": -2.067713975906372, + "logits/rejected": -2.7271177768707275, + "logps/chosen": -179.669921875, + "logps/rejected": -285.9290771484375, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300492286682129, + "rewards/margins": 5.3286895751953125, + "rewards/rejected": -11.629181861877441, + "step": 9337 + }, + { + "epoch": 1.45, + "learning_rate": 7.2984667254525654e-06, + "logits/chosen": -2.9910576343536377, + "logits/rejected": -1.5887268781661987, + "logps/chosen": -440.65435791015625, + "logps/rejected": -275.70208740234375, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.044759273529053, + "rewards/margins": 7.086265563964844, + "rewards/rejected": -12.131025314331055, + "step": 9338 + }, + { + "epoch": 1.45, + "learning_rate": 7.297733284921417e-06, + "logits/chosen": -2.999999523162842, + "logits/rejected": -2.6087515354156494, + "logps/chosen": -430.482666015625, + "logps/rejected": -419.9342346191406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9799365997314453, + "rewards/margins": 8.385714530944824, + "rewards/rejected": -12.36565113067627, + "step": 9339 + }, + { + "epoch": 1.45, + "learning_rate": 7.29699984439027e-06, + "logits/chosen": -2.3781538009643555, + "logits/rejected": -2.8044323921203613, + "logps/chosen": -96.51419067382812, + "logps/rejected": -364.0084228515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.694643974304199, + "rewards/margins": 9.65806770324707, + "rewards/rejected": -14.352710723876953, + "step": 9340 + }, + { + "epoch": 1.45, + "learning_rate": 7.296266403859122e-06, + "logits/chosen": -1.9168621301651, + "logits/rejected": -2.750304937362671, + "logps/chosen": -108.14452362060547, + "logps/rejected": -327.66162109375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.650732040405273, + "rewards/margins": 7.349518775939941, + "rewards/rejected": -13.000249862670898, + "step": 9341 + }, + { + "epoch": 1.45, + "learning_rate": 7.295532963327974e-06, + "logits/chosen": -2.685587167739868, + "logits/rejected": -2.7564873695373535, + "logps/chosen": -713.7494506835938, + "logps/rejected": -610.2807006835938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8198394775390625, + "rewards/margins": 7.425298690795898, + "rewards/rejected": -13.245138168334961, + "step": 9342 + }, + { + "epoch": 1.45, + "learning_rate": 7.294799522796826e-06, + "logits/chosen": -2.312206983566284, + "logits/rejected": -2.985820770263672, + "logps/chosen": -48.60047149658203, + "logps/rejected": -328.5462646484375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.062937021255493, + "rewards/margins": 7.349203109741211, + "rewards/rejected": -9.412139892578125, + "step": 9343 + }, + { + "epoch": 1.45, + "learning_rate": 7.2940660822656776e-06, + "logits/chosen": -2.4437432289123535, + "logits/rejected": -2.83461332321167, + "logps/chosen": -170.66294860839844, + "logps/rejected": -287.64166259765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.789340972900391, + "rewards/margins": 7.956995964050293, + "rewards/rejected": -12.746336936950684, + "step": 9344 + }, + { + "epoch": 1.45, + "learning_rate": 7.2933326417345294e-06, + "logits/chosen": -2.8545637130737305, + "logits/rejected": -2.1659882068634033, + "logps/chosen": -392.6836853027344, + "logps/rejected": -313.432861328125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.758024215698242, + "rewards/margins": 8.286985397338867, + "rewards/rejected": -12.04500961303711, + "step": 9345 + }, + { + "epoch": 1.45, + "learning_rate": 7.292599201203381e-06, + "logits/chosen": -2.6716692447662354, + "logits/rejected": -2.567136287689209, + "logps/chosen": -209.16848754882812, + "logps/rejected": -301.73956298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.382803440093994, + "rewards/margins": 9.412480354309082, + "rewards/rejected": -12.795284271240234, + "step": 9346 + }, + { + "epoch": 1.45, + "learning_rate": 7.291865760672235e-06, + "logits/chosen": -2.8192601203918457, + "logits/rejected": -3.059781551361084, + "logps/chosen": -277.71331787109375, + "logps/rejected": -459.0558166503906, + "loss": 0.2002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7432835102081299, + "rewards/margins": 6.397252082824707, + "rewards/rejected": -8.140535354614258, + "step": 9347 + }, + { + "epoch": 1.45, + "learning_rate": 7.291132320141087e-06, + "logits/chosen": -2.845261335372925, + "logits/rejected": -3.0130245685577393, + "logps/chosen": -236.47702026367188, + "logps/rejected": -376.43804931640625, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8492980003356934, + "rewards/margins": 4.880131721496582, + "rewards/rejected": -7.729429721832275, + "step": 9348 + }, + { + "epoch": 1.45, + "learning_rate": 7.290398879609939e-06, + "logits/chosen": -2.396286725997925, + "logits/rejected": -2.514895439147949, + "logps/chosen": -116.8580093383789, + "logps/rejected": -259.5592346191406, + "loss": 0.0541, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.058100700378418, + "rewards/margins": 5.087119102478027, + "rewards/rejected": -11.145219802856445, + "step": 9349 + }, + { + "epoch": 1.45, + "learning_rate": 7.2896654390787905e-06, + "logits/chosen": -2.944207191467285, + "logits/rejected": -2.63443660736084, + "logps/chosen": -653.3319091796875, + "logps/rejected": -496.15423583984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8092429637908936, + "rewards/margins": 8.447399139404297, + "rewards/rejected": -11.256643295288086, + "step": 9350 + }, + { + "epoch": 1.45, + "learning_rate": 7.288931998547642e-06, + "logits/chosen": -2.005650758743286, + "logits/rejected": -2.293459177017212, + "logps/chosen": -293.3031311035156, + "logps/rejected": -602.541748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4981567859649658, + "rewards/margins": 19.946264266967773, + "rewards/rejected": -21.444419860839844, + "step": 9351 + }, + { + "epoch": 1.45, + "learning_rate": 7.288198558016494e-06, + "logits/chosen": -2.6811065673828125, + "logits/rejected": -2.9781925678253174, + "logps/chosen": -66.11178588867188, + "logps/rejected": -304.61199951171875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4164671897888184, + "rewards/margins": 5.355517864227295, + "rewards/rejected": -8.771985054016113, + "step": 9352 + }, + { + "epoch": 1.45, + "learning_rate": 7.287465117485346e-06, + "logits/chosen": -2.7565338611602783, + "logits/rejected": -2.1827807426452637, + "logps/chosen": -372.5631103515625, + "logps/rejected": -318.8377990722656, + "loss": 2.3018, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.528921127319336, + "rewards/margins": 4.160954475402832, + "rewards/rejected": -10.689875602722168, + "step": 9353 + }, + { + "epoch": 1.45, + "learning_rate": 7.286731676954198e-06, + "logits/chosen": -1.2699298858642578, + "logits/rejected": -2.7720346450805664, + "logps/chosen": -78.21377563476562, + "logps/rejected": -341.0819091796875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.628185987472534, + "rewards/margins": 9.243064880371094, + "rewards/rejected": -11.87125015258789, + "step": 9354 + }, + { + "epoch": 1.45, + "learning_rate": 7.285998236423051e-06, + "logits/chosen": -2.9023306369781494, + "logits/rejected": -3.0380520820617676, + "logps/chosen": -290.9285888671875, + "logps/rejected": -337.3571472167969, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.16225004196167, + "rewards/margins": 7.537637233734131, + "rewards/rejected": -9.6998872756958, + "step": 9355 + }, + { + "epoch": 1.46, + "learning_rate": 7.2852647958919035e-06, + "logits/chosen": -2.8525662422180176, + "logits/rejected": -2.5806007385253906, + "logps/chosen": -335.12652587890625, + "logps/rejected": -437.8814697265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.644063472747803, + "rewards/margins": 7.702432632446289, + "rewards/rejected": -12.34649658203125, + "step": 9356 + }, + { + "epoch": 1.46, + "learning_rate": 7.284531355360756e-06, + "logits/chosen": -2.8237786293029785, + "logits/rejected": -2.2601983547210693, + "logps/chosen": -623.5072021484375, + "logps/rejected": -605.4776000976562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.677512168884277, + "rewards/margins": 8.343656539916992, + "rewards/rejected": -15.02116870880127, + "step": 9357 + }, + { + "epoch": 1.46, + "learning_rate": 7.283797914829608e-06, + "logits/chosen": -2.1948373317718506, + "logits/rejected": -2.450134038925171, + "logps/chosen": -184.92681884765625, + "logps/rejected": -450.501953125, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.278613090515137, + "rewards/margins": 5.143585681915283, + "rewards/rejected": -12.422198295593262, + "step": 9358 + }, + { + "epoch": 1.46, + "learning_rate": 7.28306447429846e-06, + "logits/chosen": -2.2993621826171875, + "logits/rejected": -2.7331535816192627, + "logps/chosen": -259.473388671875, + "logps/rejected": -522.3134765625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.63892936706543, + "rewards/margins": 6.866454124450684, + "rewards/rejected": -12.505383491516113, + "step": 9359 + }, + { + "epoch": 1.46, + "learning_rate": 7.282331033767312e-06, + "logits/chosen": -1.701985478401184, + "logits/rejected": -2.147045612335205, + "logps/chosen": -180.26678466796875, + "logps/rejected": -184.75082397460938, + "loss": 0.1063, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.571505069732666, + "rewards/margins": 4.33888053894043, + "rewards/rejected": -7.910385608673096, + "step": 9360 + }, + { + "epoch": 1.46, + "learning_rate": 7.281597593236164e-06, + "logits/chosen": -2.6267733573913574, + "logits/rejected": -2.797149658203125, + "logps/chosen": -619.767822265625, + "logps/rejected": -561.4661865234375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.856389045715332, + "rewards/margins": 6.621586799621582, + "rewards/rejected": -11.477975845336914, + "step": 9361 + }, + { + "epoch": 1.46, + "learning_rate": 7.280864152705016e-06, + "logits/chosen": -2.331108808517456, + "logits/rejected": -2.8557121753692627, + "logps/chosen": -172.89068603515625, + "logps/rejected": -219.43096923828125, + "loss": 1.6222, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.377002716064453, + "rewards/margins": 2.798887014389038, + "rewards/rejected": -9.17588996887207, + "step": 9362 + }, + { + "epoch": 1.46, + "learning_rate": 7.2801307121738675e-06, + "logits/chosen": -1.721199631690979, + "logits/rejected": -2.758476495742798, + "logps/chosen": -78.30545043945312, + "logps/rejected": -454.92803955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.516378402709961, + "rewards/margins": 10.278607368469238, + "rewards/rejected": -14.7949857711792, + "step": 9363 + }, + { + "epoch": 1.46, + "learning_rate": 7.279397271642719e-06, + "logits/chosen": -2.2744011878967285, + "logits/rejected": -2.6411776542663574, + "logps/chosen": -238.06268310546875, + "logps/rejected": -463.6059875488281, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.006874084472656, + "rewards/margins": 6.65837287902832, + "rewards/rejected": -13.665246963500977, + "step": 9364 + }, + { + "epoch": 1.46, + "learning_rate": 7.278663831111573e-06, + "logits/chosen": -2.883138656616211, + "logits/rejected": -2.8111016750335693, + "logps/chosen": -116.17425537109375, + "logps/rejected": -235.60195922851562, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.086312770843506, + "rewards/margins": 5.356550216674805, + "rewards/rejected": -10.442862510681152, + "step": 9365 + }, + { + "epoch": 1.46, + "learning_rate": 7.277930390580425e-06, + "logits/chosen": -2.911529541015625, + "logits/rejected": -1.8743239641189575, + "logps/chosen": -247.87490844726562, + "logps/rejected": -66.60401916503906, + "loss": 3.8089, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.526755332946777, + "rewards/margins": -3.756805896759033, + "rewards/rejected": -3.7699499130249023, + "step": 9366 + }, + { + "epoch": 1.46, + "learning_rate": 7.277196950049277e-06, + "logits/chosen": -2.3267459869384766, + "logits/rejected": -2.346349000930786, + "logps/chosen": -121.06114959716797, + "logps/rejected": -313.35723876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.800032615661621, + "rewards/margins": 9.175589561462402, + "rewards/rejected": -12.975622177124023, + "step": 9367 + }, + { + "epoch": 1.46, + "learning_rate": 7.2764635095181286e-06, + "logits/chosen": -2.155087947845459, + "logits/rejected": -2.61440372467041, + "logps/chosen": -219.98114013671875, + "logps/rejected": -496.3606262207031, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.537448406219482, + "rewards/margins": 6.459872245788574, + "rewards/rejected": -12.997320175170898, + "step": 9368 + }, + { + "epoch": 1.46, + "learning_rate": 7.2757300689869804e-06, + "logits/chosen": -2.6717019081115723, + "logits/rejected": -2.8075766563415527, + "logps/chosen": -270.1319580078125, + "logps/rejected": -355.71826171875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.178842544555664, + "rewards/margins": 6.365083694458008, + "rewards/rejected": -12.543926239013672, + "step": 9369 + }, + { + "epoch": 1.46, + "learning_rate": 7.274996628455832e-06, + "logits/chosen": -1.9053990840911865, + "logits/rejected": -2.937023639678955, + "logps/chosen": -141.5027618408203, + "logps/rejected": -222.7566375732422, + "loss": 0.1504, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.276911735534668, + "rewards/margins": 3.6439616680145264, + "rewards/rejected": -8.920873641967773, + "step": 9370 + }, + { + "epoch": 1.46, + "learning_rate": 7.274263187924684e-06, + "logits/chosen": -2.0779354572296143, + "logits/rejected": -1.7304985523223877, + "logps/chosen": -850.0171508789062, + "logps/rejected": -519.7423095703125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.564339637756348, + "rewards/margins": 9.108464241027832, + "rewards/rejected": -13.67280387878418, + "step": 9371 + }, + { + "epoch": 1.46, + "learning_rate": 7.273529747393537e-06, + "logits/chosen": -2.3422768115997314, + "logits/rejected": -2.975639581680298, + "logps/chosen": -344.7513122558594, + "logps/rejected": -506.87811279296875, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014455795288086, + "rewards/margins": 7.8369245529174805, + "rewards/rejected": -13.851380348205566, + "step": 9372 + }, + { + "epoch": 1.46, + "learning_rate": 7.272796306862389e-06, + "logits/chosen": -2.625336170196533, + "logits/rejected": -3.0772573947906494, + "logps/chosen": -510.4719543457031, + "logps/rejected": -536.0325927734375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.651956081390381, + "rewards/margins": 5.4972429275512695, + "rewards/rejected": -10.149199485778809, + "step": 9373 + }, + { + "epoch": 1.46, + "learning_rate": 7.272062866331242e-06, + "logits/chosen": -2.279008626937866, + "logits/rejected": -2.8383069038391113, + "logps/chosen": -144.40858459472656, + "logps/rejected": -298.682373046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.320368766784668, + "rewards/margins": 8.094279289245605, + "rewards/rejected": -11.414648056030273, + "step": 9374 + }, + { + "epoch": 1.46, + "learning_rate": 7.271329425800094e-06, + "logits/chosen": -2.3823049068450928, + "logits/rejected": -2.6062521934509277, + "logps/chosen": -350.6759948730469, + "logps/rejected": -452.5174560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.288938999176025, + "rewards/margins": 15.775985717773438, + "rewards/rejected": -20.064924240112305, + "step": 9375 + }, + { + "epoch": 1.46, + "learning_rate": 7.270595985268946e-06, + "logits/chosen": -2.7763001918792725, + "logits/rejected": -2.3404757976531982, + "logps/chosen": -448.03955078125, + "logps/rejected": -492.16864013671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.379789352416992, + "rewards/margins": 8.369085311889648, + "rewards/rejected": -14.74887466430664, + "step": 9376 + }, + { + "epoch": 1.46, + "learning_rate": 7.269862544737798e-06, + "logits/chosen": -2.5184624195098877, + "logits/rejected": -2.091376781463623, + "logps/chosen": -238.4056396484375, + "logps/rejected": -315.3511047363281, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9425768852233887, + "rewards/margins": 9.029223442077637, + "rewards/rejected": -12.971799850463867, + "step": 9377 + }, + { + "epoch": 1.46, + "learning_rate": 7.26912910420665e-06, + "logits/chosen": -1.1016106605529785, + "logits/rejected": -2.687960386276245, + "logps/chosen": -130.6573944091797, + "logps/rejected": -321.01080322265625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.706018447875977, + "rewards/margins": 6.14815616607666, + "rewards/rejected": -11.854175567626953, + "step": 9378 + }, + { + "epoch": 1.46, + "learning_rate": 7.268395663675502e-06, + "logits/chosen": -2.951517343521118, + "logits/rejected": -2.6919426918029785, + "logps/chosen": -213.50054931640625, + "logps/rejected": -180.18795776367188, + "loss": 1.2201, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.949654579162598, + "rewards/margins": 1.7276902198791504, + "rewards/rejected": -6.67734432220459, + "step": 9379 + }, + { + "epoch": 1.46, + "learning_rate": 7.267662223144354e-06, + "logits/chosen": -2.9456937313079834, + "logits/rejected": -2.914130210876465, + "logps/chosen": -300.4889831542969, + "logps/rejected": -350.3692321777344, + "loss": 3.1707, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.670495986938477, + "rewards/margins": 0.7581439018249512, + "rewards/rejected": -7.428639888763428, + "step": 9380 + }, + { + "epoch": 1.46, + "learning_rate": 7.2669287826132055e-06, + "logits/chosen": -2.7709343433380127, + "logits/rejected": -2.9811110496520996, + "logps/chosen": -502.31842041015625, + "logps/rejected": -352.90765380859375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.230329990386963, + "rewards/margins": 8.435104370117188, + "rewards/rejected": -13.665433883666992, + "step": 9381 + }, + { + "epoch": 1.46, + "learning_rate": 7.266195342082057e-06, + "logits/chosen": -2.7938687801361084, + "logits/rejected": -2.030782461166382, + "logps/chosen": -272.8136901855469, + "logps/rejected": -301.353759765625, + "loss": 1.7867, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.700960159301758, + "rewards/margins": -0.23379158973693848, + "rewards/rejected": -9.467168807983398, + "step": 9382 + }, + { + "epoch": 1.46, + "learning_rate": 7.265461901550911e-06, + "logits/chosen": -2.991394519805908, + "logits/rejected": -2.0665016174316406, + "logps/chosen": -534.8028564453125, + "logps/rejected": -442.55206298828125, + "loss": 1.1153, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.030961990356445, + "rewards/margins": 0.916130781173706, + "rewards/rejected": -8.947092056274414, + "step": 9383 + }, + { + "epoch": 1.46, + "learning_rate": 7.264728461019763e-06, + "logits/chosen": -1.4945204257965088, + "logits/rejected": -2.6706995964050293, + "logps/chosen": -198.51177978515625, + "logps/rejected": -328.7064514160156, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7265396118164062, + "rewards/margins": 7.495873928070068, + "rewards/rejected": -11.222414016723633, + "step": 9384 + }, + { + "epoch": 1.46, + "learning_rate": 7.263995020488615e-06, + "logits/chosen": -1.0740951299667358, + "logits/rejected": -2.329392910003662, + "logps/chosen": -193.03433227539062, + "logps/rejected": -453.023681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2091217041015625, + "rewards/margins": 9.680732727050781, + "rewards/rejected": -12.889854431152344, + "step": 9385 + }, + { + "epoch": 1.46, + "learning_rate": 7.263261579957467e-06, + "logits/chosen": -1.3418116569519043, + "logits/rejected": -1.6103023290634155, + "logps/chosen": -380.53521728515625, + "logps/rejected": -303.4538269042969, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.444039821624756, + "rewards/margins": 6.506313323974609, + "rewards/rejected": -8.950352668762207, + "step": 9386 + }, + { + "epoch": 1.46, + "learning_rate": 7.2625281394263185e-06, + "logits/chosen": -1.9521914720535278, + "logits/rejected": -2.7905189990997314, + "logps/chosen": -122.52278137207031, + "logps/rejected": -266.9920349121094, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.154561996459961, + "rewards/margins": 5.65760612487793, + "rewards/rejected": -9.81216812133789, + "step": 9387 + }, + { + "epoch": 1.46, + "learning_rate": 7.26179469889517e-06, + "logits/chosen": -1.9008171558380127, + "logits/rejected": -2.645362138748169, + "logps/chosen": -342.8610534667969, + "logps/rejected": -577.936279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.124706745147705, + "rewards/margins": 11.48426628112793, + "rewards/rejected": -15.608972549438477, + "step": 9388 + }, + { + "epoch": 1.46, + "learning_rate": 7.261061258364023e-06, + "logits/chosen": -2.6041646003723145, + "logits/rejected": -2.796347141265869, + "logps/chosen": -603.7951049804688, + "logps/rejected": -442.2203369140625, + "loss": 1.8528, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.8998918533325195, + "rewards/margins": 1.3108367919921875, + "rewards/rejected": -9.210728645324707, + "step": 9389 + }, + { + "epoch": 1.46, + "learning_rate": 7.260327817832875e-06, + "logits/chosen": -1.4049835205078125, + "logits/rejected": -2.5980491638183594, + "logps/chosen": -267.3062744140625, + "logps/rejected": -514.4529418945312, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.081091403961182, + "rewards/margins": 7.429388046264648, + "rewards/rejected": -14.510478973388672, + "step": 9390 + }, + { + "epoch": 1.46, + "learning_rate": 7.259594377301727e-06, + "logits/chosen": -1.675594687461853, + "logits/rejected": -2.6678571701049805, + "logps/chosen": -66.84760284423828, + "logps/rejected": -394.47088623046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.44603157043457, + "rewards/margins": 10.889080047607422, + "rewards/rejected": -15.335111618041992, + "step": 9391 + }, + { + "epoch": 1.46, + "learning_rate": 7.2588609367705804e-06, + "logits/chosen": -2.4855690002441406, + "logits/rejected": -2.638695001602173, + "logps/chosen": -167.37777709960938, + "logps/rejected": -384.39471435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597939968109131, + "rewards/margins": 10.169243812561035, + "rewards/rejected": -15.767183303833008, + "step": 9392 + }, + { + "epoch": 1.46, + "learning_rate": 7.258127496239432e-06, + "logits/chosen": -2.2458605766296387, + "logits/rejected": -2.7161543369293213, + "logps/chosen": -345.36865234375, + "logps/rejected": -356.72613525390625, + "loss": 0.5548, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.775705337524414, + "rewards/margins": 4.331962585449219, + "rewards/rejected": -10.107667922973633, + "step": 9393 + }, + { + "epoch": 1.46, + "learning_rate": 7.257394055708284e-06, + "logits/chosen": -2.896483898162842, + "logits/rejected": -2.103135347366333, + "logps/chosen": -249.7921142578125, + "logps/rejected": -319.0899658203125, + "loss": 0.4018, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.581461429595947, + "rewards/margins": 7.047698020935059, + "rewards/rejected": -12.629159927368164, + "step": 9394 + }, + { + "epoch": 1.46, + "learning_rate": 7.256660615177136e-06, + "logits/chosen": -1.4023206233978271, + "logits/rejected": -2.7501866817474365, + "logps/chosen": -311.3285217285156, + "logps/rejected": -653.7630615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8550362586975098, + "rewards/margins": 12.748920440673828, + "rewards/rejected": -15.60395622253418, + "step": 9395 + }, + { + "epoch": 1.46, + "learning_rate": 7.255927174645988e-06, + "logits/chosen": -2.6153147220611572, + "logits/rejected": -3.008981466293335, + "logps/chosen": -203.1038818359375, + "logps/rejected": -207.96046447753906, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954313278198242, + "rewards/margins": 4.986752510070801, + "rewards/rejected": -10.94106674194336, + "step": 9396 + }, + { + "epoch": 1.46, + "learning_rate": 7.25519373411484e-06, + "logits/chosen": -2.710965394973755, + "logits/rejected": -1.579728364944458, + "logps/chosen": -304.0714416503906, + "logps/rejected": -325.41668701171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.828359127044678, + "rewards/margins": 9.733866691589355, + "rewards/rejected": -15.562225341796875, + "step": 9397 + }, + { + "epoch": 1.46, + "learning_rate": 7.254460293583692e-06, + "logits/chosen": -2.83461332321167, + "logits/rejected": -2.343865156173706, + "logps/chosen": -584.741943359375, + "logps/rejected": -448.69140625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.823290824890137, + "rewards/margins": 8.08718204498291, + "rewards/rejected": -13.910472869873047, + "step": 9398 + }, + { + "epoch": 1.46, + "learning_rate": 7.2537268530525436e-06, + "logits/chosen": -2.9877982139587402, + "logits/rejected": -2.530270576477051, + "logps/chosen": -266.4711608886719, + "logps/rejected": -224.73110961914062, + "loss": 0.2284, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.188894748687744, + "rewards/margins": 3.025144338607788, + "rewards/rejected": -7.214038848876953, + "step": 9399 + }, + { + "epoch": 1.46, + "learning_rate": 7.2529934125213955e-06, + "logits/chosen": -2.538907766342163, + "logits/rejected": -2.928647994995117, + "logps/chosen": -689.2518310546875, + "logps/rejected": -569.1243896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.545584201812744, + "rewards/margins": 12.50868034362793, + "rewards/rejected": -18.054264068603516, + "step": 9400 + }, + { + "epoch": 1.46, + "learning_rate": 7.252259971990249e-06, + "logits/chosen": -2.4583683013916016, + "logits/rejected": -2.9528369903564453, + "logps/chosen": -369.865966796875, + "logps/rejected": -524.10986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5960266590118408, + "rewards/margins": 11.350114822387695, + "rewards/rejected": -11.946141242980957, + "step": 9401 + }, + { + "epoch": 1.46, + "learning_rate": 7.251526531459101e-06, + "logits/chosen": -2.4114885330200195, + "logits/rejected": -2.7176942825317383, + "logps/chosen": -102.23662567138672, + "logps/rejected": -253.91464233398438, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.166079044342041, + "rewards/margins": 7.95100212097168, + "rewards/rejected": -12.117080688476562, + "step": 9402 + }, + { + "epoch": 1.46, + "learning_rate": 7.250793090927953e-06, + "logits/chosen": -2.8441832065582275, + "logits/rejected": -2.3031082153320312, + "logps/chosen": -553.2811279296875, + "logps/rejected": -458.85784912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9646546840667725, + "rewards/margins": 9.4017333984375, + "rewards/rejected": -12.366388320922852, + "step": 9403 + }, + { + "epoch": 1.46, + "learning_rate": 7.250059650396805e-06, + "logits/chosen": -2.8649847507476807, + "logits/rejected": -2.4440879821777344, + "logps/chosen": -298.9744567871094, + "logps/rejected": -171.67071533203125, + "loss": 2.586, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.911624908447266, + "rewards/margins": -2.505855083465576, + "rewards/rejected": -6.405770301818848, + "step": 9404 + }, + { + "epoch": 1.46, + "learning_rate": 7.2493262098656565e-06, + "logits/chosen": -2.876959800720215, + "logits/rejected": -2.759982109069824, + "logps/chosen": -131.2096710205078, + "logps/rejected": -233.47872924804688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.106560230255127, + "rewards/margins": 8.966489791870117, + "rewards/rejected": -11.073050498962402, + "step": 9405 + }, + { + "epoch": 1.46, + "learning_rate": 7.248592769334509e-06, + "logits/chosen": -1.9062814712524414, + "logits/rejected": -2.5030393600463867, + "logps/chosen": -95.21588134765625, + "logps/rejected": -232.90274047851562, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.274191856384277, + "rewards/margins": 5.795864105224609, + "rewards/rejected": -10.070055961608887, + "step": 9406 + }, + { + "epoch": 1.46, + "learning_rate": 7.247859328803361e-06, + "logits/chosen": -2.8120720386505127, + "logits/rejected": -2.2663733959198, + "logps/chosen": -242.7922821044922, + "logps/rejected": -361.7764892578125, + "loss": 0.18, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.040018081665039, + "rewards/margins": 2.071307897567749, + "rewards/rejected": -11.111326217651367, + "step": 9407 + }, + { + "epoch": 1.46, + "learning_rate": 7.247125888272213e-06, + "logits/chosen": -2.587965726852417, + "logits/rejected": -1.3259391784667969, + "logps/chosen": -134.16908264160156, + "logps/rejected": -119.16217041015625, + "loss": 0.7785, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.747402667999268, + "rewards/margins": 1.6932151317596436, + "rewards/rejected": -7.440617561340332, + "step": 9408 + }, + { + "epoch": 1.46, + "learning_rate": 7.246392447741065e-06, + "logits/chosen": -2.8370115756988525, + "logits/rejected": -1.7571052312850952, + "logps/chosen": -373.3916015625, + "logps/rejected": -358.9819641113281, + "loss": 1.1824, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.846627235412598, + "rewards/margins": -0.8040482997894287, + "rewards/rejected": -6.04257869720459, + "step": 9409 + }, + { + "epoch": 1.46, + "learning_rate": 7.2456590072099185e-06, + "logits/chosen": -2.6561291217803955, + "logits/rejected": -2.640083074569702, + "logps/chosen": -218.41456604003906, + "logps/rejected": -243.4806365966797, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.359818935394287, + "rewards/margins": 7.916592597961426, + "rewards/rejected": -10.276412010192871, + "step": 9410 + }, + { + "epoch": 1.46, + "learning_rate": 7.24492556667877e-06, + "logits/chosen": -2.96146559715271, + "logits/rejected": -3.0458385944366455, + "logps/chosen": -329.7748718261719, + "logps/rejected": -377.03692626953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.035345077514648, + "rewards/margins": 7.678742408752441, + "rewards/rejected": -11.714086532592773, + "step": 9411 + }, + { + "epoch": 1.46, + "learning_rate": 7.244192126147622e-06, + "logits/chosen": -1.434004306793213, + "logits/rejected": -2.9582221508026123, + "logps/chosen": -375.23785400390625, + "logps/rejected": -410.54266357421875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.516415596008301, + "rewards/margins": 7.359816551208496, + "rewards/rejected": -10.876232147216797, + "step": 9412 + }, + { + "epoch": 1.46, + "learning_rate": 7.243458685616474e-06, + "logits/chosen": -2.2431588172912598, + "logits/rejected": -2.040405750274658, + "logps/chosen": -248.85409545898438, + "logps/rejected": -307.20977783203125, + "loss": 1.8607, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.159640312194824, + "rewards/margins": 2.889664649963379, + "rewards/rejected": -10.049304962158203, + "step": 9413 + }, + { + "epoch": 1.46, + "learning_rate": 7.242725245085326e-06, + "logits/chosen": -1.2119359970092773, + "logits/rejected": -2.7160096168518066, + "logps/chosen": -181.33819580078125, + "logps/rejected": -445.7687072753906, + "loss": 0.2308, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.045943260192871, + "rewards/margins": 5.035260200500488, + "rewards/rejected": -10.081202507019043, + "step": 9414 + }, + { + "epoch": 1.46, + "learning_rate": 7.241991804554178e-06, + "logits/chosen": -1.9016848802566528, + "logits/rejected": -2.7688403129577637, + "logps/chosen": -139.95623779296875, + "logps/rejected": -421.68817138671875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.561709403991699, + "rewards/margins": 6.060477256774902, + "rewards/rejected": -11.622186660766602, + "step": 9415 + }, + { + "epoch": 1.46, + "learning_rate": 7.24125836402303e-06, + "logits/chosen": -2.588719367980957, + "logits/rejected": -2.7288835048675537, + "logps/chosen": -292.27587890625, + "logps/rejected": -419.94866943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.081015110015869, + "rewards/margins": 7.985030174255371, + "rewards/rejected": -15.066045761108398, + "step": 9416 + }, + { + "epoch": 1.46, + "learning_rate": 7.240524923491882e-06, + "logits/chosen": -2.8109004497528076, + "logits/rejected": -1.1167323589324951, + "logps/chosen": -364.92889404296875, + "logps/rejected": -345.9961853027344, + "loss": 0.0888, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.33390998840332, + "rewards/margins": 3.534996271133423, + "rewards/rejected": -9.868906021118164, + "step": 9417 + }, + { + "epoch": 1.46, + "learning_rate": 7.239791482960735e-06, + "logits/chosen": -2.623870372772217, + "logits/rejected": -2.9259891510009766, + "logps/chosen": -198.74038696289062, + "logps/rejected": -335.2502746582031, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.78890323638916, + "rewards/margins": 6.0391106605529785, + "rewards/rejected": -11.828014373779297, + "step": 9418 + }, + { + "epoch": 1.46, + "learning_rate": 7.239058042429587e-06, + "logits/chosen": -0.8798978328704834, + "logits/rejected": -2.488398551940918, + "logps/chosen": -178.18685913085938, + "logps/rejected": -513.5303955078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.200216293334961, + "rewards/margins": 8.301121711730957, + "rewards/rejected": -14.501338958740234, + "step": 9419 + }, + { + "epoch": 1.47, + "learning_rate": 7.238324601898439e-06, + "logits/chosen": -2.763960599899292, + "logits/rejected": -1.4384515285491943, + "logps/chosen": -547.9608154296875, + "logps/rejected": -346.9554138183594, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.355921268463135, + "rewards/margins": 4.477115154266357, + "rewards/rejected": -10.833036422729492, + "step": 9420 + }, + { + "epoch": 1.47, + "learning_rate": 7.237591161367291e-06, + "logits/chosen": -1.9181969165802002, + "logits/rejected": -2.7351505756378174, + "logps/chosen": -99.37171936035156, + "logps/rejected": -426.06500244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9163544178009033, + "rewards/margins": 9.053834915161133, + "rewards/rejected": -12.970189094543457, + "step": 9421 + }, + { + "epoch": 1.47, + "learning_rate": 7.236857720836143e-06, + "logits/chosen": -2.103119134902954, + "logits/rejected": -2.3852946758270264, + "logps/chosen": -224.6450653076172, + "logps/rejected": -346.42449951171875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9763946533203125, + "rewards/margins": 6.423431396484375, + "rewards/rejected": -10.399826049804688, + "step": 9422 + }, + { + "epoch": 1.47, + "learning_rate": 7.2361242803049954e-06, + "logits/chosen": -2.235952854156494, + "logits/rejected": -2.9034409523010254, + "logps/chosen": -107.94042205810547, + "logps/rejected": -316.0068054199219, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.541180610656738, + "rewards/margins": 7.292295455932617, + "rewards/rejected": -12.833477020263672, + "step": 9423 + }, + { + "epoch": 1.47, + "learning_rate": 7.235390839773847e-06, + "logits/chosen": -2.1568078994750977, + "logits/rejected": -2.4130356311798096, + "logps/chosen": -66.83879089355469, + "logps/rejected": -356.739990234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3499250411987305, + "rewards/margins": 6.475224018096924, + "rewards/rejected": -10.825149536132812, + "step": 9424 + }, + { + "epoch": 1.47, + "learning_rate": 7.234657399242699e-06, + "logits/chosen": -1.7602617740631104, + "logits/rejected": -2.764977216720581, + "logps/chosen": -85.35497283935547, + "logps/rejected": -262.487548828125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.654224872589111, + "rewards/margins": 5.633913040161133, + "rewards/rejected": -11.288137435913086, + "step": 9425 + }, + { + "epoch": 1.47, + "learning_rate": 7.233923958711551e-06, + "logits/chosen": -2.941704750061035, + "logits/rejected": -2.623640775680542, + "logps/chosen": -518.101806640625, + "logps/rejected": -533.129150390625, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.800668239593506, + "rewards/margins": 7.942325115203857, + "rewards/rejected": -11.742993354797363, + "step": 9426 + }, + { + "epoch": 1.47, + "learning_rate": 7.233190518180405e-06, + "logits/chosen": -2.9193291664123535, + "logits/rejected": -2.942945957183838, + "logps/chosen": -119.96751403808594, + "logps/rejected": -226.88136291503906, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.66888427734375, + "rewards/margins": 3.424532651901245, + "rewards/rejected": -8.093417167663574, + "step": 9427 + }, + { + "epoch": 1.47, + "learning_rate": 7.2324570776492565e-06, + "logits/chosen": -1.8519858121871948, + "logits/rejected": -2.8442249298095703, + "logps/chosen": -489.6044616699219, + "logps/rejected": -490.9735107421875, + "loss": 0.0639, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.777986526489258, + "rewards/margins": 3.515667200088501, + "rewards/rejected": -11.29365348815918, + "step": 9428 + }, + { + "epoch": 1.47, + "learning_rate": 7.231723637118108e-06, + "logits/chosen": -2.8253891468048096, + "logits/rejected": -2.1129093170166016, + "logps/chosen": -166.37594604492188, + "logps/rejected": -191.05796813964844, + "loss": 0.5784, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.636947631835938, + "rewards/margins": 0.5747392177581787, + "rewards/rejected": -9.211686134338379, + "step": 9429 + }, + { + "epoch": 1.47, + "learning_rate": 7.23099019658696e-06, + "logits/chosen": -0.8966829180717468, + "logits/rejected": -2.8025941848754883, + "logps/chosen": -125.64411926269531, + "logps/rejected": -602.35791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.861640930175781, + "rewards/margins": 9.252772331237793, + "rewards/rejected": -15.114413261413574, + "step": 9430 + }, + { + "epoch": 1.47, + "learning_rate": 7.230256756055812e-06, + "logits/chosen": -2.8339784145355225, + "logits/rejected": -2.9257326126098633, + "logps/chosen": -334.72601318359375, + "logps/rejected": -367.5204162597656, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2045204639434814, + "rewards/margins": 6.314509391784668, + "rewards/rejected": -9.51902961730957, + "step": 9431 + }, + { + "epoch": 1.47, + "learning_rate": 7.229523315524664e-06, + "logits/chosen": -2.3550026416778564, + "logits/rejected": -2.617885112762451, + "logps/chosen": -255.46551513671875, + "logps/rejected": -262.77508544921875, + "loss": 0.7861, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.090494632720947, + "rewards/margins": 1.668335199356079, + "rewards/rejected": -7.758829593658447, + "step": 9432 + }, + { + "epoch": 1.47, + "learning_rate": 7.228789874993516e-06, + "logits/chosen": -2.9763519763946533, + "logits/rejected": -2.6198391914367676, + "logps/chosen": -120.74356079101562, + "logps/rejected": -162.0336456298828, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6944689750671387, + "rewards/margins": 6.271358489990234, + "rewards/rejected": -9.965827941894531, + "step": 9433 + }, + { + "epoch": 1.47, + "learning_rate": 7.228056434462368e-06, + "logits/chosen": -1.9565186500549316, + "logits/rejected": -2.594804525375366, + "logps/chosen": -113.74732971191406, + "logps/rejected": -304.4215087890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7689990997314453, + "rewards/margins": 10.488813400268555, + "rewards/rejected": -13.2578125, + "step": 9434 + }, + { + "epoch": 1.47, + "learning_rate": 7.22732299393122e-06, + "logits/chosen": -2.1481945514678955, + "logits/rejected": -2.7610342502593994, + "logps/chosen": -215.8511505126953, + "logps/rejected": -327.117431640625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.12846565246582, + "rewards/margins": 5.368556022644043, + "rewards/rejected": -9.497021675109863, + "step": 9435 + }, + { + "epoch": 1.47, + "learning_rate": 7.226589553400073e-06, + "logits/chosen": -2.951539993286133, + "logits/rejected": -2.149179220199585, + "logps/chosen": -1122.57958984375, + "logps/rejected": -574.022705078125, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7852783203125, + "rewards/margins": 4.505464553833008, + "rewards/rejected": -11.290742874145508, + "step": 9436 + }, + { + "epoch": 1.47, + "learning_rate": 7.225856112868925e-06, + "logits/chosen": -2.6754627227783203, + "logits/rejected": -2.708493947982788, + "logps/chosen": -102.88273620605469, + "logps/rejected": -220.56265258789062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.520262241363525, + "rewards/margins": 7.194404602050781, + "rewards/rejected": -12.714667320251465, + "step": 9437 + }, + { + "epoch": 1.47, + "learning_rate": 7.225122672337777e-06, + "logits/chosen": -2.6873724460601807, + "logits/rejected": -2.6613523960113525, + "logps/chosen": -241.16354370117188, + "logps/rejected": -265.65264892578125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1441802978515625, + "rewards/margins": 5.748345375061035, + "rewards/rejected": -9.892525672912598, + "step": 9438 + }, + { + "epoch": 1.47, + "learning_rate": 7.224389231806629e-06, + "logits/chosen": -1.1624438762664795, + "logits/rejected": -1.7920011281967163, + "logps/chosen": -310.19049072265625, + "logps/rejected": -571.112060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.692104339599609, + "rewards/margins": 10.621756553649902, + "rewards/rejected": -15.313861846923828, + "step": 9439 + }, + { + "epoch": 1.47, + "learning_rate": 7.223655791275482e-06, + "logits/chosen": -2.829800605773926, + "logits/rejected": -1.795422911643982, + "logps/chosen": -196.10914611816406, + "logps/rejected": -201.19223022460938, + "loss": 0.1214, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.405633926391602, + "rewards/margins": 3.9466443061828613, + "rewards/rejected": -8.352277755737305, + "step": 9440 + }, + { + "epoch": 1.47, + "learning_rate": 7.2229223507443335e-06, + "logits/chosen": -2.809661865234375, + "logits/rejected": -2.9628195762634277, + "logps/chosen": -214.20870971679688, + "logps/rejected": -391.643310546875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.873455047607422, + "rewards/margins": 7.449617385864258, + "rewards/rejected": -12.32307243347168, + "step": 9441 + }, + { + "epoch": 1.47, + "learning_rate": 7.222188910213185e-06, + "logits/chosen": -2.8018741607666016, + "logits/rejected": -2.595669746398926, + "logps/chosen": -213.56161499023438, + "logps/rejected": -227.62753295898438, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.825862407684326, + "rewards/margins": 4.774585723876953, + "rewards/rejected": -8.600447654724121, + "step": 9442 + }, + { + "epoch": 1.47, + "learning_rate": 7.221455469682037e-06, + "logits/chosen": -2.0414249897003174, + "logits/rejected": -2.629504442214966, + "logps/chosen": -180.55401611328125, + "logps/rejected": -315.78900146484375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.744650363922119, + "rewards/margins": 7.689269065856934, + "rewards/rejected": -11.433918952941895, + "step": 9443 + }, + { + "epoch": 1.47, + "learning_rate": 7.220722029150889e-06, + "logits/chosen": -1.8247714042663574, + "logits/rejected": -2.674980878829956, + "logps/chosen": -240.67095947265625, + "logps/rejected": -490.73345947265625, + "loss": 0.3494, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.946861267089844, + "rewards/margins": 7.824285507202148, + "rewards/rejected": -14.771146774291992, + "step": 9444 + }, + { + "epoch": 1.47, + "learning_rate": 7.219988588619743e-06, + "logits/chosen": -1.99093496799469, + "logits/rejected": -2.695829391479492, + "logps/chosen": -285.6983642578125, + "logps/rejected": -527.7715454101562, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.683502197265625, + "rewards/margins": 8.709980964660645, + "rewards/rejected": -14.393482208251953, + "step": 9445 + }, + { + "epoch": 1.47, + "learning_rate": 7.2192551480885946e-06, + "logits/chosen": -1.8212915658950806, + "logits/rejected": -2.7917990684509277, + "logps/chosen": -139.33628845214844, + "logps/rejected": -306.8377685546875, + "loss": 0.2054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.175409317016602, + "rewards/margins": 5.049160003662109, + "rewards/rejected": -11.224569320678711, + "step": 9446 + }, + { + "epoch": 1.47, + "learning_rate": 7.2185217075574464e-06, + "logits/chosen": -1.5256518125534058, + "logits/rejected": -2.6268064975738525, + "logps/chosen": -239.31446838378906, + "logps/rejected": -282.9271240234375, + "loss": 0.3733, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.823101997375488, + "rewards/margins": 3.4275195598602295, + "rewards/rejected": -10.250621795654297, + "step": 9447 + }, + { + "epoch": 1.47, + "learning_rate": 7.217788267026298e-06, + "logits/chosen": -1.3414639234542847, + "logits/rejected": -2.4711499214172363, + "logps/chosen": -85.60920715332031, + "logps/rejected": -193.88232421875, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.200161457061768, + "rewards/margins": 4.987222671508789, + "rewards/rejected": -9.187383651733398, + "step": 9448 + }, + { + "epoch": 1.47, + "learning_rate": 7.21705482649515e-06, + "logits/chosen": -2.7840702533721924, + "logits/rejected": -1.1006838083267212, + "logps/chosen": -230.9385223388672, + "logps/rejected": -179.6983642578125, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.807809352874756, + "rewards/margins": 5.338852405548096, + "rewards/rejected": -9.146661758422852, + "step": 9449 + }, + { + "epoch": 1.47, + "learning_rate": 7.216321385964002e-06, + "logits/chosen": -1.8979719877243042, + "logits/rejected": -2.2837109565734863, + "logps/chosen": -202.6840362548828, + "logps/rejected": -402.5435485839844, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.749262809753418, + "rewards/margins": 9.017571449279785, + "rewards/rejected": -13.766834259033203, + "step": 9450 + }, + { + "epoch": 1.47, + "learning_rate": 7.215587945432854e-06, + "logits/chosen": -2.07002329826355, + "logits/rejected": -2.969200372695923, + "logps/chosen": -210.20599365234375, + "logps/rejected": -393.4375, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.522985935211182, + "rewards/margins": 5.395007133483887, + "rewards/rejected": -10.91799259185791, + "step": 9451 + }, + { + "epoch": 1.47, + "learning_rate": 7.214854504901706e-06, + "logits/chosen": -2.56453800201416, + "logits/rejected": -1.7093144655227661, + "logps/chosen": -160.62802124023438, + "logps/rejected": -237.91397094726562, + "loss": 0.1278, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.083613872528076, + "rewards/margins": 5.808504104614258, + "rewards/rejected": -9.892118453979492, + "step": 9452 + }, + { + "epoch": 1.47, + "learning_rate": 7.214121064370558e-06, + "logits/chosen": -2.4972009658813477, + "logits/rejected": -3.0378737449645996, + "logps/chosen": -228.021484375, + "logps/rejected": -403.6620788574219, + "loss": 0.0946, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.314760208129883, + "rewards/margins": 3.4284636974334717, + "rewards/rejected": -11.743223190307617, + "step": 9453 + }, + { + "epoch": 1.47, + "learning_rate": 7.213387623839411e-06, + "logits/chosen": -2.8955435752868652, + "logits/rejected": -2.6869399547576904, + "logps/chosen": -883.7597045898438, + "logps/rejected": -850.4454345703125, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.932302951812744, + "rewards/margins": 5.258230686187744, + "rewards/rejected": -12.190533638000488, + "step": 9454 + }, + { + "epoch": 1.47, + "learning_rate": 7.212654183308263e-06, + "logits/chosen": -1.06521475315094, + "logits/rejected": -2.3954572677612305, + "logps/chosen": -169.39431762695312, + "logps/rejected": -452.8570556640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.062103271484375, + "rewards/margins": 10.676525115966797, + "rewards/rejected": -13.738628387451172, + "step": 9455 + }, + { + "epoch": 1.47, + "learning_rate": 7.211920742777115e-06, + "logits/chosen": -2.2805581092834473, + "logits/rejected": -2.9064407348632812, + "logps/chosen": -171.4673309326172, + "logps/rejected": -387.3902587890625, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.178282737731934, + "rewards/margins": 5.638660430908203, + "rewards/rejected": -10.816944122314453, + "step": 9456 + }, + { + "epoch": 1.47, + "learning_rate": 7.211187302245968e-06, + "logits/chosen": -1.6059308052062988, + "logits/rejected": -2.522580862045288, + "logps/chosen": -159.9964599609375, + "logps/rejected": -269.09100341796875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4539833068847656, + "rewards/margins": 8.094894409179688, + "rewards/rejected": -11.548877716064453, + "step": 9457 + }, + { + "epoch": 1.47, + "learning_rate": 7.21045386171482e-06, + "logits/chosen": -2.837700366973877, + "logits/rejected": -1.5664619207382202, + "logps/chosen": -401.084716796875, + "logps/rejected": -340.60504150390625, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.283125400543213, + "rewards/margins": 3.4354119300842285, + "rewards/rejected": -7.718537330627441, + "step": 9458 + }, + { + "epoch": 1.47, + "learning_rate": 7.2097204211836715e-06, + "logits/chosen": -2.9389917850494385, + "logits/rejected": -2.69624924659729, + "logps/chosen": -261.4503173828125, + "logps/rejected": -149.93490600585938, + "loss": 1.0522, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.534748077392578, + "rewards/margins": 1.314753770828247, + "rewards/rejected": -7.849501609802246, + "step": 9459 + }, + { + "epoch": 1.47, + "learning_rate": 7.208986980652523e-06, + "logits/chosen": -1.8517019748687744, + "logits/rejected": -2.0366036891937256, + "logps/chosen": -145.08375549316406, + "logps/rejected": -410.83441162109375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348877429962158, + "rewards/margins": 6.389638900756836, + "rewards/rejected": -10.738515853881836, + "step": 9460 + }, + { + "epoch": 1.47, + "learning_rate": 7.208253540121375e-06, + "logits/chosen": -2.390241861343384, + "logits/rejected": -2.647595167160034, + "logps/chosen": -325.6315002441406, + "logps/rejected": -466.8841552734375, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.654490947723389, + "rewards/margins": 5.317165374755859, + "rewards/rejected": -10.971656799316406, + "step": 9461 + }, + { + "epoch": 1.47, + "learning_rate": 7.207520099590227e-06, + "logits/chosen": -2.097362995147705, + "logits/rejected": -2.6395490169525146, + "logps/chosen": -114.84207153320312, + "logps/rejected": -268.9704895019531, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.646917819976807, + "rewards/margins": 5.857522964477539, + "rewards/rejected": -10.504441261291504, + "step": 9462 + }, + { + "epoch": 1.47, + "learning_rate": 7.206786659059081e-06, + "logits/chosen": -2.8799283504486084, + "logits/rejected": -2.735644817352295, + "logps/chosen": -749.6944580078125, + "logps/rejected": -536.5185546875, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.090485572814941, + "rewards/margins": 5.75255012512207, + "rewards/rejected": -13.843035697937012, + "step": 9463 + }, + { + "epoch": 1.47, + "learning_rate": 7.206053218527933e-06, + "logits/chosen": -1.5822645425796509, + "logits/rejected": -3.082045078277588, + "logps/chosen": -211.194091796875, + "logps/rejected": -739.564453125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.852590084075928, + "rewards/margins": 5.5629143714904785, + "rewards/rejected": -13.415504455566406, + "step": 9464 + }, + { + "epoch": 1.47, + "learning_rate": 7.2053197779967845e-06, + "logits/chosen": -3.0361194610595703, + "logits/rejected": -3.1358859539031982, + "logps/chosen": -127.09284973144531, + "logps/rejected": -236.2543487548828, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023571968078613, + "rewards/margins": 6.719885349273682, + "rewards/rejected": -11.743457794189453, + "step": 9465 + }, + { + "epoch": 1.47, + "learning_rate": 7.204586337465636e-06, + "logits/chosen": -3.0197536945343018, + "logits/rejected": -2.6945202350616455, + "logps/chosen": -133.98886108398438, + "logps/rejected": -118.05772399902344, + "loss": 3.2621, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.394638061523438, + "rewards/margins": -0.28397703170776367, + "rewards/rejected": -8.110661506652832, + "step": 9466 + }, + { + "epoch": 1.47, + "learning_rate": 7.203852896934488e-06, + "logits/chosen": -2.5827865600585938, + "logits/rejected": -2.641549825668335, + "logps/chosen": -322.942626953125, + "logps/rejected": -404.12969970703125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.027042388916016, + "rewards/margins": 8.102201461791992, + "rewards/rejected": -16.129243850708008, + "step": 9467 + }, + { + "epoch": 1.47, + "learning_rate": 7.20311945640334e-06, + "logits/chosen": -2.714503526687622, + "logits/rejected": -2.9327008724212646, + "logps/chosen": -778.5884399414062, + "logps/rejected": -678.6278076171875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.003649711608887, + "rewards/margins": 5.929562568664551, + "rewards/rejected": -9.933212280273438, + "step": 9468 + }, + { + "epoch": 1.47, + "learning_rate": 7.202386015872192e-06, + "logits/chosen": -3.025252103805542, + "logits/rejected": -3.1008434295654297, + "logps/chosen": -72.44085693359375, + "logps/rejected": -285.7284851074219, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.284093379974365, + "rewards/margins": 8.402447700500488, + "rewards/rejected": -12.686540603637695, + "step": 9469 + }, + { + "epoch": 1.47, + "learning_rate": 7.201652575341044e-06, + "logits/chosen": -1.6015195846557617, + "logits/rejected": -2.8138017654418945, + "logps/chosen": -98.22795104980469, + "logps/rejected": -306.95672607421875, + "loss": 0.3738, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.3547682762146, + "rewards/margins": 4.650101661682129, + "rewards/rejected": -11.00486946105957, + "step": 9470 + }, + { + "epoch": 1.47, + "learning_rate": 7.200919134809896e-06, + "logits/chosen": -1.4185174703598022, + "logits/rejected": -2.0695037841796875, + "logps/chosen": -161.92884826660156, + "logps/rejected": -351.3849792480469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6103081703186035, + "rewards/margins": 9.411511421203613, + "rewards/rejected": -14.021819114685059, + "step": 9471 + }, + { + "epoch": 1.47, + "learning_rate": 7.200185694278749e-06, + "logits/chosen": -2.7761993408203125, + "logits/rejected": -2.8234057426452637, + "logps/chosen": -295.28851318359375, + "logps/rejected": -341.50518798828125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7518906593322754, + "rewards/margins": 5.401909828186035, + "rewards/rejected": -8.153800010681152, + "step": 9472 + }, + { + "epoch": 1.47, + "learning_rate": 7.199452253747601e-06, + "logits/chosen": -2.2099714279174805, + "logits/rejected": -2.8447940349578857, + "logps/chosen": -177.83389282226562, + "logps/rejected": -433.542724609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393632888793945, + "rewards/margins": 7.595022201538086, + "rewards/rejected": -12.988655090332031, + "step": 9473 + }, + { + "epoch": 1.47, + "learning_rate": 7.198718813216454e-06, + "logits/chosen": -2.3711190223693848, + "logits/rejected": -2.758922576904297, + "logps/chosen": -229.08302307128906, + "logps/rejected": -387.5583190917969, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115129470825195, + "rewards/margins": 5.438285827636719, + "rewards/rejected": -11.553415298461914, + "step": 9474 + }, + { + "epoch": 1.47, + "learning_rate": 7.197985372685306e-06, + "logits/chosen": -2.0992484092712402, + "logits/rejected": -2.795245885848999, + "logps/chosen": -153.32394409179688, + "logps/rejected": -483.06072998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.855130195617676, + "rewards/margins": 11.524490356445312, + "rewards/rejected": -16.379619598388672, + "step": 9475 + }, + { + "epoch": 1.47, + "learning_rate": 7.197251932154158e-06, + "logits/chosen": -2.495378017425537, + "logits/rejected": -3.000962495803833, + "logps/chosen": -86.79411315917969, + "logps/rejected": -240.10546875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3914947509765625, + "rewards/margins": 6.06904411315918, + "rewards/rejected": -11.460538864135742, + "step": 9476 + }, + { + "epoch": 1.47, + "learning_rate": 7.1965184916230096e-06, + "logits/chosen": -1.9961318969726562, + "logits/rejected": -3.0167627334594727, + "logps/chosen": -123.88067626953125, + "logps/rejected": -440.3227233886719, + "loss": 2.0922, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.642279624938965, + "rewards/margins": 0.8815498352050781, + "rewards/rejected": -9.523829460144043, + "step": 9477 + }, + { + "epoch": 1.47, + "learning_rate": 7.1957850510918614e-06, + "logits/chosen": -2.8302953243255615, + "logits/rejected": -3.0267956256866455, + "logps/chosen": -171.9134521484375, + "logps/rejected": -143.70143127441406, + "loss": 0.7764, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.568634986877441, + "rewards/margins": 3.129605531692505, + "rewards/rejected": -7.698240280151367, + "step": 9478 + }, + { + "epoch": 1.47, + "learning_rate": 7.195051610560713e-06, + "logits/chosen": -2.8490195274353027, + "logits/rejected": -2.073147773742676, + "logps/chosen": -461.80120849609375, + "logps/rejected": -502.787353515625, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2064712047576904, + "rewards/margins": 8.535995483398438, + "rewards/rejected": -10.742466926574707, + "step": 9479 + }, + { + "epoch": 1.47, + "learning_rate": 7.194318170029565e-06, + "logits/chosen": -3.043104887008667, + "logits/rejected": -2.393310070037842, + "logps/chosen": -241.3199920654297, + "logps/rejected": -194.71815490722656, + "loss": 0.0716, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1568551063537598, + "rewards/margins": 7.677276611328125, + "rewards/rejected": -9.834131240844727, + "step": 9480 + }, + { + "epoch": 1.47, + "learning_rate": 7.193584729498419e-06, + "logits/chosen": -2.7826943397521973, + "logits/rejected": -2.777390480041504, + "logps/chosen": -315.4327392578125, + "logps/rejected": -562.009033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0421364307403564, + "rewards/margins": 10.304790496826172, + "rewards/rejected": -13.346927642822266, + "step": 9481 + }, + { + "epoch": 1.47, + "learning_rate": 7.192851288967271e-06, + "logits/chosen": -2.2232913970947266, + "logits/rejected": -2.7588722705841064, + "logps/chosen": -142.9007110595703, + "logps/rejected": -377.8920593261719, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.884967803955078, + "rewards/margins": 8.809320449829102, + "rewards/rejected": -12.69428825378418, + "step": 9482 + }, + { + "epoch": 1.47, + "learning_rate": 7.1921178484361225e-06, + "logits/chosen": -2.759571075439453, + "logits/rejected": -1.3216296434402466, + "logps/chosen": -478.6978759765625, + "logps/rejected": -202.50579833984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.624214172363281, + "rewards/margins": 6.871871471405029, + "rewards/rejected": -13.496086120605469, + "step": 9483 + }, + { + "epoch": 1.47, + "learning_rate": 7.191384407904974e-06, + "logits/chosen": -2.07529616355896, + "logits/rejected": -2.814223527908325, + "logps/chosen": -362.0987854003906, + "logps/rejected": -530.437255859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8692984580993652, + "rewards/margins": 9.104019165039062, + "rewards/rejected": -12.973318099975586, + "step": 9484 + }, + { + "epoch": 1.48, + "learning_rate": 7.190650967373826e-06, + "logits/chosen": -1.7119778394699097, + "logits/rejected": -2.5829851627349854, + "logps/chosen": -79.45809173583984, + "logps/rejected": -200.8211669921875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.833771705627441, + "rewards/margins": 5.854683876037598, + "rewards/rejected": -10.688455581665039, + "step": 9485 + }, + { + "epoch": 1.48, + "learning_rate": 7.189917526842678e-06, + "logits/chosen": -2.5137922763824463, + "logits/rejected": -2.996727228164673, + "logps/chosen": -176.30514526367188, + "logps/rejected": -501.4871520996094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.724599361419678, + "rewards/margins": 8.349715232849121, + "rewards/rejected": -15.07431411743164, + "step": 9486 + }, + { + "epoch": 1.48, + "learning_rate": 7.18918408631153e-06, + "logits/chosen": -1.9715205430984497, + "logits/rejected": -2.8963165283203125, + "logps/chosen": -147.08099365234375, + "logps/rejected": -310.5997314453125, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.437864303588867, + "rewards/margins": 4.733325958251953, + "rewards/rejected": -14.17119026184082, + "step": 9487 + }, + { + "epoch": 1.48, + "learning_rate": 7.188450645780382e-06, + "logits/chosen": -1.850010871887207, + "logits/rejected": -2.1524081230163574, + "logps/chosen": -281.3779296875, + "logps/rejected": -345.6576843261719, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4727044105529785, + "rewards/margins": 7.420652866363525, + "rewards/rejected": -11.893357276916504, + "step": 9488 + }, + { + "epoch": 1.48, + "learning_rate": 7.187717205249235e-06, + "logits/chosen": -1.9212158918380737, + "logits/rejected": -3.069878339767456, + "logps/chosen": -403.1847229003906, + "logps/rejected": -644.2305908203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.660099983215332, + "rewards/margins": 8.424551963806152, + "rewards/rejected": -16.084651947021484, + "step": 9489 + }, + { + "epoch": 1.48, + "learning_rate": 7.186983764718087e-06, + "logits/chosen": -2.5307512283325195, + "logits/rejected": -2.852996826171875, + "logps/chosen": -731.3850708007812, + "logps/rejected": -657.6478271484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.637320041656494, + "rewards/margins": 9.218509674072266, + "rewards/rejected": -11.855829238891602, + "step": 9490 + }, + { + "epoch": 1.48, + "learning_rate": 7.18625032418694e-06, + "logits/chosen": -2.393354654312134, + "logits/rejected": -2.664828062057495, + "logps/chosen": -324.4985046386719, + "logps/rejected": -401.0707702636719, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.899921417236328, + "rewards/margins": 5.736286163330078, + "rewards/rejected": -11.636207580566406, + "step": 9491 + }, + { + "epoch": 1.48, + "learning_rate": 7.185516883655792e-06, + "logits/chosen": -2.462442636489868, + "logits/rejected": -2.013427972793579, + "logps/chosen": -602.8510131835938, + "logps/rejected": -682.71240234375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.026278495788574, + "rewards/margins": 9.833211898803711, + "rewards/rejected": -15.859491348266602, + "step": 9492 + }, + { + "epoch": 1.48, + "learning_rate": 7.184783443124644e-06, + "logits/chosen": -2.959346055984497, + "logits/rejected": -2.93725323677063, + "logps/chosen": -201.79141235351562, + "logps/rejected": -180.00372314453125, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8348069190979, + "rewards/margins": 3.915637969970703, + "rewards/rejected": -10.750444412231445, + "step": 9493 + }, + { + "epoch": 1.48, + "learning_rate": 7.184050002593496e-06, + "logits/chosen": -2.505174160003662, + "logits/rejected": -2.6348984241485596, + "logps/chosen": -295.58660888671875, + "logps/rejected": -531.8165893554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.326986312866211, + "rewards/margins": 12.736165046691895, + "rewards/rejected": -17.063152313232422, + "step": 9494 + }, + { + "epoch": 1.48, + "learning_rate": 7.183316562062348e-06, + "logits/chosen": -2.9054670333862305, + "logits/rejected": -2.586005210876465, + "logps/chosen": -609.7041625976562, + "logps/rejected": -563.2476196289062, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.347738742828369, + "rewards/margins": 6.111337661743164, + "rewards/rejected": -12.459075927734375, + "step": 9495 + }, + { + "epoch": 1.48, + "learning_rate": 7.1825831215311995e-06, + "logits/chosen": -2.9503211975097656, + "logits/rejected": -2.1278722286224365, + "logps/chosen": -362.7004699707031, + "logps/rejected": -262.65740966796875, + "loss": 1.1534, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.149188995361328, + "rewards/margins": 1.442107915878296, + "rewards/rejected": -10.591296195983887, + "step": 9496 + }, + { + "epoch": 1.48, + "learning_rate": 7.181849681000051e-06, + "logits/chosen": -2.7389986515045166, + "logits/rejected": -2.091203451156616, + "logps/chosen": -399.2073974609375, + "logps/rejected": -294.041259765625, + "loss": 0.131, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.908442497253418, + "rewards/margins": 2.2519571781158447, + "rewards/rejected": -8.160400390625, + "step": 9497 + }, + { + "epoch": 1.48, + "learning_rate": 7.181116240468903e-06, + "logits/chosen": -1.3787827491760254, + "logits/rejected": -2.748375415802002, + "logps/chosen": -92.58451843261719, + "logps/rejected": -423.5174560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.492068290710449, + "rewards/margins": 11.617767333984375, + "rewards/rejected": -16.10983657836914, + "step": 9498 + }, + { + "epoch": 1.48, + "learning_rate": 7.180382799937757e-06, + "logits/chosen": -2.362842321395874, + "logits/rejected": -2.9320623874664307, + "logps/chosen": -174.89529418945312, + "logps/rejected": -192.28872680664062, + "loss": 1.6994, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.799613952636719, + "rewards/margins": 2.899265766143799, + "rewards/rejected": -10.698880195617676, + "step": 9499 + }, + { + "epoch": 1.48, + "learning_rate": 7.179649359406609e-06, + "logits/chosen": -1.171678900718689, + "logits/rejected": -2.954705238342285, + "logps/chosen": -243.1464080810547, + "logps/rejected": -879.4588623046875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4111199378967285, + "rewards/margins": 10.815789222717285, + "rewards/rejected": -16.226909637451172, + "step": 9500 + }, + { + "epoch": 1.48, + "learning_rate": 7.1789159188754606e-06, + "logits/chosen": -1.783717393875122, + "logits/rejected": -2.9284329414367676, + "logps/chosen": -234.9429931640625, + "logps/rejected": -324.20721435546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.803597450256348, + "rewards/margins": 8.190009117126465, + "rewards/rejected": -12.993606567382812, + "step": 9501 + }, + { + "epoch": 1.48, + "learning_rate": 7.1781824783443124e-06, + "logits/chosen": -2.7561557292938232, + "logits/rejected": -2.285911798477173, + "logps/chosen": -245.35105895996094, + "logps/rejected": -367.96148681640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3457119464874268, + "rewards/margins": 8.336362838745117, + "rewards/rejected": -11.682074546813965, + "step": 9502 + }, + { + "epoch": 1.48, + "learning_rate": 7.177449037813164e-06, + "logits/chosen": -2.6681694984436035, + "logits/rejected": -2.24174427986145, + "logps/chosen": -439.5285949707031, + "logps/rejected": -345.93243408203125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6811628341674805, + "rewards/margins": 6.600738525390625, + "rewards/rejected": -12.281902313232422, + "step": 9503 + }, + { + "epoch": 1.48, + "learning_rate": 7.176715597282016e-06, + "logits/chosen": -2.6588900089263916, + "logits/rejected": -2.77164626121521, + "logps/chosen": -279.15496826171875, + "logps/rejected": -405.2362365722656, + "loss": 0.8402, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.871216297149658, + "rewards/margins": 4.234862804412842, + "rewards/rejected": -12.1060791015625, + "step": 9504 + }, + { + "epoch": 1.48, + "learning_rate": 7.175982156750868e-06, + "logits/chosen": -3.1160824298858643, + "logits/rejected": -2.4326705932617188, + "logps/chosen": -230.32421875, + "logps/rejected": -156.55899047851562, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.344449520111084, + "rewards/margins": 6.034328937530518, + "rewards/rejected": -8.378778457641602, + "step": 9505 + }, + { + "epoch": 1.48, + "learning_rate": 7.175248716219721e-06, + "logits/chosen": -2.786005735397339, + "logits/rejected": -2.193726062774658, + "logps/chosen": -367.6129455566406, + "logps/rejected": -337.6512451171875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980480194091797, + "rewards/margins": 10.216798782348633, + "rewards/rejected": -16.19727897644043, + "step": 9506 + }, + { + "epoch": 1.48, + "learning_rate": 7.174515275688573e-06, + "logits/chosen": -2.8528285026550293, + "logits/rejected": -2.1590375900268555, + "logps/chosen": -273.41522216796875, + "logps/rejected": -228.2438201904297, + "loss": 0.0766, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.083150386810303, + "rewards/margins": 3.715393304824829, + "rewards/rejected": -8.798543930053711, + "step": 9507 + }, + { + "epoch": 1.48, + "learning_rate": 7.173781835157426e-06, + "logits/chosen": -2.669283390045166, + "logits/rejected": -2.6189088821411133, + "logps/chosen": -227.10147094726562, + "logps/rejected": -471.327880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.672553062438965, + "rewards/margins": 11.353104591369629, + "rewards/rejected": -16.025657653808594, + "step": 9508 + }, + { + "epoch": 1.48, + "learning_rate": 7.173048394626278e-06, + "logits/chosen": -2.9376473426818848, + "logits/rejected": -3.0118155479431152, + "logps/chosen": -84.20770263671875, + "logps/rejected": -214.1864013671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.930278778076172, + "rewards/margins": 7.5602288246154785, + "rewards/rejected": -12.490507125854492, + "step": 9509 + }, + { + "epoch": 1.48, + "learning_rate": 7.17231495409513e-06, + "logits/chosen": -2.9653282165527344, + "logits/rejected": -2.3588194847106934, + "logps/chosen": -515.58203125, + "logps/rejected": -643.925048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8106369972229, + "rewards/margins": 11.678662300109863, + "rewards/rejected": -16.489299774169922, + "step": 9510 + }, + { + "epoch": 1.48, + "learning_rate": 7.171581513563982e-06, + "logits/chosen": -1.1348108053207397, + "logits/rejected": -2.254772901535034, + "logps/chosen": -382.16143798828125, + "logps/rejected": -450.47637939453125, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.188575744628906, + "rewards/margins": 6.735861778259277, + "rewards/rejected": -11.9244384765625, + "step": 9511 + }, + { + "epoch": 1.48, + "learning_rate": 7.170848073032834e-06, + "logits/chosen": -1.4460020065307617, + "logits/rejected": -2.6754612922668457, + "logps/chosen": -111.75137329101562, + "logps/rejected": -405.7791748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.94687557220459, + "rewards/margins": 8.755399703979492, + "rewards/rejected": -15.702274322509766, + "step": 9512 + }, + { + "epoch": 1.48, + "learning_rate": 7.170114632501686e-06, + "logits/chosen": -0.783801257610321, + "logits/rejected": -2.6823863983154297, + "logps/chosen": -159.207763671875, + "logps/rejected": -603.3661499023438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087348937988281, + "rewards/margins": 10.537845611572266, + "rewards/rejected": -16.625194549560547, + "step": 9513 + }, + { + "epoch": 1.48, + "learning_rate": 7.1693811919705375e-06, + "logits/chosen": -1.7586902379989624, + "logits/rejected": -2.9040300846099854, + "logps/chosen": -168.119140625, + "logps/rejected": -408.54864501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5686535835266113, + "rewards/margins": 8.70669174194336, + "rewards/rejected": -12.275344848632812, + "step": 9514 + }, + { + "epoch": 1.48, + "learning_rate": 7.168647751439389e-06, + "logits/chosen": -2.883296489715576, + "logits/rejected": -1.6296082735061646, + "logps/chosen": -358.6085205078125, + "logps/rejected": -285.7718505859375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.219041347503662, + "rewards/margins": 6.529969215393066, + "rewards/rejected": -8.74901008605957, + "step": 9515 + }, + { + "epoch": 1.48, + "learning_rate": 7.167914310908243e-06, + "logits/chosen": -3.003112316131592, + "logits/rejected": -2.3226141929626465, + "logps/chosen": -306.0606689453125, + "logps/rejected": -374.98046875, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.635201454162598, + "rewards/margins": 4.558844566345215, + "rewards/rejected": -11.194046020507812, + "step": 9516 + }, + { + "epoch": 1.48, + "learning_rate": 7.167180870377095e-06, + "logits/chosen": -2.8476836681365967, + "logits/rejected": -2.685035228729248, + "logps/chosen": -466.11492919921875, + "logps/rejected": -653.8185424804688, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2686643600463867, + "rewards/margins": 7.7527642250061035, + "rewards/rejected": -11.021429061889648, + "step": 9517 + }, + { + "epoch": 1.48, + "learning_rate": 7.166447429845947e-06, + "logits/chosen": -2.660619020462036, + "logits/rejected": -1.790488839149475, + "logps/chosen": -462.5030822753906, + "logps/rejected": -436.2412414550781, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.080509185791016, + "rewards/margins": 7.802553176879883, + "rewards/rejected": -11.883062362670898, + "step": 9518 + }, + { + "epoch": 1.48, + "learning_rate": 7.165713989314799e-06, + "logits/chosen": -1.033155083656311, + "logits/rejected": -1.6953644752502441, + "logps/chosen": -81.65398406982422, + "logps/rejected": -362.91156005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.819931983947754, + "rewards/margins": 11.07822322845459, + "rewards/rejected": -15.898155212402344, + "step": 9519 + }, + { + "epoch": 1.48, + "learning_rate": 7.1649805487836505e-06, + "logits/chosen": -2.8300437927246094, + "logits/rejected": -2.9138598442077637, + "logps/chosen": -112.53752899169922, + "logps/rejected": -222.58035278320312, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8839898109436035, + "rewards/margins": 7.242928504943848, + "rewards/rejected": -13.12691879272461, + "step": 9520 + }, + { + "epoch": 1.48, + "learning_rate": 7.164247108252502e-06, + "logits/chosen": -2.870819568634033, + "logits/rejected": -2.3063583374023438, + "logps/chosen": -178.44293212890625, + "logps/rejected": -219.73056030273438, + "loss": 1.834, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.235809326171875, + "rewards/margins": 2.9195005893707275, + "rewards/rejected": -10.15531063079834, + "step": 9521 + }, + { + "epoch": 1.48, + "learning_rate": 7.163513667721354e-06, + "logits/chosen": -2.0856213569641113, + "logits/rejected": -2.380784273147583, + "logps/chosen": -223.49122619628906, + "logps/rejected": -420.6209716796875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.634716033935547, + "rewards/margins": 9.388650894165039, + "rewards/rejected": -14.023366928100586, + "step": 9522 + }, + { + "epoch": 1.48, + "learning_rate": 7.162780227190207e-06, + "logits/chosen": -2.9555811882019043, + "logits/rejected": -2.3234524726867676, + "logps/chosen": -791.2003173828125, + "logps/rejected": -722.6502685546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.569640159606934, + "rewards/margins": 7.648496627807617, + "rewards/rejected": -14.218137741088867, + "step": 9523 + }, + { + "epoch": 1.48, + "learning_rate": 7.162046786659059e-06, + "logits/chosen": -3.014725685119629, + "logits/rejected": -2.7813222408294678, + "logps/chosen": -178.87490844726562, + "logps/rejected": -241.0225372314453, + "loss": 2.6088, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.5707688331604, + "rewards/margins": 2.5339181423187256, + "rewards/rejected": -10.104686737060547, + "step": 9524 + }, + { + "epoch": 1.48, + "learning_rate": 7.1613133461279124e-06, + "logits/chosen": -2.4327104091644287, + "logits/rejected": -2.8596091270446777, + "logps/chosen": -171.44601440429688, + "logps/rejected": -296.7510986328125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9533965587615967, + "rewards/margins": 10.411312103271484, + "rewards/rejected": -13.36470890045166, + "step": 9525 + }, + { + "epoch": 1.48, + "learning_rate": 7.160579905596764e-06, + "logits/chosen": -2.7161340713500977, + "logits/rejected": -2.9511640071868896, + "logps/chosen": -774.40380859375, + "logps/rejected": -686.3028564453125, + "loss": 0.1623, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.357521057128906, + "rewards/margins": 4.997476100921631, + "rewards/rejected": -10.354997634887695, + "step": 9526 + }, + { + "epoch": 1.48, + "learning_rate": 7.159846465065616e-06, + "logits/chosen": -2.733858346939087, + "logits/rejected": -1.0396583080291748, + "logps/chosen": -452.25201416015625, + "logps/rejected": -322.92877197265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.216623783111572, + "rewards/margins": 8.548479080200195, + "rewards/rejected": -13.76510238647461, + "step": 9527 + }, + { + "epoch": 1.48, + "learning_rate": 7.159113024534468e-06, + "logits/chosen": -2.7523248195648193, + "logits/rejected": -2.7319514751434326, + "logps/chosen": -446.94281005859375, + "logps/rejected": -531.1065063476562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.707075595855713, + "rewards/margins": 7.5963897705078125, + "rewards/rejected": -12.303464889526367, + "step": 9528 + }, + { + "epoch": 1.48, + "learning_rate": 7.15837958400332e-06, + "logits/chosen": -2.0043540000915527, + "logits/rejected": -2.9064223766326904, + "logps/chosen": -110.69246673583984, + "logps/rejected": -267.56988525390625, + "loss": 2.4921, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.390803813934326, + "rewards/margins": 3.167820453643799, + "rewards/rejected": -9.558624267578125, + "step": 9529 + }, + { + "epoch": 1.48, + "learning_rate": 7.157646143472172e-06, + "logits/chosen": -1.7288405895233154, + "logits/rejected": -2.467226028442383, + "logps/chosen": -184.17037963867188, + "logps/rejected": -670.65478515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.142354965209961, + "rewards/margins": 11.376016616821289, + "rewards/rejected": -16.51837158203125, + "step": 9530 + }, + { + "epoch": 1.48, + "learning_rate": 7.156912702941024e-06, + "logits/chosen": -1.9861055612564087, + "logits/rejected": -2.781872272491455, + "logps/chosen": -146.36868286132812, + "logps/rejected": -314.8216552734375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.381858825683594, + "rewards/margins": 6.9855217933654785, + "rewards/rejected": -14.367380142211914, + "step": 9531 + }, + { + "epoch": 1.48, + "learning_rate": 7.1561792624098756e-06, + "logits/chosen": -2.3693907260894775, + "logits/rejected": -2.673844337463379, + "logps/chosen": -286.3995056152344, + "logps/rejected": -360.2689208984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.889115333557129, + "rewards/margins": 7.152748107910156, + "rewards/rejected": -11.041863441467285, + "step": 9532 + }, + { + "epoch": 1.48, + "learning_rate": 7.1554458218787275e-06, + "logits/chosen": -1.042017936706543, + "logits/rejected": -2.7178854942321777, + "logps/chosen": -108.45026397705078, + "logps/rejected": -244.60794067382812, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.237695693969727, + "rewards/margins": 4.623599529266357, + "rewards/rejected": -9.861295700073242, + "step": 9533 + }, + { + "epoch": 1.48, + "learning_rate": 7.154712381347581e-06, + "logits/chosen": -2.950451374053955, + "logits/rejected": -2.8372395038604736, + "logps/chosen": -205.68753051757812, + "logps/rejected": -309.0938415527344, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.029569625854492, + "rewards/margins": 6.027628421783447, + "rewards/rejected": -12.057197570800781, + "step": 9534 + }, + { + "epoch": 1.48, + "learning_rate": 7.153978940816433e-06, + "logits/chosen": -2.8539512157440186, + "logits/rejected": -2.643794059753418, + "logps/chosen": -317.58172607421875, + "logps/rejected": -339.80120849609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.936779975891113, + "rewards/margins": 8.102107048034668, + "rewards/rejected": -13.038887023925781, + "step": 9535 + }, + { + "epoch": 1.48, + "learning_rate": 7.153245500285285e-06, + "logits/chosen": -1.7366797924041748, + "logits/rejected": -2.9407780170440674, + "logps/chosen": -195.19927978515625, + "logps/rejected": -494.1880798339844, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.915354251861572, + "rewards/margins": 6.272520542144775, + "rewards/rejected": -11.187874794006348, + "step": 9536 + }, + { + "epoch": 1.48, + "learning_rate": 7.152512059754137e-06, + "logits/chosen": -2.309089422225952, + "logits/rejected": -2.9733245372772217, + "logps/chosen": -273.9974365234375, + "logps/rejected": -517.7821044921875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.01488971710205, + "rewards/margins": 6.205497741699219, + "rewards/rejected": -14.22038745880127, + "step": 9537 + }, + { + "epoch": 1.48, + "learning_rate": 7.1517786192229885e-06, + "logits/chosen": -2.6041994094848633, + "logits/rejected": -2.812685012817383, + "logps/chosen": -61.62262725830078, + "logps/rejected": -239.40310668945312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.199859142303467, + "rewards/margins": 8.908281326293945, + "rewards/rejected": -12.10814094543457, + "step": 9538 + }, + { + "epoch": 1.48, + "learning_rate": 7.15104517869184e-06, + "logits/chosen": -2.8562660217285156, + "logits/rejected": -2.8530266284942627, + "logps/chosen": -99.21340942382812, + "logps/rejected": -290.89306640625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.614869117736816, + "rewards/margins": 5.5175933837890625, + "rewards/rejected": -11.132462501525879, + "step": 9539 + }, + { + "epoch": 1.48, + "learning_rate": 7.150311738160693e-06, + "logits/chosen": -1.9479612112045288, + "logits/rejected": -2.5863101482391357, + "logps/chosen": -239.29122924804688, + "logps/rejected": -469.07318115234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.420541763305664, + "rewards/margins": 9.497360229492188, + "rewards/rejected": -14.917901992797852, + "step": 9540 + }, + { + "epoch": 1.48, + "learning_rate": 7.149578297629545e-06, + "logits/chosen": -2.8803353309631348, + "logits/rejected": -2.99013614654541, + "logps/chosen": -118.827880859375, + "logps/rejected": -355.4552307128906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.130925178527832, + "rewards/margins": 11.115032196044922, + "rewards/rejected": -15.245957374572754, + "step": 9541 + }, + { + "epoch": 1.48, + "learning_rate": 7.148844857098397e-06, + "logits/chosen": -2.769331693649292, + "logits/rejected": -2.7596497535705566, + "logps/chosen": -161.612548828125, + "logps/rejected": -184.49154663085938, + "loss": 2.1248, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.597966194152832, + "rewards/margins": 1.4355378150939941, + "rewards/rejected": -7.033503532409668, + "step": 9542 + }, + { + "epoch": 1.48, + "learning_rate": 7.1481114165672505e-06, + "logits/chosen": -2.9038963317871094, + "logits/rejected": -2.4868850708007812, + "logps/chosen": -177.04965209960938, + "logps/rejected": -349.359130859375, + "loss": 0.0903, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.485177040100098, + "rewards/margins": 4.362033843994141, + "rewards/rejected": -10.847211837768555, + "step": 9543 + }, + { + "epoch": 1.48, + "learning_rate": 7.147377976036102e-06, + "logits/chosen": -1.2179042100906372, + "logits/rejected": -2.6104018688201904, + "logps/chosen": -71.68376922607422, + "logps/rejected": -322.51861572265625, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.062032222747803, + "rewards/margins": 6.120669364929199, + "rewards/rejected": -12.182701110839844, + "step": 9544 + }, + { + "epoch": 1.48, + "learning_rate": 7.146644535504954e-06, + "logits/chosen": -0.9062026739120483, + "logits/rejected": -2.8049352169036865, + "logps/chosen": -122.16262817382812, + "logps/rejected": -414.3153076171875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.153779029846191, + "rewards/margins": 6.539633274078369, + "rewards/rejected": -11.693412780761719, + "step": 9545 + }, + { + "epoch": 1.48, + "learning_rate": 7.145911094973806e-06, + "logits/chosen": -1.135074257850647, + "logits/rejected": -1.4636343717575073, + "logps/chosen": -133.5854034423828, + "logps/rejected": -253.9786376953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.386831045150757, + "rewards/margins": 8.629899024963379, + "rewards/rejected": -12.016729354858398, + "step": 9546 + }, + { + "epoch": 1.48, + "learning_rate": 7.145177654442658e-06, + "logits/chosen": -1.569055199623108, + "logits/rejected": -2.907871723175049, + "logps/chosen": -153.1322021484375, + "logps/rejected": -454.406494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954987525939941, + "rewards/margins": 10.898130416870117, + "rewards/rejected": -15.853116989135742, + "step": 9547 + }, + { + "epoch": 1.48, + "learning_rate": 7.14444421391151e-06, + "logits/chosen": -2.8196403980255127, + "logits/rejected": -2.281200885772705, + "logps/chosen": -340.3321228027344, + "logps/rejected": -250.19680786132812, + "loss": 2.0188, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.382508277893066, + "rewards/margins": 5.303465843200684, + "rewards/rejected": -13.68597412109375, + "step": 9548 + }, + { + "epoch": 1.49, + "learning_rate": 7.143710773380362e-06, + "logits/chosen": -2.4072344303131104, + "logits/rejected": -2.435701847076416, + "logps/chosen": -250.28759765625, + "logps/rejected": -485.921142578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.700043678283691, + "rewards/margins": 8.696966171264648, + "rewards/rejected": -14.39700984954834, + "step": 9549 + }, + { + "epoch": 1.49, + "learning_rate": 7.142977332849214e-06, + "logits/chosen": -0.9619234800338745, + "logits/rejected": -2.38775372505188, + "logps/chosen": -153.85809326171875, + "logps/rejected": -495.0300598144531, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.274907112121582, + "rewards/margins": 8.578226089477539, + "rewards/rejected": -13.853133201599121, + "step": 9550 + }, + { + "epoch": 1.49, + "learning_rate": 7.1422438923180655e-06, + "logits/chosen": -2.997032403945923, + "logits/rejected": -1.7523475885391235, + "logps/chosen": -363.56842041015625, + "logps/rejected": -244.49240112304688, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.431685447692871, + "rewards/margins": 7.0796284675598145, + "rewards/rejected": -12.511314392089844, + "step": 9551 + }, + { + "epoch": 1.49, + "learning_rate": 7.141510451786919e-06, + "logits/chosen": -2.324571132659912, + "logits/rejected": -2.939103126525879, + "logps/chosen": -99.23696899414062, + "logps/rejected": -451.70965576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1783974170684814, + "rewards/margins": 11.180642127990723, + "rewards/rejected": -14.359039306640625, + "step": 9552 + }, + { + "epoch": 1.49, + "learning_rate": 7.140777011255771e-06, + "logits/chosen": -2.7971813678741455, + "logits/rejected": -2.928960084915161, + "logps/chosen": -199.13742065429688, + "logps/rejected": -264.4873046875, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.17195987701416, + "rewards/margins": 4.283964157104492, + "rewards/rejected": -11.455923080444336, + "step": 9553 + }, + { + "epoch": 1.49, + "learning_rate": 7.140043570724623e-06, + "logits/chosen": -2.3796753883361816, + "logits/rejected": -2.6821258068084717, + "logps/chosen": -395.66644287109375, + "logps/rejected": -379.2397766113281, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13146448135376, + "rewards/margins": 6.356850624084473, + "rewards/rejected": -12.48831558227539, + "step": 9554 + }, + { + "epoch": 1.49, + "learning_rate": 7.139310130193475e-06, + "logits/chosen": -3.2123942375183105, + "logits/rejected": -2.8166568279266357, + "logps/chosen": -295.66162109375, + "logps/rejected": -185.531982421875, + "loss": 2.4046, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.9237060546875, + "rewards/margins": 4.116818428039551, + "rewards/rejected": -10.04052448272705, + "step": 9555 + }, + { + "epoch": 1.49, + "learning_rate": 7.138576689662327e-06, + "logits/chosen": -3.1311328411102295, + "logits/rejected": -3.142392635345459, + "logps/chosen": -116.1917724609375, + "logps/rejected": -210.33961486816406, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3665542602539062, + "rewards/margins": 6.903794288635254, + "rewards/rejected": -10.270349502563477, + "step": 9556 + }, + { + "epoch": 1.49, + "learning_rate": 7.137843249131179e-06, + "logits/chosen": -2.65899920463562, + "logits/rejected": -1.807350754737854, + "logps/chosen": -215.08995056152344, + "logps/rejected": -140.68637084960938, + "loss": 2.0859, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.728190898895264, + "rewards/margins": -1.9450339078903198, + "rewards/rejected": -4.783156871795654, + "step": 9557 + }, + { + "epoch": 1.49, + "learning_rate": 7.137109808600031e-06, + "logits/chosen": -0.8427697420120239, + "logits/rejected": -2.221048593521118, + "logps/chosen": -144.72137451171875, + "logps/rejected": -341.68212890625, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.174320220947266, + "rewards/margins": 4.571221351623535, + "rewards/rejected": -11.7455415725708, + "step": 9558 + }, + { + "epoch": 1.49, + "learning_rate": 7.136376368068883e-06, + "logits/chosen": -2.9935882091522217, + "logits/rejected": -2.823300838470459, + "logps/chosen": -431.1082458496094, + "logps/rejected": -319.3369140625, + "loss": 0.2223, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.845895290374756, + "rewards/margins": 6.176450252532959, + "rewards/rejected": -10.022345542907715, + "step": 9559 + }, + { + "epoch": 1.49, + "learning_rate": 7.135642927537735e-06, + "logits/chosen": -2.742701768875122, + "logits/rejected": -3.065979242324829, + "logps/chosen": -230.44288635253906, + "logps/rejected": -237.27462768554688, + "loss": 0.1278, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5966591835021973, + "rewards/margins": 2.464641571044922, + "rewards/rejected": -6.061300754547119, + "step": 9560 + }, + { + "epoch": 1.49, + "learning_rate": 7.1349094870065885e-06, + "logits/chosen": -2.0088181495666504, + "logits/rejected": -2.9502062797546387, + "logps/chosen": -108.25326538085938, + "logps/rejected": -472.83221435546875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.346380233764648, + "rewards/margins": 10.202132225036621, + "rewards/rejected": -16.548511505126953, + "step": 9561 + }, + { + "epoch": 1.49, + "learning_rate": 7.13417604647544e-06, + "logits/chosen": -2.5794482231140137, + "logits/rejected": -2.120450496673584, + "logps/chosen": -353.8955078125, + "logps/rejected": -294.17340087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.610606670379639, + "rewards/margins": 11.323844909667969, + "rewards/rejected": -15.934452056884766, + "step": 9562 + }, + { + "epoch": 1.49, + "learning_rate": 7.133442605944292e-06, + "logits/chosen": -1.423927903175354, + "logits/rejected": -2.7695348262786865, + "logps/chosen": -172.67221069335938, + "logps/rejected": -594.3679809570312, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4458465576171875, + "rewards/margins": 7.836956977844238, + "rewards/rejected": -15.282804489135742, + "step": 9563 + }, + { + "epoch": 1.49, + "learning_rate": 7.132709165413144e-06, + "logits/chosen": -1.4770976305007935, + "logits/rejected": -2.725837230682373, + "logps/chosen": -226.42701721191406, + "logps/rejected": -382.68353271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.062066555023193, + "rewards/margins": 9.824064254760742, + "rewards/rejected": -13.886131286621094, + "step": 9564 + }, + { + "epoch": 1.49, + "learning_rate": 7.131975724881996e-06, + "logits/chosen": -1.8014799356460571, + "logits/rejected": -2.8317456245422363, + "logps/chosen": -119.23480224609375, + "logps/rejected": -343.7087097167969, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.736158847808838, + "rewards/margins": 7.654537200927734, + "rewards/rejected": -11.39069652557373, + "step": 9565 + }, + { + "epoch": 1.49, + "learning_rate": 7.131242284350848e-06, + "logits/chosen": -2.590862512588501, + "logits/rejected": -3.1026928424835205, + "logps/chosen": -202.07586669921875, + "logps/rejected": -446.40216064453125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.186553955078125, + "rewards/margins": 6.4898834228515625, + "rewards/rejected": -9.676437377929688, + "step": 9566 + }, + { + "epoch": 1.49, + "learning_rate": 7.1305088438197e-06, + "logits/chosen": -2.468050718307495, + "logits/rejected": -2.487618923187256, + "logps/chosen": -197.78106689453125, + "logps/rejected": -226.00453186035156, + "loss": 0.1334, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.560568809509277, + "rewards/margins": 5.027248382568359, + "rewards/rejected": -12.587818145751953, + "step": 9567 + }, + { + "epoch": 1.49, + "learning_rate": 7.129775403288552e-06, + "logits/chosen": -2.7531092166900635, + "logits/rejected": -2.448387622833252, + "logps/chosen": -270.2528076171875, + "logps/rejected": -234.98487854003906, + "loss": 0.1735, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.281863689422607, + "rewards/margins": 5.583091735839844, + "rewards/rejected": -11.864954948425293, + "step": 9568 + }, + { + "epoch": 1.49, + "learning_rate": 7.1290419627574035e-06, + "logits/chosen": -1.5260685682296753, + "logits/rejected": -2.6400632858276367, + "logps/chosen": -130.44522094726562, + "logps/rejected": -494.7520446777344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.250170707702637, + "rewards/margins": 11.255330085754395, + "rewards/rejected": -16.50550079345703, + "step": 9569 + }, + { + "epoch": 1.49, + "learning_rate": 7.128308522226257e-06, + "logits/chosen": -2.770045042037964, + "logits/rejected": -2.3017287254333496, + "logps/chosen": -291.4993591308594, + "logps/rejected": -194.11273193359375, + "loss": 0.1601, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088687896728516, + "rewards/margins": 3.7689051628112793, + "rewards/rejected": -9.857593536376953, + "step": 9570 + }, + { + "epoch": 1.49, + "learning_rate": 7.127575081695109e-06, + "logits/chosen": -2.625430107116699, + "logits/rejected": -1.4393941164016724, + "logps/chosen": -284.63482666015625, + "logps/rejected": -308.06536865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884880781173706, + "rewards/margins": 11.684164047241211, + "rewards/rejected": -14.569045066833496, + "step": 9571 + }, + { + "epoch": 1.49, + "learning_rate": 7.126841641163961e-06, + "logits/chosen": -2.8999972343444824, + "logits/rejected": -1.8206931352615356, + "logps/chosen": -592.8101806640625, + "logps/rejected": -377.03656005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8231887817382812, + "rewards/margins": 11.685763359069824, + "rewards/rejected": -14.508952140808105, + "step": 9572 + }, + { + "epoch": 1.49, + "learning_rate": 7.126108200632813e-06, + "logits/chosen": -1.8374967575073242, + "logits/rejected": -1.1972098350524902, + "logps/chosen": -509.9861145019531, + "logps/rejected": -515.4369506835938, + "loss": 0.1968, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.892865180969238, + "rewards/margins": 6.22329568862915, + "rewards/rejected": -11.116161346435547, + "step": 9573 + }, + { + "epoch": 1.49, + "learning_rate": 7.1253747601016655e-06, + "logits/chosen": -1.3327655792236328, + "logits/rejected": -2.759981393814087, + "logps/chosen": -196.13125610351562, + "logps/rejected": -301.9533386230469, + "loss": 0.2581, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.053929328918457, + "rewards/margins": 4.94082498550415, + "rewards/rejected": -11.994754791259766, + "step": 9574 + }, + { + "epoch": 1.49, + "learning_rate": 7.124641319570517e-06, + "logits/chosen": -2.721834182739258, + "logits/rejected": -1.8655356168746948, + "logps/chosen": -498.215576171875, + "logps/rejected": -388.7751770019531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.729234457015991, + "rewards/margins": 8.676473617553711, + "rewards/rejected": -12.405708312988281, + "step": 9575 + }, + { + "epoch": 1.49, + "learning_rate": 7.123907879039369e-06, + "logits/chosen": -2.6858556270599365, + "logits/rejected": -2.87377667427063, + "logps/chosen": -194.66323852539062, + "logps/rejected": -154.908203125, + "loss": 0.796, + "rewards/accuracies": 0.0, + "rewards/chosen": -10.209054946899414, + "rewards/margins": -0.1954641342163086, + "rewards/rejected": -10.013590812683105, + "step": 9576 + }, + { + "epoch": 1.49, + "learning_rate": 7.123174438508221e-06, + "logits/chosen": -2.53580904006958, + "logits/rejected": -1.7986793518066406, + "logps/chosen": -197.26361083984375, + "logps/rejected": -184.41090393066406, + "loss": 0.7783, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7201409339904785, + "rewards/margins": 3.3891186714172363, + "rewards/rejected": -8.109259605407715, + "step": 9577 + }, + { + "epoch": 1.49, + "learning_rate": 7.122440997977073e-06, + "logits/chosen": -2.441805362701416, + "logits/rejected": -3.0346028804779053, + "logps/chosen": -468.9657897949219, + "logps/rejected": -541.654296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0342912673950195, + "rewards/margins": 7.378378391265869, + "rewards/rejected": -11.412670135498047, + "step": 9578 + }, + { + "epoch": 1.49, + "learning_rate": 7.1217075574459266e-06, + "logits/chosen": -2.873807668685913, + "logits/rejected": -1.3903683423995972, + "logps/chosen": -893.8900756835938, + "logps/rejected": -354.55657958984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.062091827392578, + "rewards/margins": 6.30142068862915, + "rewards/rejected": -11.36351203918457, + "step": 9579 + }, + { + "epoch": 1.49, + "learning_rate": 7.1209741169147784e-06, + "logits/chosen": -1.8576252460479736, + "logits/rejected": -2.845759868621826, + "logps/chosen": -308.76959228515625, + "logps/rejected": -494.0228271484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202794075012207, + "rewards/margins": 6.623040199279785, + "rewards/rejected": -12.825834274291992, + "step": 9580 + }, + { + "epoch": 1.49, + "learning_rate": 7.12024067638363e-06, + "logits/chosen": -2.507256031036377, + "logits/rejected": -2.811352491378784, + "logps/chosen": -688.0827026367188, + "logps/rejected": -571.822509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3572492599487305, + "rewards/margins": 9.657374382019043, + "rewards/rejected": -14.014623641967773, + "step": 9581 + }, + { + "epoch": 1.49, + "learning_rate": 7.119507235852482e-06, + "logits/chosen": -2.460353136062622, + "logits/rejected": -2.7245492935180664, + "logps/chosen": -91.92781066894531, + "logps/rejected": -318.63677978515625, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.877050876617432, + "rewards/margins": 6.958751678466797, + "rewards/rejected": -11.835803031921387, + "step": 9582 + }, + { + "epoch": 1.49, + "learning_rate": 7.118773795321334e-06, + "logits/chosen": -1.7640408277511597, + "logits/rejected": -2.7901177406311035, + "logps/chosen": -296.4993591308594, + "logps/rejected": -454.33880615234375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.367933750152588, + "rewards/margins": 6.517299175262451, + "rewards/rejected": -9.885232925415039, + "step": 9583 + }, + { + "epoch": 1.49, + "learning_rate": 7.118040354790186e-06, + "logits/chosen": -3.017245054244995, + "logits/rejected": -2.260248899459839, + "logps/chosen": -291.716064453125, + "logps/rejected": -282.9494934082031, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.025925397872925, + "rewards/margins": 7.693572044372559, + "rewards/rejected": -10.719497680664062, + "step": 9584 + }, + { + "epoch": 1.49, + "learning_rate": 7.117306914259038e-06, + "logits/chosen": -3.158245325088501, + "logits/rejected": -2.3384463787078857, + "logps/chosen": -222.71084594726562, + "logps/rejected": -248.1090850830078, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3278021812438965, + "rewards/margins": 4.737803936004639, + "rewards/rejected": -10.065606117248535, + "step": 9585 + }, + { + "epoch": 1.49, + "learning_rate": 7.11657347372789e-06, + "logits/chosen": -1.939670443534851, + "logits/rejected": -2.947218179702759, + "logps/chosen": -305.5538330078125, + "logps/rejected": -389.26129150390625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1160783767700195, + "rewards/margins": 6.481211185455322, + "rewards/rejected": -11.5972900390625, + "step": 9586 + }, + { + "epoch": 1.49, + "learning_rate": 7.115840033196742e-06, + "logits/chosen": -2.7865309715270996, + "logits/rejected": -2.90797758102417, + "logps/chosen": -130.62588500976562, + "logps/rejected": -402.260009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.32134485244751, + "rewards/margins": 9.582345962524414, + "rewards/rejected": -14.903691291809082, + "step": 9587 + }, + { + "epoch": 1.49, + "learning_rate": 7.115106592665595e-06, + "logits/chosen": -2.058626890182495, + "logits/rejected": -2.7305729389190674, + "logps/chosen": -129.5113525390625, + "logps/rejected": -457.6976318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3297500610351562, + "rewards/margins": 12.29671859741211, + "rewards/rejected": -15.626468658447266, + "step": 9588 + }, + { + "epoch": 1.49, + "learning_rate": 7.114373152134447e-06, + "logits/chosen": -2.9429407119750977, + "logits/rejected": -2.8941285610198975, + "logps/chosen": -244.7895965576172, + "logps/rejected": -532.4618530273438, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1737446784973145, + "rewards/margins": 8.40965461730957, + "rewards/rejected": -12.583398818969727, + "step": 9589 + }, + { + "epoch": 1.49, + "learning_rate": 7.113639711603299e-06, + "logits/chosen": -2.771036148071289, + "logits/rejected": -2.4118921756744385, + "logps/chosen": -443.889892578125, + "logps/rejected": -326.1600341796875, + "loss": 0.2135, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.429448127746582, + "rewards/margins": 2.886538028717041, + "rewards/rejected": -8.315986633300781, + "step": 9590 + }, + { + "epoch": 1.49, + "learning_rate": 7.112906271072152e-06, + "logits/chosen": -2.92293643951416, + "logits/rejected": -2.4845235347747803, + "logps/chosen": -225.0456085205078, + "logps/rejected": -259.09588623046875, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.186862468719482, + "rewards/margins": 4.608770847320557, + "rewards/rejected": -10.795633316040039, + "step": 9591 + }, + { + "epoch": 1.49, + "learning_rate": 7.1121728305410035e-06, + "logits/chosen": -2.88590145111084, + "logits/rejected": -2.9590370655059814, + "logps/chosen": -407.49554443359375, + "logps/rejected": -340.76483154296875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.98714542388916, + "rewards/margins": 6.25477933883667, + "rewards/rejected": -12.241924285888672, + "step": 9592 + }, + { + "epoch": 1.49, + "learning_rate": 7.111439390009855e-06, + "logits/chosen": -2.8612120151519775, + "logits/rejected": -2.7492659091949463, + "logps/chosen": -521.385009765625, + "logps/rejected": -314.8288269042969, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.650275230407715, + "rewards/margins": 8.292350769042969, + "rewards/rejected": -12.942625999450684, + "step": 9593 + }, + { + "epoch": 1.49, + "learning_rate": 7.110705949478707e-06, + "logits/chosen": -2.8519718647003174, + "logits/rejected": -2.0521562099456787, + "logps/chosen": -341.2127685546875, + "logps/rejected": -356.6026916503906, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.862288475036621, + "rewards/margins": 7.819654941558838, + "rewards/rejected": -12.681943893432617, + "step": 9594 + }, + { + "epoch": 1.49, + "learning_rate": 7.109972508947559e-06, + "logits/chosen": -2.630736827850342, + "logits/rejected": -1.2407435178756714, + "logps/chosen": -492.3858337402344, + "logps/rejected": -372.08648681640625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117779731750488, + "rewards/margins": 6.36989688873291, + "rewards/rejected": -10.487676620483398, + "step": 9595 + }, + { + "epoch": 1.49, + "learning_rate": 7.109239068416411e-06, + "logits/chosen": -2.457871913909912, + "logits/rejected": -2.3398358821868896, + "logps/chosen": -184.2452392578125, + "logps/rejected": -306.9294738769531, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.393962860107422, + "rewards/margins": 6.009661674499512, + "rewards/rejected": -12.403624534606934, + "step": 9596 + }, + { + "epoch": 1.49, + "learning_rate": 7.108505627885265e-06, + "logits/chosen": -2.1549806594848633, + "logits/rejected": -2.6947505474090576, + "logps/chosen": -304.36053466796875, + "logps/rejected": -289.5247802734375, + "loss": 2.0802, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.191714763641357, + "rewards/margins": 0.9850714206695557, + "rewards/rejected": -7.176786422729492, + "step": 9597 + }, + { + "epoch": 1.49, + "learning_rate": 7.1077721873541165e-06, + "logits/chosen": -2.969489336013794, + "logits/rejected": -2.1392364501953125, + "logps/chosen": -324.2648010253906, + "logps/rejected": -322.9695739746094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.62556791305542, + "rewards/margins": 9.87439250946045, + "rewards/rejected": -16.499961853027344, + "step": 9598 + }, + { + "epoch": 1.49, + "learning_rate": 7.107038746822968e-06, + "logits/chosen": -0.9823448061943054, + "logits/rejected": -2.736358880996704, + "logps/chosen": -112.56599426269531, + "logps/rejected": -574.380615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.946214199066162, + "rewards/margins": 11.585460662841797, + "rewards/rejected": -15.531675338745117, + "step": 9599 + }, + { + "epoch": 1.49, + "learning_rate": 7.10630530629182e-06, + "logits/chosen": -1.2838112115859985, + "logits/rejected": -2.6158554553985596, + "logps/chosen": -134.86483764648438, + "logps/rejected": -451.9465026855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081844329833984, + "rewards/margins": 9.50149154663086, + "rewards/rejected": -14.583335876464844, + "step": 9600 + }, + { + "epoch": 1.49, + "learning_rate": 7.105571865760672e-06, + "logits/chosen": -1.4338617324829102, + "logits/rejected": -1.810601830482483, + "logps/chosen": -71.25135040283203, + "logps/rejected": -317.4181823730469, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.86339807510376, + "rewards/margins": 5.199726104736328, + "rewards/rejected": -10.06312370300293, + "step": 9601 + }, + { + "epoch": 1.49, + "learning_rate": 7.104838425229524e-06, + "logits/chosen": -2.776132106781006, + "logits/rejected": -3.14243221282959, + "logps/chosen": -62.216766357421875, + "logps/rejected": -231.06295776367188, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.542191505432129, + "rewards/margins": 5.65871524810791, + "rewards/rejected": -11.200906753540039, + "step": 9602 + }, + { + "epoch": 1.49, + "learning_rate": 7.104104984698376e-06, + "logits/chosen": -0.9746488928794861, + "logits/rejected": -2.190640449523926, + "logps/chosen": -230.95596313476562, + "logps/rejected": -533.247314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.093510627746582, + "rewards/margins": 11.518467903137207, + "rewards/rejected": -16.61197853088379, + "step": 9603 + }, + { + "epoch": 1.49, + "learning_rate": 7.103371544167228e-06, + "logits/chosen": -2.713146448135376, + "logits/rejected": -1.934486985206604, + "logps/chosen": -435.0946044921875, + "logps/rejected": -330.1561584472656, + "loss": 2.0579, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.380833625793457, + "rewards/margins": 2.7320971488952637, + "rewards/rejected": -9.112930297851562, + "step": 9604 + }, + { + "epoch": 1.49, + "learning_rate": 7.102638103636081e-06, + "logits/chosen": -2.717132091522217, + "logits/rejected": -2.905773162841797, + "logps/chosen": -394.18951416015625, + "logps/rejected": -558.3378295898438, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.803199768066406, + "rewards/margins": 5.974451065063477, + "rewards/rejected": -12.777650833129883, + "step": 9605 + }, + { + "epoch": 1.49, + "learning_rate": 7.101904663104933e-06, + "logits/chosen": -2.8135550022125244, + "logits/rejected": -2.5018162727355957, + "logps/chosen": -791.091796875, + "logps/rejected": -791.3375244140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.373984336853027, + "rewards/margins": 10.268633842468262, + "rewards/rejected": -15.642618179321289, + "step": 9606 + }, + { + "epoch": 1.49, + "learning_rate": 7.101171222573785e-06, + "logits/chosen": -2.9451515674591064, + "logits/rejected": -1.2754606008529663, + "logps/chosen": -803.3653564453125, + "logps/rejected": -680.7133178710938, + "loss": 1.0463, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.920179843902588, + "rewards/margins": 1.1713929176330566, + "rewards/rejected": -9.091572761535645, + "step": 9607 + }, + { + "epoch": 1.49, + "learning_rate": 7.100437782042638e-06, + "logits/chosen": -2.1322519779205322, + "logits/rejected": -2.8925817012786865, + "logps/chosen": -94.13385009765625, + "logps/rejected": -364.4830627441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6623446941375732, + "rewards/margins": 10.870386123657227, + "rewards/rejected": -13.532730102539062, + "step": 9608 + }, + { + "epoch": 1.49, + "learning_rate": 7.09970434151149e-06, + "logits/chosen": -2.9818313121795654, + "logits/rejected": -2.971193313598633, + "logps/chosen": -754.5697631835938, + "logps/rejected": -423.0050048828125, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622727870941162, + "rewards/margins": 4.908585071563721, + "rewards/rejected": -11.531312942504883, + "step": 9609 + }, + { + "epoch": 1.49, + "learning_rate": 7.0989709009803416e-06, + "logits/chosen": -2.558164119720459, + "logits/rejected": -2.931983709335327, + "logps/chosen": -137.91732788085938, + "logps/rejected": -181.2430877685547, + "loss": 0.1364, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.692805290222168, + "rewards/margins": 4.245322227478027, + "rewards/rejected": -12.938127517700195, + "step": 9610 + }, + { + "epoch": 1.49, + "learning_rate": 7.0982374604491934e-06, + "logits/chosen": -2.3906924724578857, + "logits/rejected": -2.6515092849731445, + "logps/chosen": -242.91650390625, + "logps/rejected": -242.0197296142578, + "loss": 1.7014, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.825983047485352, + "rewards/margins": 1.3933048248291016, + "rewards/rejected": -8.219287872314453, + "step": 9611 + }, + { + "epoch": 1.49, + "learning_rate": 7.097504019918045e-06, + "logits/chosen": -2.552727222442627, + "logits/rejected": -3.065150260925293, + "logps/chosen": -108.78793334960938, + "logps/rejected": -278.8377685546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.327171802520752, + "rewards/margins": 8.290972709655762, + "rewards/rejected": -13.618144989013672, + "step": 9612 + }, + { + "epoch": 1.5, + "learning_rate": 7.096770579386897e-06, + "logits/chosen": -0.9490050077438354, + "logits/rejected": -2.6696791648864746, + "logps/chosen": -106.68751525878906, + "logps/rejected": -479.2646484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.019892692565918, + "rewards/margins": 11.115238189697266, + "rewards/rejected": -15.135129928588867, + "step": 9613 + }, + { + "epoch": 1.5, + "learning_rate": 7.096037138855751e-06, + "logits/chosen": -1.79232656955719, + "logits/rejected": -2.890162706375122, + "logps/chosen": -122.48683166503906, + "logps/rejected": -372.75067138671875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337585926055908, + "rewards/margins": 7.072044372558594, + "rewards/rejected": -11.409629821777344, + "step": 9614 + }, + { + "epoch": 1.5, + "learning_rate": 7.095303698324603e-06, + "logits/chosen": -2.436523914337158, + "logits/rejected": -2.8920981884002686, + "logps/chosen": -90.4173355102539, + "logps/rejected": -206.61708068847656, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.348722457885742, + "rewards/margins": 5.868027687072754, + "rewards/rejected": -12.216750144958496, + "step": 9615 + }, + { + "epoch": 1.5, + "learning_rate": 7.0945702577934545e-06, + "logits/chosen": -2.674367666244507, + "logits/rejected": -3.0549752712249756, + "logps/chosen": -743.4620971679688, + "logps/rejected": -934.7080078125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.943487167358398, + "rewards/margins": 7.211118698120117, + "rewards/rejected": -12.154605865478516, + "step": 9616 + }, + { + "epoch": 1.5, + "learning_rate": 7.093836817262306e-06, + "logits/chosen": -2.6781535148620605, + "logits/rejected": -2.409864664077759, + "logps/chosen": -228.0070037841797, + "logps/rejected": -407.52801513671875, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.900995254516602, + "rewards/margins": 7.573215484619141, + "rewards/rejected": -12.474210739135742, + "step": 9617 + }, + { + "epoch": 1.5, + "learning_rate": 7.093103376731158e-06, + "logits/chosen": -0.7735977172851562, + "logits/rejected": -2.252668619155884, + "logps/chosen": -161.4505157470703, + "logps/rejected": -379.8128662109375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1972551345825195, + "rewards/margins": 6.4084792137146, + "rewards/rejected": -13.605733871459961, + "step": 9618 + }, + { + "epoch": 1.5, + "learning_rate": 7.09236993620001e-06, + "logits/chosen": -2.970994472503662, + "logits/rejected": -2.6412577629089355, + "logps/chosen": -194.91976928710938, + "logps/rejected": -279.01025390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1923398971557617, + "rewards/margins": 8.216291427612305, + "rewards/rejected": -11.408631324768066, + "step": 9619 + }, + { + "epoch": 1.5, + "learning_rate": 7.091636495668862e-06, + "logits/chosen": -2.942774534225464, + "logits/rejected": -1.596557378768921, + "logps/chosen": -364.669921875, + "logps/rejected": -183.18048095703125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.705230712890625, + "rewards/margins": 8.871357917785645, + "rewards/rejected": -10.57658863067627, + "step": 9620 + }, + { + "epoch": 1.5, + "learning_rate": 7.090903055137714e-06, + "logits/chosen": -1.939842700958252, + "logits/rejected": -2.5000696182250977, + "logps/chosen": -324.046875, + "logps/rejected": -541.4073486328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.482505798339844, + "rewards/margins": 11.997532844543457, + "rewards/rejected": -17.480037689208984, + "step": 9621 + }, + { + "epoch": 1.5, + "learning_rate": 7.090169614606566e-06, + "logits/chosen": -2.8455116748809814, + "logits/rejected": -2.913449287414551, + "logps/chosen": -179.705322265625, + "logps/rejected": -277.548095703125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5560173988342285, + "rewards/margins": 6.568748474121094, + "rewards/rejected": -9.124765396118164, + "step": 9622 + }, + { + "epoch": 1.5, + "learning_rate": 7.089436174075419e-06, + "logits/chosen": -2.5991132259368896, + "logits/rejected": -2.983859062194824, + "logps/chosen": -442.8854675292969, + "logps/rejected": -505.6572265625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5500786304473877, + "rewards/margins": 6.857656002044678, + "rewards/rejected": -9.407734870910645, + "step": 9623 + }, + { + "epoch": 1.5, + "learning_rate": 7.088702733544271e-06, + "logits/chosen": -1.8921291828155518, + "logits/rejected": -2.6067473888397217, + "logps/chosen": -242.29714965820312, + "logps/rejected": -328.6980285644531, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.997348785400391, + "rewards/margins": 6.093329906463623, + "rewards/rejected": -11.090679168701172, + "step": 9624 + }, + { + "epoch": 1.5, + "learning_rate": 7.087969293013124e-06, + "logits/chosen": -0.7316649556159973, + "logits/rejected": -2.6544485092163086, + "logps/chosen": -106.64652252197266, + "logps/rejected": -430.2372741699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315189838409424, + "rewards/margins": 10.520390510559082, + "rewards/rejected": -16.83557891845703, + "step": 9625 + }, + { + "epoch": 1.5, + "learning_rate": 7.087235852481976e-06, + "logits/chosen": -2.3236172199249268, + "logits/rejected": -2.989259719848633, + "logps/chosen": -121.99960327148438, + "logps/rejected": -303.5502624511719, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411869049072266, + "rewards/margins": 6.456508636474609, + "rewards/rejected": -12.868377685546875, + "step": 9626 + }, + { + "epoch": 1.5, + "learning_rate": 7.086502411950828e-06, + "logits/chosen": -1.316118597984314, + "logits/rejected": -2.14146089553833, + "logps/chosen": -123.96702575683594, + "logps/rejected": -330.00494384765625, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.397976875305176, + "rewards/margins": 7.556942939758301, + "rewards/rejected": -13.954919815063477, + "step": 9627 + }, + { + "epoch": 1.5, + "learning_rate": 7.08576897141968e-06, + "logits/chosen": -1.40089750289917, + "logits/rejected": -1.7718842029571533, + "logps/chosen": -331.6496887207031, + "logps/rejected": -224.16761779785156, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.867650508880615, + "rewards/margins": 3.686105251312256, + "rewards/rejected": -9.553755760192871, + "step": 9628 + }, + { + "epoch": 1.5, + "learning_rate": 7.0850355308885315e-06, + "logits/chosen": -1.8837530612945557, + "logits/rejected": -2.8671422004699707, + "logps/chosen": -239.45404052734375, + "logps/rejected": -440.40228271484375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.598197937011719, + "rewards/margins": 5.3564958572387695, + "rewards/rejected": -9.954693794250488, + "step": 9629 + }, + { + "epoch": 1.5, + "learning_rate": 7.084302090357383e-06, + "logits/chosen": -2.6382083892822266, + "logits/rejected": -2.991234064102173, + "logps/chosen": -71.03742980957031, + "logps/rejected": -203.40826416015625, + "loss": 0.0666, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.610568046569824, + "rewards/margins": 4.142486572265625, + "rewards/rejected": -9.75305461883545, + "step": 9630 + }, + { + "epoch": 1.5, + "learning_rate": 7.083568649826235e-06, + "logits/chosen": -2.3737831115722656, + "logits/rejected": -2.7510695457458496, + "logps/chosen": -267.75762939453125, + "logps/rejected": -440.2485656738281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2350058555603027, + "rewards/margins": 9.193819046020508, + "rewards/rejected": -12.428825378417969, + "step": 9631 + }, + { + "epoch": 1.5, + "learning_rate": 7.082835209295089e-06, + "logits/chosen": -1.4074803590774536, + "logits/rejected": -2.6708056926727295, + "logps/chosen": -376.177490234375, + "logps/rejected": -434.4566345214844, + "loss": 2.6023, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.367935180664062, + "rewards/margins": 1.1431996822357178, + "rewards/rejected": -11.51113510131836, + "step": 9632 + }, + { + "epoch": 1.5, + "learning_rate": 7.082101768763941e-06, + "logits/chosen": -2.3329293727874756, + "logits/rejected": -3.007438898086548, + "logps/chosen": -59.71739959716797, + "logps/rejected": -296.8448791503906, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009961128234863, + "rewards/margins": 6.3573455810546875, + "rewards/rejected": -11.36730670928955, + "step": 9633 + }, + { + "epoch": 1.5, + "learning_rate": 7.0813683282327926e-06, + "logits/chosen": -2.841036558151245, + "logits/rejected": -3.0519115924835205, + "logps/chosen": -649.7852783203125, + "logps/rejected": -743.6268310546875, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2568359375, + "rewards/margins": 3.777426242828369, + "rewards/rejected": -10.034261703491211, + "step": 9634 + }, + { + "epoch": 1.5, + "learning_rate": 7.0806348877016444e-06, + "logits/chosen": -1.5950603485107422, + "logits/rejected": -2.7278833389282227, + "logps/chosen": -186.14108276367188, + "logps/rejected": -409.7513427734375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.155318260192871, + "rewards/margins": 6.051400184631348, + "rewards/rejected": -11.206718444824219, + "step": 9635 + }, + { + "epoch": 1.5, + "learning_rate": 7.079901447170496e-06, + "logits/chosen": -2.3152527809143066, + "logits/rejected": -2.875298500061035, + "logps/chosen": -512.3118896484375, + "logps/rejected": -510.2882995605469, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.822314739227295, + "rewards/margins": 6.564480781555176, + "rewards/rejected": -10.386795043945312, + "step": 9636 + }, + { + "epoch": 1.5, + "learning_rate": 7.079168006639348e-06, + "logits/chosen": -1.4689947366714478, + "logits/rejected": -2.6871862411499023, + "logps/chosen": -242.81532287597656, + "logps/rejected": -487.92120361328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5701565742492676, + "rewards/margins": 9.662300109863281, + "rewards/rejected": -13.232457160949707, + "step": 9637 + }, + { + "epoch": 1.5, + "learning_rate": 7.0784345661082e-06, + "logits/chosen": -2.0049901008605957, + "logits/rejected": -2.7739410400390625, + "logps/chosen": -293.7643127441406, + "logps/rejected": -621.6653442382812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5341386795043945, + "rewards/margins": 10.311314582824707, + "rewards/rejected": -14.845453262329102, + "step": 9638 + }, + { + "epoch": 1.5, + "learning_rate": 7.077701125577052e-06, + "logits/chosen": -2.807795763015747, + "logits/rejected": -3.0150768756866455, + "logps/chosen": -141.7906036376953, + "logps/rejected": -277.3856201171875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.499170303344727, + "rewards/margins": 6.624291896820068, + "rewards/rejected": -13.123461723327637, + "step": 9639 + }, + { + "epoch": 1.5, + "learning_rate": 7.076967685045904e-06, + "logits/chosen": -2.977219820022583, + "logits/rejected": -2.0168614387512207, + "logps/chosen": -238.59176635742188, + "logps/rejected": -313.0973815917969, + "loss": 0.1179, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.957160472869873, + "rewards/margins": 5.714996337890625, + "rewards/rejected": -12.672157287597656, + "step": 9640 + }, + { + "epoch": 1.5, + "learning_rate": 7.076234244514757e-06, + "logits/chosen": -3.062321424484253, + "logits/rejected": -2.721865177154541, + "logps/chosen": -130.1698455810547, + "logps/rejected": -234.61489868164062, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.08105731010437, + "rewards/margins": 5.831762313842773, + "rewards/rejected": -7.912819862365723, + "step": 9641 + }, + { + "epoch": 1.5, + "learning_rate": 7.07550080398361e-06, + "logits/chosen": -1.2926567792892456, + "logits/rejected": -2.587808847427368, + "logps/chosen": -132.20281982421875, + "logps/rejected": -272.1650390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.317996978759766, + "rewards/margins": 8.053182601928711, + "rewards/rejected": -14.371179580688477, + "step": 9642 + }, + { + "epoch": 1.5, + "learning_rate": 7.074767363452462e-06, + "logits/chosen": -2.40187406539917, + "logits/rejected": -2.987355947494507, + "logps/chosen": -818.69140625, + "logps/rejected": -768.5380859375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.390112400054932, + "rewards/margins": 6.0845489501953125, + "rewards/rejected": -13.474660873413086, + "step": 9643 + }, + { + "epoch": 1.5, + "learning_rate": 7.074033922921314e-06, + "logits/chosen": -3.1128885746002197, + "logits/rejected": -2.9218547344207764, + "logps/chosen": -126.29412841796875, + "logps/rejected": -181.58131408691406, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.807796478271484, + "rewards/margins": 5.303474426269531, + "rewards/rejected": -11.111270904541016, + "step": 9644 + }, + { + "epoch": 1.5, + "learning_rate": 7.073300482390166e-06, + "logits/chosen": -1.657045841217041, + "logits/rejected": -2.7928426265716553, + "logps/chosen": -321.9183349609375, + "logps/rejected": -377.24359130859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.144996643066406, + "rewards/margins": 7.304610252380371, + "rewards/rejected": -12.449607849121094, + "step": 9645 + }, + { + "epoch": 1.5, + "learning_rate": 7.072567041859018e-06, + "logits/chosen": -2.2304866313934326, + "logits/rejected": -2.4354727268218994, + "logps/chosen": -494.26715087890625, + "logps/rejected": -417.86102294921875, + "loss": 1.2274, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.005351066589355, + "rewards/margins": 4.846685886383057, + "rewards/rejected": -12.85203742980957, + "step": 9646 + }, + { + "epoch": 1.5, + "learning_rate": 7.0718336013278695e-06, + "logits/chosen": -1.7035151720046997, + "logits/rejected": -2.6390697956085205, + "logps/chosen": -444.0430908203125, + "logps/rejected": -359.9547119140625, + "loss": 0.5584, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.522325038909912, + "rewards/margins": 5.429106712341309, + "rewards/rejected": -9.951431274414062, + "step": 9647 + }, + { + "epoch": 1.5, + "learning_rate": 7.071100160796721e-06, + "logits/chosen": -2.8360543251037598, + "logits/rejected": -2.795102834701538, + "logps/chosen": -475.80194091796875, + "logps/rejected": -518.1698608398438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.008693695068359, + "rewards/margins": 10.242775917053223, + "rewards/rejected": -14.251468658447266, + "step": 9648 + }, + { + "epoch": 1.5, + "learning_rate": 7.070366720265573e-06, + "logits/chosen": -2.5943474769592285, + "logits/rejected": -1.6419605016708374, + "logps/chosen": -698.85498046875, + "logps/rejected": -506.2851867675781, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.703863620758057, + "rewards/margins": 7.2229204177856445, + "rewards/rejected": -11.92678451538086, + "step": 9649 + }, + { + "epoch": 1.5, + "learning_rate": 7.069633279734426e-06, + "logits/chosen": -2.6619224548339844, + "logits/rejected": -2.9221668243408203, + "logps/chosen": -376.5069274902344, + "logps/rejected": -469.67340087890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.335470676422119, + "rewards/margins": 9.286422729492188, + "rewards/rejected": -13.621892929077148, + "step": 9650 + }, + { + "epoch": 1.5, + "learning_rate": 7.068899839203278e-06, + "logits/chosen": -2.5431911945343018, + "logits/rejected": -2.8030753135681152, + "logps/chosen": -61.46873092651367, + "logps/rejected": -323.8506774902344, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6078386306762695, + "rewards/margins": 5.525917053222656, + "rewards/rejected": -10.133755683898926, + "step": 9651 + }, + { + "epoch": 1.5, + "learning_rate": 7.068166398672131e-06, + "logits/chosen": -2.90712308883667, + "logits/rejected": -2.8756561279296875, + "logps/chosen": -340.8031921386719, + "logps/rejected": -447.258544921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.329312324523926, + "rewards/margins": 8.828619956970215, + "rewards/rejected": -11.15793228149414, + "step": 9652 + }, + { + "epoch": 1.5, + "learning_rate": 7.0674329581409825e-06, + "logits/chosen": -2.83548641204834, + "logits/rejected": -2.0263755321502686, + "logps/chosen": -520.8734130859375, + "logps/rejected": -476.1340637207031, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2238922119140625, + "rewards/margins": 4.640402793884277, + "rewards/rejected": -9.86429500579834, + "step": 9653 + }, + { + "epoch": 1.5, + "learning_rate": 7.066699517609834e-06, + "logits/chosen": -2.6552889347076416, + "logits/rejected": -2.934232711791992, + "logps/chosen": -148.30154418945312, + "logps/rejected": -349.9656982421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036309242248535, + "rewards/margins": 8.60915756225586, + "rewards/rejected": -12.645465850830078, + "step": 9654 + }, + { + "epoch": 1.5, + "learning_rate": 7.065966077078686e-06, + "logits/chosen": -2.715679168701172, + "logits/rejected": -2.796142578125, + "logps/chosen": -155.35536193847656, + "logps/rejected": -199.46163940429688, + "loss": 0.4108, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.860614776611328, + "rewards/margins": 4.402040481567383, + "rewards/rejected": -10.262655258178711, + "step": 9655 + }, + { + "epoch": 1.5, + "learning_rate": 7.065232636547538e-06, + "logits/chosen": -2.7648677825927734, + "logits/rejected": -2.8746848106384277, + "logps/chosen": -282.39678955078125, + "logps/rejected": -288.1915283203125, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.56204891204834, + "rewards/margins": 5.259220123291016, + "rewards/rejected": -11.821269035339355, + "step": 9656 + }, + { + "epoch": 1.5, + "learning_rate": 7.064499196016391e-06, + "logits/chosen": -2.788148880004883, + "logits/rejected": -1.7561373710632324, + "logps/chosen": -186.8075714111328, + "logps/rejected": -190.1387176513672, + "loss": 1.5885, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.1661810874938965, + "rewards/margins": 2.4915647506713867, + "rewards/rejected": -7.657745361328125, + "step": 9657 + }, + { + "epoch": 1.5, + "learning_rate": 7.063765755485243e-06, + "logits/chosen": -1.5731863975524902, + "logits/rejected": -2.3749091625213623, + "logps/chosen": -179.45480346679688, + "logps/rejected": -386.13751220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9376096725463867, + "rewards/margins": 10.882013320922852, + "rewards/rejected": -14.819622993469238, + "step": 9658 + }, + { + "epoch": 1.5, + "learning_rate": 7.0630323149540955e-06, + "logits/chosen": -2.7360427379608154, + "logits/rejected": -2.141707420349121, + "logps/chosen": -286.3534240722656, + "logps/rejected": -339.7970275878906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3985090255737305, + "rewards/margins": 10.477215766906738, + "rewards/rejected": -15.875724792480469, + "step": 9659 + }, + { + "epoch": 1.5, + "learning_rate": 7.062298874422947e-06, + "logits/chosen": -2.1424243450164795, + "logits/rejected": -2.549260139465332, + "logps/chosen": -150.25616455078125, + "logps/rejected": -374.5121765136719, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.443366050720215, + "rewards/margins": 7.536470413208008, + "rewards/rejected": -10.979836463928223, + "step": 9660 + }, + { + "epoch": 1.5, + "learning_rate": 7.0615654338918e-06, + "logits/chosen": -2.259368896484375, + "logits/rejected": -2.0161521434783936, + "logps/chosen": -302.7674255371094, + "logps/rejected": -394.04022216796875, + "loss": 0.0936, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.68719482421875, + "rewards/margins": 4.337593078613281, + "rewards/rejected": -12.024787902832031, + "step": 9661 + }, + { + "epoch": 1.5, + "learning_rate": 7.060831993360652e-06, + "logits/chosen": -1.8617424964904785, + "logits/rejected": -2.290783643722534, + "logps/chosen": -139.70938110351562, + "logps/rejected": -271.54827880859375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.720291614532471, + "rewards/margins": 5.486769199371338, + "rewards/rejected": -12.207060813903809, + "step": 9662 + }, + { + "epoch": 1.5, + "learning_rate": 7.060098552829504e-06, + "logits/chosen": -2.8966453075408936, + "logits/rejected": -2.460864543914795, + "logps/chosen": -366.7050476074219, + "logps/rejected": -375.069091796875, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.508741855621338, + "rewards/margins": 4.271844863891602, + "rewards/rejected": -8.780586242675781, + "step": 9663 + }, + { + "epoch": 1.5, + "learning_rate": 7.059365112298356e-06, + "logits/chosen": -2.9375009536743164, + "logits/rejected": -3.138864517211914, + "logps/chosen": -200.41793823242188, + "logps/rejected": -350.245361328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6681370735168457, + "rewards/margins": 8.6597261428833, + "rewards/rejected": -12.327862739562988, + "step": 9664 + }, + { + "epoch": 1.5, + "learning_rate": 7.0586316717672076e-06, + "logits/chosen": -1.1936867237091064, + "logits/rejected": -2.667564630508423, + "logps/chosen": -234.26528930664062, + "logps/rejected": -570.2523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.982149124145508, + "rewards/margins": 13.58682632446289, + "rewards/rejected": -18.56897735595703, + "step": 9665 + }, + { + "epoch": 1.5, + "learning_rate": 7.05789823123606e-06, + "logits/chosen": -2.2882847785949707, + "logits/rejected": -2.894063711166382, + "logps/chosen": -358.324462890625, + "logps/rejected": -510.3333435058594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2335357666015625, + "rewards/margins": 11.158111572265625, + "rewards/rejected": -12.391647338867188, + "step": 9666 + }, + { + "epoch": 1.5, + "learning_rate": 7.057164790704912e-06, + "logits/chosen": -2.4961800575256348, + "logits/rejected": -2.7721762657165527, + "logps/chosen": -526.7548217773438, + "logps/rejected": -640.0391235351562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6027588844299316, + "rewards/margins": 8.667154312133789, + "rewards/rejected": -12.269912719726562, + "step": 9667 + }, + { + "epoch": 1.5, + "learning_rate": 7.056431350173764e-06, + "logits/chosen": -2.6673789024353027, + "logits/rejected": -3.150771379470825, + "logps/chosen": -88.24652862548828, + "logps/rejected": -357.4384765625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.200510025024414, + "rewards/margins": 5.9540205001831055, + "rewards/rejected": -10.15453052520752, + "step": 9668 + }, + { + "epoch": 1.5, + "learning_rate": 7.055697909642616e-06, + "logits/chosen": -2.6991426944732666, + "logits/rejected": -0.8466788530349731, + "logps/chosen": -287.5104064941406, + "logps/rejected": -137.89308166503906, + "loss": 0.228, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.59316873550415, + "rewards/margins": 3.4339635372161865, + "rewards/rejected": -9.027132034301758, + "step": 9669 + }, + { + "epoch": 1.5, + "learning_rate": 7.054964469111469e-06, + "logits/chosen": -1.4208229780197144, + "logits/rejected": -2.905668258666992, + "logps/chosen": -156.40029907226562, + "logps/rejected": -326.41192626953125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.476356506347656, + "rewards/margins": 6.506856918334961, + "rewards/rejected": -11.983213424682617, + "step": 9670 + }, + { + "epoch": 1.5, + "learning_rate": 7.0542310285803205e-06, + "logits/chosen": -2.362077474594116, + "logits/rejected": -2.9437880516052246, + "logps/chosen": -94.38540649414062, + "logps/rejected": -470.35943603515625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.693789958953857, + "rewards/margins": 6.927064895629883, + "rewards/rejected": -12.620855331420898, + "step": 9671 + }, + { + "epoch": 1.5, + "learning_rate": 7.053497588049172e-06, + "logits/chosen": -2.3704628944396973, + "logits/rejected": -3.00628662109375, + "logps/chosen": -171.92303466796875, + "logps/rejected": -242.7041778564453, + "loss": 0.0651, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.403837203979492, + "rewards/margins": 5.226487636566162, + "rewards/rejected": -13.630325317382812, + "step": 9672 + }, + { + "epoch": 1.5, + "learning_rate": 7.052764147518024e-06, + "logits/chosen": -1.1781343221664429, + "logits/rejected": -2.72169828414917, + "logps/chosen": -60.94829559326172, + "logps/rejected": -249.9461669921875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.302170753479004, + "rewards/margins": 7.215704441070557, + "rewards/rejected": -11.517875671386719, + "step": 9673 + }, + { + "epoch": 1.5, + "learning_rate": 7.052030706986877e-06, + "logits/chosen": -2.8752810955047607, + "logits/rejected": -2.9499948024749756, + "logps/chosen": -230.96658325195312, + "logps/rejected": -463.50262451171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.254631996154785, + "rewards/margins": 8.404866218566895, + "rewards/rejected": -15.65949821472168, + "step": 9674 + }, + { + "epoch": 1.5, + "learning_rate": 7.051297266455729e-06, + "logits/chosen": -2.966118574142456, + "logits/rejected": -3.0532820224761963, + "logps/chosen": -135.07992553710938, + "logps/rejected": -260.61492919921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036664962768555, + "rewards/margins": 8.912203788757324, + "rewards/rejected": -12.948868751525879, + "step": 9675 + }, + { + "epoch": 1.5, + "learning_rate": 7.050563825924582e-06, + "logits/chosen": -1.5226399898529053, + "logits/rejected": -2.453289747238159, + "logps/chosen": -175.2164306640625, + "logps/rejected": -313.35406494140625, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.617544174194336, + "rewards/margins": 5.308999061584473, + "rewards/rejected": -9.926544189453125, + "step": 9676 + }, + { + "epoch": 1.5, + "learning_rate": 7.0498303853934335e-06, + "logits/chosen": -2.09519100189209, + "logits/rejected": -2.802823781967163, + "logps/chosen": -114.07586669921875, + "logps/rejected": -471.702880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.315402984619141, + "rewards/margins": 13.828657150268555, + "rewards/rejected": -19.144060134887695, + "step": 9677 + }, + { + "epoch": 1.51, + "learning_rate": 7.049096944862285e-06, + "logits/chosen": -1.2443089485168457, + "logits/rejected": -2.320645570755005, + "logps/chosen": -180.545166015625, + "logps/rejected": -372.2349853515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5839648246765137, + "rewards/margins": 8.927712440490723, + "rewards/rejected": -12.511676788330078, + "step": 9678 + }, + { + "epoch": 1.51, + "learning_rate": 7.048363504331138e-06, + "logits/chosen": -2.3676538467407227, + "logits/rejected": -2.802347421646118, + "logps/chosen": -369.2452087402344, + "logps/rejected": -424.178955078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.635363817214966, + "rewards/margins": 7.828099250793457, + "rewards/rejected": -11.463462829589844, + "step": 9679 + }, + { + "epoch": 1.51, + "learning_rate": 7.04763006379999e-06, + "logits/chosen": -1.9103546142578125, + "logits/rejected": -2.9054863452911377, + "logps/chosen": -221.22564697265625, + "logps/rejected": -419.919189453125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277813911437988, + "rewards/margins": 6.032693862915039, + "rewards/rejected": -10.310506820678711, + "step": 9680 + }, + { + "epoch": 1.51, + "learning_rate": 7.046896623268842e-06, + "logits/chosen": -3.2513961791992188, + "logits/rejected": -3.0014612674713135, + "logps/chosen": -198.8548583984375, + "logps/rejected": -171.15028381347656, + "loss": 1.0753, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.784685134887695, + "rewards/margins": 0.4799365997314453, + "rewards/rejected": -7.264621734619141, + "step": 9681 + }, + { + "epoch": 1.51, + "learning_rate": 7.046163182737694e-06, + "logits/chosen": -2.59965181350708, + "logits/rejected": -2.8619866371154785, + "logps/chosen": -193.92666625976562, + "logps/rejected": -323.0147705078125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.358335494995117, + "rewards/margins": 5.994973182678223, + "rewards/rejected": -10.353307723999023, + "step": 9682 + }, + { + "epoch": 1.51, + "learning_rate": 7.0454297422065465e-06, + "logits/chosen": -1.9987598657608032, + "logits/rejected": -3.0244619846343994, + "logps/chosen": -358.51910400390625, + "logps/rejected": -557.0771484375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9261016845703125, + "rewards/margins": 6.630528450012207, + "rewards/rejected": -10.55663013458252, + "step": 9683 + }, + { + "epoch": 1.51, + "learning_rate": 7.044696301675398e-06, + "logits/chosen": -2.1393914222717285, + "logits/rejected": -2.5793662071228027, + "logps/chosen": -101.26411437988281, + "logps/rejected": -432.54620361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.304321050643921, + "rewards/margins": 12.865379333496094, + "rewards/rejected": -16.169700622558594, + "step": 9684 + }, + { + "epoch": 1.51, + "learning_rate": 7.04396286114425e-06, + "logits/chosen": -2.117614984512329, + "logits/rejected": -2.7746400833129883, + "logps/chosen": -87.68651580810547, + "logps/rejected": -205.73275756835938, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.10352087020874, + "rewards/margins": 5.844438076019287, + "rewards/rejected": -10.947958946228027, + "step": 9685 + }, + { + "epoch": 1.51, + "learning_rate": 7.043229420613102e-06, + "logits/chosen": -2.9536917209625244, + "logits/rejected": -3.1169466972351074, + "logps/chosen": -270.1792297363281, + "logps/rejected": -237.61306762695312, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.013829708099365, + "rewards/margins": 4.525138854980469, + "rewards/rejected": -8.538969039916992, + "step": 9686 + }, + { + "epoch": 1.51, + "learning_rate": 7.042495980081954e-06, + "logits/chosen": -2.791402816772461, + "logits/rejected": -2.9712729454040527, + "logps/chosen": -103.80835723876953, + "logps/rejected": -275.400634765625, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.344052791595459, + "rewards/margins": 5.502262115478516, + "rewards/rejected": -10.846314430236816, + "step": 9687 + }, + { + "epoch": 1.51, + "learning_rate": 7.041762539550807e-06, + "logits/chosen": -2.354607582092285, + "logits/rejected": -2.852351427078247, + "logps/chosen": -276.08050537109375, + "logps/rejected": -522.2774658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264862060546875, + "rewards/margins": 11.559962272644043, + "rewards/rejected": -16.824825286865234, + "step": 9688 + }, + { + "epoch": 1.51, + "learning_rate": 7.041029099019659e-06, + "logits/chosen": -1.9191051721572876, + "logits/rejected": -2.6772518157958984, + "logps/chosen": -242.8094024658203, + "logps/rejected": -293.7996826171875, + "loss": 2.1176, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.659208297729492, + "rewards/margins": 3.526324510574341, + "rewards/rejected": -12.185532569885254, + "step": 9689 + }, + { + "epoch": 1.51, + "learning_rate": 7.0402956584885105e-06, + "logits/chosen": -1.9719215631484985, + "logits/rejected": -2.78143048286438, + "logps/chosen": -202.10780334472656, + "logps/rejected": -367.86883544921875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4734954833984375, + "rewards/margins": 9.322578430175781, + "rewards/rejected": -14.796073913574219, + "step": 9690 + }, + { + "epoch": 1.51, + "learning_rate": 7.039562217957362e-06, + "logits/chosen": -2.613696813583374, + "logits/rejected": -2.980837106704712, + "logps/chosen": -94.92786407470703, + "logps/rejected": -345.4560241699219, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.28455924987793, + "rewards/margins": 6.954785346984863, + "rewards/rejected": -13.239343643188477, + "step": 9691 + }, + { + "epoch": 1.51, + "learning_rate": 7.038828777426215e-06, + "logits/chosen": -2.166196346282959, + "logits/rejected": -2.9948575496673584, + "logps/chosen": -82.64727783203125, + "logps/rejected": -219.64988708496094, + "loss": 0.448, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.071209907531738, + "rewards/margins": 2.9764444828033447, + "rewards/rejected": -10.04765510559082, + "step": 9692 + }, + { + "epoch": 1.51, + "learning_rate": 7.038095336895068e-06, + "logits/chosen": -3.0847837924957275, + "logits/rejected": -2.3306634426116943, + "logps/chosen": -516.765869140625, + "logps/rejected": -450.27825927734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.83358097076416, + "rewards/margins": 8.403454780578613, + "rewards/rejected": -14.237035751342773, + "step": 9693 + }, + { + "epoch": 1.51, + "learning_rate": 7.03736189636392e-06, + "logits/chosen": -1.6799794435501099, + "logits/rejected": -2.964844226837158, + "logps/chosen": -274.91259765625, + "logps/rejected": -340.1904296875, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.063946723937988, + "rewards/margins": 5.443601608276367, + "rewards/rejected": -10.507548332214355, + "step": 9694 + }, + { + "epoch": 1.51, + "learning_rate": 7.0366284558327715e-06, + "logits/chosen": -2.139280319213867, + "logits/rejected": -2.7451224327087402, + "logps/chosen": -197.422607421875, + "logps/rejected": -424.1468505859375, + "loss": 0.2016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079615116119385, + "rewards/margins": 4.676878929138184, + "rewards/rejected": -10.756494522094727, + "step": 9695 + }, + { + "epoch": 1.51, + "learning_rate": 7.0358950153016234e-06, + "logits/chosen": -2.4949381351470947, + "logits/rejected": -2.9722237586975098, + "logps/chosen": -772.1907348632812, + "logps/rejected": -1075.0333251953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.254361152648926, + "rewards/margins": 10.68549919128418, + "rewards/rejected": -13.939861297607422, + "step": 9696 + }, + { + "epoch": 1.51, + "learning_rate": 7.035161574770476e-06, + "logits/chosen": -1.4921948909759521, + "logits/rejected": -2.886770009994507, + "logps/chosen": -119.22572326660156, + "logps/rejected": -273.14471435546875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.207950115203857, + "rewards/margins": 4.967101097106934, + "rewards/rejected": -10.175050735473633, + "step": 9697 + }, + { + "epoch": 1.51, + "learning_rate": 7.034428134239328e-06, + "logits/chosen": -3.0023231506347656, + "logits/rejected": -2.7492239475250244, + "logps/chosen": -382.2895202636719, + "logps/rejected": -556.8650512695312, + "loss": 0.7346, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.187167167663574, + "rewards/margins": 2.339632034301758, + "rewards/rejected": -7.526799201965332, + "step": 9698 + }, + { + "epoch": 1.51, + "learning_rate": 7.03369469370818e-06, + "logits/chosen": -2.489298105239868, + "logits/rejected": -2.986643075942993, + "logps/chosen": -157.52891540527344, + "logps/rejected": -229.65713500976562, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.77247428894043, + "rewards/margins": 8.112927436828613, + "rewards/rejected": -12.885401725769043, + "step": 9699 + }, + { + "epoch": 1.51, + "learning_rate": 7.032961253177032e-06, + "logits/chosen": -2.879587173461914, + "logits/rejected": -3.0685977935791016, + "logps/chosen": -101.6732177734375, + "logps/rejected": -272.624755859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.82890510559082, + "rewards/margins": 7.926608562469482, + "rewards/rejected": -13.755514144897461, + "step": 9700 + }, + { + "epoch": 1.51, + "learning_rate": 7.0322278126458845e-06, + "logits/chosen": -1.6551645994186401, + "logits/rejected": -2.7047476768493652, + "logps/chosen": -164.47994995117188, + "logps/rejected": -434.5965576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.542379856109619, + "rewards/margins": 11.780019760131836, + "rewards/rejected": -16.322399139404297, + "step": 9701 + }, + { + "epoch": 1.51, + "learning_rate": 7.031494372114736e-06, + "logits/chosen": -3.0398616790771484, + "logits/rejected": -3.059709310531616, + "logps/chosen": -178.05421447753906, + "logps/rejected": -354.2929382324219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.51361083984375, + "rewards/margins": 8.25851821899414, + "rewards/rejected": -11.77212905883789, + "step": 9702 + }, + { + "epoch": 1.51, + "learning_rate": 7.030760931583588e-06, + "logits/chosen": -3.0125904083251953, + "logits/rejected": -3.0812978744506836, + "logps/chosen": -111.55450439453125, + "logps/rejected": -253.32281494140625, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.933269500732422, + "rewards/margins": 4.266852378845215, + "rewards/rejected": -10.200121879577637, + "step": 9703 + }, + { + "epoch": 1.51, + "learning_rate": 7.03002749105244e-06, + "logits/chosen": -1.4173409938812256, + "logits/rejected": -2.6748783588409424, + "logps/chosen": -92.08673858642578, + "logps/rejected": -317.10736083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.120511054992676, + "rewards/margins": 9.281898498535156, + "rewards/rejected": -13.402409553527832, + "step": 9704 + }, + { + "epoch": 1.51, + "learning_rate": 7.029294050521292e-06, + "logits/chosen": -2.8716530799865723, + "logits/rejected": -3.0076510906219482, + "logps/chosen": -266.60992431640625, + "logps/rejected": -241.09481811523438, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.271807909011841, + "rewards/margins": 5.710055351257324, + "rewards/rejected": -8.981863021850586, + "step": 9705 + }, + { + "epoch": 1.51, + "learning_rate": 7.028560609990145e-06, + "logits/chosen": -2.542318105697632, + "logits/rejected": -2.754607677459717, + "logps/chosen": -123.62505340576172, + "logps/rejected": -278.7833557128906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5852646827697754, + "rewards/margins": 9.894692420959473, + "rewards/rejected": -13.479957580566406, + "step": 9706 + }, + { + "epoch": 1.51, + "learning_rate": 7.027827169458997e-06, + "logits/chosen": -2.684523582458496, + "logits/rejected": -3.1016602516174316, + "logps/chosen": -110.95541381835938, + "logps/rejected": -329.7724304199219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.033766269683838, + "rewards/margins": 8.930152893066406, + "rewards/rejected": -14.963919639587402, + "step": 9707 + }, + { + "epoch": 1.51, + "learning_rate": 7.0270937289278485e-06, + "logits/chosen": -1.9435821771621704, + "logits/rejected": -2.4297118186950684, + "logps/chosen": -88.49734497070312, + "logps/rejected": -201.04823303222656, + "loss": 0.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8305768966674805, + "rewards/margins": 4.4741716384887695, + "rewards/rejected": -11.30474853515625, + "step": 9708 + }, + { + "epoch": 1.51, + "learning_rate": 7.026360288396701e-06, + "logits/chosen": -2.8030896186828613, + "logits/rejected": -1.886635184288025, + "logps/chosen": -260.27239990234375, + "logps/rejected": -290.9648132324219, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6711835861206055, + "rewards/margins": 4.086342811584473, + "rewards/rejected": -9.757526397705078, + "step": 9709 + }, + { + "epoch": 1.51, + "learning_rate": 7.025626847865554e-06, + "logits/chosen": -2.9865615367889404, + "logits/rejected": -2.5049006938934326, + "logps/chosen": -901.8675537109375, + "logps/rejected": -329.2705383300781, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034726142883301, + "rewards/margins": 7.104937553405762, + "rewards/rejected": -13.139663696289062, + "step": 9710 + }, + { + "epoch": 1.51, + "learning_rate": 7.024893407334406e-06, + "logits/chosen": -2.76082181930542, + "logits/rejected": -3.0755412578582764, + "logps/chosen": -51.4614143371582, + "logps/rejected": -217.0821075439453, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.687866687774658, + "rewards/margins": 6.179316520690918, + "rewards/rejected": -9.867183685302734, + "step": 9711 + }, + { + "epoch": 1.51, + "learning_rate": 7.024159966803258e-06, + "logits/chosen": -2.203996181488037, + "logits/rejected": -2.821216106414795, + "logps/chosen": -263.34417724609375, + "logps/rejected": -324.91094970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2159576416015625, + "rewards/margins": 10.24078369140625, + "rewards/rejected": -14.456741333007812, + "step": 9712 + }, + { + "epoch": 1.51, + "learning_rate": 7.02342652627211e-06, + "logits/chosen": -2.9510891437530518, + "logits/rejected": -2.6844098567962646, + "logps/chosen": -714.9794311523438, + "logps/rejected": -535.6117553710938, + "loss": 0.1471, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.639636039733887, + "rewards/margins": 2.645085334777832, + "rewards/rejected": -8.284721374511719, + "step": 9713 + }, + { + "epoch": 1.51, + "learning_rate": 7.0226930857409615e-06, + "logits/chosen": -2.6369686126708984, + "logits/rejected": -2.9867959022521973, + "logps/chosen": -71.54850769042969, + "logps/rejected": -219.94119262695312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.177656650543213, + "rewards/margins": 9.122721672058105, + "rewards/rejected": -12.300378799438477, + "step": 9714 + }, + { + "epoch": 1.51, + "learning_rate": 7.021959645209814e-06, + "logits/chosen": -1.5936543941497803, + "logits/rejected": -3.0559287071228027, + "logps/chosen": -88.1282958984375, + "logps/rejected": -422.4302978515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9785330295562744, + "rewards/margins": 8.511488914489746, + "rewards/rejected": -12.490021705627441, + "step": 9715 + }, + { + "epoch": 1.51, + "learning_rate": 7.021226204678666e-06, + "logits/chosen": -2.9667139053344727, + "logits/rejected": -2.737682819366455, + "logps/chosen": -828.4871215820312, + "logps/rejected": -682.31396484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.632063865661621, + "rewards/margins": 9.26240062713623, + "rewards/rejected": -14.894464492797852, + "step": 9716 + }, + { + "epoch": 1.51, + "learning_rate": 7.020492764147518e-06, + "logits/chosen": -2.1559715270996094, + "logits/rejected": -2.7992897033691406, + "logps/chosen": -252.97679138183594, + "logps/rejected": -502.31793212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.30447244644165, + "rewards/margins": 12.725852966308594, + "rewards/rejected": -17.03032684326172, + "step": 9717 + }, + { + "epoch": 1.51, + "learning_rate": 7.01975932361637e-06, + "logits/chosen": -2.94559645652771, + "logits/rejected": -1.1569452285766602, + "logps/chosen": -535.2548828125, + "logps/rejected": -201.3939666748047, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.797196388244629, + "rewards/margins": 4.8610358238220215, + "rewards/rejected": -10.658231735229492, + "step": 9718 + }, + { + "epoch": 1.51, + "learning_rate": 7.0190258830852226e-06, + "logits/chosen": -3.1261417865753174, + "logits/rejected": -2.798814535140991, + "logps/chosen": -142.10104370117188, + "logps/rejected": -167.24578857421875, + "loss": 1.5763, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.344825744628906, + "rewards/margins": 1.825423240661621, + "rewards/rejected": -7.170248985290527, + "step": 9719 + }, + { + "epoch": 1.51, + "learning_rate": 7.0182924425540744e-06, + "logits/chosen": -2.5568110942840576, + "logits/rejected": -3.020285129547119, + "logps/chosen": -112.39906311035156, + "logps/rejected": -301.52886962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5299673080444336, + "rewards/margins": 9.169149398803711, + "rewards/rejected": -12.699115753173828, + "step": 9720 + }, + { + "epoch": 1.51, + "learning_rate": 7.017559002022926e-06, + "logits/chosen": -2.5728142261505127, + "logits/rejected": -2.97800350189209, + "logps/chosen": -274.20745849609375, + "logps/rejected": -696.375, + "loss": 0.936, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.928953170776367, + "rewards/margins": 3.0518999099731445, + "rewards/rejected": -11.980854034423828, + "step": 9721 + }, + { + "epoch": 1.51, + "learning_rate": 7.016825561491778e-06, + "logits/chosen": -1.4972538948059082, + "logits/rejected": -2.6556384563446045, + "logps/chosen": -279.1551513671875, + "logps/rejected": -699.222900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3889412879943848, + "rewards/margins": 12.245174407958984, + "rewards/rejected": -15.634116172790527, + "step": 9722 + }, + { + "epoch": 1.51, + "learning_rate": 7.016092120960631e-06, + "logits/chosen": -2.9267008304595947, + "logits/rejected": -2.922581672668457, + "logps/chosen": -344.46856689453125, + "logps/rejected": -430.741455078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.717466831207275, + "rewards/margins": 8.316080093383789, + "rewards/rejected": -16.033546447753906, + "step": 9723 + }, + { + "epoch": 1.51, + "learning_rate": 7.015358680429483e-06, + "logits/chosen": -2.689176321029663, + "logits/rejected": -2.7498557567596436, + "logps/chosen": -194.91419982910156, + "logps/rejected": -215.90463256835938, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2763848304748535, + "rewards/margins": 4.008415699005127, + "rewards/rejected": -10.28480052947998, + "step": 9724 + }, + { + "epoch": 1.51, + "learning_rate": 7.014625239898335e-06, + "logits/chosen": -1.30416738986969, + "logits/rejected": -2.6806867122650146, + "logps/chosen": -166.6983642578125, + "logps/rejected": -449.04345703125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.809599876403809, + "rewards/margins": 8.948113441467285, + "rewards/rejected": -15.757713317871094, + "step": 9725 + }, + { + "epoch": 1.51, + "learning_rate": 7.013891799367187e-06, + "logits/chosen": -2.940260648727417, + "logits/rejected": -2.9640440940856934, + "logps/chosen": -179.9687042236328, + "logps/rejected": -125.5020523071289, + "loss": 3.0276, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.625275611877441, + "rewards/margins": 0.5506582260131836, + "rewards/rejected": -9.175933837890625, + "step": 9726 + }, + { + "epoch": 1.51, + "learning_rate": 7.013158358836039e-06, + "logits/chosen": -2.2383151054382324, + "logits/rejected": -2.852919816970825, + "logps/chosen": -266.8941345214844, + "logps/rejected": -256.36297607421875, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.881154537200928, + "rewards/margins": 4.306730270385742, + "rewards/rejected": -12.187885284423828, + "step": 9727 + }, + { + "epoch": 1.51, + "learning_rate": 7.012424918304892e-06, + "logits/chosen": -1.8938382863998413, + "logits/rejected": -2.4740118980407715, + "logps/chosen": -293.5186767578125, + "logps/rejected": -555.980712890625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.332817077636719, + "rewards/margins": 6.273734092712402, + "rewards/rejected": -14.606551170349121, + "step": 9728 + }, + { + "epoch": 1.51, + "learning_rate": 7.011691477773744e-06, + "logits/chosen": -3.011380195617676, + "logits/rejected": -1.3112519979476929, + "logps/chosen": -544.4526977539062, + "logps/rejected": -343.87628173828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6345794200897217, + "rewards/margins": 7.452795028686523, + "rewards/rejected": -10.087374687194824, + "step": 9729 + }, + { + "epoch": 1.51, + "learning_rate": 7.010958037242596e-06, + "logits/chosen": -2.827623128890991, + "logits/rejected": -1.6083128452301025, + "logps/chosen": -270.44842529296875, + "logps/rejected": -353.97528076171875, + "loss": 1.3834, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.5436601638793945, + "rewards/margins": 2.2069973945617676, + "rewards/rejected": -9.75065803527832, + "step": 9730 + }, + { + "epoch": 1.51, + "learning_rate": 7.010224596711448e-06, + "logits/chosen": -2.285573959350586, + "logits/rejected": -2.6003570556640625, + "logps/chosen": -264.6661682128906, + "logps/rejected": -533.8471069335938, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.189609527587891, + "rewards/margins": 7.953024864196777, + "rewards/rejected": -12.142635345458984, + "step": 9731 + }, + { + "epoch": 1.51, + "learning_rate": 7.0094911561803e-06, + "logits/chosen": -3.029975414276123, + "logits/rejected": -2.6860334873199463, + "logps/chosen": -215.03762817382812, + "logps/rejected": -202.9466552734375, + "loss": 0.5347, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.197200775146484, + "rewards/margins": 3.243654727935791, + "rewards/rejected": -9.440855026245117, + "step": 9732 + }, + { + "epoch": 1.51, + "learning_rate": 7.008757715649152e-06, + "logits/chosen": -3.0044493675231934, + "logits/rejected": -3.118107318878174, + "logps/chosen": -88.83338165283203, + "logps/rejected": -204.73788452148438, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.486783981323242, + "rewards/margins": 5.1954545974731445, + "rewards/rejected": -9.682238578796387, + "step": 9733 + }, + { + "epoch": 1.51, + "learning_rate": 7.008024275118004e-06, + "logits/chosen": -2.064629316329956, + "logits/rejected": -2.790884494781494, + "logps/chosen": -381.4482116699219, + "logps/rejected": -484.57086181640625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.315021991729736, + "rewards/margins": 6.744096755981445, + "rewards/rejected": -11.059118270874023, + "step": 9734 + }, + { + "epoch": 1.51, + "learning_rate": 7.007290834586856e-06, + "logits/chosen": -2.518123149871826, + "logits/rejected": -2.993703842163086, + "logps/chosen": -129.74778747558594, + "logps/rejected": -378.2154541015625, + "loss": 0.6425, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.896038055419922, + "rewards/margins": 4.701501369476318, + "rewards/rejected": -10.597539901733398, + "step": 9735 + }, + { + "epoch": 1.51, + "learning_rate": 7.006557394055708e-06, + "logits/chosen": -3.045443534851074, + "logits/rejected": -3.1047720909118652, + "logps/chosen": -38.541385650634766, + "logps/rejected": -150.64915466308594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9352869987487793, + "rewards/margins": 7.5667643547058105, + "rewards/rejected": -10.50205135345459, + "step": 9736 + }, + { + "epoch": 1.51, + "learning_rate": 7.005823953524561e-06, + "logits/chosen": -1.8154867887496948, + "logits/rejected": -2.7150609493255615, + "logps/chosen": -236.74435424804688, + "logps/rejected": -342.0611877441406, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.740894317626953, + "rewards/margins": 6.491057395935059, + "rewards/rejected": -10.231952667236328, + "step": 9737 + }, + { + "epoch": 1.51, + "learning_rate": 7.0050905129934125e-06, + "logits/chosen": -2.698028087615967, + "logits/rejected": -3.064561367034912, + "logps/chosen": -378.96575927734375, + "logps/rejected": -790.214111328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.492410182952881, + "rewards/margins": 9.217584609985352, + "rewards/rejected": -13.70999526977539, + "step": 9738 + }, + { + "epoch": 1.51, + "learning_rate": 7.004357072462264e-06, + "logits/chosen": -2.807955741882324, + "logits/rejected": -1.977869987487793, + "logps/chosen": -293.72467041015625, + "logps/rejected": -256.9178466796875, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.642339706420898, + "rewards/margins": 4.896751403808594, + "rewards/rejected": -14.539091110229492, + "step": 9739 + }, + { + "epoch": 1.51, + "learning_rate": 7.003623631931116e-06, + "logits/chosen": -1.9434105157852173, + "logits/rejected": -2.504620313644409, + "logps/chosen": -233.05967712402344, + "logps/rejected": -425.6085205078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.430603504180908, + "rewards/margins": 8.109444618225098, + "rewards/rejected": -14.540048599243164, + "step": 9740 + }, + { + "epoch": 1.51, + "learning_rate": 7.002890191399969e-06, + "logits/chosen": -2.973191022872925, + "logits/rejected": -2.7607383728027344, + "logps/chosen": -199.0955352783203, + "logps/rejected": -311.87261962890625, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.048487186431885, + "rewards/margins": 3.306506633758545, + "rewards/rejected": -10.35499382019043, + "step": 9741 + }, + { + "epoch": 1.52, + "learning_rate": 7.002156750868821e-06, + "logits/chosen": -3.059950113296509, + "logits/rejected": -3.0358903408050537, + "logps/chosen": -108.66661071777344, + "logps/rejected": -270.4933166503906, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.361538887023926, + "rewards/margins": 6.469724655151367, + "rewards/rejected": -11.831262588500977, + "step": 9742 + }, + { + "epoch": 1.52, + "learning_rate": 7.0014233103376736e-06, + "logits/chosen": -1.7916603088378906, + "logits/rejected": -2.549974203109741, + "logps/chosen": -251.86412048339844, + "logps/rejected": -561.3345336914062, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.703388214111328, + "rewards/margins": 6.2795939445495605, + "rewards/rejected": -13.982982635498047, + "step": 9743 + }, + { + "epoch": 1.52, + "learning_rate": 7.0006898698065254e-06, + "logits/chosen": -0.9806142449378967, + "logits/rejected": -2.4337353706359863, + "logps/chosen": -154.1243896484375, + "logps/rejected": -538.6326904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0220441818237305, + "rewards/margins": 10.626973152160645, + "rewards/rejected": -15.649017333984375, + "step": 9744 + }, + { + "epoch": 1.52, + "learning_rate": 6.999956429275377e-06, + "logits/chosen": -2.897587537765503, + "logits/rejected": -1.661027431488037, + "logps/chosen": -259.4638366699219, + "logps/rejected": -136.91073608398438, + "loss": 1.3211, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.320008277893066, + "rewards/margins": 1.6140589714050293, + "rewards/rejected": -8.934066772460938, + "step": 9745 + }, + { + "epoch": 1.52, + "learning_rate": 6.99922298874423e-06, + "logits/chosen": -0.6903961896896362, + "logits/rejected": -2.817532539367676, + "logps/chosen": -97.04734802246094, + "logps/rejected": -408.84161376953125, + "loss": 0.7772, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.637224197387695, + "rewards/margins": 3.3032498359680176, + "rewards/rejected": -10.940473556518555, + "step": 9746 + }, + { + "epoch": 1.52, + "learning_rate": 6.998489548213082e-06, + "logits/chosen": -2.551947593688965, + "logits/rejected": -2.716001510620117, + "logps/chosen": -301.1877746582031, + "logps/rejected": -347.2514953613281, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3830766677856445, + "rewards/margins": 7.448660373687744, + "rewards/rejected": -11.831737518310547, + "step": 9747 + }, + { + "epoch": 1.52, + "learning_rate": 6.997756107681934e-06, + "logits/chosen": -2.7812812328338623, + "logits/rejected": -3.129934310913086, + "logps/chosen": -87.4878921508789, + "logps/rejected": -270.26556396484375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.867112636566162, + "rewards/margins": 6.702138900756836, + "rewards/rejected": -10.569252014160156, + "step": 9748 + }, + { + "epoch": 1.52, + "learning_rate": 6.997022667150786e-06, + "logits/chosen": -2.0699880123138428, + "logits/rejected": -2.597929000854492, + "logps/chosen": -161.15963745117188, + "logps/rejected": -368.1953430175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.659150123596191, + "rewards/margins": 11.835248947143555, + "rewards/rejected": -17.494400024414062, + "step": 9749 + }, + { + "epoch": 1.52, + "learning_rate": 6.996289226619638e-06, + "logits/chosen": -2.1174516677856445, + "logits/rejected": -2.6778950691223145, + "logps/chosen": -131.0727081298828, + "logps/rejected": -231.20953369140625, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.714357852935791, + "rewards/margins": 3.459312915802002, + "rewards/rejected": -10.173670768737793, + "step": 9750 + }, + { + "epoch": 1.52, + "learning_rate": 6.99555578608849e-06, + "logits/chosen": -1.0152817964553833, + "logits/rejected": -1.7251843214035034, + "logps/chosen": -159.03695678710938, + "logps/rejected": -497.6222229003906, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.128843784332275, + "rewards/margins": 8.424651145935059, + "rewards/rejected": -12.553494453430176, + "step": 9751 + }, + { + "epoch": 1.52, + "learning_rate": 6.994822345557342e-06, + "logits/chosen": -2.4287190437316895, + "logits/rejected": -2.7848029136657715, + "logps/chosen": -205.33541870117188, + "logps/rejected": -383.0188903808594, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.469697952270508, + "rewards/margins": 9.866579055786133, + "rewards/rejected": -16.33627700805664, + "step": 9752 + }, + { + "epoch": 1.52, + "learning_rate": 6.994088905026194e-06, + "logits/chosen": -2.003833055496216, + "logits/rejected": -2.9598381519317627, + "logps/chosen": -324.1121826171875, + "logps/rejected": -455.81170654296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.669641494750977, + "rewards/margins": 6.703433990478516, + "rewards/rejected": -13.373075485229492, + "step": 9753 + }, + { + "epoch": 1.52, + "learning_rate": 6.993355464495046e-06, + "logits/chosen": -2.915639877319336, + "logits/rejected": -2.567638397216797, + "logps/chosen": -331.0126647949219, + "logps/rejected": -157.9584503173828, + "loss": 2.8315, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.125409126281738, + "rewards/margins": 0.40781545639038086, + "rewards/rejected": -9.533224105834961, + "step": 9754 + }, + { + "epoch": 1.52, + "learning_rate": 6.992622023963899e-06, + "logits/chosen": -2.670456886291504, + "logits/rejected": -1.9218668937683105, + "logps/chosen": -836.1483154296875, + "logps/rejected": -630.4259033203125, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.624975204467773, + "rewards/margins": 4.8081374168396, + "rewards/rejected": -13.433113098144531, + "step": 9755 + }, + { + "epoch": 1.52, + "learning_rate": 6.9918885834327505e-06, + "logits/chosen": -2.965662956237793, + "logits/rejected": -3.1550822257995605, + "logps/chosen": -334.40069580078125, + "logps/rejected": -379.0309143066406, + "loss": 1.9991, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.4957780838012695, + "rewards/margins": 1.5494531393051147, + "rewards/rejected": -9.045230865478516, + "step": 9756 + }, + { + "epoch": 1.52, + "learning_rate": 6.991155142901602e-06, + "logits/chosen": -2.3507814407348633, + "logits/rejected": -2.922642230987549, + "logps/chosen": -224.4207305908203, + "logps/rejected": -320.6278076171875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5537261962890625, + "rewards/margins": 5.371942520141602, + "rewards/rejected": -10.925668716430664, + "step": 9757 + }, + { + "epoch": 1.52, + "learning_rate": 6.990421702370454e-06, + "logits/chosen": -2.7482686042785645, + "logits/rejected": -2.8873841762542725, + "logps/chosen": -86.99108123779297, + "logps/rejected": -319.4026184082031, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.500389575958252, + "rewards/margins": 9.067035675048828, + "rewards/rejected": -13.567424774169922, + "step": 9758 + }, + { + "epoch": 1.52, + "learning_rate": 6.989688261839307e-06, + "logits/chosen": -2.2043468952178955, + "logits/rejected": -2.6498401165008545, + "logps/chosen": -62.059669494628906, + "logps/rejected": -325.5589904785156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1361818313598633, + "rewards/margins": 9.879843711853027, + "rewards/rejected": -13.01602554321289, + "step": 9759 + }, + { + "epoch": 1.52, + "learning_rate": 6.98895482130816e-06, + "logits/chosen": -2.586500644683838, + "logits/rejected": -2.8682608604431152, + "logps/chosen": -778.359375, + "logps/rejected": -741.1692504882812, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5018982887268066, + "rewards/margins": 8.238555908203125, + "rewards/rejected": -11.74045467376709, + "step": 9760 + }, + { + "epoch": 1.52, + "learning_rate": 6.988221380777012e-06, + "logits/chosen": -1.8972910642623901, + "logits/rejected": -2.9047000408172607, + "logps/chosen": -380.402099609375, + "logps/rejected": -633.4122924804688, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.395022869110107, + "rewards/margins": 4.3762922286987305, + "rewards/rejected": -10.77131462097168, + "step": 9761 + }, + { + "epoch": 1.52, + "learning_rate": 6.9874879402458635e-06, + "logits/chosen": -1.184683918952942, + "logits/rejected": -2.4487156867980957, + "logps/chosen": -102.14312744140625, + "logps/rejected": -281.98638916015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.563114166259766, + "rewards/margins": 7.372537612915039, + "rewards/rejected": -14.935651779174805, + "step": 9762 + }, + { + "epoch": 1.52, + "learning_rate": 6.986754499714715e-06, + "logits/chosen": -2.5617122650146484, + "logits/rejected": -2.6000750064849854, + "logps/chosen": -107.3745346069336, + "logps/rejected": -146.0951690673828, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.76314115524292, + "rewards/margins": 5.02054500579834, + "rewards/rejected": -10.783685684204102, + "step": 9763 + }, + { + "epoch": 1.52, + "learning_rate": 6.986021059183568e-06, + "logits/chosen": -2.3134877681732178, + "logits/rejected": -2.8557097911834717, + "logps/chosen": -184.6810302734375, + "logps/rejected": -313.71435546875, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.583222389221191, + "rewards/margins": 5.916215896606445, + "rewards/rejected": -11.499439239501953, + "step": 9764 + }, + { + "epoch": 1.52, + "learning_rate": 6.98528761865242e-06, + "logits/chosen": -2.7327358722686768, + "logits/rejected": -2.1729750633239746, + "logps/chosen": -499.042236328125, + "logps/rejected": -448.6380920410156, + "loss": 0.0603, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6061601638793945, + "rewards/margins": 5.849937438964844, + "rewards/rejected": -11.456096649169922, + "step": 9765 + }, + { + "epoch": 1.52, + "learning_rate": 6.984554178121272e-06, + "logits/chosen": -2.999746084213257, + "logits/rejected": -2.7581539154052734, + "logps/chosen": -156.4141845703125, + "logps/rejected": -278.4749755859375, + "loss": 1.8263, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.238960266113281, + "rewards/margins": 2.336304187774658, + "rewards/rejected": -10.575263977050781, + "step": 9766 + }, + { + "epoch": 1.52, + "learning_rate": 6.983820737590124e-06, + "logits/chosen": -2.6775577068328857, + "logits/rejected": -2.562635898590088, + "logps/chosen": -277.1352233886719, + "logps/rejected": -410.74090576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8287193775177, + "rewards/margins": 10.651900291442871, + "rewards/rejected": -13.480619430541992, + "step": 9767 + }, + { + "epoch": 1.52, + "learning_rate": 6.9830872970589764e-06, + "logits/chosen": -2.445925712585449, + "logits/rejected": -3.119699478149414, + "logps/chosen": -136.0914764404297, + "logps/rejected": -545.5523681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.699051856994629, + "rewards/margins": 9.811383247375488, + "rewards/rejected": -16.510435104370117, + "step": 9768 + }, + { + "epoch": 1.52, + "learning_rate": 6.982353856527828e-06, + "logits/chosen": -2.7695696353912354, + "logits/rejected": -2.169869899749756, + "logps/chosen": -166.17379760742188, + "logps/rejected": -267.238037109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9915337562561035, + "rewards/margins": 7.536189556121826, + "rewards/rejected": -10.52772331237793, + "step": 9769 + }, + { + "epoch": 1.52, + "learning_rate": 6.98162041599668e-06, + "logits/chosen": -2.7106189727783203, + "logits/rejected": -2.794590473175049, + "logps/chosen": -473.5623779296875, + "logps/rejected": -412.17779541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.282288551330566, + "rewards/margins": 8.359454154968262, + "rewards/rejected": -13.641742706298828, + "step": 9770 + }, + { + "epoch": 1.52, + "learning_rate": 6.980886975465532e-06, + "logits/chosen": -1.3267818689346313, + "logits/rejected": -2.665821075439453, + "logps/chosen": -212.79324340820312, + "logps/rejected": -400.7354736328125, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.551550388336182, + "rewards/margins": 4.9286346435546875, + "rewards/rejected": -12.480184555053711, + "step": 9771 + }, + { + "epoch": 1.52, + "learning_rate": 6.980153534934385e-06, + "logits/chosen": -1.796277642250061, + "logits/rejected": -2.6214282512664795, + "logps/chosen": -104.24813842773438, + "logps/rejected": -196.1626739501953, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3228278160095215, + "rewards/margins": 4.029028415679932, + "rewards/rejected": -9.351856231689453, + "step": 9772 + }, + { + "epoch": 1.52, + "learning_rate": 6.979420094403237e-06, + "logits/chosen": -2.5034916400909424, + "logits/rejected": -2.8693485260009766, + "logps/chosen": -162.611328125, + "logps/rejected": -313.95391845703125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.120884895324707, + "rewards/margins": 7.759956359863281, + "rewards/rejected": -11.880842208862305, + "step": 9773 + }, + { + "epoch": 1.52, + "learning_rate": 6.9786866538720886e-06, + "logits/chosen": -1.937272548675537, + "logits/rejected": -2.765084981918335, + "logps/chosen": -136.8612060546875, + "logps/rejected": -365.3663330078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.507908582687378, + "rewards/margins": 8.757184982299805, + "rewards/rejected": -12.265094757080078, + "step": 9774 + }, + { + "epoch": 1.52, + "learning_rate": 6.9779532133409404e-06, + "logits/chosen": -1.0380662679672241, + "logits/rejected": -2.573864698410034, + "logps/chosen": -119.51121520996094, + "logps/rejected": -476.3990783691406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.411102294921875, + "rewards/margins": 10.260374069213867, + "rewards/rejected": -15.671476364135742, + "step": 9775 + }, + { + "epoch": 1.52, + "learning_rate": 6.977219772809793e-06, + "logits/chosen": -2.2948710918426514, + "logits/rejected": -2.7358851432800293, + "logps/chosen": -170.63543701171875, + "logps/rejected": -448.5074462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.91664981842041, + "rewards/margins": 13.888364791870117, + "rewards/rejected": -16.80501365661621, + "step": 9776 + }, + { + "epoch": 1.52, + "learning_rate": 6.976486332278646e-06, + "logits/chosen": -2.263463020324707, + "logits/rejected": -2.9111392498016357, + "logps/chosen": -78.29954528808594, + "logps/rejected": -326.460693359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.436255931854248, + "rewards/margins": 10.208911895751953, + "rewards/rejected": -15.64516830444336, + "step": 9777 + }, + { + "epoch": 1.52, + "learning_rate": 6.975752891747498e-06, + "logits/chosen": -2.6204614639282227, + "logits/rejected": -1.4390040636062622, + "logps/chosen": -172.7881317138672, + "logps/rejected": -133.21189880371094, + "loss": 1.6776, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.666234970092773, + "rewards/margins": 1.1777753829956055, + "rewards/rejected": -9.844010353088379, + "step": 9778 + }, + { + "epoch": 1.52, + "learning_rate": 6.97501945121635e-06, + "logits/chosen": -2.668172836303711, + "logits/rejected": -2.05830717086792, + "logps/chosen": -163.89120483398438, + "logps/rejected": -297.9252624511719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6091980934143066, + "rewards/margins": 7.856246471405029, + "rewards/rejected": -11.465444564819336, + "step": 9779 + }, + { + "epoch": 1.52, + "learning_rate": 6.9742860106852015e-06, + "logits/chosen": -2.613124132156372, + "logits/rejected": -2.7509310245513916, + "logps/chosen": -99.51716613769531, + "logps/rejected": -439.5079345703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9332072734832764, + "rewards/margins": 8.020352363586426, + "rewards/rejected": -10.953559875488281, + "step": 9780 + }, + { + "epoch": 1.52, + "learning_rate": 6.973552570154054e-06, + "logits/chosen": -2.4114718437194824, + "logits/rejected": -3.080138921737671, + "logps/chosen": -138.24427795410156, + "logps/rejected": -441.90972900390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.802480220794678, + "rewards/margins": 7.795280456542969, + "rewards/rejected": -13.597761154174805, + "step": 9781 + }, + { + "epoch": 1.52, + "learning_rate": 6.972819129622906e-06, + "logits/chosen": -3.1198344230651855, + "logits/rejected": -3.0970876216888428, + "logps/chosen": -235.76593017578125, + "logps/rejected": -317.574951171875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.957958936691284, + "rewards/margins": 8.399134635925293, + "rewards/rejected": -11.357093811035156, + "step": 9782 + }, + { + "epoch": 1.52, + "learning_rate": 6.972085689091758e-06, + "logits/chosen": -2.657639503479004, + "logits/rejected": -2.5153698921203613, + "logps/chosen": -428.00341796875, + "logps/rejected": -308.55792236328125, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2775678634643555, + "rewards/margins": 6.60690975189209, + "rewards/rejected": -11.884477615356445, + "step": 9783 + }, + { + "epoch": 1.52, + "learning_rate": 6.97135224856061e-06, + "logits/chosen": -2.982130289077759, + "logits/rejected": -2.749920129776001, + "logps/chosen": -169.59996032714844, + "logps/rejected": -228.1300048828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7412967681884766, + "rewards/margins": 6.853180408477783, + "rewards/rejected": -10.594476699829102, + "step": 9784 + }, + { + "epoch": 1.52, + "learning_rate": 6.970618808029462e-06, + "logits/chosen": -2.0801045894622803, + "logits/rejected": -3.0222294330596924, + "logps/chosen": -144.4141387939453, + "logps/rejected": -243.57708740234375, + "loss": 1.0791, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.414915561676025, + "rewards/margins": 2.4832184314727783, + "rewards/rejected": -7.898134231567383, + "step": 9785 + }, + { + "epoch": 1.52, + "learning_rate": 6.9698853674983145e-06, + "logits/chosen": -2.2863845825195312, + "logits/rejected": -2.3537302017211914, + "logps/chosen": -121.56188201904297, + "logps/rejected": -483.3182678222656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025158405303955, + "rewards/margins": 9.377786636352539, + "rewards/rejected": -15.402944564819336, + "step": 9786 + }, + { + "epoch": 1.52, + "learning_rate": 6.969151926967166e-06, + "logits/chosen": -1.53654146194458, + "logits/rejected": -2.9472646713256836, + "logps/chosen": -113.06067657470703, + "logps/rejected": -351.8143310546875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.999176979064941, + "rewards/margins": 5.727596282958984, + "rewards/rejected": -12.726773262023926, + "step": 9787 + }, + { + "epoch": 1.52, + "learning_rate": 6.968418486436018e-06, + "logits/chosen": -1.329581379890442, + "logits/rejected": -1.7960855960845947, + "logps/chosen": -272.7842102050781, + "logps/rejected": -428.8420715332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7292227745056152, + "rewards/margins": 10.695611953735352, + "rewards/rejected": -14.424835205078125, + "step": 9788 + }, + { + "epoch": 1.52, + "learning_rate": 6.96768504590487e-06, + "logits/chosen": -2.9624533653259277, + "logits/rejected": -0.8670582175254822, + "logps/chosen": -794.6812133789062, + "logps/rejected": -289.43328857421875, + "loss": 2.542, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.66919231414795, + "rewards/margins": 1.1427299976348877, + "rewards/rejected": -10.811922073364258, + "step": 9789 + }, + { + "epoch": 1.52, + "learning_rate": 6.966951605373723e-06, + "logits/chosen": -2.7515041828155518, + "logits/rejected": -2.1906189918518066, + "logps/chosen": -411.1717529296875, + "logps/rejected": -243.3046875, + "loss": 2.1374, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.532015800476074, + "rewards/margins": -0.10210418701171875, + "rewards/rejected": -7.4299116134643555, + "step": 9790 + }, + { + "epoch": 1.52, + "learning_rate": 6.966218164842575e-06, + "logits/chosen": -2.9308249950408936, + "logits/rejected": -2.9819021224975586, + "logps/chosen": -1105.3941650390625, + "logps/rejected": -676.57861328125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4964141845703125, + "rewards/margins": 1.303785800933838, + "rewards/rejected": -8.800199508666992, + "step": 9791 + }, + { + "epoch": 1.52, + "learning_rate": 6.965484724311427e-06, + "logits/chosen": -1.9460415840148926, + "logits/rejected": -2.939786434173584, + "logps/chosen": -215.95555114746094, + "logps/rejected": -389.6513977050781, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.097025156021118, + "rewards/margins": 10.225594520568848, + "rewards/rejected": -13.322619438171387, + "step": 9792 + }, + { + "epoch": 1.52, + "learning_rate": 6.964751283780279e-06, + "logits/chosen": -2.8212265968322754, + "logits/rejected": -2.7868151664733887, + "logps/chosen": -250.70030212402344, + "logps/rejected": -361.51385498046875, + "loss": 0.1834, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.29090690612793, + "rewards/margins": 7.748050689697266, + "rewards/rejected": -15.038957595825195, + "step": 9793 + }, + { + "epoch": 1.52, + "learning_rate": 6.964017843249131e-06, + "logits/chosen": -1.1057161092758179, + "logits/rejected": -2.6342732906341553, + "logps/chosen": -123.33914184570312, + "logps/rejected": -566.0562744140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.530411720275879, + "rewards/margins": 9.843062400817871, + "rewards/rejected": -14.37347412109375, + "step": 9794 + }, + { + "epoch": 1.52, + "learning_rate": 6.963284402717984e-06, + "logits/chosen": -2.534306287765503, + "logits/rejected": -2.898939371109009, + "logps/chosen": -408.7726745605469, + "logps/rejected": -411.36480712890625, + "loss": 0.2596, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.885684013366699, + "rewards/margins": 2.7663958072662354, + "rewards/rejected": -8.652080535888672, + "step": 9795 + }, + { + "epoch": 1.52, + "learning_rate": 6.962550962186836e-06, + "logits/chosen": -2.8234381675720215, + "logits/rejected": -1.4787447452545166, + "logps/chosen": -219.00796508789062, + "logps/rejected": -373.0889892578125, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4636240005493164, + "rewards/margins": 8.28702163696289, + "rewards/rejected": -11.750645637512207, + "step": 9796 + }, + { + "epoch": 1.52, + "learning_rate": 6.961817521655688e-06, + "logits/chosen": -2.7150654792785645, + "logits/rejected": -2.9200501441955566, + "logps/chosen": -148.93032836914062, + "logps/rejected": -434.0899963378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906822204589844, + "rewards/margins": 9.118027687072754, + "rewards/rejected": -15.024849891662598, + "step": 9797 + }, + { + "epoch": 1.52, + "learning_rate": 6.9610840811245396e-06, + "logits/chosen": -2.7111098766326904, + "logits/rejected": -2.165074348449707, + "logps/chosen": -242.1826934814453, + "logps/rejected": -332.43963623046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.116744518280029, + "rewards/margins": 8.009953498840332, + "rewards/rejected": -12.126697540283203, + "step": 9798 + }, + { + "epoch": 1.52, + "learning_rate": 6.960350640593392e-06, + "logits/chosen": -1.1543720960617065, + "logits/rejected": -2.306483268737793, + "logps/chosen": -146.77877807617188, + "logps/rejected": -479.7724609375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.535428047180176, + "rewards/margins": 7.605442047119141, + "rewards/rejected": -12.140869140625, + "step": 9799 + }, + { + "epoch": 1.52, + "learning_rate": 6.959617200062244e-06, + "logits/chosen": -2.636484146118164, + "logits/rejected": -3.152346134185791, + "logps/chosen": -90.28311157226562, + "logps/rejected": -275.446044921875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3855388164520264, + "rewards/margins": 5.073492050170898, + "rewards/rejected": -8.459031105041504, + "step": 9800 + }, + { + "epoch": 1.52, + "learning_rate": 6.958883759531096e-06, + "logits/chosen": -2.4079806804656982, + "logits/rejected": -2.7146778106689453, + "logps/chosen": -409.15924072265625, + "logps/rejected": -609.5240478515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6550843715667725, + "rewards/margins": 9.955377578735352, + "rewards/rejected": -13.610462188720703, + "step": 9801 + }, + { + "epoch": 1.52, + "learning_rate": 6.958150318999948e-06, + "logits/chosen": -1.9408767223358154, + "logits/rejected": -2.45434832572937, + "logps/chosen": -197.30361938476562, + "logps/rejected": -271.03485107421875, + "loss": 0.1596, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4403581619262695, + "rewards/margins": 3.895657539367676, + "rewards/rejected": -8.336015701293945, + "step": 9802 + }, + { + "epoch": 1.52, + "learning_rate": 6.9574168784688e-06, + "logits/chosen": -2.663905143737793, + "logits/rejected": -2.976501703262329, + "logps/chosen": -232.685546875, + "logps/rejected": -183.66180419921875, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.657776355743408, + "rewards/margins": 4.683815956115723, + "rewards/rejected": -9.341591835021973, + "step": 9803 + }, + { + "epoch": 1.52, + "learning_rate": 6.9566834379376525e-06, + "logits/chosen": -1.825194239616394, + "logits/rejected": -2.690746307373047, + "logps/chosen": -155.5818328857422, + "logps/rejected": -247.71009826660156, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.391916275024414, + "rewards/margins": 7.7211527824401855, + "rewards/rejected": -12.113069534301758, + "step": 9804 + }, + { + "epoch": 1.52, + "learning_rate": 6.955949997406504e-06, + "logits/chosen": -2.1121718883514404, + "logits/rejected": -2.8307700157165527, + "logps/chosen": -633.0299682617188, + "logps/rejected": -625.6427001953125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.84588623046875, + "rewards/margins": 5.5498738288879395, + "rewards/rejected": -10.395759582519531, + "step": 9805 + }, + { + "epoch": 1.53, + "learning_rate": 6.955216556875356e-06, + "logits/chosen": -1.7349621057510376, + "logits/rejected": -2.9436028003692627, + "logps/chosen": -170.89703369140625, + "logps/rejected": -414.68963623046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.265642166137695, + "rewards/margins": 8.51504898071289, + "rewards/rejected": -12.780691146850586, + "step": 9806 + }, + { + "epoch": 1.53, + "learning_rate": 6.954483116344208e-06, + "logits/chosen": -1.7266403436660767, + "logits/rejected": -2.879835605621338, + "logps/chosen": -147.39271545410156, + "logps/rejected": -450.02642822265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.224024772644043, + "rewards/margins": 8.512789726257324, + "rewards/rejected": -14.736814498901367, + "step": 9807 + }, + { + "epoch": 1.53, + "learning_rate": 6.953749675813061e-06, + "logits/chosen": -3.015472888946533, + "logits/rejected": -3.0780086517333984, + "logps/chosen": -300.26824951171875, + "logps/rejected": -158.24520874023438, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2876787185668945, + "rewards/margins": 5.529130935668945, + "rewards/rejected": -8.81680965423584, + "step": 9808 + }, + { + "epoch": 1.53, + "learning_rate": 6.953016235281913e-06, + "logits/chosen": -2.5766093730926514, + "logits/rejected": -2.6045756340026855, + "logps/chosen": -244.80136108398438, + "logps/rejected": -293.548095703125, + "loss": 0.4182, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.830137729644775, + "rewards/margins": 6.346558570861816, + "rewards/rejected": -12.17669677734375, + "step": 9809 + }, + { + "epoch": 1.53, + "learning_rate": 6.9522827947507655e-06, + "logits/chosen": -2.15450119972229, + "logits/rejected": -2.821362257003784, + "logps/chosen": -320.63909912109375, + "logps/rejected": -417.6602783203125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.098355293273926, + "rewards/margins": 6.674689292907715, + "rewards/rejected": -10.77304458618164, + "step": 9810 + }, + { + "epoch": 1.53, + "learning_rate": 6.951549354219617e-06, + "logits/chosen": -2.645674467086792, + "logits/rejected": -1.1226774454116821, + "logps/chosen": -288.0233459472656, + "logps/rejected": -152.111328125, + "loss": 1.1814, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.061551094055176, + "rewards/margins": 1.7746977806091309, + "rewards/rejected": -10.836248397827148, + "step": 9811 + }, + { + "epoch": 1.53, + "learning_rate": 6.950815913688469e-06, + "logits/chosen": -2.7402639389038086, + "logits/rejected": -2.902599573135376, + "logps/chosen": -157.3582763671875, + "logps/rejected": -306.10565185546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.012470722198486, + "rewards/margins": 7.4252519607543945, + "rewards/rejected": -11.437723159790039, + "step": 9812 + }, + { + "epoch": 1.53, + "learning_rate": 6.950082473157322e-06, + "logits/chosen": -2.604728937149048, + "logits/rejected": -3.0385382175445557, + "logps/chosen": -386.79022216796875, + "logps/rejected": -480.69866943359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.180706024169922, + "rewards/margins": 6.310855865478516, + "rewards/rejected": -13.491561889648438, + "step": 9813 + }, + { + "epoch": 1.53, + "learning_rate": 6.949349032626174e-06, + "logits/chosen": -2.339926242828369, + "logits/rejected": -2.266519784927368, + "logps/chosen": -247.365234375, + "logps/rejected": -244.8349609375, + "loss": 1.0966, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.202410697937012, + "rewards/margins": 1.998915433883667, + "rewards/rejected": -10.201326370239258, + "step": 9814 + }, + { + "epoch": 1.53, + "learning_rate": 6.948615592095026e-06, + "logits/chosen": -2.9025895595550537, + "logits/rejected": -2.443169593811035, + "logps/chosen": -190.82591247558594, + "logps/rejected": -278.7034912109375, + "loss": 1.5477, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.2111711502075195, + "rewards/margins": 1.906221628189087, + "rewards/rejected": -9.117392539978027, + "step": 9815 + }, + { + "epoch": 1.53, + "learning_rate": 6.947882151563878e-06, + "logits/chosen": -1.5237246751785278, + "logits/rejected": -3.025785446166992, + "logps/chosen": -162.04727172851562, + "logps/rejected": -656.4010009765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.323556900024414, + "rewards/margins": 7.405311584472656, + "rewards/rejected": -13.72886848449707, + "step": 9816 + }, + { + "epoch": 1.53, + "learning_rate": 6.94714871103273e-06, + "logits/chosen": -2.244541645050049, + "logits/rejected": -2.560917854309082, + "logps/chosen": -238.34442138671875, + "logps/rejected": -388.0806579589844, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.636924743652344, + "rewards/margins": 5.378772735595703, + "rewards/rejected": -10.015697479248047, + "step": 9817 + }, + { + "epoch": 1.53, + "learning_rate": 6.946415270501582e-06, + "logits/chosen": -1.0511870384216309, + "logits/rejected": -2.87136173248291, + "logps/chosen": -137.61790466308594, + "logps/rejected": -461.34698486328125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.856508255004883, + "rewards/margins": 8.16667652130127, + "rewards/rejected": -16.02318572998047, + "step": 9818 + }, + { + "epoch": 1.53, + "learning_rate": 6.945681829970434e-06, + "logits/chosen": -2.363271951675415, + "logits/rejected": -2.995920181274414, + "logps/chosen": -152.13491821289062, + "logps/rejected": -554.77197265625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9710187911987305, + "rewards/margins": 7.06605863571167, + "rewards/rejected": -12.037076950073242, + "step": 9819 + }, + { + "epoch": 1.53, + "learning_rate": 6.944948389439286e-06, + "logits/chosen": -2.7844972610473633, + "logits/rejected": -2.1871554851531982, + "logps/chosen": -314.95068359375, + "logps/rejected": -321.971435546875, + "loss": 0.7402, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.654684066772461, + "rewards/margins": 1.6099395751953125, + "rewards/rejected": -7.264623641967773, + "step": 9820 + }, + { + "epoch": 1.53, + "learning_rate": 6.944214948908139e-06, + "logits/chosen": -2.5464608669281006, + "logits/rejected": -2.8423006534576416, + "logps/chosen": -101.4463882446289, + "logps/rejected": -235.69552612304688, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5434136390686035, + "rewards/margins": 5.491807460784912, + "rewards/rejected": -10.035221099853516, + "step": 9821 + }, + { + "epoch": 1.53, + "learning_rate": 6.943481508376991e-06, + "logits/chosen": -2.859200954437256, + "logits/rejected": -2.7223644256591797, + "logps/chosen": -407.48931884765625, + "logps/rejected": -452.2901306152344, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.709621429443359, + "rewards/margins": 4.808134078979492, + "rewards/rejected": -10.517755508422852, + "step": 9822 + }, + { + "epoch": 1.53, + "learning_rate": 6.9427480678458425e-06, + "logits/chosen": -2.84273362159729, + "logits/rejected": -1.8416167497634888, + "logps/chosen": -593.87451171875, + "logps/rejected": -434.1365966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.077846050262451, + "rewards/margins": 9.052009582519531, + "rewards/rejected": -13.12985610961914, + "step": 9823 + }, + { + "epoch": 1.53, + "learning_rate": 6.942014627314694e-06, + "logits/chosen": -2.7205824851989746, + "logits/rejected": -2.8481075763702393, + "logps/chosen": -356.87042236328125, + "logps/rejected": -443.7049255371094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.560976505279541, + "rewards/margins": 9.063620567321777, + "rewards/rejected": -12.624597549438477, + "step": 9824 + }, + { + "epoch": 1.53, + "learning_rate": 6.941281186783546e-06, + "logits/chosen": -1.4057329893112183, + "logits/rejected": -2.6202569007873535, + "logps/chosen": -159.5712432861328, + "logps/rejected": -377.2001037597656, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5564818382263184, + "rewards/margins": 8.4254150390625, + "rewards/rejected": -10.981897354125977, + "step": 9825 + }, + { + "epoch": 1.53, + "learning_rate": 6.940547746252399e-06, + "logits/chosen": -2.7744531631469727, + "logits/rejected": -2.2956385612487793, + "logps/chosen": -139.95594787597656, + "logps/rejected": -202.22793579101562, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.308025360107422, + "rewards/margins": 6.980556964874268, + "rewards/rejected": -10.288581848144531, + "step": 9826 + }, + { + "epoch": 1.53, + "learning_rate": 6.939814305721252e-06, + "logits/chosen": -2.983808755874634, + "logits/rejected": -2.742804527282715, + "logps/chosen": -675.2034301757812, + "logps/rejected": -642.505126953125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4332275390625, + "rewards/margins": 6.792755126953125, + "rewards/rejected": -11.225982666015625, + "step": 9827 + }, + { + "epoch": 1.53, + "learning_rate": 6.9390808651901035e-06, + "logits/chosen": -2.3530983924865723, + "logits/rejected": -2.8686673641204834, + "logps/chosen": -328.2132873535156, + "logps/rejected": -441.9272155761719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362937927246094, + "rewards/margins": 7.735015869140625, + "rewards/rejected": -12.097953796386719, + "step": 9828 + }, + { + "epoch": 1.53, + "learning_rate": 6.938347424658955e-06, + "logits/chosen": -2.550078868865967, + "logits/rejected": -3.063422203063965, + "logps/chosen": -393.50323486328125, + "logps/rejected": -506.8099670410156, + "loss": 2.0079, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.720216751098633, + "rewards/margins": 2.4610214233398438, + "rewards/rejected": -8.181238174438477, + "step": 9829 + }, + { + "epoch": 1.53, + "learning_rate": 6.937613984127808e-06, + "logits/chosen": -2.6853320598602295, + "logits/rejected": -2.2964487075805664, + "logps/chosen": -473.4390869140625, + "logps/rejected": -464.95025634765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8075714111328125, + "rewards/margins": 8.202201843261719, + "rewards/rejected": -12.009773254394531, + "step": 9830 + }, + { + "epoch": 1.53, + "learning_rate": 6.93688054359666e-06, + "logits/chosen": -2.8563132286071777, + "logits/rejected": -2.7463066577911377, + "logps/chosen": -197.5666961669922, + "logps/rejected": -236.00381469726562, + "loss": 0.4157, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.611559867858887, + "rewards/margins": 1.4472346305847168, + "rewards/rejected": -6.0587944984436035, + "step": 9831 + }, + { + "epoch": 1.53, + "learning_rate": 6.936147103065512e-06, + "logits/chosen": -2.6936514377593994, + "logits/rejected": -2.631578207015991, + "logps/chosen": -254.38165283203125, + "logps/rejected": -380.8622131347656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3342812061309814, + "rewards/margins": 7.195818901062012, + "rewards/rejected": -10.530099868774414, + "step": 9832 + }, + { + "epoch": 1.53, + "learning_rate": 6.935413662534364e-06, + "logits/chosen": -2.7780954837799072, + "logits/rejected": -3.0045676231384277, + "logps/chosen": -298.6893005371094, + "logps/rejected": -214.26341247558594, + "loss": 1.2991, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.749936103820801, + "rewards/margins": -0.5675008296966553, + "rewards/rejected": -6.182435035705566, + "step": 9833 + }, + { + "epoch": 1.53, + "learning_rate": 6.934680222003216e-06, + "logits/chosen": -2.337606430053711, + "logits/rejected": -3.0275533199310303, + "logps/chosen": -268.9883117675781, + "logps/rejected": -362.2701416015625, + "loss": 0.0909, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50822639465332, + "rewards/margins": 3.1181883811950684, + "rewards/rejected": -9.626415252685547, + "step": 9834 + }, + { + "epoch": 1.53, + "learning_rate": 6.933946781472068e-06, + "logits/chosen": -2.4541690349578857, + "logits/rejected": -2.864905595779419, + "logps/chosen": -111.76994323730469, + "logps/rejected": -272.2208251953125, + "loss": 0.0775, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7914838790893555, + "rewards/margins": 7.78635835647583, + "rewards/rejected": -12.577842712402344, + "step": 9835 + }, + { + "epoch": 1.53, + "learning_rate": 6.93321334094092e-06, + "logits/chosen": -2.8902552127838135, + "logits/rejected": -2.6725094318389893, + "logps/chosen": -469.9395751953125, + "logps/rejected": -545.4625244140625, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.308004856109619, + "rewards/margins": 6.728944778442383, + "rewards/rejected": -11.036949157714844, + "step": 9836 + }, + { + "epoch": 1.53, + "learning_rate": 6.932479900409772e-06, + "logits/chosen": -2.087451219558716, + "logits/rejected": -2.864401340484619, + "logps/chosen": -237.64578247070312, + "logps/rejected": -597.8868408203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.590819358825684, + "rewards/margins": 7.7783308029174805, + "rewards/rejected": -16.369150161743164, + "step": 9837 + }, + { + "epoch": 1.53, + "learning_rate": 6.931746459878624e-06, + "logits/chosen": -2.5871012210845947, + "logits/rejected": -2.914910316467285, + "logps/chosen": -386.5503234863281, + "logps/rejected": -397.4289855957031, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.649426460266113, + "rewards/margins": 7.355101585388184, + "rewards/rejected": -14.004528045654297, + "step": 9838 + }, + { + "epoch": 1.53, + "learning_rate": 6.931013019347477e-06, + "logits/chosen": -0.9710543155670166, + "logits/rejected": -2.2212135791778564, + "logps/chosen": -155.905029296875, + "logps/rejected": -443.7445068359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.535317420959473, + "rewards/margins": 8.555656433105469, + "rewards/rejected": -13.090973854064941, + "step": 9839 + }, + { + "epoch": 1.53, + "learning_rate": 6.930279578816329e-06, + "logits/chosen": -2.6545145511627197, + "logits/rejected": -2.945443630218506, + "logps/chosen": -123.1866226196289, + "logps/rejected": -230.77464294433594, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.086679935455322, + "rewards/margins": 4.125998497009277, + "rewards/rejected": -11.212678909301758, + "step": 9840 + }, + { + "epoch": 1.53, + "learning_rate": 6.9295461382851805e-06, + "logits/chosen": -2.6382784843444824, + "logits/rejected": -2.907949447631836, + "logps/chosen": -131.04629516601562, + "logps/rejected": -208.7037353515625, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8247222900390625, + "rewards/margins": 5.138431072235107, + "rewards/rejected": -9.963152885437012, + "step": 9841 + }, + { + "epoch": 1.53, + "learning_rate": 6.928812697754032e-06, + "logits/chosen": -2.5404491424560547, + "logits/rejected": -1.665213942527771, + "logps/chosen": -218.29757690429688, + "logps/rejected": -287.83172607421875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.246507167816162, + "rewards/margins": 7.729415416717529, + "rewards/rejected": -10.975922584533691, + "step": 9842 + }, + { + "epoch": 1.53, + "learning_rate": 6.928079257222885e-06, + "logits/chosen": -1.9843486547470093, + "logits/rejected": -2.7894136905670166, + "logps/chosen": -206.99224853515625, + "logps/rejected": -466.1893005371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9698379039764404, + "rewards/margins": 11.409807205200195, + "rewards/rejected": -15.379644393920898, + "step": 9843 + }, + { + "epoch": 1.53, + "learning_rate": 6.927345816691738e-06, + "logits/chosen": -0.9632381796836853, + "logits/rejected": -2.7664029598236084, + "logps/chosen": -70.7629623413086, + "logps/rejected": -412.6207275390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.767601013183594, + "rewards/margins": 10.273362159729004, + "rewards/rejected": -15.040962219238281, + "step": 9844 + }, + { + "epoch": 1.53, + "learning_rate": 6.92661237616059e-06, + "logits/chosen": -1.6733746528625488, + "logits/rejected": -2.8482635021209717, + "logps/chosen": -147.93917846679688, + "logps/rejected": -457.12640380859375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.853867053985596, + "rewards/margins": 7.986878871917725, + "rewards/rejected": -12.84074592590332, + "step": 9845 + }, + { + "epoch": 1.53, + "learning_rate": 6.925878935629442e-06, + "logits/chosen": -2.9398562908172607, + "logits/rejected": -3.023088216781616, + "logps/chosen": -323.3106384277344, + "logps/rejected": -241.44778442382812, + "loss": 0.1623, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.249919414520264, + "rewards/margins": 3.792416572570801, + "rewards/rejected": -8.042335510253906, + "step": 9846 + }, + { + "epoch": 1.53, + "learning_rate": 6.9251454950982935e-06, + "logits/chosen": -3.080655336380005, + "logits/rejected": -2.3086743354797363, + "logps/chosen": -696.68896484375, + "logps/rejected": -362.59814453125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3671631813049316, + "rewards/margins": 6.156193733215332, + "rewards/rejected": -9.523357391357422, + "step": 9847 + }, + { + "epoch": 1.53, + "learning_rate": 6.924412054567146e-06, + "logits/chosen": -2.735982894897461, + "logits/rejected": -2.4609076976776123, + "logps/chosen": -171.26791381835938, + "logps/rejected": -256.62603759765625, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2592267990112305, + "rewards/margins": 4.401767253875732, + "rewards/rejected": -9.660994529724121, + "step": 9848 + }, + { + "epoch": 1.53, + "learning_rate": 6.923678614035998e-06, + "logits/chosen": -2.036139488220215, + "logits/rejected": -2.787266492843628, + "logps/chosen": -397.90948486328125, + "logps/rejected": -403.8011169433594, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5312676429748535, + "rewards/margins": 3.881694793701172, + "rewards/rejected": -10.412961959838867, + "step": 9849 + }, + { + "epoch": 1.53, + "learning_rate": 6.92294517350485e-06, + "logits/chosen": -3.033008575439453, + "logits/rejected": -2.2540781497955322, + "logps/chosen": -289.51763916015625, + "logps/rejected": -246.99862670898438, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.852923631668091, + "rewards/margins": 6.857913970947266, + "rewards/rejected": -10.710837364196777, + "step": 9850 + }, + { + "epoch": 1.53, + "learning_rate": 6.922211732973702e-06, + "logits/chosen": -1.9420418739318848, + "logits/rejected": -1.345744252204895, + "logps/chosen": -235.90748596191406, + "logps/rejected": -265.882568359375, + "loss": 0.4911, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.570429801940918, + "rewards/margins": 1.9562108516693115, + "rewards/rejected": -7.526640892028809, + "step": 9851 + }, + { + "epoch": 1.53, + "learning_rate": 6.921478292442554e-06, + "logits/chosen": -2.7939908504486084, + "logits/rejected": -2.3250885009765625, + "logps/chosen": -497.98687744140625, + "logps/rejected": -466.4205017089844, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.22430419921875, + "rewards/margins": 6.271521091461182, + "rewards/rejected": -9.495824813842773, + "step": 9852 + }, + { + "epoch": 1.53, + "learning_rate": 6.9207448519114064e-06, + "logits/chosen": -2.0302441120147705, + "logits/rejected": -2.52128529548645, + "logps/chosen": -138.9981689453125, + "logps/rejected": -225.792236328125, + "loss": 0.0724, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.493419647216797, + "rewards/margins": 4.871702194213867, + "rewards/rejected": -9.365121841430664, + "step": 9853 + }, + { + "epoch": 1.53, + "learning_rate": 6.920011411380258e-06, + "logits/chosen": -2.801992654800415, + "logits/rejected": -1.9011671543121338, + "logps/chosen": -540.7916870117188, + "logps/rejected": -408.60809326171875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.890072822570801, + "rewards/margins": 5.912357330322266, + "rewards/rejected": -11.802430152893066, + "step": 9854 + }, + { + "epoch": 1.53, + "learning_rate": 6.91927797084911e-06, + "logits/chosen": -3.012699842453003, + "logits/rejected": -2.807560443878174, + "logps/chosen": -252.07101440429688, + "logps/rejected": -272.4326171875, + "loss": 0.1756, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.509599208831787, + "rewards/margins": 3.123331308364868, + "rewards/rejected": -8.632930755615234, + "step": 9855 + }, + { + "epoch": 1.53, + "learning_rate": 6.918544530317962e-06, + "logits/chosen": -1.7976367473602295, + "logits/rejected": -2.7658238410949707, + "logps/chosen": -247.9459228515625, + "logps/rejected": -475.74688720703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.004593849182129, + "rewards/margins": 9.897721290588379, + "rewards/rejected": -12.902315139770508, + "step": 9856 + }, + { + "epoch": 1.53, + "learning_rate": 6.917811089786815e-06, + "logits/chosen": -1.2778345346450806, + "logits/rejected": -2.820474147796631, + "logps/chosen": -190.16458129882812, + "logps/rejected": -409.87066650390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988962650299072, + "rewards/margins": 8.80336856842041, + "rewards/rejected": -14.79233169555664, + "step": 9857 + }, + { + "epoch": 1.53, + "learning_rate": 6.917077649255667e-06, + "logits/chosen": -2.791827440261841, + "logits/rejected": -3.2145087718963623, + "logps/chosen": -65.29520416259766, + "logps/rejected": -430.83502197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5536022186279297, + "rewards/margins": 9.169427871704102, + "rewards/rejected": -11.723030090332031, + "step": 9858 + }, + { + "epoch": 1.53, + "learning_rate": 6.9163442087245185e-06, + "logits/chosen": -1.5043511390686035, + "logits/rejected": -2.534804582595825, + "logps/chosen": -52.10542297363281, + "logps/rejected": -341.73419189453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.646915435791016, + "rewards/margins": 7.890003204345703, + "rewards/rejected": -12.536918640136719, + "step": 9859 + }, + { + "epoch": 1.53, + "learning_rate": 6.915610768193371e-06, + "logits/chosen": -3.114992141723633, + "logits/rejected": -2.168397903442383, + "logps/chosen": -242.50901794433594, + "logps/rejected": -162.90431213378906, + "loss": 3.2165, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.374147415161133, + "rewards/margins": -0.07370519638061523, + "rewards/rejected": -6.300442218780518, + "step": 9860 + }, + { + "epoch": 1.53, + "learning_rate": 6.914877327662224e-06, + "logits/chosen": -2.9622323513031006, + "logits/rejected": -1.8040084838867188, + "logps/chosen": -565.8804931640625, + "logps/rejected": -475.35821533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9006247520446777, + "rewards/margins": 10.880987167358398, + "rewards/rejected": -13.781610488891602, + "step": 9861 + }, + { + "epoch": 1.53, + "learning_rate": 6.914143887131076e-06, + "logits/chosen": -2.637269973754883, + "logits/rejected": -3.0964882373809814, + "logps/chosen": -121.44413757324219, + "logps/rejected": -267.6504821777344, + "loss": 0.0939, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.100449562072754, + "rewards/margins": 2.970560073852539, + "rewards/rejected": -9.071009635925293, + "step": 9862 + }, + { + "epoch": 1.53, + "learning_rate": 6.913410446599928e-06, + "logits/chosen": -3.0555102825164795, + "logits/rejected": -0.6013327240943909, + "logps/chosen": -457.9962158203125, + "logps/rejected": -231.17115783691406, + "loss": 0.0628, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.305550575256348, + "rewards/margins": 5.931450843811035, + "rewards/rejected": -12.237001419067383, + "step": 9863 + }, + { + "epoch": 1.53, + "learning_rate": 6.91267700606878e-06, + "logits/chosen": -2.4277522563934326, + "logits/rejected": -2.4352777004241943, + "logps/chosen": -160.3855743408203, + "logps/rejected": -450.05108642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.349440813064575, + "rewards/margins": 11.667510986328125, + "rewards/rejected": -15.016950607299805, + "step": 9864 + }, + { + "epoch": 1.53, + "learning_rate": 6.9119435655376315e-06, + "logits/chosen": -2.568326473236084, + "logits/rejected": -3.0978100299835205, + "logps/chosen": -396.5478210449219, + "logps/rejected": -568.8848266601562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145476341247559, + "rewards/margins": 8.043834686279297, + "rewards/rejected": -13.189311027526855, + "step": 9865 + }, + { + "epoch": 1.53, + "learning_rate": 6.911210125006484e-06, + "logits/chosen": -2.8320930004119873, + "logits/rejected": -2.284381866455078, + "logps/chosen": -147.3362579345703, + "logps/rejected": -72.10279846191406, + "loss": 2.6878, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.833005905151367, + "rewards/margins": -2.613701343536377, + "rewards/rejected": -6.219304084777832, + "step": 9866 + }, + { + "epoch": 1.53, + "learning_rate": 6.910476684475336e-06, + "logits/chosen": -1.2518951892852783, + "logits/rejected": -2.7425525188446045, + "logps/chosen": -109.61409759521484, + "logps/rejected": -481.8565368652344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.334931373596191, + "rewards/margins": 11.049792289733887, + "rewards/rejected": -16.384723663330078, + "step": 9867 + }, + { + "epoch": 1.53, + "learning_rate": 6.909743243944188e-06, + "logits/chosen": -2.6807210445404053, + "logits/rejected": -3.0151543617248535, + "logps/chosen": -70.8455810546875, + "logps/rejected": -178.7666778564453, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.418760776519775, + "rewards/margins": 5.591089725494385, + "rewards/rejected": -10.00985050201416, + "step": 9868 + }, + { + "epoch": 1.53, + "learning_rate": 6.90900980341304e-06, + "logits/chosen": -3.000066041946411, + "logits/rejected": -2.6386866569519043, + "logps/chosen": -192.81936645507812, + "logps/rejected": -221.37173461914062, + "loss": 0.1481, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.910386562347412, + "rewards/margins": 1.8639352321624756, + "rewards/rejected": -9.774321556091309, + "step": 9869 + }, + { + "epoch": 1.53, + "learning_rate": 6.908276362881893e-06, + "logits/chosen": -1.6860259771347046, + "logits/rejected": -1.8539389371871948, + "logps/chosen": -255.72756958007812, + "logps/rejected": -251.1276397705078, + "loss": 0.3986, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7090559005737305, + "rewards/margins": 3.9769601821899414, + "rewards/rejected": -8.686016082763672, + "step": 9870 + }, + { + "epoch": 1.54, + "learning_rate": 6.9075429223507445e-06, + "logits/chosen": -1.9771476984024048, + "logits/rejected": -2.6004550457000732, + "logps/chosen": -162.24920654296875, + "logps/rejected": -409.80596923828125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.249693393707275, + "rewards/margins": 8.208011627197266, + "rewards/rejected": -15.457704544067383, + "step": 9871 + }, + { + "epoch": 1.54, + "learning_rate": 6.906809481819596e-06, + "logits/chosen": -2.857797622680664, + "logits/rejected": -1.933533787727356, + "logps/chosen": -332.7826843261719, + "logps/rejected": -305.9452819824219, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.562110424041748, + "rewards/margins": 4.561929702758789, + "rewards/rejected": -12.124040603637695, + "step": 9872 + }, + { + "epoch": 1.54, + "learning_rate": 6.906076041288448e-06, + "logits/chosen": -2.2032058238983154, + "logits/rejected": -2.9484243392944336, + "logps/chosen": -167.55380249023438, + "logps/rejected": -338.43463134765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7935633659362793, + "rewards/margins": 9.023001670837402, + "rewards/rejected": -12.816564559936523, + "step": 9873 + }, + { + "epoch": 1.54, + "learning_rate": 6.9053426007573e-06, + "logits/chosen": -2.8211965560913086, + "logits/rejected": -2.6337971687316895, + "logps/chosen": -370.5704650878906, + "logps/rejected": -434.88433837890625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011340141296387, + "rewards/margins": 5.592738628387451, + "rewards/rejected": -11.60407829284668, + "step": 9874 + }, + { + "epoch": 1.54, + "learning_rate": 6.904609160226153e-06, + "logits/chosen": -2.2959744930267334, + "logits/rejected": -2.755420684814453, + "logps/chosen": -240.0167999267578, + "logps/rejected": -501.4312744140625, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.046036243438721, + "rewards/margins": 4.302181720733643, + "rewards/rejected": -10.348217964172363, + "step": 9875 + }, + { + "epoch": 1.54, + "learning_rate": 6.903875719695005e-06, + "logits/chosen": -2.019943952560425, + "logits/rejected": -2.66774845123291, + "logps/chosen": -309.35430908203125, + "logps/rejected": -293.3145751953125, + "loss": 0.048, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5514421463012695, + "rewards/margins": 3.017331123352051, + "rewards/rejected": -8.56877326965332, + "step": 9876 + }, + { + "epoch": 1.54, + "learning_rate": 6.9031422791638574e-06, + "logits/chosen": -0.8305319547653198, + "logits/rejected": -2.450751781463623, + "logps/chosen": -258.01873779296875, + "logps/rejected": -486.24237060546875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.32633638381958, + "rewards/margins": 6.837203502655029, + "rewards/rejected": -13.16353988647461, + "step": 9877 + }, + { + "epoch": 1.54, + "learning_rate": 6.902408838632709e-06, + "logits/chosen": -3.0049426555633545, + "logits/rejected": -1.995420217514038, + "logps/chosen": -428.2884521484375, + "logps/rejected": -416.8674011230469, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.62606954574585, + "rewards/margins": 3.3157730102539062, + "rewards/rejected": -7.941842555999756, + "step": 9878 + }, + { + "epoch": 1.54, + "learning_rate": 6.901675398101562e-06, + "logits/chosen": -2.836474657058716, + "logits/rejected": -2.2628672122955322, + "logps/chosen": -562.5396728515625, + "logps/rejected": -483.916015625, + "loss": 0.2216, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.254542827606201, + "rewards/margins": 5.989383697509766, + "rewards/rejected": -9.243926048278809, + "step": 9879 + }, + { + "epoch": 1.54, + "learning_rate": 6.900941957570414e-06, + "logits/chosen": -2.64428973197937, + "logits/rejected": -1.062086820602417, + "logps/chosen": -226.09710693359375, + "logps/rejected": -154.08358764648438, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.181303024291992, + "rewards/margins": 3.0253689289093018, + "rewards/rejected": -11.206671714782715, + "step": 9880 + }, + { + "epoch": 1.54, + "learning_rate": 6.900208517039266e-06, + "logits/chosen": -2.814823627471924, + "logits/rejected": -2.9922268390655518, + "logps/chosen": -472.9023132324219, + "logps/rejected": -408.3682556152344, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.130654335021973, + "rewards/margins": 2.710347890853882, + "rewards/rejected": -9.841002464294434, + "step": 9881 + }, + { + "epoch": 1.54, + "learning_rate": 6.899475076508118e-06, + "logits/chosen": -2.2738683223724365, + "logits/rejected": -2.7879436016082764, + "logps/chosen": -198.7050018310547, + "logps/rejected": -276.22998046875, + "loss": 0.4405, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.015353202819824, + "rewards/margins": 2.1000492572784424, + "rewards/rejected": -8.115402221679688, + "step": 9882 + }, + { + "epoch": 1.54, + "learning_rate": 6.8987416359769696e-06, + "logits/chosen": -1.9937878847122192, + "logits/rejected": -2.703223466873169, + "logps/chosen": -327.8990173339844, + "logps/rejected": -632.9810180664062, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.33981466293335, + "rewards/margins": 9.497552871704102, + "rewards/rejected": -14.83736801147461, + "step": 9883 + }, + { + "epoch": 1.54, + "learning_rate": 6.898008195445822e-06, + "logits/chosen": -2.9470858573913574, + "logits/rejected": -1.970107078552246, + "logps/chosen": -507.1673583984375, + "logps/rejected": -434.4436950683594, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414433479309082, + "rewards/margins": 6.080970764160156, + "rewards/rejected": -10.495404243469238, + "step": 9884 + }, + { + "epoch": 1.54, + "learning_rate": 6.897274754914674e-06, + "logits/chosen": -2.689587354660034, + "logits/rejected": -2.9172146320343018, + "logps/chosen": -372.9747619628906, + "logps/rejected": -434.6342468261719, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.809178829193115, + "rewards/margins": 9.113940238952637, + "rewards/rejected": -13.92311954498291, + "step": 9885 + }, + { + "epoch": 1.54, + "learning_rate": 6.896541314383526e-06, + "logits/chosen": -2.8796026706695557, + "logits/rejected": -2.204343557357788, + "logps/chosen": -704.2454833984375, + "logps/rejected": -572.0721435546875, + "loss": 0.1964, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8694465160369873, + "rewards/margins": 3.8989381790161133, + "rewards/rejected": -7.76838493347168, + "step": 9886 + }, + { + "epoch": 1.54, + "learning_rate": 6.895807873852378e-06, + "logits/chosen": -1.2360830307006836, + "logits/rejected": -2.630502700805664, + "logps/chosen": -250.38040161132812, + "logps/rejected": -442.4062194824219, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.447551727294922, + "rewards/margins": 7.403903961181641, + "rewards/rejected": -13.851455688476562, + "step": 9887 + }, + { + "epoch": 1.54, + "learning_rate": 6.895074433321231e-06, + "logits/chosen": -1.5943411588668823, + "logits/rejected": -2.6856374740600586, + "logps/chosen": -271.00811767578125, + "logps/rejected": -596.611572265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5913896560668945, + "rewards/margins": 8.386220932006836, + "rewards/rejected": -15.977609634399414, + "step": 9888 + }, + { + "epoch": 1.54, + "learning_rate": 6.8943409927900825e-06, + "logits/chosen": -2.7422118186950684, + "logits/rejected": -2.9271109104156494, + "logps/chosen": -138.98963928222656, + "logps/rejected": -239.97933959960938, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6817193031311035, + "rewards/margins": 5.997282981872559, + "rewards/rejected": -9.67900276184082, + "step": 9889 + }, + { + "epoch": 1.54, + "learning_rate": 6.893607552258934e-06, + "logits/chosen": -2.3351070880889893, + "logits/rejected": -2.842992067337036, + "logps/chosen": -299.31524658203125, + "logps/rejected": -383.73931884765625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.647078514099121, + "rewards/margins": 6.048911094665527, + "rewards/rejected": -11.695989608764648, + "step": 9890 + }, + { + "epoch": 1.54, + "learning_rate": 6.892874111727786e-06, + "logits/chosen": -2.856003522872925, + "logits/rejected": -2.026106834411621, + "logps/chosen": -260.8610534667969, + "logps/rejected": -248.0001220703125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.31763219833374, + "rewards/margins": 6.350741386413574, + "rewards/rejected": -10.668373107910156, + "step": 9891 + }, + { + "epoch": 1.54, + "learning_rate": 6.892140671196638e-06, + "logits/chosen": -3.03312087059021, + "logits/rejected": -2.456352949142456, + "logps/chosen": -220.0602569580078, + "logps/rejected": -286.43121337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1381828784942627, + "rewards/margins": 10.862089157104492, + "rewards/rejected": -13.000272750854492, + "step": 9892 + }, + { + "epoch": 1.54, + "learning_rate": 6.891407230665491e-06, + "logits/chosen": -3.0509228706359863, + "logits/rejected": -2.9763245582580566, + "logps/chosen": -223.13250732421875, + "logps/rejected": -303.77276611328125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.879950761795044, + "rewards/margins": 9.319511413574219, + "rewards/rejected": -13.199462890625, + "step": 9893 + }, + { + "epoch": 1.54, + "learning_rate": 6.890673790134344e-06, + "logits/chosen": -3.0099799633026123, + "logits/rejected": -2.9013614654541016, + "logps/chosen": -299.65008544921875, + "logps/rejected": -376.79656982421875, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.094399452209473, + "rewards/margins": 3.909274101257324, + "rewards/rejected": -8.003673553466797, + "step": 9894 + }, + { + "epoch": 1.54, + "learning_rate": 6.8899403496031955e-06, + "logits/chosen": -2.410787582397461, + "logits/rejected": -2.8853232860565186, + "logps/chosen": -68.5603256225586, + "logps/rejected": -215.73348999023438, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.34447979927063, + "rewards/margins": 7.493926048278809, + "rewards/rejected": -10.83840560913086, + "step": 9895 + }, + { + "epoch": 1.54, + "learning_rate": 6.889206909072047e-06, + "logits/chosen": -2.561537027359009, + "logits/rejected": -2.9491047859191895, + "logps/chosen": -723.4319458007812, + "logps/rejected": -649.83935546875, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.197500228881836, + "rewards/margins": 4.311697959899902, + "rewards/rejected": -9.509198188781738, + "step": 9896 + }, + { + "epoch": 1.54, + "learning_rate": 6.8884734685409e-06, + "logits/chosen": -2.629520893096924, + "logits/rejected": -3.0408153533935547, + "logps/chosen": -210.2042236328125, + "logps/rejected": -185.13192749023438, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.865883827209473, + "rewards/margins": 4.252721309661865, + "rewards/rejected": -9.11860466003418, + "step": 9897 + }, + { + "epoch": 1.54, + "learning_rate": 6.887740028009752e-06, + "logits/chosen": -2.050466775894165, + "logits/rejected": -2.820002794265747, + "logps/chosen": -188.87713623046875, + "logps/rejected": -415.2147216796875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.233664512634277, + "rewards/margins": 5.9486083984375, + "rewards/rejected": -12.182273864746094, + "step": 9898 + }, + { + "epoch": 1.54, + "learning_rate": 6.887006587478604e-06, + "logits/chosen": -2.844102382659912, + "logits/rejected": -2.72615647315979, + "logps/chosen": -267.6453552246094, + "logps/rejected": -436.3773498535156, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.398745059967041, + "rewards/margins": 7.291494369506836, + "rewards/rejected": -13.690238952636719, + "step": 9899 + }, + { + "epoch": 1.54, + "learning_rate": 6.886273146947456e-06, + "logits/chosen": -2.915984630584717, + "logits/rejected": -2.936241865158081, + "logps/chosen": -188.20562744140625, + "logps/rejected": -230.46096801757812, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3079681396484375, + "rewards/margins": 7.667769908905029, + "rewards/rejected": -9.975738525390625, + "step": 9900 + }, + { + "epoch": 1.54, + "learning_rate": 6.885539706416308e-06, + "logits/chosen": -2.9927940368652344, + "logits/rejected": -3.005664587020874, + "logps/chosen": -111.68846130371094, + "logps/rejected": -248.23812866210938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.435378313064575, + "rewards/margins": 7.792272567749023, + "rewards/rejected": -11.227651596069336, + "step": 9901 + }, + { + "epoch": 1.54, + "learning_rate": 6.88480626588516e-06, + "logits/chosen": -2.4630234241485596, + "logits/rejected": -2.8648006916046143, + "logps/chosen": -229.0207061767578, + "logps/rejected": -289.06927490234375, + "loss": 1.8862, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.54932975769043, + "rewards/margins": 2.507347345352173, + "rewards/rejected": -9.056676864624023, + "step": 9902 + }, + { + "epoch": 1.54, + "learning_rate": 6.884072825354012e-06, + "logits/chosen": -2.74454402923584, + "logits/rejected": -3.0149447917938232, + "logps/chosen": -125.58358001708984, + "logps/rejected": -305.9597473144531, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.89080810546875, + "rewards/margins": 6.061951160430908, + "rewards/rejected": -12.952759742736816, + "step": 9903 + }, + { + "epoch": 1.54, + "learning_rate": 6.883339384822864e-06, + "logits/chosen": -2.910679340362549, + "logits/rejected": -2.21785306930542, + "logps/chosen": -206.89154052734375, + "logps/rejected": -292.703125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.427870035171509, + "rewards/margins": 6.537187576293945, + "rewards/rejected": -9.965057373046875, + "step": 9904 + }, + { + "epoch": 1.54, + "learning_rate": 6.882605944291716e-06, + "logits/chosen": -2.614311695098877, + "logits/rejected": -1.637131929397583, + "logps/chosen": -199.96388244628906, + "logps/rejected": -199.23460388183594, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.845996856689453, + "rewards/margins": 3.02109694480896, + "rewards/rejected": -10.867094039916992, + "step": 9905 + }, + { + "epoch": 1.54, + "learning_rate": 6.881872503760569e-06, + "logits/chosen": -0.8190279603004456, + "logits/rejected": -2.014855146408081, + "logps/chosen": -154.11767578125, + "logps/rejected": -511.80645751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.813567161560059, + "rewards/margins": 8.428279876708984, + "rewards/rejected": -13.241846084594727, + "step": 9906 + }, + { + "epoch": 1.54, + "learning_rate": 6.8811390632294206e-06, + "logits/chosen": -2.033876657485962, + "logits/rejected": -2.9803216457366943, + "logps/chosen": -230.02536010742188, + "logps/rejected": -434.1117858886719, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.557743072509766, + "rewards/margins": 7.678572177886963, + "rewards/rejected": -12.23631477355957, + "step": 9907 + }, + { + "epoch": 1.54, + "learning_rate": 6.8804056226982724e-06, + "logits/chosen": -3.041752815246582, + "logits/rejected": -2.0480926036834717, + "logps/chosen": -474.9825439453125, + "logps/rejected": -316.115966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.898889183998108, + "rewards/margins": 10.349756240844727, + "rewards/rejected": -12.248645782470703, + "step": 9908 + }, + { + "epoch": 1.54, + "learning_rate": 6.879672182167124e-06, + "logits/chosen": -2.1541762351989746, + "logits/rejected": -2.672668933868408, + "logps/chosen": -176.8282470703125, + "logps/rejected": -182.8739013671875, + "loss": 2.1584, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.399374008178711, + "rewards/margins": 1.6707653999328613, + "rewards/rejected": -8.070138931274414, + "step": 9909 + }, + { + "epoch": 1.54, + "learning_rate": 6.878938741635977e-06, + "logits/chosen": -2.6081082820892334, + "logits/rejected": -2.652073383331299, + "logps/chosen": -199.95718383789062, + "logps/rejected": -321.4173583984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.937020301818848, + "rewards/margins": 8.563970565795898, + "rewards/rejected": -13.500991821289062, + "step": 9910 + }, + { + "epoch": 1.54, + "learning_rate": 6.87820530110483e-06, + "logits/chosen": -2.512610912322998, + "logits/rejected": -1.460027813911438, + "logps/chosen": -172.29129028320312, + "logps/rejected": -214.07894897460938, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.51168155670166, + "rewards/margins": 4.838469505310059, + "rewards/rejected": -11.350151062011719, + "step": 9911 + }, + { + "epoch": 1.54, + "learning_rate": 6.877471860573682e-06, + "logits/chosen": -2.381181001663208, + "logits/rejected": -2.993149995803833, + "logps/chosen": -552.124267578125, + "logps/rejected": -504.2463073730469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277469158172607, + "rewards/margins": 8.31692123413086, + "rewards/rejected": -12.594389915466309, + "step": 9912 + }, + { + "epoch": 1.54, + "learning_rate": 6.8767384200425335e-06, + "logits/chosen": -1.4975661039352417, + "logits/rejected": -2.814471960067749, + "logps/chosen": -267.23681640625, + "logps/rejected": -340.4354553222656, + "loss": 0.1121, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.460035800933838, + "rewards/margins": 6.990488052368164, + "rewards/rejected": -13.450523376464844, + "step": 9913 + }, + { + "epoch": 1.54, + "learning_rate": 6.876004979511385e-06, + "logits/chosen": -1.6642255783081055, + "logits/rejected": -2.648315668106079, + "logps/chosen": -124.4146499633789, + "logps/rejected": -390.7140808105469, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.699390411376953, + "rewards/margins": 7.207663536071777, + "rewards/rejected": -11.907054901123047, + "step": 9914 + }, + { + "epoch": 1.54, + "learning_rate": 6.875271538980238e-06, + "logits/chosen": -3.005143165588379, + "logits/rejected": -2.9506993293762207, + "logps/chosen": -232.45034790039062, + "logps/rejected": -185.73532104492188, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.448174476623535, + "rewards/margins": 5.5250396728515625, + "rewards/rejected": -10.973214149475098, + "step": 9915 + }, + { + "epoch": 1.54, + "learning_rate": 6.87453809844909e-06, + "logits/chosen": -2.2754311561584473, + "logits/rejected": -2.691821336746216, + "logps/chosen": -422.1639404296875, + "logps/rejected": -369.7063903808594, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.641455173492432, + "rewards/margins": 6.576594829559326, + "rewards/rejected": -11.218050003051758, + "step": 9916 + }, + { + "epoch": 1.54, + "learning_rate": 6.873804657917942e-06, + "logits/chosen": -2.8935582637786865, + "logits/rejected": -2.0561869144439697, + "logps/chosen": -568.6449584960938, + "logps/rejected": -367.08551025390625, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954693794250488, + "rewards/margins": 5.505507946014404, + "rewards/rejected": -10.460201263427734, + "step": 9917 + }, + { + "epoch": 1.54, + "learning_rate": 6.873071217386794e-06, + "logits/chosen": -0.6621565818786621, + "logits/rejected": -2.8317952156066895, + "logps/chosen": -88.95364379882812, + "logps/rejected": -416.5210266113281, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.44459342956543, + "rewards/margins": 3.375800848007202, + "rewards/rejected": -10.820394515991211, + "step": 9918 + }, + { + "epoch": 1.54, + "learning_rate": 6.8723377768556465e-06, + "logits/chosen": -3.072441577911377, + "logits/rejected": -3.0662779808044434, + "logps/chosen": -169.67018127441406, + "logps/rejected": -217.01812744140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.69254732131958, + "rewards/margins": 8.626998901367188, + "rewards/rejected": -12.31954574584961, + "step": 9919 + }, + { + "epoch": 1.54, + "learning_rate": 6.871604336324498e-06, + "logits/chosen": -3.049652099609375, + "logits/rejected": -2.9680070877075195, + "logps/chosen": -200.34368896484375, + "logps/rejected": -311.93536376953125, + "loss": 0.132, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.227255821228027, + "rewards/margins": 4.37501335144043, + "rewards/rejected": -10.602269172668457, + "step": 9920 + }, + { + "epoch": 1.54, + "learning_rate": 6.87087089579335e-06, + "logits/chosen": -2.6422173976898193, + "logits/rejected": -1.346516489982605, + "logps/chosen": -292.4593811035156, + "logps/rejected": -232.01544189453125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518851280212402, + "rewards/margins": 6.869596481323242, + "rewards/rejected": -13.388446807861328, + "step": 9921 + }, + { + "epoch": 1.54, + "learning_rate": 6.870137455262202e-06, + "logits/chosen": -2.9292478561401367, + "logits/rejected": -2.937938690185547, + "logps/chosen": -362.119140625, + "logps/rejected": -377.2321472167969, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7246627807617188, + "rewards/margins": 5.990971565246582, + "rewards/rejected": -9.7156343460083, + "step": 9922 + }, + { + "epoch": 1.54, + "learning_rate": 6.869404014731054e-06, + "logits/chosen": -2.9817185401916504, + "logits/rejected": -2.9771056175231934, + "logps/chosen": -153.79351806640625, + "logps/rejected": -300.5605163574219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.42791223526001, + "rewards/margins": 9.357328414916992, + "rewards/rejected": -14.785240173339844, + "step": 9923 + }, + { + "epoch": 1.54, + "learning_rate": 6.868670574199907e-06, + "logits/chosen": -2.61905574798584, + "logits/rejected": -3.101170539855957, + "logps/chosen": -156.2898712158203, + "logps/rejected": -311.8209533691406, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.001399993896484, + "rewards/margins": 7.341772079467773, + "rewards/rejected": -17.343172073364258, + "step": 9924 + }, + { + "epoch": 1.54, + "learning_rate": 6.867937133668759e-06, + "logits/chosen": -2.561411142349243, + "logits/rejected": -2.716503620147705, + "logps/chosen": -574.1943969726562, + "logps/rejected": -705.3245239257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7324953079223633, + "rewards/margins": 15.004239082336426, + "rewards/rejected": -17.73673439025879, + "step": 9925 + }, + { + "epoch": 1.54, + "learning_rate": 6.8672036931376105e-06, + "logits/chosen": -1.9293650388717651, + "logits/rejected": -2.741502046585083, + "logps/chosen": -226.6324462890625, + "logps/rejected": -376.59527587890625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.040515899658203, + "rewards/margins": 8.006830215454102, + "rewards/rejected": -13.047346115112305, + "step": 9926 + }, + { + "epoch": 1.54, + "learning_rate": 6.866470252606463e-06, + "logits/chosen": -2.237119674682617, + "logits/rejected": -2.983543872833252, + "logps/chosen": -840.2482299804688, + "logps/rejected": -625.98486328125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.315469264984131, + "rewards/margins": 6.436941146850586, + "rewards/rejected": -9.752410888671875, + "step": 9927 + }, + { + "epoch": 1.54, + "learning_rate": 6.865736812075316e-06, + "logits/chosen": -2.561511754989624, + "logits/rejected": -2.9496254920959473, + "logps/chosen": -537.1075439453125, + "logps/rejected": -588.9757080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.552847385406494, + "rewards/margins": 9.686666488647461, + "rewards/rejected": -13.239514350891113, + "step": 9928 + }, + { + "epoch": 1.54, + "learning_rate": 6.865003371544168e-06, + "logits/chosen": -0.989713728427887, + "logits/rejected": -2.737739324569702, + "logps/chosen": -103.14352416992188, + "logps/rejected": -321.40203857421875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.887565612792969, + "rewards/margins": 6.274466514587402, + "rewards/rejected": -15.162033081054688, + "step": 9929 + }, + { + "epoch": 1.54, + "learning_rate": 6.86426993101302e-06, + "logits/chosen": -2.68520188331604, + "logits/rejected": -1.7789028882980347, + "logps/chosen": -224.01898193359375, + "logps/rejected": -338.9559631347656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.063026428222656, + "rewards/margins": 9.473592758178711, + "rewards/rejected": -13.536619186401367, + "step": 9930 + }, + { + "epoch": 1.54, + "learning_rate": 6.8635364904818716e-06, + "logits/chosen": -1.6180028915405273, + "logits/rejected": -2.5848758220672607, + "logps/chosen": -201.57505798339844, + "logps/rejected": -497.3016662597656, + "loss": 0.3027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9536051750183105, + "rewards/margins": 6.292684078216553, + "rewards/rejected": -12.246289253234863, + "step": 9931 + }, + { + "epoch": 1.54, + "learning_rate": 6.8628030499507235e-06, + "logits/chosen": -2.6017377376556396, + "logits/rejected": -2.0967347621917725, + "logps/chosen": -247.3621063232422, + "logps/rejected": -286.22900390625, + "loss": 0.1789, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.289612770080566, + "rewards/margins": 2.2607762813568115, + "rewards/rejected": -11.55038833618164, + "step": 9932 + }, + { + "epoch": 1.54, + "learning_rate": 6.862069609419576e-06, + "logits/chosen": -2.8815786838531494, + "logits/rejected": -1.6521800756454468, + "logps/chosen": -310.0229187011719, + "logps/rejected": -116.21047973632812, + "loss": 1.6905, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.2330985069274902, + "rewards/margins": 5.922057628631592, + "rewards/rejected": -8.155156135559082, + "step": 9933 + }, + { + "epoch": 1.54, + "learning_rate": 6.861336168888428e-06, + "logits/chosen": -2.8356547355651855, + "logits/rejected": -3.134552240371704, + "logps/chosen": -88.89665222167969, + "logps/rejected": -244.7447509765625, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.21638822555542, + "rewards/margins": 2.570585012435913, + "rewards/rejected": -8.786972999572754, + "step": 9934 + }, + { + "epoch": 1.55, + "learning_rate": 6.86060272835728e-06, + "logits/chosen": -1.7804034948349, + "logits/rejected": -2.915531873703003, + "logps/chosen": -295.44512939453125, + "logps/rejected": -505.7760314941406, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.789288520812988, + "rewards/margins": 3.491739273071289, + "rewards/rejected": -10.281027793884277, + "step": 9935 + }, + { + "epoch": 1.55, + "learning_rate": 6.859869287826132e-06, + "logits/chosen": -2.1170096397399902, + "logits/rejected": -2.4282689094543457, + "logps/chosen": -108.15414428710938, + "logps/rejected": -255.08575439453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.015283584594727, + "rewards/margins": 7.659849166870117, + "rewards/rejected": -13.675132751464844, + "step": 9936 + }, + { + "epoch": 1.55, + "learning_rate": 6.8591358472949845e-06, + "logits/chosen": -2.7448601722717285, + "logits/rejected": -2.924037456512451, + "logps/chosen": -299.3708190917969, + "logps/rejected": -338.47283935546875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3092339038848877, + "rewards/margins": 9.394171714782715, + "rewards/rejected": -11.703405380249023, + "step": 9937 + }, + { + "epoch": 1.55, + "learning_rate": 6.858402406763836e-06, + "logits/chosen": -2.189547300338745, + "logits/rejected": -2.778059482574463, + "logps/chosen": -127.31800842285156, + "logps/rejected": -279.1318054199219, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.735194683074951, + "rewards/margins": 6.986030578613281, + "rewards/rejected": -11.72122573852539, + "step": 9938 + }, + { + "epoch": 1.55, + "learning_rate": 6.857668966232688e-06, + "logits/chosen": -2.5131125450134277, + "logits/rejected": -2.955507278442383, + "logps/chosen": -119.80003356933594, + "logps/rejected": -437.9176940917969, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2740108966827393, + "rewards/margins": 6.953182220458984, + "rewards/rejected": -10.227192878723145, + "step": 9939 + }, + { + "epoch": 1.55, + "learning_rate": 6.85693552570154e-06, + "logits/chosen": -2.6696064472198486, + "logits/rejected": -3.0484139919281006, + "logps/chosen": -98.17760467529297, + "logps/rejected": -383.9439697265625, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677135944366455, + "rewards/margins": 7.615840911865234, + "rewards/rejected": -12.292976379394531, + "step": 9940 + }, + { + "epoch": 1.55, + "learning_rate": 6.856202085170392e-06, + "logits/chosen": -2.916424512863159, + "logits/rejected": -2.9955081939697266, + "logps/chosen": -164.21600341796875, + "logps/rejected": -482.166259765625, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954562187194824, + "rewards/margins": 10.570802688598633, + "rewards/rejected": -15.525365829467773, + "step": 9941 + }, + { + "epoch": 1.55, + "learning_rate": 6.855468644639245e-06, + "logits/chosen": -2.9990243911743164, + "logits/rejected": -2.19669508934021, + "logps/chosen": -232.26885986328125, + "logps/rejected": -127.5135498046875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0484619140625, + "rewards/margins": 6.1697587966918945, + "rewards/rejected": -10.218220710754395, + "step": 9942 + }, + { + "epoch": 1.55, + "learning_rate": 6.854735204108097e-06, + "logits/chosen": -3.178767204284668, + "logits/rejected": -3.0567069053649902, + "logps/chosen": -315.8228454589844, + "logps/rejected": -337.7969665527344, + "loss": 0.4821, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.500581741333008, + "rewards/margins": 0.9355611801147461, + "rewards/rejected": -8.436142921447754, + "step": 9943 + }, + { + "epoch": 1.55, + "learning_rate": 6.854001763576949e-06, + "logits/chosen": -2.945326805114746, + "logits/rejected": -2.106529712677002, + "logps/chosen": -310.67449951171875, + "logps/rejected": -384.4883117675781, + "loss": 0.4329, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.455979347229004, + "rewards/margins": 4.560555458068848, + "rewards/rejected": -9.016534805297852, + "step": 9944 + }, + { + "epoch": 1.55, + "learning_rate": 6.853268323045801e-06, + "logits/chosen": -1.613661289215088, + "logits/rejected": -2.7711098194122314, + "logps/chosen": -122.88127136230469, + "logps/rejected": -293.52996826171875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.575357437133789, + "rewards/margins": 5.9717817306518555, + "rewards/rejected": -12.547139167785645, + "step": 9945 + }, + { + "epoch": 1.55, + "learning_rate": 6.852534882514654e-06, + "logits/chosen": -2.2739779949188232, + "logits/rejected": -2.735302209854126, + "logps/chosen": -246.5078582763672, + "logps/rejected": -312.21759033203125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.391660213470459, + "rewards/margins": 5.467937469482422, + "rewards/rejected": -9.859598159790039, + "step": 9946 + }, + { + "epoch": 1.55, + "learning_rate": 6.851801441983506e-06, + "logits/chosen": -3.060814380645752, + "logits/rejected": -2.2390222549438477, + "logps/chosen": -240.60821533203125, + "logps/rejected": -178.6807861328125, + "loss": 0.9058, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.3193206787109375, + "rewards/margins": 1.9087777137756348, + "rewards/rejected": -6.2280988693237305, + "step": 9947 + }, + { + "epoch": 1.55, + "learning_rate": 6.851068001452358e-06, + "logits/chosen": -2.6533191204071045, + "logits/rejected": -2.981437921524048, + "logps/chosen": -135.09671020507812, + "logps/rejected": -282.3482666015625, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.786406517028809, + "rewards/margins": 4.293442726135254, + "rewards/rejected": -12.079849243164062, + "step": 9948 + }, + { + "epoch": 1.55, + "learning_rate": 6.85033456092121e-06, + "logits/chosen": -2.504405975341797, + "logits/rejected": -3.0125975608825684, + "logps/chosen": -209.672119140625, + "logps/rejected": -359.0545654296875, + "loss": 0.159, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.45328426361084, + "rewards/margins": 2.543433427810669, + "rewards/rejected": -7.996717929840088, + "step": 9949 + }, + { + "epoch": 1.55, + "learning_rate": 6.8496011203900615e-06, + "logits/chosen": -1.488723635673523, + "logits/rejected": -2.892657518386841, + "logps/chosen": -130.14511108398438, + "logps/rejected": -338.65374755859375, + "loss": 0.6651, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.719292163848877, + "rewards/margins": 4.286298751831055, + "rewards/rejected": -10.005590438842773, + "step": 9950 + }, + { + "epoch": 1.55, + "learning_rate": 6.848867679858914e-06, + "logits/chosen": -2.4245924949645996, + "logits/rejected": -2.9974687099456787, + "logps/chosen": -124.39795684814453, + "logps/rejected": -427.84228515625, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.224521160125732, + "rewards/margins": 6.013157844543457, + "rewards/rejected": -11.237678527832031, + "step": 9951 + }, + { + "epoch": 1.55, + "learning_rate": 6.848134239327766e-06, + "logits/chosen": -2.4590859413146973, + "logits/rejected": -2.980854034423828, + "logps/chosen": -172.4755401611328, + "logps/rejected": -149.4383544921875, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.328188419342041, + "rewards/margins": 3.120436191558838, + "rewards/rejected": -7.448624610900879, + "step": 9952 + }, + { + "epoch": 1.55, + "learning_rate": 6.847400798796618e-06, + "logits/chosen": -2.569570302963257, + "logits/rejected": -2.884263515472412, + "logps/chosen": -188.0296630859375, + "logps/rejected": -565.1151733398438, + "loss": 2.1774, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.038731575012207, + "rewards/margins": -1.1490960121154785, + "rewards/rejected": -5.8896355628967285, + "step": 9953 + }, + { + "epoch": 1.55, + "learning_rate": 6.84666735826547e-06, + "logits/chosen": -2.0990991592407227, + "logits/rejected": -2.422513484954834, + "logps/chosen": -466.08685302734375, + "logps/rejected": -578.334716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.413945198059082, + "rewards/margins": 9.42888355255127, + "rewards/rejected": -11.842828750610352, + "step": 9954 + }, + { + "epoch": 1.55, + "learning_rate": 6.845933917734323e-06, + "logits/chosen": -2.8252832889556885, + "logits/rejected": -2.600311756134033, + "logps/chosen": -224.82460021972656, + "logps/rejected": -300.67236328125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.483443260192871, + "rewards/margins": 5.643950462341309, + "rewards/rejected": -10.12739372253418, + "step": 9955 + }, + { + "epoch": 1.55, + "learning_rate": 6.8452004772031745e-06, + "logits/chosen": -2.5071542263031006, + "logits/rejected": -2.7875254154205322, + "logps/chosen": -91.9551773071289, + "logps/rejected": -369.88067626953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5261712074279785, + "rewards/margins": 9.278825759887695, + "rewards/rejected": -12.804996490478516, + "step": 9956 + }, + { + "epoch": 1.55, + "learning_rate": 6.844467036672026e-06, + "logits/chosen": -2.9907870292663574, + "logits/rejected": -2.7301182746887207, + "logps/chosen": -313.9571838378906, + "logps/rejected": -357.9097595214844, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8364593982696533, + "rewards/margins": 8.048179626464844, + "rewards/rejected": -9.884639739990234, + "step": 9957 + }, + { + "epoch": 1.55, + "learning_rate": 6.843733596140878e-06, + "logits/chosen": -2.7976298332214355, + "logits/rejected": -2.9345102310180664, + "logps/chosen": -138.73846435546875, + "logps/rejected": -321.0180358886719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2672014236450195, + "rewards/margins": 8.844179153442383, + "rewards/rejected": -14.111381530761719, + "step": 9958 + }, + { + "epoch": 1.55, + "learning_rate": 6.843000155609731e-06, + "logits/chosen": -2.900419235229492, + "logits/rejected": -2.150348663330078, + "logps/chosen": -368.88623046875, + "logps/rejected": -305.6337585449219, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.605898380279541, + "rewards/margins": 5.5691237449646, + "rewards/rejected": -9.17502212524414, + "step": 9959 + }, + { + "epoch": 1.55, + "learning_rate": 6.842266715078583e-06, + "logits/chosen": -2.122727632522583, + "logits/rejected": -2.982165575027466, + "logps/chosen": -243.85296630859375, + "logps/rejected": -352.2213134765625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4396133422851562, + "rewards/margins": 7.395127773284912, + "rewards/rejected": -10.834741592407227, + "step": 9960 + }, + { + "epoch": 1.55, + "learning_rate": 6.8415332745474355e-06, + "logits/chosen": -1.4694446325302124, + "logits/rejected": -2.596869707107544, + "logps/chosen": -179.95068359375, + "logps/rejected": -274.31561279296875, + "loss": 0.182, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.775646209716797, + "rewards/margins": 5.075351715087891, + "rewards/rejected": -11.850997924804688, + "step": 9961 + }, + { + "epoch": 1.55, + "learning_rate": 6.840799834016287e-06, + "logits/chosen": -1.9142601490020752, + "logits/rejected": -2.7231926918029785, + "logps/chosen": -221.02828979492188, + "logps/rejected": -240.12416076660156, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.959622859954834, + "rewards/margins": 7.231842041015625, + "rewards/rejected": -12.1914644241333, + "step": 9962 + }, + { + "epoch": 1.55, + "learning_rate": 6.840066393485139e-06, + "logits/chosen": -2.7063310146331787, + "logits/rejected": -1.5361838340759277, + "logps/chosen": -277.6756286621094, + "logps/rejected": -168.33438110351562, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.683765411376953, + "rewards/margins": 1.9162001609802246, + "rewards/rejected": -7.599965572357178, + "step": 9963 + }, + { + "epoch": 1.55, + "learning_rate": 6.839332952953992e-06, + "logits/chosen": -2.8382012844085693, + "logits/rejected": -2.382721424102783, + "logps/chosen": -271.6365051269531, + "logps/rejected": -224.0469970703125, + "loss": 0.5058, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.264701843261719, + "rewards/margins": 0.41995692253112793, + "rewards/rejected": -6.684659004211426, + "step": 9964 + }, + { + "epoch": 1.55, + "learning_rate": 6.838599512422844e-06, + "logits/chosen": -2.201171875, + "logits/rejected": -2.960995674133301, + "logps/chosen": -477.3985595703125, + "logps/rejected": -669.7437744140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.408592224121094, + "rewards/margins": 7.198211669921875, + "rewards/rejected": -11.606803894042969, + "step": 9965 + }, + { + "epoch": 1.55, + "learning_rate": 6.837866071891696e-06, + "logits/chosen": -2.6535797119140625, + "logits/rejected": -2.2781431674957275, + "logps/chosen": -158.29833984375, + "logps/rejected": -398.18475341796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6866707801818848, + "rewards/margins": 9.021692276000977, + "rewards/rejected": -12.70836353302002, + "step": 9966 + }, + { + "epoch": 1.55, + "learning_rate": 6.837132631360548e-06, + "logits/chosen": -2.969825029373169, + "logits/rejected": -2.6089625358581543, + "logps/chosen": -439.4208679199219, + "logps/rejected": -485.85296630859375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569591522216797, + "rewards/margins": 6.936602592468262, + "rewards/rejected": -12.506193161010742, + "step": 9967 + }, + { + "epoch": 1.55, + "learning_rate": 6.8363991908294e-06, + "logits/chosen": -1.7019017934799194, + "logits/rejected": -2.7994394302368164, + "logps/chosen": -266.62799072265625, + "logps/rejected": -483.6663818359375, + "loss": 0.8226, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.544116020202637, + "rewards/margins": 3.3337254524230957, + "rewards/rejected": -9.87784194946289, + "step": 9968 + }, + { + "epoch": 1.55, + "learning_rate": 6.835665750298252e-06, + "logits/chosen": -2.684971570968628, + "logits/rejected": -1.905941128730774, + "logps/chosen": -329.8622131347656, + "logps/rejected": -149.87045288085938, + "loss": 1.6335, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.349145889282227, + "rewards/margins": -0.2698073387145996, + "rewards/rejected": -10.079339027404785, + "step": 9969 + }, + { + "epoch": 1.55, + "learning_rate": 6.834932309767104e-06, + "logits/chosen": -2.945206642150879, + "logits/rejected": -3.045109272003174, + "logps/chosen": -111.78754425048828, + "logps/rejected": -357.1878662109375, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.051272392272949, + "rewards/margins": 3.672168254852295, + "rewards/rejected": -8.723441123962402, + "step": 9970 + }, + { + "epoch": 1.55, + "learning_rate": 6.834198869235956e-06, + "logits/chosen": -2.6265487670898438, + "logits/rejected": -2.701448678970337, + "logps/chosen": -122.93807220458984, + "logps/rejected": -373.97967529296875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.846909999847412, + "rewards/margins": 8.114126205444336, + "rewards/rejected": -12.961036682128906, + "step": 9971 + }, + { + "epoch": 1.55, + "learning_rate": 6.833465428704808e-06, + "logits/chosen": -3.0529396533966064, + "logits/rejected": -2.8135812282562256, + "logps/chosen": -216.95498657226562, + "logps/rejected": -200.5663604736328, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3944520950317383, + "rewards/margins": 8.615850448608398, + "rewards/rejected": -11.010303497314453, + "step": 9972 + }, + { + "epoch": 1.55, + "learning_rate": 6.832731988173661e-06, + "logits/chosen": -2.3438243865966797, + "logits/rejected": -2.9068338871002197, + "logps/chosen": -432.8045349121094, + "logps/rejected": -574.39501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.924769639968872, + "rewards/margins": 9.334681510925293, + "rewards/rejected": -12.259450912475586, + "step": 9973 + }, + { + "epoch": 1.55, + "learning_rate": 6.8319985476425125e-06, + "logits/chosen": -2.978335380554199, + "logits/rejected": -2.463550329208374, + "logps/chosen": -181.62277221679688, + "logps/rejected": -247.41310119628906, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.29881477355957, + "rewards/margins": 4.621283531188965, + "rewards/rejected": -10.920098304748535, + "step": 9974 + }, + { + "epoch": 1.55, + "learning_rate": 6.831265107111364e-06, + "logits/chosen": -2.29105806350708, + "logits/rejected": -2.9105310440063477, + "logps/chosen": -145.36517333984375, + "logps/rejected": -241.69561767578125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.270196437835693, + "rewards/margins": 8.03847885131836, + "rewards/rejected": -13.308674812316895, + "step": 9975 + }, + { + "epoch": 1.55, + "learning_rate": 6.830531666580216e-06, + "logits/chosen": -2.6424367427825928, + "logits/rejected": -2.9807848930358887, + "logps/chosen": -285.0759582519531, + "logps/rejected": -266.73150634765625, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.448493003845215, + "rewards/margins": 4.790433883666992, + "rewards/rejected": -10.238926887512207, + "step": 9976 + }, + { + "epoch": 1.55, + "learning_rate": 6.829798226049069e-06, + "logits/chosen": -3.0364298820495605, + "logits/rejected": -2.9940099716186523, + "logps/chosen": -166.60308837890625, + "logps/rejected": -151.2073516845703, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223566055297852, + "rewards/margins": 4.1784586906433105, + "rewards/rejected": -10.40202522277832, + "step": 9977 + }, + { + "epoch": 1.55, + "learning_rate": 6.829064785517922e-06, + "logits/chosen": -0.9926164746284485, + "logits/rejected": -1.8707666397094727, + "logps/chosen": -258.49560546875, + "logps/rejected": -378.04376220703125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.142592906951904, + "rewards/margins": 10.579010963439941, + "rewards/rejected": -16.721603393554688, + "step": 9978 + }, + { + "epoch": 1.55, + "learning_rate": 6.828331344986774e-06, + "logits/chosen": -2.507977247238159, + "logits/rejected": -2.0029709339141846, + "logps/chosen": -236.54855346679688, + "logps/rejected": -339.70611572265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.369113922119141, + "rewards/margins": 8.219768524169922, + "rewards/rejected": -12.588882446289062, + "step": 9979 + }, + { + "epoch": 1.55, + "learning_rate": 6.8275979044556255e-06, + "logits/chosen": -2.880133867263794, + "logits/rejected": -1.6192331314086914, + "logps/chosen": -378.6502685546875, + "logps/rejected": -249.46630859375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.601444959640503, + "rewards/margins": 6.387862205505371, + "rewards/rejected": -9.989307403564453, + "step": 9980 + }, + { + "epoch": 1.55, + "learning_rate": 6.826864463924477e-06, + "logits/chosen": -2.8331408500671387, + "logits/rejected": -2.7478885650634766, + "logps/chosen": -1053.197509765625, + "logps/rejected": -752.533203125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.615386962890625, + "rewards/margins": 7.08870267868042, + "rewards/rejected": -11.704090118408203, + "step": 9981 + }, + { + "epoch": 1.55, + "learning_rate": 6.82613102339333e-06, + "logits/chosen": -2.701815366744995, + "logits/rejected": -1.1278332471847534, + "logps/chosen": -336.67852783203125, + "logps/rejected": -192.9906768798828, + "loss": 2.6864, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.35311508178711, + "rewards/margins": -1.9916410446166992, + "rewards/rejected": -7.36147403717041, + "step": 9982 + }, + { + "epoch": 1.55, + "learning_rate": 6.825397582862182e-06, + "logits/chosen": -2.48836350440979, + "logits/rejected": -2.9769387245178223, + "logps/chosen": -345.6399230957031, + "logps/rejected": -507.3611145019531, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.504834175109863, + "rewards/margins": 5.70703125, + "rewards/rejected": -11.211865425109863, + "step": 9983 + }, + { + "epoch": 1.55, + "learning_rate": 6.824664142331034e-06, + "logits/chosen": -2.208977222442627, + "logits/rejected": -2.405010938644409, + "logps/chosen": -228.7126922607422, + "logps/rejected": -387.67449951171875, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193818092346191, + "rewards/margins": 6.106330394744873, + "rewards/rejected": -12.300148963928223, + "step": 9984 + }, + { + "epoch": 1.55, + "learning_rate": 6.823930701799886e-06, + "logits/chosen": -3.086376905441284, + "logits/rejected": -2.6472465991973877, + "logps/chosen": -116.49969482421875, + "logps/rejected": -184.0098876953125, + "loss": 1.5248, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.523794174194336, + "rewards/margins": 2.9468588829040527, + "rewards/rejected": -8.47065258026123, + "step": 9985 + }, + { + "epoch": 1.55, + "learning_rate": 6.8231972612687384e-06, + "logits/chosen": -2.411013126373291, + "logits/rejected": -2.928307056427002, + "logps/chosen": -67.2830810546875, + "logps/rejected": -277.00286865234375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.567883014678955, + "rewards/margins": 6.057806968688965, + "rewards/rejected": -11.625690460205078, + "step": 9986 + }, + { + "epoch": 1.55, + "learning_rate": 6.82246382073759e-06, + "logits/chosen": -2.591817617416382, + "logits/rejected": -2.8363876342773438, + "logps/chosen": -150.71810913085938, + "logps/rejected": -401.4913635253906, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.506189346313477, + "rewards/margins": 6.124546527862549, + "rewards/rejected": -10.630735397338867, + "step": 9987 + }, + { + "epoch": 1.55, + "learning_rate": 6.821730380206442e-06, + "logits/chosen": -3.0538201332092285, + "logits/rejected": -2.693463087081909, + "logps/chosen": -431.9081115722656, + "logps/rejected": -349.5584411621094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.077363967895508, + "rewards/margins": 6.855466842651367, + "rewards/rejected": -9.932830810546875, + "step": 9988 + }, + { + "epoch": 1.55, + "learning_rate": 6.820996939675294e-06, + "logits/chosen": -2.972069025039673, + "logits/rejected": -2.997697353363037, + "logps/chosen": -184.373046875, + "logps/rejected": -242.40777587890625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395845651626587, + "rewards/margins": 5.984469890594482, + "rewards/rejected": -9.380315780639648, + "step": 9989 + }, + { + "epoch": 1.55, + "learning_rate": 6.820263499144146e-06, + "logits/chosen": -2.4969048500061035, + "logits/rejected": -3.1031296253204346, + "logps/chosen": -96.94255828857422, + "logps/rejected": -325.130615234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.41869854927063, + "rewards/margins": 8.385869026184082, + "rewards/rejected": -10.804567337036133, + "step": 9990 + }, + { + "epoch": 1.55, + "learning_rate": 6.819530058612999e-06, + "logits/chosen": -2.5619616508483887, + "logits/rejected": -2.834592819213867, + "logps/chosen": -126.85603332519531, + "logps/rejected": -229.39456176757812, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.073848009109497, + "rewards/margins": 5.112332344055176, + "rewards/rejected": -8.186180114746094, + "step": 9991 + }, + { + "epoch": 1.55, + "learning_rate": 6.8187966180818505e-06, + "logits/chosen": -3.0367348194122314, + "logits/rejected": -3.0805623531341553, + "logps/chosen": -141.56259155273438, + "logps/rejected": -122.03509521484375, + "loss": 0.3739, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627878665924072, + "rewards/margins": 1.4206827878952026, + "rewards/rejected": -6.0485615730285645, + "step": 9992 + }, + { + "epoch": 1.55, + "learning_rate": 6.8180631775507024e-06, + "logits/chosen": -2.8418214321136475, + "logits/rejected": -2.997591495513916, + "logps/chosen": -516.3505249023438, + "logps/rejected": -469.25823974609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.245507836341858, + "rewards/margins": 8.914323806762695, + "rewards/rejected": -10.159832000732422, + "step": 9993 + }, + { + "epoch": 1.55, + "learning_rate": 6.817329737019555e-06, + "logits/chosen": -1.3795063495635986, + "logits/rejected": -2.2895796298980713, + "logps/chosen": -112.12312316894531, + "logps/rejected": -290.64874267578125, + "loss": 0.0471, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.45531177520752, + "rewards/margins": 6.720335483551025, + "rewards/rejected": -15.175647735595703, + "step": 9994 + }, + { + "epoch": 1.55, + "learning_rate": 6.816596296488408e-06, + "logits/chosen": -1.2809898853302002, + "logits/rejected": -2.668198823928833, + "logps/chosen": -121.45603942871094, + "logps/rejected": -422.57061767578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.124283313751221, + "rewards/margins": 8.041171073913574, + "rewards/rejected": -14.165454864501953, + "step": 9995 + }, + { + "epoch": 1.55, + "learning_rate": 6.81586285595726e-06, + "logits/chosen": -2.840628147125244, + "logits/rejected": -1.1226414442062378, + "logps/chosen": -249.54502868652344, + "logps/rejected": -252.14599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0678067207336426, + "rewards/margins": 9.83822250366211, + "rewards/rejected": -12.906028747558594, + "step": 9996 + }, + { + "epoch": 1.55, + "learning_rate": 6.815129415426112e-06, + "logits/chosen": -2.9265780448913574, + "logits/rejected": -2.832817316055298, + "logps/chosen": -91.53621673583984, + "logps/rejected": -304.38360595703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.33635950088501, + "rewards/margins": 8.405464172363281, + "rewards/rejected": -13.741823196411133, + "step": 9997 + }, + { + "epoch": 1.55, + "learning_rate": 6.8143959748949635e-06, + "logits/chosen": -1.928175926208496, + "logits/rejected": -2.774878740310669, + "logps/chosen": -190.42636108398438, + "logps/rejected": -283.41900634765625, + "loss": 1.5452, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.55208683013916, + "rewards/margins": 2.1198465824127197, + "rewards/rejected": -9.6719331741333, + "step": 9998 + }, + { + "epoch": 1.56, + "learning_rate": 6.813662534363815e-06, + "logits/chosen": -2.8125483989715576, + "logits/rejected": -1.6217797994613647, + "logps/chosen": -388.9554138183594, + "logps/rejected": -340.3733825683594, + "loss": 1.6675, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.4281697273254395, + "rewards/margins": 5.1758832931518555, + "rewards/rejected": -12.604052543640137, + "step": 9999 + }, + { + "epoch": 1.56, + "learning_rate": 6.812929093832668e-06, + "logits/chosen": -3.032763719558716, + "logits/rejected": -3.1678357124328613, + "logps/chosen": -75.54530334472656, + "logps/rejected": -205.6185302734375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.597797870635986, + "rewards/margins": 6.186711311340332, + "rewards/rejected": -10.784509658813477, + "step": 10000 + }, + { + "epoch": 1.56, + "learning_rate": 6.81219565330152e-06, + "logits/chosen": -1.108549952507019, + "logits/rejected": -2.4622418880462646, + "logps/chosen": -148.74960327148438, + "logps/rejected": -517.6104736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5935299396514893, + "rewards/margins": 10.29830551147461, + "rewards/rejected": -13.89183521270752, + "step": 10001 + }, + { + "epoch": 1.56, + "learning_rate": 6.811462212770372e-06, + "logits/chosen": -2.88653564453125, + "logits/rejected": -2.946166515350342, + "logps/chosen": -277.61077880859375, + "logps/rejected": -244.95761108398438, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3146591186523438, + "rewards/margins": 5.725377559661865, + "rewards/rejected": -9.040037155151367, + "step": 10002 + }, + { + "epoch": 1.56, + "learning_rate": 6.810728772239224e-06, + "logits/chosen": -2.4383952617645264, + "logits/rejected": -2.739098310470581, + "logps/chosen": -312.11981201171875, + "logps/rejected": -431.6931457519531, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.027073860168457, + "rewards/margins": 5.6748456954956055, + "rewards/rejected": -10.701919555664062, + "step": 10003 + }, + { + "epoch": 1.56, + "learning_rate": 6.8099953317080765e-06, + "logits/chosen": -1.7276802062988281, + "logits/rejected": -2.8381261825561523, + "logps/chosen": -151.02914428710938, + "logps/rejected": -512.3834228515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0634639263153076, + "rewards/margins": 9.86317253112793, + "rewards/rejected": -12.9266357421875, + "step": 10004 + }, + { + "epoch": 1.56, + "learning_rate": 6.809261891176928e-06, + "logits/chosen": -2.0428202152252197, + "logits/rejected": -1.9079803228378296, + "logps/chosen": -142.16632080078125, + "logps/rejected": -300.5330810546875, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.821897029876709, + "rewards/margins": 9.610334396362305, + "rewards/rejected": -15.432231903076172, + "step": 10005 + }, + { + "epoch": 1.56, + "learning_rate": 6.80852845064578e-06, + "logits/chosen": -2.818146228790283, + "logits/rejected": -2.7168471813201904, + "logps/chosen": -245.44439697265625, + "logps/rejected": -237.00479125976562, + "loss": 0.9552, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.449804306030273, + "rewards/margins": 1.6477786302566528, + "rewards/rejected": -7.097582817077637, + "step": 10006 + }, + { + "epoch": 1.56, + "learning_rate": 6.807795010114632e-06, + "logits/chosen": -3.012939214706421, + "logits/rejected": -2.3008692264556885, + "logps/chosen": -160.68875122070312, + "logps/rejected": -140.20339965820312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8110134601593018, + "rewards/margins": 8.04754638671875, + "rewards/rejected": -10.858560562133789, + "step": 10007 + }, + { + "epoch": 1.56, + "learning_rate": 6.807061569583485e-06, + "logits/chosen": -1.755478024482727, + "logits/rejected": -2.9354896545410156, + "logps/chosen": -168.96917724609375, + "logps/rejected": -617.46240234375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.065341949462891, + "rewards/margins": 9.00306224822998, + "rewards/rejected": -15.068403244018555, + "step": 10008 + }, + { + "epoch": 1.56, + "learning_rate": 6.806328129052337e-06, + "logits/chosen": -2.925867795944214, + "logits/rejected": -2.4593591690063477, + "logps/chosen": -493.28338623046875, + "logps/rejected": -483.63836669921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8118317127227783, + "rewards/margins": 8.684903144836426, + "rewards/rejected": -12.496734619140625, + "step": 10009 + }, + { + "epoch": 1.56, + "learning_rate": 6.805594688521189e-06, + "logits/chosen": -2.6212453842163086, + "logits/rejected": -2.2347819805145264, + "logps/chosen": -194.01138305664062, + "logps/rejected": -325.8416748046875, + "loss": 1.5068, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.7793073654174805, + "rewards/margins": 2.823415517807007, + "rewards/rejected": -10.602723121643066, + "step": 10010 + }, + { + "epoch": 1.56, + "learning_rate": 6.804861247990041e-06, + "logits/chosen": -0.7373389005661011, + "logits/rejected": -2.66386342048645, + "logps/chosen": -95.76708984375, + "logps/rejected": -323.6502380371094, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.26662540435791, + "rewards/margins": 6.4198455810546875, + "rewards/rejected": -10.686470031738281, + "step": 10011 + }, + { + "epoch": 1.56, + "learning_rate": 6.804127807458893e-06, + "logits/chosen": -2.833649158477783, + "logits/rejected": -2.1644797325134277, + "logps/chosen": -384.9815673828125, + "logps/rejected": -276.7674560546875, + "loss": 2.7236, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.851030349731445, + "rewards/margins": -1.0156762599945068, + "rewards/rejected": -4.835353851318359, + "step": 10012 + }, + { + "epoch": 1.56, + "learning_rate": 6.803394366927746e-06, + "logits/chosen": -2.3975415229797363, + "logits/rejected": -2.7502315044403076, + "logps/chosen": -269.47662353515625, + "logps/rejected": -372.10272216796875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8867111206054688, + "rewards/margins": 6.944366455078125, + "rewards/rejected": -10.831077575683594, + "step": 10013 + }, + { + "epoch": 1.56, + "learning_rate": 6.802660926396598e-06, + "logits/chosen": -2.9371039867401123, + "logits/rejected": -1.7745369672775269, + "logps/chosen": -773.82177734375, + "logps/rejected": -501.8360595703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.25042724609375, + "rewards/margins": 6.586275577545166, + "rewards/rejected": -11.836702346801758, + "step": 10014 + }, + { + "epoch": 1.56, + "learning_rate": 6.80192748586545e-06, + "logits/chosen": -1.708747148513794, + "logits/rejected": -2.6173954010009766, + "logps/chosen": -238.4136962890625, + "logps/rejected": -375.1545104980469, + "loss": 0.0848, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.189565658569336, + "rewards/margins": 4.32282829284668, + "rewards/rejected": -11.512393951416016, + "step": 10015 + }, + { + "epoch": 1.56, + "learning_rate": 6.8011940453343016e-06, + "logits/chosen": -1.6041868925094604, + "logits/rejected": -2.927769422531128, + "logps/chosen": -184.48941040039062, + "logps/rejected": -439.0224304199219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8439478874206543, + "rewards/margins": 9.577230453491211, + "rewards/rejected": -13.421178817749023, + "step": 10016 + }, + { + "epoch": 1.56, + "learning_rate": 6.800460604803154e-06, + "logits/chosen": -2.714064836502075, + "logits/rejected": -1.2293756008148193, + "logps/chosen": -202.19754028320312, + "logps/rejected": -94.73332977294922, + "loss": 0.9125, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.404205322265625, + "rewards/margins": 2.536857843399048, + "rewards/rejected": -7.941062927246094, + "step": 10017 + }, + { + "epoch": 1.56, + "learning_rate": 6.799727164272006e-06, + "logits/chosen": -1.7513153553009033, + "logits/rejected": -2.7338764667510986, + "logps/chosen": -239.7645721435547, + "logps/rejected": -345.71734619140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6628851890563965, + "rewards/margins": 7.865719318389893, + "rewards/rejected": -11.528604507446289, + "step": 10018 + }, + { + "epoch": 1.56, + "learning_rate": 6.798993723740858e-06, + "logits/chosen": -2.8956093788146973, + "logits/rejected": -2.6963367462158203, + "logps/chosen": -279.8154296875, + "logps/rejected": -338.75390625, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6375136375427246, + "rewards/margins": 7.7735466957092285, + "rewards/rejected": -10.411060333251953, + "step": 10019 + }, + { + "epoch": 1.56, + "learning_rate": 6.79826028320971e-06, + "logits/chosen": -3.0136876106262207, + "logits/rejected": -2.0641093254089355, + "logps/chosen": -214.20333862304688, + "logps/rejected": -227.55233764648438, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.243178606033325, + "rewards/margins": 5.881843566894531, + "rewards/rejected": -9.125021934509277, + "step": 10020 + }, + { + "epoch": 1.56, + "learning_rate": 6.797526842678562e-06, + "logits/chosen": -2.4108550548553467, + "logits/rejected": -2.4150800704956055, + "logps/chosen": -181.55267333984375, + "logps/rejected": -317.3475341796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.292110443115234, + "rewards/margins": 7.5663228034973145, + "rewards/rejected": -12.85843276977539, + "step": 10021 + }, + { + "epoch": 1.56, + "learning_rate": 6.7967934021474145e-06, + "logits/chosen": -2.7970781326293945, + "logits/rejected": -2.3498547077178955, + "logps/chosen": -436.48333740234375, + "logps/rejected": -431.01507568359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9700615406036377, + "rewards/margins": 7.730022430419922, + "rewards/rejected": -11.700084686279297, + "step": 10022 + }, + { + "epoch": 1.56, + "learning_rate": 6.796059961616266e-06, + "logits/chosen": -2.892301559448242, + "logits/rejected": -2.6495728492736816, + "logps/chosen": -499.28411865234375, + "logps/rejected": -440.6036376953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0590343475341797, + "rewards/margins": 7.9829792976379395, + "rewards/rejected": -11.042013168334961, + "step": 10023 + }, + { + "epoch": 1.56, + "learning_rate": 6.795326521085118e-06, + "logits/chosen": -1.8487064838409424, + "logits/rejected": -3.1514673233032227, + "logps/chosen": -120.37269592285156, + "logps/rejected": -569.6923828125, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.851252555847168, + "rewards/margins": 4.705347061157227, + "rewards/rejected": -10.556599617004395, + "step": 10024 + }, + { + "epoch": 1.56, + "learning_rate": 6.79459308055397e-06, + "logits/chosen": -3.025693893432617, + "logits/rejected": -3.1915838718414307, + "logps/chosen": -235.79934692382812, + "logps/rejected": -433.400390625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.585071563720703, + "rewards/margins": 5.667109966278076, + "rewards/rejected": -10.252182006835938, + "step": 10025 + }, + { + "epoch": 1.56, + "learning_rate": 6.793859640022823e-06, + "logits/chosen": -3.0982766151428223, + "logits/rejected": -2.5567591190338135, + "logps/chosen": -336.1053466796875, + "logps/rejected": -383.5401306152344, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.28990364074707, + "rewards/margins": 3.6250882148742676, + "rewards/rejected": -8.914992332458496, + "step": 10026 + }, + { + "epoch": 1.56, + "learning_rate": 6.793126199491675e-06, + "logits/chosen": -2.7661805152893066, + "logits/rejected": -1.8778752088546753, + "logps/chosen": -257.8396301269531, + "logps/rejected": -245.20675659179688, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7929162979125977, + "rewards/margins": 6.080422401428223, + "rewards/rejected": -8.87333869934082, + "step": 10027 + }, + { + "epoch": 1.56, + "learning_rate": 6.792392758960527e-06, + "logits/chosen": -2.8731179237365723, + "logits/rejected": -1.6914448738098145, + "logps/chosen": -532.596923828125, + "logps/rejected": -604.8560180664062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.325987339019775, + "rewards/margins": 10.639240264892578, + "rewards/rejected": -14.965227127075195, + "step": 10028 + }, + { + "epoch": 1.56, + "learning_rate": 6.791659318429379e-06, + "logits/chosen": -2.6840784549713135, + "logits/rejected": -2.405987501144409, + "logps/chosen": -148.9261474609375, + "logps/rejected": -336.3541259765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.088348388671875, + "rewards/margins": 8.888527870178223, + "rewards/rejected": -12.976876258850098, + "step": 10029 + }, + { + "epoch": 1.56, + "learning_rate": 6.790925877898231e-06, + "logits/chosen": -1.734894871711731, + "logits/rejected": -2.5750887393951416, + "logps/chosen": -161.51837158203125, + "logps/rejected": -406.16436767578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6798839569091797, + "rewards/margins": 8.36335277557373, + "rewards/rejected": -12.04323673248291, + "step": 10030 + }, + { + "epoch": 1.56, + "learning_rate": 6.790192437367084e-06, + "logits/chosen": -3.200716018676758, + "logits/rejected": -2.508746385574341, + "logps/chosen": -706.1513061523438, + "logps/rejected": -367.28680419921875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.168983459472656, + "rewards/margins": 5.700403213500977, + "rewards/rejected": -11.869386672973633, + "step": 10031 + }, + { + "epoch": 1.56, + "learning_rate": 6.789458996835936e-06, + "logits/chosen": -2.5191843509674072, + "logits/rejected": -3.0144600868225098, + "logps/chosen": -163.69607543945312, + "logps/rejected": -456.318603515625, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2303361892700195, + "rewards/margins": 4.417048454284668, + "rewards/rejected": -10.647384643554688, + "step": 10032 + }, + { + "epoch": 1.56, + "learning_rate": 6.788725556304788e-06, + "logits/chosen": -1.394781470298767, + "logits/rejected": -2.807804584503174, + "logps/chosen": -132.70010375976562, + "logps/rejected": -429.5658264160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8629541397094727, + "rewards/margins": 12.07393741607666, + "rewards/rejected": -14.936891555786133, + "step": 10033 + }, + { + "epoch": 1.56, + "learning_rate": 6.78799211577364e-06, + "logits/chosen": -2.3791935443878174, + "logits/rejected": -2.914853572845459, + "logps/chosen": -112.34537506103516, + "logps/rejected": -228.5376434326172, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.774763345718384, + "rewards/margins": 6.128303527832031, + "rewards/rejected": -9.903066635131836, + "step": 10034 + }, + { + "epoch": 1.56, + "learning_rate": 6.787258675242492e-06, + "logits/chosen": -2.2519309520721436, + "logits/rejected": -2.671724319458008, + "logps/chosen": -196.01458740234375, + "logps/rejected": -672.9904174804688, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.987492561340332, + "rewards/margins": 5.885327339172363, + "rewards/rejected": -12.872819900512695, + "step": 10035 + }, + { + "epoch": 1.56, + "learning_rate": 6.786525234711344e-06, + "logits/chosen": -1.5560916662216187, + "logits/rejected": -2.3179428577423096, + "logps/chosen": -77.95215606689453, + "logps/rejected": -398.64276123046875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.623025417327881, + "rewards/margins": 8.658402442932129, + "rewards/rejected": -12.281427383422852, + "step": 10036 + }, + { + "epoch": 1.56, + "learning_rate": 6.785791794180196e-06, + "logits/chosen": -2.547351598739624, + "logits/rejected": -2.757582426071167, + "logps/chosen": -147.5784149169922, + "logps/rejected": -395.5954284667969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4291260242462158, + "rewards/margins": 10.280128479003906, + "rewards/rejected": -11.709254264831543, + "step": 10037 + }, + { + "epoch": 1.56, + "learning_rate": 6.785058353649048e-06, + "logits/chosen": -3.020311117172241, + "logits/rejected": -3.0098743438720703, + "logps/chosen": -469.17657470703125, + "logps/rejected": -507.527587890625, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.622976303100586, + "rewards/margins": 5.339698791503906, + "rewards/rejected": -8.962675094604492, + "step": 10038 + }, + { + "epoch": 1.56, + "learning_rate": 6.7843249131179e-06, + "logits/chosen": -2.784968852996826, + "logits/rejected": -1.5644001960754395, + "logps/chosen": -446.9901123046875, + "logps/rejected": -1281.8380126953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.670668601989746, + "rewards/margins": 14.665685653686523, + "rewards/rejected": -21.336355209350586, + "step": 10039 + }, + { + "epoch": 1.56, + "learning_rate": 6.7835914725867526e-06, + "logits/chosen": -1.189497709274292, + "logits/rejected": -2.755615711212158, + "logps/chosen": -132.1156768798828, + "logps/rejected": -465.0921325683594, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.730876922607422, + "rewards/margins": 11.01125717163086, + "rewards/rejected": -14.742134094238281, + "step": 10040 + }, + { + "epoch": 1.56, + "learning_rate": 6.7828580320556044e-06, + "logits/chosen": -2.797826051712036, + "logits/rejected": -3.1537258625030518, + "logps/chosen": -315.7589111328125, + "logps/rejected": -526.537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7534023523330688, + "rewards/margins": 10.688177108764648, + "rewards/rejected": -12.44157886505127, + "step": 10041 + }, + { + "epoch": 1.56, + "learning_rate": 6.782124591524456e-06, + "logits/chosen": -2.0234479904174805, + "logits/rejected": -2.6635630130767822, + "logps/chosen": -119.0814437866211, + "logps/rejected": -226.25640869140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2702317237854004, + "rewards/margins": 7.794784069061279, + "rewards/rejected": -11.06501579284668, + "step": 10042 + }, + { + "epoch": 1.56, + "learning_rate": 6.781391150993308e-06, + "logits/chosen": -2.8202879428863525, + "logits/rejected": -3.0965683460235596, + "logps/chosen": -72.96736145019531, + "logps/rejected": -107.60051727294922, + "loss": 0.8066, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.282952308654785, + "rewards/margins": 1.7634897232055664, + "rewards/rejected": -6.046442031860352, + "step": 10043 + }, + { + "epoch": 1.56, + "learning_rate": 6.780657710462161e-06, + "logits/chosen": -3.102267026901245, + "logits/rejected": -2.947199583053589, + "logps/chosen": -304.6034240722656, + "logps/rejected": -374.17279052734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9810588359832764, + "rewards/margins": 7.766307353973389, + "rewards/rejected": -9.747365951538086, + "step": 10044 + }, + { + "epoch": 1.56, + "learning_rate": 6.779924269931013e-06, + "logits/chosen": -3.069664716720581, + "logits/rejected": -2.3747599124908447, + "logps/chosen": -304.9052429199219, + "logps/rejected": -301.6955261230469, + "loss": 0.5232, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.915694713592529, + "rewards/margins": 3.744978904724121, + "rewards/rejected": -9.660673141479492, + "step": 10045 + }, + { + "epoch": 1.56, + "learning_rate": 6.7791908293998655e-06, + "logits/chosen": -2.890441656112671, + "logits/rejected": -2.807429552078247, + "logps/chosen": -220.64004516601562, + "logps/rejected": -151.80465698242188, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.108756065368652, + "rewards/margins": 3.3852148056030273, + "rewards/rejected": -7.49397087097168, + "step": 10046 + }, + { + "epoch": 1.56, + "learning_rate": 6.778457388868717e-06, + "logits/chosen": -1.9530280828475952, + "logits/rejected": -3.1459968090057373, + "logps/chosen": -62.899253845214844, + "logps/rejected": -351.24200439453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0953993797302246, + "rewards/margins": 7.65563440322876, + "rewards/rejected": -10.751033782958984, + "step": 10047 + }, + { + "epoch": 1.56, + "learning_rate": 6.77772394833757e-06, + "logits/chosen": -2.5479393005371094, + "logits/rejected": -2.998866081237793, + "logps/chosen": -135.4427490234375, + "logps/rejected": -472.23193359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.608725547790527, + "rewards/margins": 9.668281555175781, + "rewards/rejected": -15.277006149291992, + "step": 10048 + }, + { + "epoch": 1.56, + "learning_rate": 6.776990507806422e-06, + "logits/chosen": -1.9664678573608398, + "logits/rejected": -2.7566030025482178, + "logps/chosen": -107.91957092285156, + "logps/rejected": -372.8101806640625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.671286106109619, + "rewards/margins": 11.64654541015625, + "rewards/rejected": -16.31783103942871, + "step": 10049 + }, + { + "epoch": 1.56, + "learning_rate": 6.776257067275274e-06, + "logits/chosen": -1.7020883560180664, + "logits/rejected": -2.994400978088379, + "logps/chosen": -140.69410705566406, + "logps/rejected": -467.1188659667969, + "loss": 0.0817, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2248969078063965, + "rewards/margins": 8.358384132385254, + "rewards/rejected": -12.583280563354492, + "step": 10050 + }, + { + "epoch": 1.56, + "learning_rate": 6.775523626744126e-06, + "logits/chosen": -2.6728665828704834, + "logits/rejected": -2.8680505752563477, + "logps/chosen": -96.4988784790039, + "logps/rejected": -327.86224365234375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.58421516418457, + "rewards/margins": 6.759469985961914, + "rewards/rejected": -11.343685150146484, + "step": 10051 + }, + { + "epoch": 1.56, + "learning_rate": 6.774790186212978e-06, + "logits/chosen": -1.8569786548614502, + "logits/rejected": -2.623725175857544, + "logps/chosen": -146.24844360351562, + "logps/rejected": -439.3916320800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.256595611572266, + "rewards/margins": 11.222078323364258, + "rewards/rejected": -17.478673934936523, + "step": 10052 + }, + { + "epoch": 1.56, + "learning_rate": 6.77405674568183e-06, + "logits/chosen": -2.901455879211426, + "logits/rejected": -2.23865008354187, + "logps/chosen": -391.04730224609375, + "logps/rejected": -341.3290710449219, + "loss": 0.1822, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.834684371948242, + "rewards/margins": 3.6326992511749268, + "rewards/rejected": -11.46738338470459, + "step": 10053 + }, + { + "epoch": 1.56, + "learning_rate": 6.773323305150682e-06, + "logits/chosen": -1.6734639406204224, + "logits/rejected": -2.790506362915039, + "logps/chosen": -106.68999481201172, + "logps/rejected": -301.052734375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.889803647994995, + "rewards/margins": 6.796478748321533, + "rewards/rejected": -10.686283111572266, + "step": 10054 + }, + { + "epoch": 1.56, + "learning_rate": 6.772589864619534e-06, + "logits/chosen": -1.281662106513977, + "logits/rejected": -2.6492486000061035, + "logps/chosen": -200.61102294921875, + "logps/rejected": -465.390380859375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.536577224731445, + "rewards/margins": 6.342068672180176, + "rewards/rejected": -11.878645896911621, + "step": 10055 + }, + { + "epoch": 1.56, + "learning_rate": 6.771856424088386e-06, + "logits/chosen": -2.717315673828125, + "logits/rejected": -2.2716009616851807, + "logps/chosen": -229.05172729492188, + "logps/rejected": -207.10626220703125, + "loss": 0.1066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.119193077087402, + "rewards/margins": 2.243600606918335, + "rewards/rejected": -7.362793922424316, + "step": 10056 + }, + { + "epoch": 1.56, + "learning_rate": 6.771122983557239e-06, + "logits/chosen": -2.9942972660064697, + "logits/rejected": -1.8453588485717773, + "logps/chosen": -314.28472900390625, + "logps/rejected": -92.13667297363281, + "loss": 3.61, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.59523868560791, + "rewards/margins": -3.410043954849243, + "rewards/rejected": -4.185194492340088, + "step": 10057 + }, + { + "epoch": 1.56, + "learning_rate": 6.770389543026091e-06, + "logits/chosen": -3.0798802375793457, + "logits/rejected": -2.6936514377593994, + "logps/chosen": -185.28343200683594, + "logps/rejected": -128.20663452148438, + "loss": 0.0914, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.457979679107666, + "rewards/margins": 2.9629933834075928, + "rewards/rejected": -8.42097282409668, + "step": 10058 + }, + { + "epoch": 1.56, + "learning_rate": 6.7696561024949425e-06, + "logits/chosen": -3.035982370376587, + "logits/rejected": -3.1629061698913574, + "logps/chosen": -176.5849151611328, + "logps/rejected": -277.2342834472656, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.662421703338623, + "rewards/margins": 7.939990043640137, + "rewards/rejected": -11.602411270141602, + "step": 10059 + }, + { + "epoch": 1.56, + "learning_rate": 6.768922661963794e-06, + "logits/chosen": -2.6062698364257812, + "logits/rejected": -2.7907040119171143, + "logps/chosen": -234.972412109375, + "logps/rejected": -544.3660278320312, + "loss": 0.7473, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.175622940063477, + "rewards/margins": 5.595314025878906, + "rewards/rejected": -13.770936965942383, + "step": 10060 + }, + { + "epoch": 1.56, + "learning_rate": 6.768189221432646e-06, + "logits/chosen": -2.218039035797119, + "logits/rejected": -2.7713871002197266, + "logps/chosen": -288.2574462890625, + "logps/rejected": -474.15667724609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.661442279815674, + "rewards/margins": 7.409923553466797, + "rewards/rejected": -13.071365356445312, + "step": 10061 + }, + { + "epoch": 1.56, + "learning_rate": 6.767455780901499e-06, + "logits/chosen": -2.80826735496521, + "logits/rejected": -2.34368896484375, + "logps/chosen": -284.5206298828125, + "logps/rejected": -253.87840270996094, + "loss": 0.5915, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.718043327331543, + "rewards/margins": 0.22632932662963867, + "rewards/rejected": -7.94437313079834, + "step": 10062 + }, + { + "epoch": 1.57, + "learning_rate": 6.766722340370352e-06, + "logits/chosen": -2.04929256439209, + "logits/rejected": -2.780116558074951, + "logps/chosen": -74.78680419921875, + "logps/rejected": -239.76791381835938, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8224010467529297, + "rewards/margins": 7.720293998718262, + "rewards/rejected": -11.542695045471191, + "step": 10063 + }, + { + "epoch": 1.57, + "learning_rate": 6.7659888998392036e-06, + "logits/chosen": -2.9859235286712646, + "logits/rejected": -3.006802797317505, + "logps/chosen": -82.50492858886719, + "logps/rejected": -148.47091674804688, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755300521850586, + "rewards/margins": 5.449358940124512, + "rewards/rejected": -11.204659461975098, + "step": 10064 + }, + { + "epoch": 1.57, + "learning_rate": 6.7652554593080555e-06, + "logits/chosen": -2.784031391143799, + "logits/rejected": -2.161717414855957, + "logps/chosen": -396.5273742675781, + "logps/rejected": -358.4205322265625, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904367446899414, + "rewards/margins": 5.840633392333984, + "rewards/rejected": -12.745000839233398, + "step": 10065 + }, + { + "epoch": 1.57, + "learning_rate": 6.764522018776908e-06, + "logits/chosen": -3.194934368133545, + "logits/rejected": -2.9222283363342285, + "logps/chosen": -502.017822265625, + "logps/rejected": -380.0275573730469, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2589354515075684, + "rewards/margins": 4.423243522644043, + "rewards/rejected": -6.682178497314453, + "step": 10066 + }, + { + "epoch": 1.57, + "learning_rate": 6.76378857824576e-06, + "logits/chosen": -2.7071213722229004, + "logits/rejected": -2.3977417945861816, + "logps/chosen": -168.72824096679688, + "logps/rejected": -197.5464630126953, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.975558280944824, + "rewards/margins": 6.030611038208008, + "rewards/rejected": -11.006169319152832, + "step": 10067 + }, + { + "epoch": 1.57, + "learning_rate": 6.763055137714612e-06, + "logits/chosen": -2.085832357406616, + "logits/rejected": -3.0771491527557373, + "logps/chosen": -259.7980651855469, + "logps/rejected": -375.08843994140625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.608571767807007, + "rewards/margins": 5.9034342765808105, + "rewards/rejected": -8.512005805969238, + "step": 10068 + }, + { + "epoch": 1.57, + "learning_rate": 6.762321697183464e-06, + "logits/chosen": -2.858180284500122, + "logits/rejected": -2.7773048877716064, + "logps/chosen": -657.0546875, + "logps/rejected": -510.270263671875, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.725198745727539, + "rewards/margins": 1.9584589004516602, + "rewards/rejected": -7.683657646179199, + "step": 10069 + }, + { + "epoch": 1.57, + "learning_rate": 6.761588256652316e-06, + "logits/chosen": -2.7318553924560547, + "logits/rejected": -1.3095414638519287, + "logps/chosen": -335.40667724609375, + "logps/rejected": -261.91754150390625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.937973976135254, + "rewards/margins": 8.029544830322266, + "rewards/rejected": -10.96751880645752, + "step": 10070 + }, + { + "epoch": 1.57, + "learning_rate": 6.760854816121168e-06, + "logits/chosen": -2.890348434448242, + "logits/rejected": -2.3587467670440674, + "logps/chosen": -304.3951110839844, + "logps/rejected": -168.0103759765625, + "loss": 2.0411, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.906669616699219, + "rewards/margins": 1.0130126476287842, + "rewards/rejected": -6.919682502746582, + "step": 10071 + }, + { + "epoch": 1.57, + "learning_rate": 6.76012137559002e-06, + "logits/chosen": -2.8656957149505615, + "logits/rejected": -2.634692668914795, + "logps/chosen": -470.12994384765625, + "logps/rejected": -453.4686279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.409287452697754, + "rewards/margins": 9.574481964111328, + "rewards/rejected": -11.983770370483398, + "step": 10072 + }, + { + "epoch": 1.57, + "learning_rate": 6.759387935058872e-06, + "logits/chosen": -2.7760441303253174, + "logits/rejected": -2.9655392169952393, + "logps/chosen": -252.78431701660156, + "logps/rejected": -311.6717834472656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.526467800140381, + "rewards/margins": 7.628206729888916, + "rewards/rejected": -12.154674530029297, + "step": 10073 + }, + { + "epoch": 1.57, + "learning_rate": 6.758654494527724e-06, + "logits/chosen": -2.29455828666687, + "logits/rejected": -2.305619478225708, + "logps/chosen": -964.5021362304688, + "logps/rejected": -669.6607055664062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.346240043640137, + "rewards/margins": 10.027084350585938, + "rewards/rejected": -14.37332534790039, + "step": 10074 + }, + { + "epoch": 1.57, + "learning_rate": 6.757921053996577e-06, + "logits/chosen": -2.8945162296295166, + "logits/rejected": -2.051483392715454, + "logps/chosen": -386.37213134765625, + "logps/rejected": -331.3546142578125, + "loss": 0.1325, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.342817783355713, + "rewards/margins": 3.7972214221954346, + "rewards/rejected": -9.140039443969727, + "step": 10075 + }, + { + "epoch": 1.57, + "learning_rate": 6.757187613465429e-06, + "logits/chosen": -2.2466187477111816, + "logits/rejected": -2.5998659133911133, + "logps/chosen": -195.29782104492188, + "logps/rejected": -347.9838562011719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.660323143005371, + "rewards/margins": 9.16978645324707, + "rewards/rejected": -13.830110549926758, + "step": 10076 + }, + { + "epoch": 1.57, + "learning_rate": 6.7564541729342805e-06, + "logits/chosen": -2.691807270050049, + "logits/rejected": -2.997248649597168, + "logps/chosen": -207.6209716796875, + "logps/rejected": -175.23153686523438, + "loss": 0.0873, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1301684379577637, + "rewards/margins": 3.589174270629883, + "rewards/rejected": -6.719342231750488, + "step": 10077 + }, + { + "epoch": 1.57, + "learning_rate": 6.755720732403132e-06, + "logits/chosen": -2.4794130325317383, + "logits/rejected": -3.0515267848968506, + "logps/chosen": -172.6023406982422, + "logps/rejected": -348.3448486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7989709377288818, + "rewards/margins": 9.987730979919434, + "rewards/rejected": -11.786702156066895, + "step": 10078 + }, + { + "epoch": 1.57, + "learning_rate": 6.754987291871985e-06, + "logits/chosen": -2.1907944679260254, + "logits/rejected": -1.990682601928711, + "logps/chosen": -115.80012512207031, + "logps/rejected": -274.91998291015625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.820470333099365, + "rewards/margins": 6.5659308433532715, + "rewards/rejected": -11.386401176452637, + "step": 10079 + }, + { + "epoch": 1.57, + "learning_rate": 6.754253851340838e-06, + "logits/chosen": -2.2633557319641113, + "logits/rejected": -1.5315910577774048, + "logps/chosen": -198.18821716308594, + "logps/rejected": -376.65045166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.971349239349365, + "rewards/margins": 9.164836883544922, + "rewards/rejected": -14.136186599731445, + "step": 10080 + }, + { + "epoch": 1.57, + "learning_rate": 6.75352041080969e-06, + "logits/chosen": -2.78899884223938, + "logits/rejected": -3.1775460243225098, + "logps/chosen": -172.53268432617188, + "logps/rejected": -442.90283203125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.728268623352051, + "rewards/margins": 5.7833709716796875, + "rewards/rejected": -10.511639595031738, + "step": 10081 + }, + { + "epoch": 1.57, + "learning_rate": 6.752786970278542e-06, + "logits/chosen": -2.7098793983459473, + "logits/rejected": -2.450209617614746, + "logps/chosen": -238.55184936523438, + "logps/rejected": -430.05084228515625, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305319309234619, + "rewards/margins": 9.027198791503906, + "rewards/rejected": -14.332517623901367, + "step": 10082 + }, + { + "epoch": 1.57, + "learning_rate": 6.7520535297473935e-06, + "logits/chosen": -3.1504461765289307, + "logits/rejected": -3.1414577960968018, + "logps/chosen": -196.10702514648438, + "logps/rejected": -297.7557067871094, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993536949157715, + "rewards/margins": 4.720680236816406, + "rewards/rejected": -9.714217185974121, + "step": 10083 + }, + { + "epoch": 1.57, + "learning_rate": 6.751320089216246e-06, + "logits/chosen": -0.7641196846961975, + "logits/rejected": -2.9700469970703125, + "logps/chosen": -225.53001403808594, + "logps/rejected": -613.0650634765625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.436703681945801, + "rewards/margins": 7.115372657775879, + "rewards/rejected": -11.55207633972168, + "step": 10084 + }, + { + "epoch": 1.57, + "learning_rate": 6.750586648685098e-06, + "logits/chosen": -2.3396122455596924, + "logits/rejected": -2.9269726276397705, + "logps/chosen": -408.0858154296875, + "logps/rejected": -453.46148681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.95992374420166, + "rewards/margins": 9.881979942321777, + "rewards/rejected": -14.841903686523438, + "step": 10085 + }, + { + "epoch": 1.57, + "learning_rate": 6.74985320815395e-06, + "logits/chosen": -1.1938554048538208, + "logits/rejected": -2.881588935852051, + "logps/chosen": -154.17091369628906, + "logps/rejected": -555.2362060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6689352989196777, + "rewards/margins": 11.182537078857422, + "rewards/rejected": -14.851472854614258, + "step": 10086 + }, + { + "epoch": 1.57, + "learning_rate": 6.749119767622802e-06, + "logits/chosen": -1.7773932218551636, + "logits/rejected": -2.7352049350738525, + "logps/chosen": -289.66546630859375, + "logps/rejected": -421.53173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.306665897369385, + "rewards/margins": 9.20631217956543, + "rewards/rejected": -13.512977600097656, + "step": 10087 + }, + { + "epoch": 1.57, + "learning_rate": 6.748386327091654e-06, + "logits/chosen": -2.8588948249816895, + "logits/rejected": -1.7927643060684204, + "logps/chosen": -305.67449951171875, + "logps/rejected": -124.11094665527344, + "loss": 3.599, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.182398319244385, + "rewards/margins": -1.4727201461791992, + "rewards/rejected": -5.7096781730651855, + "step": 10088 + }, + { + "epoch": 1.57, + "learning_rate": 6.7476528865605065e-06, + "logits/chosen": -2.066375255584717, + "logits/rejected": -2.763537883758545, + "logps/chosen": -243.0510711669922, + "logps/rejected": -414.111328125, + "loss": 0.0376, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.490715026855469, + "rewards/margins": 5.719773292541504, + "rewards/rejected": -12.210489273071289, + "step": 10089 + }, + { + "epoch": 1.57, + "learning_rate": 6.746919446029358e-06, + "logits/chosen": -2.074155807495117, + "logits/rejected": -2.887181282043457, + "logps/chosen": -291.8807678222656, + "logps/rejected": -440.9065856933594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4812703132629395, + "rewards/margins": 8.810404777526855, + "rewards/rejected": -12.291675567626953, + "step": 10090 + }, + { + "epoch": 1.57, + "learning_rate": 6.74618600549821e-06, + "logits/chosen": -2.9437191486358643, + "logits/rejected": -2.9261679649353027, + "logps/chosen": -196.35723876953125, + "logps/rejected": -424.75140380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.736919403076172, + "rewards/margins": 9.826345443725586, + "rewards/rejected": -16.56326675415039, + "step": 10091 + }, + { + "epoch": 1.57, + "learning_rate": 6.745452564967062e-06, + "logits/chosen": -2.770636558532715, + "logits/rejected": -2.645490884780884, + "logps/chosen": -199.86758422851562, + "logps/rejected": -302.8896484375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0776615142822266, + "rewards/margins": 7.36429500579834, + "rewards/rejected": -10.441956520080566, + "step": 10092 + }, + { + "epoch": 1.57, + "learning_rate": 6.744719124435915e-06, + "logits/chosen": -2.3600857257843018, + "logits/rejected": -2.684173822402954, + "logps/chosen": -167.53001403808594, + "logps/rejected": -392.193359375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.987143516540527, + "rewards/margins": 6.675816059112549, + "rewards/rejected": -11.662960052490234, + "step": 10093 + }, + { + "epoch": 1.57, + "learning_rate": 6.743985683904767e-06, + "logits/chosen": -1.625847339630127, + "logits/rejected": -2.865445613861084, + "logps/chosen": -39.42563247680664, + "logps/rejected": -255.441650390625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.319460153579712, + "rewards/margins": 6.588150978088379, + "rewards/rejected": -9.907611846923828, + "step": 10094 + }, + { + "epoch": 1.57, + "learning_rate": 6.7432522433736186e-06, + "logits/chosen": -2.9678995609283447, + "logits/rejected": -2.5452699661254883, + "logps/chosen": -238.3935089111328, + "logps/rejected": -198.94091796875, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.925103187561035, + "rewards/margins": 5.02836799621582, + "rewards/rejected": -10.953471183776855, + "step": 10095 + }, + { + "epoch": 1.57, + "learning_rate": 6.742518802842471e-06, + "logits/chosen": -1.4964717626571655, + "logits/rejected": -2.837709665298462, + "logps/chosen": -143.2021484375, + "logps/rejected": -168.62522888183594, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.431333065032959, + "rewards/margins": 4.424886226654053, + "rewards/rejected": -8.856219291687012, + "step": 10096 + }, + { + "epoch": 1.57, + "learning_rate": 6.741785362311324e-06, + "logits/chosen": -2.6668708324432373, + "logits/rejected": -1.2578636407852173, + "logps/chosen": -280.5108642578125, + "logps/rejected": -135.42823791503906, + "loss": 0.2051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.369807720184326, + "rewards/margins": 2.2851781845092773, + "rewards/rejected": -6.654985427856445, + "step": 10097 + }, + { + "epoch": 1.57, + "learning_rate": 6.741051921780176e-06, + "logits/chosen": -2.8619518280029297, + "logits/rejected": -2.492713451385498, + "logps/chosen": -258.10931396484375, + "logps/rejected": -329.16607666015625, + "loss": 3.2671, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.017374992370605, + "rewards/margins": 1.3276989459991455, + "rewards/rejected": -10.345074653625488, + "step": 10098 + }, + { + "epoch": 1.57, + "learning_rate": 6.740318481249028e-06, + "logits/chosen": -2.552473783493042, + "logits/rejected": -2.5897655487060547, + "logps/chosen": -258.9372253417969, + "logps/rejected": -216.83380126953125, + "loss": 0.3987, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.220314979553223, + "rewards/margins": 3.571399688720703, + "rewards/rejected": -7.791714668273926, + "step": 10099 + }, + { + "epoch": 1.57, + "learning_rate": 6.73958504071788e-06, + "logits/chosen": -2.6821794509887695, + "logits/rejected": -3.1256203651428223, + "logps/chosen": -96.3373031616211, + "logps/rejected": -288.77618408203125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08114767074585, + "rewards/margins": 6.71047306060791, + "rewards/rejected": -10.791620254516602, + "step": 10100 + }, + { + "epoch": 1.57, + "learning_rate": 6.7388516001867315e-06, + "logits/chosen": -1.0857335329055786, + "logits/rejected": -2.574978828430176, + "logps/chosen": -69.68171691894531, + "logps/rejected": -274.4794616699219, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.181451320648193, + "rewards/margins": 7.333124160766602, + "rewards/rejected": -12.514575004577637, + "step": 10101 + }, + { + "epoch": 1.57, + "learning_rate": 6.738118159655584e-06, + "logits/chosen": -2.6530535221099854, + "logits/rejected": -2.762390375137329, + "logps/chosen": -463.0082092285156, + "logps/rejected": -439.42303466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.503891468048096, + "rewards/margins": 9.950579643249512, + "rewards/rejected": -14.454471588134766, + "step": 10102 + }, + { + "epoch": 1.57, + "learning_rate": 6.737384719124436e-06, + "logits/chosen": -2.6325643062591553, + "logits/rejected": -3.120696783065796, + "logps/chosen": -106.84712219238281, + "logps/rejected": -346.6947021484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7510037422180176, + "rewards/margins": 8.332796096801758, + "rewards/rejected": -12.083799362182617, + "step": 10103 + }, + { + "epoch": 1.57, + "learning_rate": 6.736651278593288e-06, + "logits/chosen": -2.2996535301208496, + "logits/rejected": -2.8555126190185547, + "logps/chosen": -258.158203125, + "logps/rejected": -290.059814453125, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.033091068267822, + "rewards/margins": 6.359654426574707, + "rewards/rejected": -10.392745971679688, + "step": 10104 + }, + { + "epoch": 1.57, + "learning_rate": 6.73591783806214e-06, + "logits/chosen": -2.882063627243042, + "logits/rejected": -2.918421983718872, + "logps/chosen": -171.90673828125, + "logps/rejected": -170.48202514648438, + "loss": 0.2435, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.756898880004883, + "rewards/margins": 2.7900192737579346, + "rewards/rejected": -9.546917915344238, + "step": 10105 + }, + { + "epoch": 1.57, + "learning_rate": 6.735184397530993e-06, + "logits/chosen": -2.800051689147949, + "logits/rejected": -1.8538720607757568, + "logps/chosen": -249.47274780273438, + "logps/rejected": -250.50546264648438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.688769340515137, + "rewards/margins": 8.458520889282227, + "rewards/rejected": -13.147290229797363, + "step": 10106 + }, + { + "epoch": 1.57, + "learning_rate": 6.7344509569998445e-06, + "logits/chosen": -1.5628479719161987, + "logits/rejected": -2.899048089981079, + "logps/chosen": -293.7561340332031, + "logps/rejected": -408.6548767089844, + "loss": 0.1285, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.060526371002197, + "rewards/margins": 4.286412239074707, + "rewards/rejected": -8.346939086914062, + "step": 10107 + }, + { + "epoch": 1.57, + "learning_rate": 6.733717516468696e-06, + "logits/chosen": -2.7121336460113525, + "logits/rejected": -2.8035101890563965, + "logps/chosen": -65.88606262207031, + "logps/rejected": -348.0211181640625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.469830513000488, + "rewards/margins": 6.475856781005859, + "rewards/rejected": -10.945687294006348, + "step": 10108 + }, + { + "epoch": 1.57, + "learning_rate": 6.732984075937548e-06, + "logits/chosen": -2.688312292098999, + "logits/rejected": -2.499340295791626, + "logps/chosen": -430.89044189453125, + "logps/rejected": -520.3607177734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5793795585632324, + "rewards/margins": 7.411872863769531, + "rewards/rejected": -10.991252899169922, + "step": 10109 + }, + { + "epoch": 1.57, + "learning_rate": 6.7322506354064e-06, + "logits/chosen": -2.4098737239837646, + "logits/rejected": -3.047131299972534, + "logps/chosen": -584.0089111328125, + "logps/rejected": -495.22125244140625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.950177192687988, + "rewards/margins": 7.976634979248047, + "rewards/rejected": -12.926811218261719, + "step": 10110 + }, + { + "epoch": 1.57, + "learning_rate": 6.731517194875253e-06, + "logits/chosen": -2.5765044689178467, + "logits/rejected": -2.935643434524536, + "logps/chosen": -303.39727783203125, + "logps/rejected": -505.2124938964844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3369736671447754, + "rewards/margins": 7.504561901092529, + "rewards/rejected": -10.841535568237305, + "step": 10111 + }, + { + "epoch": 1.57, + "learning_rate": 6.730783754344105e-06, + "logits/chosen": -2.2062129974365234, + "logits/rejected": -2.7213001251220703, + "logps/chosen": -705.1439208984375, + "logps/rejected": -740.290283203125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.501398086547852, + "rewards/margins": 6.261213302612305, + "rewards/rejected": -10.762611389160156, + "step": 10112 + }, + { + "epoch": 1.57, + "learning_rate": 6.7300503138129575e-06, + "logits/chosen": -3.0107855796813965, + "logits/rejected": -2.8923683166503906, + "logps/chosen": -291.31182861328125, + "logps/rejected": -378.95306396484375, + "loss": 1.3857, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.826906204223633, + "rewards/margins": 2.7580907344818115, + "rewards/rejected": -7.584996700286865, + "step": 10113 + }, + { + "epoch": 1.57, + "learning_rate": 6.729316873281809e-06, + "logits/chosen": -1.9263999462127686, + "logits/rejected": -2.712930917739868, + "logps/chosen": -269.17510986328125, + "logps/rejected": -342.51824951171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9134483337402344, + "rewards/margins": 8.258556365966797, + "rewards/rejected": -11.172004699707031, + "step": 10114 + }, + { + "epoch": 1.57, + "learning_rate": 6.728583432750662e-06, + "logits/chosen": -1.6311908960342407, + "logits/rejected": -2.62345814704895, + "logps/chosen": -146.28225708007812, + "logps/rejected": -390.1356201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3449642658233643, + "rewards/margins": 10.11732292175293, + "rewards/rejected": -13.462287902832031, + "step": 10115 + }, + { + "epoch": 1.57, + "learning_rate": 6.727849992219514e-06, + "logits/chosen": -3.005828857421875, + "logits/rejected": -3.0110483169555664, + "logps/chosen": -206.62240600585938, + "logps/rejected": -238.97393798828125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.663956642150879, + "rewards/margins": 6.5882954597473145, + "rewards/rejected": -14.252252578735352, + "step": 10116 + }, + { + "epoch": 1.57, + "learning_rate": 6.727116551688366e-06, + "logits/chosen": -1.812038540840149, + "logits/rejected": -2.8137195110321045, + "logps/chosen": -289.4219970703125, + "logps/rejected": -506.63067626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.232086181640625, + "rewards/margins": 9.359965324401855, + "rewards/rejected": -12.59205150604248, + "step": 10117 + }, + { + "epoch": 1.57, + "learning_rate": 6.726383111157218e-06, + "logits/chosen": -2.304684638977051, + "logits/rejected": -3.0174801349639893, + "logps/chosen": -233.24888610839844, + "logps/rejected": -450.75, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.291005849838257, + "rewards/margins": 6.951508522033691, + "rewards/rejected": -10.242513656616211, + "step": 10118 + }, + { + "epoch": 1.57, + "learning_rate": 6.72564967062607e-06, + "logits/chosen": -2.746983528137207, + "logits/rejected": -3.121664047241211, + "logps/chosen": -295.7442626953125, + "logps/rejected": -395.7291564941406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1554107666015625, + "rewards/margins": 9.120650291442871, + "rewards/rejected": -13.27606201171875, + "step": 10119 + }, + { + "epoch": 1.57, + "learning_rate": 6.724916230094922e-06, + "logits/chosen": -2.8112235069274902, + "logits/rejected": -2.9799375534057617, + "logps/chosen": -153.9811553955078, + "logps/rejected": -282.35906982421875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.833630084991455, + "rewards/margins": 7.659895420074463, + "rewards/rejected": -9.493525505065918, + "step": 10120 + }, + { + "epoch": 1.57, + "learning_rate": 6.724182789563774e-06, + "logits/chosen": -3.0212624073028564, + "logits/rejected": -3.0986251831054688, + "logps/chosen": -88.61820983886719, + "logps/rejected": -181.55792236328125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.502876043319702, + "rewards/margins": 7.449566841125488, + "rewards/rejected": -9.952442169189453, + "step": 10121 + }, + { + "epoch": 1.57, + "learning_rate": 6.723449349032626e-06, + "logits/chosen": -2.781397819519043, + "logits/rejected": -3.082085371017456, + "logps/chosen": -182.82247924804688, + "logps/rejected": -269.9361572265625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4272804260253906, + "rewards/margins": 7.8564653396606445, + "rewards/rejected": -11.283745765686035, + "step": 10122 + }, + { + "epoch": 1.57, + "learning_rate": 6.722715908501478e-06, + "logits/chosen": -2.944096088409424, + "logits/rejected": -3.014507293701172, + "logps/chosen": -547.7692260742188, + "logps/rejected": -468.70782470703125, + "loss": 0.0614, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.942600965499878, + "rewards/margins": 3.6618072986602783, + "rewards/rejected": -7.604408264160156, + "step": 10123 + }, + { + "epoch": 1.57, + "learning_rate": 6.721982467970331e-06, + "logits/chosen": -2.282540798187256, + "logits/rejected": -2.9778695106506348, + "logps/chosen": -350.3684387207031, + "logps/rejected": -636.5992431640625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.862902641296387, + "rewards/margins": 5.019783973693848, + "rewards/rejected": -10.882686614990234, + "step": 10124 + }, + { + "epoch": 1.57, + "learning_rate": 6.7212490274391825e-06, + "logits/chosen": -2.1955485343933105, + "logits/rejected": -2.976253032684326, + "logps/chosen": -487.154052734375, + "logps/rejected": -588.2327270507812, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.578838348388672, + "rewards/margins": 4.517973899841309, + "rewards/rejected": -10.096813201904297, + "step": 10125 + }, + { + "epoch": 1.57, + "learning_rate": 6.7205155869080344e-06, + "logits/chosen": -2.4540443420410156, + "logits/rejected": -2.740605592727661, + "logps/chosen": -211.5487060546875, + "logps/rejected": -320.1526184082031, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8222289085388184, + "rewards/margins": 7.1481218338012695, + "rewards/rejected": -9.97035026550293, + "step": 10126 + }, + { + "epoch": 1.57, + "learning_rate": 6.719782146376886e-06, + "logits/chosen": -2.410717487335205, + "logits/rejected": -2.5401129722595215, + "logps/chosen": -163.09539794921875, + "logps/rejected": -375.77227783203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6797382831573486, + "rewards/margins": 8.612829208374023, + "rewards/rejected": -12.292566299438477, + "step": 10127 + }, + { + "epoch": 1.58, + "learning_rate": 6.719048705845738e-06, + "logits/chosen": -2.4911699295043945, + "logits/rejected": -2.6442949771881104, + "logps/chosen": -215.4654541015625, + "logps/rejected": -395.0322265625, + "loss": 0.0488, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.594000816345215, + "rewards/margins": 6.18281364440918, + "rewards/rejected": -11.776814460754395, + "step": 10128 + }, + { + "epoch": 1.58, + "learning_rate": 6.718315265314591e-06, + "logits/chosen": -2.2203097343444824, + "logits/rejected": -3.0606014728546143, + "logps/chosen": -142.61419677734375, + "logps/rejected": -258.0782470703125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7308943271636963, + "rewards/margins": 6.483729362487793, + "rewards/rejected": -10.21462345123291, + "step": 10129 + }, + { + "epoch": 1.58, + "learning_rate": 6.717581824783444e-06, + "logits/chosen": -2.961261034011841, + "logits/rejected": -2.9345312118530273, + "logps/chosen": -892.3925170898438, + "logps/rejected": -529.8378295898438, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.277783393859863, + "rewards/margins": 7.268548965454102, + "rewards/rejected": -13.546331405639648, + "step": 10130 + }, + { + "epoch": 1.58, + "learning_rate": 6.7168483842522955e-06, + "logits/chosen": -2.970244884490967, + "logits/rejected": -3.06878662109375, + "logps/chosen": -268.23870849609375, + "logps/rejected": -226.228515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08055305480957, + "rewards/margins": 7.458181381225586, + "rewards/rejected": -11.538734436035156, + "step": 10131 + }, + { + "epoch": 1.58, + "learning_rate": 6.716114943721147e-06, + "logits/chosen": -2.214301586151123, + "logits/rejected": -2.9904019832611084, + "logps/chosen": -1006.500244140625, + "logps/rejected": -940.2931518554688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.054492473602295, + "rewards/margins": 11.088708877563477, + "rewards/rejected": -15.14320182800293, + "step": 10132 + }, + { + "epoch": 1.58, + "learning_rate": 6.71538150319e-06, + "logits/chosen": -2.7497470378875732, + "logits/rejected": -3.0579612255096436, + "logps/chosen": -742.888427734375, + "logps/rejected": -697.08837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.006722927093506, + "rewards/margins": 8.331521987915039, + "rewards/rejected": -12.338245391845703, + "step": 10133 + }, + { + "epoch": 1.58, + "learning_rate": 6.714648062658852e-06, + "logits/chosen": -2.783813953399658, + "logits/rejected": -2.894641637802124, + "logps/chosen": -662.607177734375, + "logps/rejected": -703.1798706054688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6895751953125, + "rewards/margins": 7.695634841918945, + "rewards/rejected": -11.385210037231445, + "step": 10134 + }, + { + "epoch": 1.58, + "learning_rate": 6.713914622127704e-06, + "logits/chosen": -1.2863714694976807, + "logits/rejected": -2.739042043685913, + "logps/chosen": -53.56730651855469, + "logps/rejected": -496.1009521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.678864002227783, + "rewards/margins": 12.137406349182129, + "rewards/rejected": -15.81627082824707, + "step": 10135 + }, + { + "epoch": 1.58, + "learning_rate": 6.713181181596556e-06, + "logits/chosen": -2.567624807357788, + "logits/rejected": -2.479264736175537, + "logps/chosen": -260.36798095703125, + "logps/rejected": -358.66802978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.28420352935791, + "rewards/margins": 9.863913536071777, + "rewards/rejected": -15.148117065429688, + "step": 10136 + }, + { + "epoch": 1.58, + "learning_rate": 6.712447741065408e-06, + "logits/chosen": -2.237394094467163, + "logits/rejected": -2.9251186847686768, + "logps/chosen": -115.02677917480469, + "logps/rejected": -353.9397888183594, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.797289848327637, + "rewards/margins": 6.154665946960449, + "rewards/rejected": -10.951955795288086, + "step": 10137 + }, + { + "epoch": 1.58, + "learning_rate": 6.71171430053426e-06, + "logits/chosen": -2.980084180831909, + "logits/rejected": -2.304593086242676, + "logps/chosen": -397.3999938964844, + "logps/rejected": -377.64849853515625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.103060722351074, + "rewards/margins": 6.176692962646484, + "rewards/rejected": -10.279754638671875, + "step": 10138 + }, + { + "epoch": 1.58, + "learning_rate": 6.710980860003112e-06, + "logits/chosen": -2.7195208072662354, + "logits/rejected": -2.91764760017395, + "logps/chosen": -368.9559326171875, + "logps/rejected": -489.8267517089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.488439559936523, + "rewards/margins": 10.277141571044922, + "rewards/rejected": -14.765581130981445, + "step": 10139 + }, + { + "epoch": 1.58, + "learning_rate": 6.710247419471964e-06, + "logits/chosen": -2.9991421699523926, + "logits/rejected": -2.365980625152588, + "logps/chosen": -731.502685546875, + "logps/rejected": -463.2274169921875, + "loss": 1.0625, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.115145206451416, + "rewards/margins": 0.24035906791687012, + "rewards/rejected": -6.355504035949707, + "step": 10140 + }, + { + "epoch": 1.58, + "learning_rate": 6.709513978940816e-06, + "logits/chosen": -2.9844236373901367, + "logits/rejected": -2.101854085922241, + "logps/chosen": -182.47662353515625, + "logps/rejected": -219.58456420898438, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4814834594726562, + "rewards/margins": 7.167316913604736, + "rewards/rejected": -10.648799896240234, + "step": 10141 + }, + { + "epoch": 1.58, + "learning_rate": 6.708780538409669e-06, + "logits/chosen": -2.4517245292663574, + "logits/rejected": -2.888463258743286, + "logps/chosen": -187.90354919433594, + "logps/rejected": -324.2451171875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.186253547668457, + "rewards/margins": 5.929676055908203, + "rewards/rejected": -10.11592960357666, + "step": 10142 + }, + { + "epoch": 1.58, + "learning_rate": 6.708047097878521e-06, + "logits/chosen": -2.2509899139404297, + "logits/rejected": -2.876887559890747, + "logps/chosen": -249.8626708984375, + "logps/rejected": -343.6170959472656, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7116057872772217, + "rewards/margins": 7.315505504608154, + "rewards/rejected": -11.027111053466797, + "step": 10143 + }, + { + "epoch": 1.58, + "learning_rate": 6.7073136573473725e-06, + "logits/chosen": -1.8775322437286377, + "logits/rejected": -2.856294631958008, + "logps/chosen": -116.23788452148438, + "logps/rejected": -322.0182800292969, + "loss": 0.5296, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.7535834312438965, + "rewards/margins": 5.335306644439697, + "rewards/rejected": -10.088890075683594, + "step": 10144 + }, + { + "epoch": 1.58, + "learning_rate": 6.706580216816224e-06, + "logits/chosen": -2.895371913909912, + "logits/rejected": -3.052844762802124, + "logps/chosen": -173.2434844970703, + "logps/rejected": -228.33218383789062, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.462450981140137, + "rewards/margins": 4.371411323547363, + "rewards/rejected": -8.8338623046875, + "step": 10145 + }, + { + "epoch": 1.58, + "learning_rate": 6.705846776285077e-06, + "logits/chosen": -1.9169905185699463, + "logits/rejected": -2.6985647678375244, + "logps/chosen": -209.99801635742188, + "logps/rejected": -494.7478942871094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.787753105163574, + "rewards/margins": 10.195323944091797, + "rewards/rejected": -12.983076095581055, + "step": 10146 + }, + { + "epoch": 1.58, + "learning_rate": 6.70511333575393e-06, + "logits/chosen": -2.1987111568450928, + "logits/rejected": -1.997782588005066, + "logps/chosen": -471.928955078125, + "logps/rejected": -343.3494873046875, + "loss": 1.3074, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.689844131469727, + "rewards/margins": 2.1381008625030518, + "rewards/rejected": -9.8279447555542, + "step": 10147 + }, + { + "epoch": 1.58, + "learning_rate": 6.704379895222782e-06, + "logits/chosen": -1.9762629270553589, + "logits/rejected": -2.844900608062744, + "logps/chosen": -258.1458740234375, + "logps/rejected": -450.7682189941406, + "loss": 0.8653, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.209012031555176, + "rewards/margins": 4.573132038116455, + "rewards/rejected": -11.782144546508789, + "step": 10148 + }, + { + "epoch": 1.58, + "learning_rate": 6.7036464546916336e-06, + "logits/chosen": -2.8074681758880615, + "logits/rejected": -2.944685220718384, + "logps/chosen": -172.4335479736328, + "logps/rejected": -228.4722442626953, + "loss": 0.0618, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.967367649078369, + "rewards/margins": 4.162914752960205, + "rewards/rejected": -7.130282402038574, + "step": 10149 + }, + { + "epoch": 1.58, + "learning_rate": 6.7029130141604854e-06, + "logits/chosen": -1.9137382507324219, + "logits/rejected": -2.910832166671753, + "logps/chosen": -235.26864624023438, + "logps/rejected": -304.8774108886719, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308667182922363, + "rewards/margins": 5.4700469970703125, + "rewards/rejected": -11.778714179992676, + "step": 10150 + }, + { + "epoch": 1.58, + "learning_rate": 6.702179573629338e-06, + "logits/chosen": -2.702864646911621, + "logits/rejected": -2.3705155849456787, + "logps/chosen": -469.6234436035156, + "logps/rejected": -555.9163208007812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.036954641342163, + "rewards/margins": 8.606996536254883, + "rewards/rejected": -11.643951416015625, + "step": 10151 + }, + { + "epoch": 1.58, + "learning_rate": 6.70144613309819e-06, + "logits/chosen": -2.7651309967041016, + "logits/rejected": -3.065680742263794, + "logps/chosen": -154.27593994140625, + "logps/rejected": -401.5819091796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.28881311416626, + "rewards/margins": 7.654109001159668, + "rewards/rejected": -11.94292163848877, + "step": 10152 + }, + { + "epoch": 1.58, + "learning_rate": 6.700712692567042e-06, + "logits/chosen": -1.7255011796951294, + "logits/rejected": -2.8867509365081787, + "logps/chosen": -160.4423828125, + "logps/rejected": -451.3134460449219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3574187755584717, + "rewards/margins": 9.30817699432373, + "rewards/rejected": -12.665596008300781, + "step": 10153 + }, + { + "epoch": 1.58, + "learning_rate": 6.699979252035894e-06, + "logits/chosen": -2.4414689540863037, + "logits/rejected": -3.1104798316955566, + "logps/chosen": -184.10952758789062, + "logps/rejected": -506.9406433105469, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.37021541595459, + "rewards/margins": 8.355182647705078, + "rewards/rejected": -12.725399017333984, + "step": 10154 + }, + { + "epoch": 1.58, + "learning_rate": 6.6992458115047465e-06, + "logits/chosen": -2.632124900817871, + "logits/rejected": -3.0818583965301514, + "logps/chosen": -293.0132751464844, + "logps/rejected": -495.08642578125, + "loss": 0.2153, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.582362174987793, + "rewards/margins": 1.9803581237792969, + "rewards/rejected": -10.56272029876709, + "step": 10155 + }, + { + "epoch": 1.58, + "learning_rate": 6.698512370973598e-06, + "logits/chosen": -2.454307794570923, + "logits/rejected": -2.9755280017852783, + "logps/chosen": -46.91363525390625, + "logps/rejected": -328.82977294921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8044772148132324, + "rewards/margins": 7.33489990234375, + "rewards/rejected": -11.13937759399414, + "step": 10156 + }, + { + "epoch": 1.58, + "learning_rate": 6.69777893044245e-06, + "logits/chosen": -2.707275390625, + "logits/rejected": -3.1005771160125732, + "logps/chosen": -272.7660827636719, + "logps/rejected": -299.7889404296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7820472717285156, + "rewards/margins": 7.299270153045654, + "rewards/rejected": -9.081317901611328, + "step": 10157 + }, + { + "epoch": 1.58, + "learning_rate": 6.697045489911302e-06, + "logits/chosen": -2.910843849182129, + "logits/rejected": -1.8306835889816284, + "logps/chosen": -498.082763671875, + "logps/rejected": -315.4006042480469, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.271238327026367, + "rewards/margins": 6.561772346496582, + "rewards/rejected": -10.83301067352295, + "step": 10158 + }, + { + "epoch": 1.58, + "learning_rate": 6.696312049380154e-06, + "logits/chosen": -1.953719139099121, + "logits/rejected": -2.841015338897705, + "logps/chosen": -107.82617950439453, + "logps/rejected": -484.44854736328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9557929039001465, + "rewards/margins": 10.331202507019043, + "rewards/rejected": -15.286994934082031, + "step": 10159 + }, + { + "epoch": 1.58, + "learning_rate": 6.695578608849007e-06, + "logits/chosen": -2.4500181674957275, + "logits/rejected": -2.8332715034484863, + "logps/chosen": -195.0931396484375, + "logps/rejected": -240.4114532470703, + "loss": 1.1789, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.221244812011719, + "rewards/margins": 2.624760866165161, + "rewards/rejected": -8.8460054397583, + "step": 10160 + }, + { + "epoch": 1.58, + "learning_rate": 6.694845168317859e-06, + "logits/chosen": -3.0996036529541016, + "logits/rejected": -3.052607297897339, + "logps/chosen": -84.43147277832031, + "logps/rejected": -196.42288208007812, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4448084831237793, + "rewards/margins": 6.241909980773926, + "rewards/rejected": -9.686717987060547, + "step": 10161 + }, + { + "epoch": 1.58, + "learning_rate": 6.6941117277867105e-06, + "logits/chosen": -2.6343047618865967, + "logits/rejected": -3.031557083129883, + "logps/chosen": -1182.374267578125, + "logps/rejected": -791.461669921875, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.326510429382324, + "rewards/margins": 5.269731521606445, + "rewards/rejected": -10.59624195098877, + "step": 10162 + }, + { + "epoch": 1.58, + "learning_rate": 6.693378287255563e-06, + "logits/chosen": -2.151303291320801, + "logits/rejected": -2.8718607425689697, + "logps/chosen": -184.5688018798828, + "logps/rejected": -389.5796203613281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414669513702393, + "rewards/margins": 8.158729553222656, + "rewards/rejected": -12.573399543762207, + "step": 10163 + }, + { + "epoch": 1.58, + "learning_rate": 6.692644846724416e-06, + "logits/chosen": -2.307206630706787, + "logits/rejected": -2.8421387672424316, + "logps/chosen": -344.0435791015625, + "logps/rejected": -422.20361328125, + "loss": 0.2613, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.648214817047119, + "rewards/margins": 4.1704912185668945, + "rewards/rejected": -8.818705558776855, + "step": 10164 + }, + { + "epoch": 1.58, + "learning_rate": 6.691911406193268e-06, + "logits/chosen": -2.190340757369995, + "logits/rejected": -2.941194534301758, + "logps/chosen": -163.34378051757812, + "logps/rejected": -437.13934326171875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.963045597076416, + "rewards/margins": 7.092984199523926, + "rewards/rejected": -10.056029319763184, + "step": 10165 + }, + { + "epoch": 1.58, + "learning_rate": 6.69117796566212e-06, + "logits/chosen": -3.0059595108032227, + "logits/rejected": -3.226691961288452, + "logps/chosen": -213.45974731445312, + "logps/rejected": -343.2545166015625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.605346202850342, + "rewards/margins": 5.983879089355469, + "rewards/rejected": -10.589225769042969, + "step": 10166 + }, + { + "epoch": 1.58, + "learning_rate": 6.690444525130972e-06, + "logits/chosen": -2.0181140899658203, + "logits/rejected": -2.8750267028808594, + "logps/chosen": -296.1454162597656, + "logps/rejected": -435.0559997558594, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.714591026306152, + "rewards/margins": 3.9125702381134033, + "rewards/rejected": -10.627161026000977, + "step": 10167 + }, + { + "epoch": 1.58, + "learning_rate": 6.6897110845998235e-06, + "logits/chosen": -3.0771279335021973, + "logits/rejected": -2.9270966053009033, + "logps/chosen": -125.32781982421875, + "logps/rejected": -160.9171600341797, + "loss": 2.1516, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.473302364349365, + "rewards/margins": 2.0949625968933105, + "rewards/rejected": -7.568264961242676, + "step": 10168 + }, + { + "epoch": 1.58, + "learning_rate": 6.688977644068676e-06, + "logits/chosen": -2.4872260093688965, + "logits/rejected": -2.5666258335113525, + "logps/chosen": -179.50515747070312, + "logps/rejected": -274.92095947265625, + "loss": 1.1115, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.260189533233643, + "rewards/margins": 6.243793964385986, + "rewards/rejected": -12.503983497619629, + "step": 10169 + }, + { + "epoch": 1.58, + "learning_rate": 6.688244203537528e-06, + "logits/chosen": -2.3082897663116455, + "logits/rejected": -2.8121209144592285, + "logps/chosen": -325.85076904296875, + "logps/rejected": -465.0615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.126865386962891, + "rewards/margins": 10.96061897277832, + "rewards/rejected": -15.087484359741211, + "step": 10170 + }, + { + "epoch": 1.58, + "learning_rate": 6.68751076300638e-06, + "logits/chosen": -1.5126296281814575, + "logits/rejected": -1.8703914880752563, + "logps/chosen": -229.9713897705078, + "logps/rejected": -524.454833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8919119834899902, + "rewards/margins": 11.745292663574219, + "rewards/rejected": -15.63720417022705, + "step": 10171 + }, + { + "epoch": 1.58, + "learning_rate": 6.686777322475232e-06, + "logits/chosen": -2.9479713439941406, + "logits/rejected": -2.498295783996582, + "logps/chosen": -133.14920043945312, + "logps/rejected": -184.22349548339844, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.267786502838135, + "rewards/margins": 5.57785177230835, + "rewards/rejected": -9.845638275146484, + "step": 10172 + }, + { + "epoch": 1.58, + "learning_rate": 6.6860438819440846e-06, + "logits/chosen": -2.8710381984710693, + "logits/rejected": -1.7688870429992676, + "logps/chosen": -313.9090576171875, + "logps/rejected": -259.9357604980469, + "loss": 0.5621, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.063605308532715, + "rewards/margins": 2.953066110610962, + "rewards/rejected": -8.016671180725098, + "step": 10173 + }, + { + "epoch": 1.58, + "learning_rate": 6.6853104414129364e-06, + "logits/chosen": -2.374326705932617, + "logits/rejected": -3.129084587097168, + "logps/chosen": -83.73269653320312, + "logps/rejected": -269.0060729980469, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.627686023712158, + "rewards/margins": 6.244499683380127, + "rewards/rejected": -9.872185707092285, + "step": 10174 + }, + { + "epoch": 1.58, + "learning_rate": 6.684577000881788e-06, + "logits/chosen": -1.7100666761398315, + "logits/rejected": -1.0827478170394897, + "logps/chosen": -430.73065185546875, + "logps/rejected": -697.9912109375, + "loss": 0.1478, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.120642185211182, + "rewards/margins": 2.344294548034668, + "rewards/rejected": -8.464937210083008, + "step": 10175 + }, + { + "epoch": 1.58, + "learning_rate": 6.68384356035064e-06, + "logits/chosen": -2.9165921211242676, + "logits/rejected": -2.287803888320923, + "logps/chosen": -591.2393188476562, + "logps/rejected": -432.5303649902344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7780046463012695, + "rewards/margins": 6.678412437438965, + "rewards/rejected": -11.456417083740234, + "step": 10176 + }, + { + "epoch": 1.58, + "learning_rate": 6.683110119819492e-06, + "logits/chosen": -1.8339399099349976, + "logits/rejected": -2.2994773387908936, + "logps/chosen": -147.25286865234375, + "logps/rejected": -292.5250549316406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.742499828338623, + "rewards/margins": 9.285569190979004, + "rewards/rejected": -14.028069496154785, + "step": 10177 + }, + { + "epoch": 1.58, + "learning_rate": 6.682376679288345e-06, + "logits/chosen": -2.97865629196167, + "logits/rejected": -2.698126792907715, + "logps/chosen": -529.1080932617188, + "logps/rejected": -539.7504272460938, + "loss": 0.6189, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.714822769165039, + "rewards/margins": 1.1943023204803467, + "rewards/rejected": -6.909124851226807, + "step": 10178 + }, + { + "epoch": 1.58, + "learning_rate": 6.681643238757197e-06, + "logits/chosen": -2.730673313140869, + "logits/rejected": -2.94710111618042, + "logps/chosen": -87.54975128173828, + "logps/rejected": -177.64259338378906, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.751278877258301, + "rewards/margins": 5.73175048828125, + "rewards/rejected": -9.483030319213867, + "step": 10179 + }, + { + "epoch": 1.58, + "learning_rate": 6.680909798226049e-06, + "logits/chosen": -2.752175807952881, + "logits/rejected": -1.9729217290878296, + "logps/chosen": -213.98828125, + "logps/rejected": -368.64324951171875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.879772424697876, + "rewards/margins": 6.978594779968262, + "rewards/rejected": -9.858367919921875, + "step": 10180 + }, + { + "epoch": 1.58, + "learning_rate": 6.680176357694901e-06, + "logits/chosen": -3.093653917312622, + "logits/rejected": -2.741203784942627, + "logps/chosen": -628.6590576171875, + "logps/rejected": -625.287353515625, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.768246650695801, + "rewards/margins": 4.456995964050293, + "rewards/rejected": -10.225242614746094, + "step": 10181 + }, + { + "epoch": 1.58, + "learning_rate": 6.679442917163754e-06, + "logits/chosen": -1.4100868701934814, + "logits/rejected": -2.7543163299560547, + "logps/chosen": -180.69920349121094, + "logps/rejected": -536.9712524414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6791257858276367, + "rewards/margins": 11.055230140686035, + "rewards/rejected": -14.734355926513672, + "step": 10182 + }, + { + "epoch": 1.58, + "learning_rate": 6.678709476632606e-06, + "logits/chosen": -1.6248154640197754, + "logits/rejected": -1.688109040260315, + "logps/chosen": -284.02752685546875, + "logps/rejected": -488.12396240234375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.595640659332275, + "rewards/margins": 7.6887712478637695, + "rewards/rejected": -14.284412384033203, + "step": 10183 + }, + { + "epoch": 1.58, + "learning_rate": 6.677976036101458e-06, + "logits/chosen": -2.5457096099853516, + "logits/rejected": -2.972867012023926, + "logps/chosen": -252.564697265625, + "logps/rejected": -327.33270263671875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.582128047943115, + "rewards/margins": 5.095773696899414, + "rewards/rejected": -9.677902221679688, + "step": 10184 + }, + { + "epoch": 1.58, + "learning_rate": 6.67724259557031e-06, + "logits/chosen": -2.8641345500946045, + "logits/rejected": -3.08272123336792, + "logps/chosen": -532.3328247070312, + "logps/rejected": -410.19927978515625, + "loss": 0.6625, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.564784526824951, + "rewards/margins": 3.897996425628662, + "rewards/rejected": -11.462780952453613, + "step": 10185 + }, + { + "epoch": 1.58, + "learning_rate": 6.6765091550391615e-06, + "logits/chosen": -2.979201078414917, + "logits/rejected": -1.9969686269760132, + "logps/chosen": -416.81597900390625, + "logps/rejected": -341.53375244140625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4175667762756348, + "rewards/margins": 6.712945938110352, + "rewards/rejected": -10.130512237548828, + "step": 10186 + }, + { + "epoch": 1.58, + "learning_rate": 6.675775714508014e-06, + "logits/chosen": -1.8514665365219116, + "logits/rejected": -2.7581615447998047, + "logps/chosen": -234.51515197753906, + "logps/rejected": -506.2965393066406, + "loss": 0.0618, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.539088726043701, + "rewards/margins": 7.513256072998047, + "rewards/rejected": -14.052345275878906, + "step": 10187 + }, + { + "epoch": 1.58, + "learning_rate": 6.675042273976866e-06, + "logits/chosen": -2.406362533569336, + "logits/rejected": -2.8909647464752197, + "logps/chosen": -167.24330139160156, + "logps/rejected": -330.450927734375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6013922691345215, + "rewards/margins": 6.111152648925781, + "rewards/rejected": -10.712545394897461, + "step": 10188 + }, + { + "epoch": 1.58, + "learning_rate": 6.674308833445718e-06, + "logits/chosen": -2.5849008560180664, + "logits/rejected": -3.1869089603424072, + "logps/chosen": -597.7183227539062, + "logps/rejected": -688.5912475585938, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.287997245788574, + "rewards/margins": 5.5101704597473145, + "rewards/rejected": -9.79816722869873, + "step": 10189 + }, + { + "epoch": 1.58, + "learning_rate": 6.67357539291457e-06, + "logits/chosen": -2.6866281032562256, + "logits/rejected": -2.979262351989746, + "logps/chosen": -335.3568115234375, + "logps/rejected": -477.9421691894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.313859462738037, + "rewards/margins": 11.98633861541748, + "rewards/rejected": -15.30019760131836, + "step": 10190 + }, + { + "epoch": 1.58, + "learning_rate": 6.672841952383423e-06, + "logits/chosen": -2.558985948562622, + "logits/rejected": -3.051905870437622, + "logps/chosen": -264.7919921875, + "logps/rejected": -237.25119018554688, + "loss": 0.6864, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.365782737731934, + "rewards/margins": 2.2444629669189453, + "rewards/rejected": -8.610245704650879, + "step": 10191 + }, + { + "epoch": 1.59, + "learning_rate": 6.6721085118522745e-06, + "logits/chosen": -2.6204922199249268, + "logits/rejected": -3.223054885864258, + "logps/chosen": -42.994239807128906, + "logps/rejected": -308.67828369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3428051471710205, + "rewards/margins": 10.54406452178955, + "rewards/rejected": -13.886869430541992, + "step": 10192 + }, + { + "epoch": 1.59, + "learning_rate": 6.671375071321126e-06, + "logits/chosen": -2.828688383102417, + "logits/rejected": -3.0155436992645264, + "logps/chosen": -209.07879638671875, + "logps/rejected": -242.77447509765625, + "loss": 0.7719, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.63683557510376, + "rewards/margins": 2.5068812370300293, + "rewards/rejected": -9.143716812133789, + "step": 10193 + }, + { + "epoch": 1.59, + "learning_rate": 6.670641630789978e-06, + "logits/chosen": -2.8014349937438965, + "logits/rejected": -2.9971704483032227, + "logps/chosen": -111.53850555419922, + "logps/rejected": -209.6728973388672, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8291726112365723, + "rewards/margins": 8.10845947265625, + "rewards/rejected": -9.937631607055664, + "step": 10194 + }, + { + "epoch": 1.59, + "learning_rate": 6.669908190258831e-06, + "logits/chosen": -2.239109992980957, + "logits/rejected": -3.0050911903381348, + "logps/chosen": -54.875423431396484, + "logps/rejected": -199.77996826171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.359855651855469, + "rewards/margins": 6.152151107788086, + "rewards/rejected": -10.512006759643555, + "step": 10195 + }, + { + "epoch": 1.59, + "learning_rate": 6.669174749727683e-06, + "logits/chosen": -2.8998301029205322, + "logits/rejected": -2.268876075744629, + "logps/chosen": -274.9407653808594, + "logps/rejected": -285.70599365234375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.521548271179199, + "rewards/margins": 5.808159828186035, + "rewards/rejected": -10.329708099365234, + "step": 10196 + }, + { + "epoch": 1.59, + "learning_rate": 6.6684413091965356e-06, + "logits/chosen": -2.381671905517578, + "logits/rejected": -2.7911407947540283, + "logps/chosen": -87.72373962402344, + "logps/rejected": -392.0054931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5060880184173584, + "rewards/margins": 11.057241439819336, + "rewards/rejected": -14.563329696655273, + "step": 10197 + }, + { + "epoch": 1.59, + "learning_rate": 6.6677078686653874e-06, + "logits/chosen": -1.951930046081543, + "logits/rejected": -2.712282657623291, + "logps/chosen": -106.0491714477539, + "logps/rejected": -338.2346496582031, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.88683557510376, + "rewards/margins": 8.98858642578125, + "rewards/rejected": -14.875421524047852, + "step": 10198 + }, + { + "epoch": 1.59, + "learning_rate": 6.666974428134239e-06, + "logits/chosen": -2.2512896060943604, + "logits/rejected": -2.9836559295654297, + "logps/chosen": -161.23007202148438, + "logps/rejected": -343.0322265625, + "loss": 0.1436, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539656639099121, + "rewards/margins": 6.39079475402832, + "rewards/rejected": -10.930451393127441, + "step": 10199 + }, + { + "epoch": 1.59, + "learning_rate": 6.666240987603092e-06, + "logits/chosen": -1.7581491470336914, + "logits/rejected": -3.083740472793579, + "logps/chosen": -268.81976318359375, + "logps/rejected": -441.9934997558594, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.080690383911133, + "rewards/margins": 5.409631729125977, + "rewards/rejected": -11.49032211303711, + "step": 10200 + }, + { + "epoch": 1.59, + "learning_rate": 6.665507547071944e-06, + "logits/chosen": -3.014163017272949, + "logits/rejected": -2.863116979598999, + "logps/chosen": -109.37361145019531, + "logps/rejected": -181.5916748046875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213194847106934, + "rewards/margins": 6.172462463378906, + "rewards/rejected": -10.38565731048584, + "step": 10201 + }, + { + "epoch": 1.59, + "learning_rate": 6.664774106540796e-06, + "logits/chosen": -2.6297810077667236, + "logits/rejected": -3.1240153312683105, + "logps/chosen": -548.97607421875, + "logps/rejected": -587.826171875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8034958839416504, + "rewards/margins": 8.301260948181152, + "rewards/rejected": -12.104757308959961, + "step": 10202 + }, + { + "epoch": 1.59, + "learning_rate": 6.664040666009648e-06, + "logits/chosen": -2.8281664848327637, + "logits/rejected": -1.181413173675537, + "logps/chosen": -316.4970703125, + "logps/rejected": -268.0998840332031, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.416296005249023, + "rewards/margins": 5.861403465270996, + "rewards/rejected": -11.27769947052002, + "step": 10203 + }, + { + "epoch": 1.59, + "learning_rate": 6.6633072254785e-06, + "logits/chosen": -2.9600610733032227, + "logits/rejected": -3.1469390392303467, + "logps/chosen": -572.5922241210938, + "logps/rejected": -324.84539794921875, + "loss": 1.5837, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.437165260314941, + "rewards/margins": 2.7729403972625732, + "rewards/rejected": -12.210105895996094, + "step": 10204 + }, + { + "epoch": 1.59, + "learning_rate": 6.662573784947352e-06, + "logits/chosen": -1.9631171226501465, + "logits/rejected": -2.7574126720428467, + "logps/chosen": -167.1219024658203, + "logps/rejected": -438.6326904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.448195457458496, + "rewards/margins": 9.646425247192383, + "rewards/rejected": -14.094621658325195, + "step": 10205 + }, + { + "epoch": 1.59, + "learning_rate": 6.661840344416204e-06, + "logits/chosen": -2.040187358856201, + "logits/rejected": -2.919919967651367, + "logps/chosen": -73.51882934570312, + "logps/rejected": -287.65435791015625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1546950340271, + "rewards/margins": 5.700362205505371, + "rewards/rejected": -11.855056762695312, + "step": 10206 + }, + { + "epoch": 1.59, + "learning_rate": 6.661106903885056e-06, + "logits/chosen": -2.6771013736724854, + "logits/rejected": -2.7964277267456055, + "logps/chosen": -444.1202087402344, + "logps/rejected": -429.2271728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.136407494544983, + "rewards/margins": 10.52977466583252, + "rewards/rejected": -11.666181564331055, + "step": 10207 + }, + { + "epoch": 1.59, + "learning_rate": 6.660373463353908e-06, + "logits/chosen": -3.0262057781219482, + "logits/rejected": -3.0088412761688232, + "logps/chosen": -240.33868408203125, + "logps/rejected": -260.5694580078125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.249380588531494, + "rewards/margins": 7.424983978271484, + "rewards/rejected": -12.67436408996582, + "step": 10208 + }, + { + "epoch": 1.59, + "learning_rate": 6.659640022822761e-06, + "logits/chosen": -3.0058977603912354, + "logits/rejected": -2.1848838329315186, + "logps/chosen": -372.7655029296875, + "logps/rejected": -197.07537841796875, + "loss": 0.158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2461259365081787, + "rewards/margins": 3.183535575866699, + "rewards/rejected": -5.429661750793457, + "step": 10209 + }, + { + "epoch": 1.59, + "learning_rate": 6.6589065822916125e-06, + "logits/chosen": -2.8101389408111572, + "logits/rejected": -2.462272882461548, + "logps/chosen": -272.25469970703125, + "logps/rejected": -327.572021484375, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.358286142349243, + "rewards/margins": 5.011081218719482, + "rewards/rejected": -8.369367599487305, + "step": 10210 + }, + { + "epoch": 1.59, + "learning_rate": 6.658173141760464e-06, + "logits/chosen": -2.6906676292419434, + "logits/rejected": -2.073835849761963, + "logps/chosen": -185.85047912597656, + "logps/rejected": -203.10031127929688, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539205551147461, + "rewards/margins": 5.1024627685546875, + "rewards/rejected": -9.641668319702148, + "step": 10211 + }, + { + "epoch": 1.59, + "learning_rate": 6.657439701229316e-06, + "logits/chosen": -1.9467785358428955, + "logits/rejected": -2.588256359100342, + "logps/chosen": -201.2301483154297, + "logps/rejected": -319.0964660644531, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21608829498291, + "rewards/margins": 6.51134729385376, + "rewards/rejected": -11.727436065673828, + "step": 10212 + }, + { + "epoch": 1.59, + "learning_rate": 6.656706260698169e-06, + "logits/chosen": -2.047842502593994, + "logits/rejected": -3.002591133117676, + "logps/chosen": -152.99575805664062, + "logps/rejected": -318.22100830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3954148292541504, + "rewards/margins": 8.978647232055664, + "rewards/rejected": -12.374061584472656, + "step": 10213 + }, + { + "epoch": 1.59, + "learning_rate": 6.655972820167022e-06, + "logits/chosen": -3.175501585006714, + "logits/rejected": -3.223994016647339, + "logps/chosen": -123.85914611816406, + "logps/rejected": -220.1362762451172, + "loss": 0.1247, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.22023868560791, + "rewards/margins": 4.250545501708984, + "rewards/rejected": -9.470784187316895, + "step": 10214 + }, + { + "epoch": 1.59, + "learning_rate": 6.655239379635874e-06, + "logits/chosen": -2.201878309249878, + "logits/rejected": -2.828726053237915, + "logps/chosen": -305.22613525390625, + "logps/rejected": -433.1005859375, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.455284118652344, + "rewards/margins": 6.049466609954834, + "rewards/rejected": -12.504751205444336, + "step": 10215 + }, + { + "epoch": 1.59, + "learning_rate": 6.6545059391047255e-06, + "logits/chosen": -2.937849283218384, + "logits/rejected": -2.185631036758423, + "logps/chosen": -332.258544921875, + "logps/rejected": -378.06378173828125, + "loss": 0.5769, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.449516296386719, + "rewards/margins": 4.51031494140625, + "rewards/rejected": -8.959831237792969, + "step": 10216 + }, + { + "epoch": 1.59, + "learning_rate": 6.653772498573577e-06, + "logits/chosen": -3.0240402221679688, + "logits/rejected": -2.861220359802246, + "logps/chosen": -690.6400756835938, + "logps/rejected": -499.64459228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.984161376953125, + "rewards/margins": 12.71107292175293, + "rewards/rejected": -15.695234298706055, + "step": 10217 + }, + { + "epoch": 1.59, + "learning_rate": 6.65303905804243e-06, + "logits/chosen": -3.0248448848724365, + "logits/rejected": -3.1142702102661133, + "logps/chosen": -70.61418914794922, + "logps/rejected": -206.4131622314453, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.676472544670105, + "rewards/margins": 8.402530670166016, + "rewards/rejected": -10.079002380371094, + "step": 10218 + }, + { + "epoch": 1.59, + "learning_rate": 6.652305617511282e-06, + "logits/chosen": -2.651567220687866, + "logits/rejected": -3.090601921081543, + "logps/chosen": -89.61308288574219, + "logps/rejected": -235.79530334472656, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6478142738342285, + "rewards/margins": 6.645824432373047, + "rewards/rejected": -11.293639183044434, + "step": 10219 + }, + { + "epoch": 1.59, + "learning_rate": 6.651572176980134e-06, + "logits/chosen": -2.8096165657043457, + "logits/rejected": -2.6027233600616455, + "logps/chosen": -532.6544799804688, + "logps/rejected": -734.335693359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2544503211975098, + "rewards/margins": 7.407774925231934, + "rewards/rejected": -9.662225723266602, + "step": 10220 + }, + { + "epoch": 1.59, + "learning_rate": 6.650838736448986e-06, + "logits/chosen": -2.6733267307281494, + "logits/rejected": -2.5560901165008545, + "logps/chosen": -149.9342041015625, + "logps/rejected": -211.95240783691406, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.522846698760986, + "rewards/margins": 5.700535297393799, + "rewards/rejected": -12.223381996154785, + "step": 10221 + }, + { + "epoch": 1.59, + "learning_rate": 6.6501052959178385e-06, + "logits/chosen": -2.893906593322754, + "logits/rejected": -2.5501139163970947, + "logps/chosen": -455.6490783691406, + "logps/rejected": -352.734619140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.778210639953613, + "rewards/margins": 6.637279510498047, + "rewards/rejected": -11.415491104125977, + "step": 10222 + }, + { + "epoch": 1.59, + "learning_rate": 6.64937185538669e-06, + "logits/chosen": -3.0905306339263916, + "logits/rejected": -3.162572145462036, + "logps/chosen": -189.5438995361328, + "logps/rejected": -353.64141845703125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.447083950042725, + "rewards/margins": 7.157830715179443, + "rewards/rejected": -11.604914665222168, + "step": 10223 + }, + { + "epoch": 1.59, + "learning_rate": 6.648638414855542e-06, + "logits/chosen": -3.062176465988159, + "logits/rejected": -2.572253942489624, + "logps/chosen": -388.40997314453125, + "logps/rejected": -215.9842529296875, + "loss": 2.7328, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.752761840820312, + "rewards/margins": -0.5954174995422363, + "rewards/rejected": -8.157344818115234, + "step": 10224 + }, + { + "epoch": 1.59, + "learning_rate": 6.647904974324394e-06, + "logits/chosen": -2.8729612827301025, + "logits/rejected": -1.7344995737075806, + "logps/chosen": -651.6325073242188, + "logps/rejected": -387.130615234375, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023263931274414, + "rewards/margins": 4.105660438537598, + "rewards/rejected": -10.128923416137695, + "step": 10225 + }, + { + "epoch": 1.59, + "learning_rate": 6.647171533793246e-06, + "logits/chosen": -3.011897563934326, + "logits/rejected": -2.672494888305664, + "logps/chosen": -160.43736267089844, + "logps/rejected": -115.67750549316406, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9781494140625, + "rewards/margins": 3.2338662147521973, + "rewards/rejected": -9.212015151977539, + "step": 10226 + }, + { + "epoch": 1.59, + "learning_rate": 6.646438093262099e-06, + "logits/chosen": -3.00026798248291, + "logits/rejected": -3.068605422973633, + "logps/chosen": -457.7694091796875, + "logps/rejected": -595.55078125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096551418304443, + "rewards/margins": 7.245144844055176, + "rewards/rejected": -11.341695785522461, + "step": 10227 + }, + { + "epoch": 1.59, + "learning_rate": 6.6457046527309506e-06, + "logits/chosen": -2.7383804321289062, + "logits/rejected": -1.3766887187957764, + "logps/chosen": -154.36865234375, + "logps/rejected": -144.5294189453125, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.429384231567383, + "rewards/margins": 3.469367027282715, + "rewards/rejected": -9.898751258850098, + "step": 10228 + }, + { + "epoch": 1.59, + "learning_rate": 6.6449712121998025e-06, + "logits/chosen": -2.526689291000366, + "logits/rejected": -3.0995657444000244, + "logps/chosen": -113.63679504394531, + "logps/rejected": -292.8222961425781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.730849266052246, + "rewards/margins": 6.7997002601623535, + "rewards/rejected": -10.530550003051758, + "step": 10229 + }, + { + "epoch": 1.59, + "learning_rate": 6.644237771668655e-06, + "logits/chosen": -3.1070921421051025, + "logits/rejected": -2.774294376373291, + "logps/chosen": -176.75074768066406, + "logps/rejected": -358.6383361816406, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6311235427856445, + "rewards/margins": 7.345951080322266, + "rewards/rejected": -11.977073669433594, + "step": 10230 + }, + { + "epoch": 1.59, + "learning_rate": 6.643504331137508e-06, + "logits/chosen": -2.963914155960083, + "logits/rejected": -2.408869981765747, + "logps/chosen": -155.92071533203125, + "logps/rejected": -210.68521118164062, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.430578231811523, + "rewards/margins": 4.708970546722412, + "rewards/rejected": -11.139549255371094, + "step": 10231 + }, + { + "epoch": 1.59, + "learning_rate": 6.64277089060636e-06, + "logits/chosen": -1.875914216041565, + "logits/rejected": -2.836069345474243, + "logps/chosen": -172.28585815429688, + "logps/rejected": -500.3944091796875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9210140705108643, + "rewards/margins": 7.562999725341797, + "rewards/rejected": -11.484013557434082, + "step": 10232 + }, + { + "epoch": 1.59, + "learning_rate": 6.642037450075212e-06, + "logits/chosen": -2.4638500213623047, + "logits/rejected": -2.8819568157196045, + "logps/chosen": -130.5026092529297, + "logps/rejected": -241.72695922851562, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.46033239364624, + "rewards/margins": 5.965709686279297, + "rewards/rejected": -10.426042556762695, + "step": 10233 + }, + { + "epoch": 1.59, + "learning_rate": 6.6413040095440635e-06, + "logits/chosen": -2.8692715167999268, + "logits/rejected": -2.108698606491089, + "logps/chosen": -228.49493408203125, + "logps/rejected": -155.16424560546875, + "loss": 2.5978, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.967321395874023, + "rewards/margins": -2.229896068572998, + "rewards/rejected": -5.737425327301025, + "step": 10234 + }, + { + "epoch": 1.59, + "learning_rate": 6.640570569012916e-06, + "logits/chosen": -0.8360739946365356, + "logits/rejected": -2.970365285873413, + "logps/chosen": -158.75827026367188, + "logps/rejected": -615.8768920898438, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994044303894043, + "rewards/margins": 5.188148498535156, + "rewards/rejected": -11.1821928024292, + "step": 10235 + }, + { + "epoch": 1.59, + "learning_rate": 6.639837128481768e-06, + "logits/chosen": -2.761700391769409, + "logits/rejected": -2.857621908187866, + "logps/chosen": -210.92074584960938, + "logps/rejected": -280.5250549316406, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5420374870300293, + "rewards/margins": 7.142441272735596, + "rewards/rejected": -10.684478759765625, + "step": 10236 + }, + { + "epoch": 1.59, + "learning_rate": 6.63910368795062e-06, + "logits/chosen": -1.0113534927368164, + "logits/rejected": -2.3632936477661133, + "logps/chosen": -322.7747497558594, + "logps/rejected": -703.9598388671875, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380901336669922, + "rewards/margins": 7.021851539611816, + "rewards/rejected": -13.402752876281738, + "step": 10237 + }, + { + "epoch": 1.59, + "learning_rate": 6.638370247419472e-06, + "logits/chosen": -3.1191141605377197, + "logits/rejected": -3.011760711669922, + "logps/chosen": -125.29545593261719, + "logps/rejected": -192.82981872558594, + "loss": 1.1067, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.933781147003174, + "rewards/margins": 0.7451107501983643, + "rewards/rejected": -7.678892135620117, + "step": 10238 + }, + { + "epoch": 1.59, + "learning_rate": 6.637636806888324e-06, + "logits/chosen": -2.8882551193237305, + "logits/rejected": -2.3869729042053223, + "logps/chosen": -786.1744995117188, + "logps/rejected": -546.4099731445312, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2282443046569824, + "rewards/margins": 5.668554306030273, + "rewards/rejected": -8.896798133850098, + "step": 10239 + }, + { + "epoch": 1.59, + "learning_rate": 6.6369033663571765e-06, + "logits/chosen": -2.952359199523926, + "logits/rejected": -3.0058112144470215, + "logps/chosen": -255.38873291015625, + "logps/rejected": -380.0332336425781, + "loss": 0.6455, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.620942115783691, + "rewards/margins": 3.3475394248962402, + "rewards/rejected": -8.96848201751709, + "step": 10240 + }, + { + "epoch": 1.59, + "learning_rate": 6.636169925826028e-06, + "logits/chosen": -2.4070606231689453, + "logits/rejected": -3.019777774810791, + "logps/chosen": -77.71629333496094, + "logps/rejected": -422.4065246582031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.606557846069336, + "rewards/margins": 9.277992248535156, + "rewards/rejected": -12.884550094604492, + "step": 10241 + }, + { + "epoch": 1.59, + "learning_rate": 6.63543648529488e-06, + "logits/chosen": -2.2569191455841064, + "logits/rejected": -1.2608258724212646, + "logps/chosen": -451.366943359375, + "logps/rejected": -264.3765563964844, + "loss": 0.4003, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.748739719390869, + "rewards/margins": 3.8504648208618164, + "rewards/rejected": -8.599205017089844, + "step": 10242 + }, + { + "epoch": 1.59, + "learning_rate": 6.634703044763732e-06, + "logits/chosen": -2.588916540145874, + "logits/rejected": -2.340886116027832, + "logps/chosen": -133.9376220703125, + "logps/rejected": -215.30914306640625, + "loss": 0.7066, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.976429462432861, + "rewards/margins": 1.6018552780151367, + "rewards/rejected": -8.578285217285156, + "step": 10243 + }, + { + "epoch": 1.59, + "learning_rate": 6.633969604232585e-06, + "logits/chosen": -2.8902571201324463, + "logits/rejected": -3.077819347381592, + "logps/chosen": -83.96405792236328, + "logps/rejected": -242.92901611328125, + "loss": 0.0854, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.674049377441406, + "rewards/margins": 2.457517623901367, + "rewards/rejected": -8.131567001342773, + "step": 10244 + }, + { + "epoch": 1.59, + "learning_rate": 6.633236163701437e-06, + "logits/chosen": -2.017936944961548, + "logits/rejected": -2.7582571506500244, + "logps/chosen": -203.61819458007812, + "logps/rejected": -268.86712646484375, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.710860252380371, + "rewards/margins": 5.725480556488037, + "rewards/rejected": -12.43634033203125, + "step": 10245 + }, + { + "epoch": 1.59, + "learning_rate": 6.632502723170289e-06, + "logits/chosen": -2.711639642715454, + "logits/rejected": -2.8142731189727783, + "logps/chosen": -418.04962158203125, + "logps/rejected": -437.20098876953125, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468088150024414, + "rewards/margins": 4.403429985046387, + "rewards/rejected": -9.8715181350708, + "step": 10246 + }, + { + "epoch": 1.59, + "learning_rate": 6.631769282639141e-06, + "logits/chosen": -1.875393033027649, + "logits/rejected": -2.7683422565460205, + "logps/chosen": -259.237548828125, + "logps/rejected": -672.536865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.319317817687988, + "rewards/margins": 10.078838348388672, + "rewards/rejected": -14.398157119750977, + "step": 10247 + }, + { + "epoch": 1.59, + "learning_rate": 6.631035842107993e-06, + "logits/chosen": -2.474146604537964, + "logits/rejected": -2.993297576904297, + "logps/chosen": -200.49099731445312, + "logps/rejected": -235.8082275390625, + "loss": 3.1063, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.535888671875, + "rewards/margins": -0.34529948234558105, + "rewards/rejected": -7.19058895111084, + "step": 10248 + }, + { + "epoch": 1.59, + "learning_rate": 6.630302401576846e-06, + "logits/chosen": -2.963594436645508, + "logits/rejected": -1.634173035621643, + "logps/chosen": -478.9579162597656, + "logps/rejected": -320.8751220703125, + "loss": 0.1635, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5028395652771, + "rewards/margins": 3.4567790031433105, + "rewards/rejected": -10.95961856842041, + "step": 10249 + }, + { + "epoch": 1.59, + "learning_rate": 6.629568961045698e-06, + "logits/chosen": -1.4467726945877075, + "logits/rejected": -2.800142526626587, + "logps/chosen": -78.59066772460938, + "logps/rejected": -507.58001708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.153807163238525, + "rewards/margins": 13.164167404174805, + "rewards/rejected": -17.317974090576172, + "step": 10250 + }, + { + "epoch": 1.59, + "learning_rate": 6.62883552051455e-06, + "logits/chosen": -2.9413692951202393, + "logits/rejected": -2.6613776683807373, + "logps/chosen": -400.87646484375, + "logps/rejected": -348.6854248046875, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4891037940979, + "rewards/margins": 3.9268784523010254, + "rewards/rejected": -11.415982246398926, + "step": 10251 + }, + { + "epoch": 1.59, + "learning_rate": 6.628102079983402e-06, + "logits/chosen": -2.8602819442749023, + "logits/rejected": -1.6561857461929321, + "logps/chosen": -227.2892608642578, + "logps/rejected": -203.2313232421875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.305007457733154, + "rewards/margins": 5.9358367919921875, + "rewards/rejected": -10.2408447265625, + "step": 10252 + }, + { + "epoch": 1.59, + "learning_rate": 6.627368639452254e-06, + "logits/chosen": -1.9260839223861694, + "logits/rejected": -2.84423565864563, + "logps/chosen": -136.88369750976562, + "logps/rejected": -366.96624755859375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6679184436798096, + "rewards/margins": 8.320184707641602, + "rewards/rejected": -10.988102912902832, + "step": 10253 + }, + { + "epoch": 1.59, + "learning_rate": 6.626635198921106e-06, + "logits/chosen": -2.1655850410461426, + "logits/rejected": -2.972691059112549, + "logps/chosen": -168.99124145507812, + "logps/rejected": -377.47650146484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.422989845275879, + "rewards/margins": 7.206334114074707, + "rewards/rejected": -12.629323959350586, + "step": 10254 + }, + { + "epoch": 1.59, + "learning_rate": 6.625901758389958e-06, + "logits/chosen": -2.7615389823913574, + "logits/rejected": -3.0546491146087646, + "logps/chosen": -612.0028076171875, + "logps/rejected": -630.049072265625, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6460371017456055, + "rewards/margins": 3.885868787765503, + "rewards/rejected": -10.531906127929688, + "step": 10255 + }, + { + "epoch": 1.6, + "learning_rate": 6.62516831785881e-06, + "logits/chosen": -2.1610045433044434, + "logits/rejected": -2.9466257095336914, + "logps/chosen": -199.70388793945312, + "logps/rejected": -314.3394470214844, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1271109580993652, + "rewards/margins": 6.934669017791748, + "rewards/rejected": -10.061779975891113, + "step": 10256 + }, + { + "epoch": 1.6, + "learning_rate": 6.624434877327662e-06, + "logits/chosen": -2.9096434116363525, + "logits/rejected": -2.3435206413269043, + "logps/chosen": -116.10971069335938, + "logps/rejected": -305.5489501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.238767147064209, + "rewards/margins": 10.095309257507324, + "rewards/rejected": -13.334075927734375, + "step": 10257 + }, + { + "epoch": 1.6, + "learning_rate": 6.6237014367965145e-06, + "logits/chosen": -1.4213755130767822, + "logits/rejected": -2.6795129776000977, + "logps/chosen": -164.2622528076172, + "logps/rejected": -390.640625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.157434463500977, + "rewards/margins": 7.011218070983887, + "rewards/rejected": -12.168652534484863, + "step": 10258 + }, + { + "epoch": 1.6, + "learning_rate": 6.6229679962653664e-06, + "logits/chosen": -3.0671491622924805, + "logits/rejected": -2.8859031200408936, + "logps/chosen": -181.17494201660156, + "logps/rejected": -336.6540222167969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.495521545410156, + "rewards/margins": 9.504169464111328, + "rewards/rejected": -13.999691009521484, + "step": 10259 + }, + { + "epoch": 1.6, + "learning_rate": 6.622234555734218e-06, + "logits/chosen": -2.9259750843048096, + "logits/rejected": -1.8853120803833008, + "logps/chosen": -320.1415710449219, + "logps/rejected": -214.53733825683594, + "loss": 1.1266, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.52640438079834, + "rewards/margins": 0.009430646896362305, + "rewards/rejected": -6.535834789276123, + "step": 10260 + }, + { + "epoch": 1.6, + "learning_rate": 6.62150111520307e-06, + "logits/chosen": -2.08174204826355, + "logits/rejected": -2.521533489227295, + "logps/chosen": -387.5037536621094, + "logps/rejected": -470.71038818359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.384477615356445, + "rewards/margins": 6.067802429199219, + "rewards/rejected": -14.452280044555664, + "step": 10261 + }, + { + "epoch": 1.6, + "learning_rate": 6.620767674671923e-06, + "logits/chosen": -2.2429704666137695, + "logits/rejected": -2.6020655632019043, + "logps/chosen": -121.90940856933594, + "logps/rejected": -236.02244567871094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.775120735168457, + "rewards/margins": 9.195093154907227, + "rewards/rejected": -12.97021484375, + "step": 10262 + }, + { + "epoch": 1.6, + "learning_rate": 6.620034234140775e-06, + "logits/chosen": -1.4931275844573975, + "logits/rejected": -2.5164268016815186, + "logps/chosen": -163.24032592773438, + "logps/rejected": -315.6715087890625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.28085470199585, + "rewards/margins": 6.010153293609619, + "rewards/rejected": -10.291007995605469, + "step": 10263 + }, + { + "epoch": 1.6, + "learning_rate": 6.6193007936096275e-06, + "logits/chosen": -2.9253437519073486, + "logits/rejected": -2.2665629386901855, + "logps/chosen": -157.4365997314453, + "logps/rejected": -335.5504150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.336574554443359, + "rewards/margins": 11.167413711547852, + "rewards/rejected": -15.503988265991211, + "step": 10264 + }, + { + "epoch": 1.6, + "learning_rate": 6.618567353078479e-06, + "logits/chosen": -1.6756352186203003, + "logits/rejected": -2.381089210510254, + "logps/chosen": -241.69732666015625, + "logps/rejected": -263.4895324707031, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.629993438720703, + "rewards/margins": 6.19207763671875, + "rewards/rejected": -9.822071075439453, + "step": 10265 + }, + { + "epoch": 1.6, + "learning_rate": 6.617833912547331e-06, + "logits/chosen": -2.8629250526428223, + "logits/rejected": -2.507962942123413, + "logps/chosen": -460.828369140625, + "logps/rejected": -503.30010986328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8749895095825195, + "rewards/margins": 7.843859672546387, + "rewards/rejected": -13.718849182128906, + "step": 10266 + }, + { + "epoch": 1.6, + "learning_rate": 6.617100472016184e-06, + "logits/chosen": -2.3370559215545654, + "logits/rejected": -2.7875468730926514, + "logps/chosen": -375.3794250488281, + "logps/rejected": -632.2487182617188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.221203804016113, + "rewards/margins": 10.04426383972168, + "rewards/rejected": -15.26546859741211, + "step": 10267 + }, + { + "epoch": 1.6, + "learning_rate": 6.616367031485036e-06, + "logits/chosen": -2.406590700149536, + "logits/rejected": -2.756204843521118, + "logps/chosen": -253.03411865234375, + "logps/rejected": -406.51202392578125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.666964054107666, + "rewards/margins": 8.32295036315918, + "rewards/rejected": -12.989914894104004, + "step": 10268 + }, + { + "epoch": 1.6, + "learning_rate": 6.615633590953888e-06, + "logits/chosen": -2.99289870262146, + "logits/rejected": -2.7917447090148926, + "logps/chosen": -521.4302368164062, + "logps/rejected": -470.7301025390625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.169985771179199, + "rewards/margins": 6.686488151550293, + "rewards/rejected": -11.856473922729492, + "step": 10269 + }, + { + "epoch": 1.6, + "learning_rate": 6.61490015042274e-06, + "logits/chosen": -3.1565401554107666, + "logits/rejected": -2.9817183017730713, + "logps/chosen": -627.8652954101562, + "logps/rejected": -530.4986572265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.129446029663086, + "rewards/margins": 8.295296669006348, + "rewards/rejected": -13.424741744995117, + "step": 10270 + }, + { + "epoch": 1.6, + "learning_rate": 6.614166709891592e-06, + "logits/chosen": -2.3129539489746094, + "logits/rejected": -3.0558135509490967, + "logps/chosen": -555.4649658203125, + "logps/rejected": -612.4266967773438, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.892401933670044, + "rewards/margins": 7.019827842712402, + "rewards/rejected": -10.912229537963867, + "step": 10271 + }, + { + "epoch": 1.6, + "learning_rate": 6.613433269360444e-06, + "logits/chosen": -2.011364221572876, + "logits/rejected": -2.941434144973755, + "logps/chosen": -527.44921875, + "logps/rejected": -779.534912109375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417623996734619, + "rewards/margins": 6.899405002593994, + "rewards/rejected": -12.317028999328613, + "step": 10272 + }, + { + "epoch": 1.6, + "learning_rate": 6.612699828829296e-06, + "logits/chosen": -3.0705955028533936, + "logits/rejected": -2.5847043991088867, + "logps/chosen": -384.3398132324219, + "logps/rejected": -266.95703125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.226762294769287, + "rewards/margins": 8.011171340942383, + "rewards/rejected": -11.237934112548828, + "step": 10273 + }, + { + "epoch": 1.6, + "learning_rate": 6.611966388298148e-06, + "logits/chosen": -1.2331516742706299, + "logits/rejected": -2.7148306369781494, + "logps/chosen": -111.69453430175781, + "logps/rejected": -265.40509033203125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.728679180145264, + "rewards/margins": 6.794371128082275, + "rewards/rejected": -11.523050308227539, + "step": 10274 + }, + { + "epoch": 1.6, + "learning_rate": 6.611232947767e-06, + "logits/chosen": -1.5444947481155396, + "logits/rejected": -2.8291914463043213, + "logps/chosen": -194.8699188232422, + "logps/rejected": -344.0339050292969, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.372134208679199, + "rewards/margins": 7.143251419067383, + "rewards/rejected": -13.515386581420898, + "step": 10275 + }, + { + "epoch": 1.6, + "learning_rate": 6.610499507235853e-06, + "logits/chosen": -2.7258663177490234, + "logits/rejected": -2.998365640640259, + "logps/chosen": -171.21519470214844, + "logps/rejected": -280.84490966796875, + "loss": 1.0001, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.398219108581543, + "rewards/margins": 3.3900179862976074, + "rewards/rejected": -12.788236618041992, + "step": 10276 + }, + { + "epoch": 1.6, + "learning_rate": 6.6097660667047045e-06, + "logits/chosen": -2.300887107849121, + "logits/rejected": -2.783271551132202, + "logps/chosen": -165.55270385742188, + "logps/rejected": -252.22116088867188, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.083742141723633, + "rewards/margins": 6.223202705383301, + "rewards/rejected": -13.30694580078125, + "step": 10277 + }, + { + "epoch": 1.6, + "learning_rate": 6.609032626173556e-06, + "logits/chosen": -1.3185343742370605, + "logits/rejected": -2.9914960861206055, + "logps/chosen": -445.146484375, + "logps/rejected": -885.0303955078125, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.848958492279053, + "rewards/margins": 3.5653252601623535, + "rewards/rejected": -10.414283752441406, + "step": 10278 + }, + { + "epoch": 1.6, + "learning_rate": 6.608299185642408e-06, + "logits/chosen": -2.2213006019592285, + "logits/rejected": -2.299146890640259, + "logps/chosen": -109.28272247314453, + "logps/rejected": -348.61798095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8207731246948242, + "rewards/margins": 14.116124153137207, + "rewards/rejected": -15.936897277832031, + "step": 10279 + }, + { + "epoch": 1.6, + "learning_rate": 6.607565745111261e-06, + "logits/chosen": -2.141139507293701, + "logits/rejected": -3.068005084991455, + "logps/chosen": -358.5833740234375, + "logps/rejected": -406.20733642578125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6767873764038086, + "rewards/margins": 7.101001739501953, + "rewards/rejected": -10.777789115905762, + "step": 10280 + }, + { + "epoch": 1.6, + "learning_rate": 6.606832304580114e-06, + "logits/chosen": -1.4460369348526, + "logits/rejected": -3.058627128601074, + "logps/chosen": -119.41764068603516, + "logps/rejected": -382.35223388671875, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299246788024902, + "rewards/margins": 5.395011901855469, + "rewards/rejected": -10.694258689880371, + "step": 10281 + }, + { + "epoch": 1.6, + "learning_rate": 6.6060988640489656e-06, + "logits/chosen": -1.5430562496185303, + "logits/rejected": -2.700169086456299, + "logps/chosen": -155.96182250976562, + "logps/rejected": -312.0333251953125, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.002530574798584, + "rewards/margins": 4.471351146697998, + "rewards/rejected": -9.473881721496582, + "step": 10282 + }, + { + "epoch": 1.6, + "learning_rate": 6.6053654235178174e-06, + "logits/chosen": -2.491062879562378, + "logits/rejected": -3.114229440689087, + "logps/chosen": -80.91798400878906, + "logps/rejected": -229.78257751464844, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9437408447265625, + "rewards/margins": 5.014750957489014, + "rewards/rejected": -11.958491325378418, + "step": 10283 + }, + { + "epoch": 1.6, + "learning_rate": 6.60463198298667e-06, + "logits/chosen": -2.8050451278686523, + "logits/rejected": -2.460874080657959, + "logps/chosen": -280.4253234863281, + "logps/rejected": -219.67794799804688, + "loss": 0.1161, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.829190254211426, + "rewards/margins": 3.363560438156128, + "rewards/rejected": -11.192750930786133, + "step": 10284 + }, + { + "epoch": 1.6, + "learning_rate": 6.603898542455522e-06, + "logits/chosen": -2.5359108448028564, + "logits/rejected": -3.1271092891693115, + "logps/chosen": -223.15805053710938, + "logps/rejected": -294.31988525390625, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.022946834564209, + "rewards/margins": 3.995291233062744, + "rewards/rejected": -11.018238067626953, + "step": 10285 + }, + { + "epoch": 1.6, + "learning_rate": 6.603165101924374e-06, + "logits/chosen": -2.4652934074401855, + "logits/rejected": -2.7140491008758545, + "logps/chosen": -117.84732055664062, + "logps/rejected": -218.82498168945312, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9246091842651367, + "rewards/margins": 5.602237701416016, + "rewards/rejected": -9.526845932006836, + "step": 10286 + }, + { + "epoch": 1.6, + "learning_rate": 6.602431661393226e-06, + "logits/chosen": -1.9812813997268677, + "logits/rejected": -2.660040855407715, + "logps/chosen": -294.1167297363281, + "logps/rejected": -532.0616455078125, + "loss": 0.0614, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.036871433258057, + "rewards/margins": 4.241943359375, + "rewards/rejected": -11.278814315795898, + "step": 10287 + }, + { + "epoch": 1.6, + "learning_rate": 6.601698220862078e-06, + "logits/chosen": -3.180756092071533, + "logits/rejected": -2.697016477584839, + "logps/chosen": -520.8521118164062, + "logps/rejected": -301.6468505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1621460914611816, + "rewards/margins": 9.164411544799805, + "rewards/rejected": -11.326558113098145, + "step": 10288 + }, + { + "epoch": 1.6, + "learning_rate": 6.60096478033093e-06, + "logits/chosen": -2.2899422645568848, + "logits/rejected": -2.7036139965057373, + "logps/chosen": -120.94197845458984, + "logps/rejected": -330.7659912109375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.126911163330078, + "rewards/margins": 7.971644878387451, + "rewards/rejected": -11.098556518554688, + "step": 10289 + }, + { + "epoch": 1.6, + "learning_rate": 6.600231339799782e-06, + "logits/chosen": -2.5070762634277344, + "logits/rejected": -3.126852035522461, + "logps/chosen": -97.94341278076172, + "logps/rejected": -328.2420654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6143932342529297, + "rewards/margins": 8.68814754486084, + "rewards/rejected": -12.30254077911377, + "step": 10290 + }, + { + "epoch": 1.6, + "learning_rate": 6.599497899268634e-06, + "logits/chosen": -1.715097427368164, + "logits/rejected": -3.108901023864746, + "logps/chosen": -169.25775146484375, + "logps/rejected": -543.0618896484375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.416121482849121, + "rewards/margins": 7.513976573944092, + "rewards/rejected": -14.930097579956055, + "step": 10291 + }, + { + "epoch": 1.6, + "learning_rate": 6.598764458737486e-06, + "logits/chosen": -3.0760679244995117, + "logits/rejected": -2.274641990661621, + "logps/chosen": -227.65589904785156, + "logps/rejected": -289.768310546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.486787796020508, + "rewards/margins": 8.796378135681152, + "rewards/rejected": -13.28316593170166, + "step": 10292 + }, + { + "epoch": 1.6, + "learning_rate": 6.598031018206339e-06, + "logits/chosen": -2.5128040313720703, + "logits/rejected": -3.0363667011260986, + "logps/chosen": -467.3757019042969, + "logps/rejected": -471.3280029296875, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.152578830718994, + "rewards/margins": 6.148815155029297, + "rewards/rejected": -11.301393508911133, + "step": 10293 + }, + { + "epoch": 1.6, + "learning_rate": 6.597297577675191e-06, + "logits/chosen": -1.7465336322784424, + "logits/rejected": -2.4312338829040527, + "logps/chosen": -125.91594696044922, + "logps/rejected": -330.589111328125, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.741569995880127, + "rewards/margins": 6.848459243774414, + "rewards/rejected": -11.590028762817383, + "step": 10294 + }, + { + "epoch": 1.6, + "learning_rate": 6.5965641371440425e-06, + "logits/chosen": -3.0009026527404785, + "logits/rejected": -2.9329986572265625, + "logps/chosen": -889.8873291015625, + "logps/rejected": -621.2401123046875, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.237240791320801, + "rewards/margins": 2.644437074661255, + "rewards/rejected": -9.881677627563477, + "step": 10295 + }, + { + "epoch": 1.6, + "learning_rate": 6.595830696612894e-06, + "logits/chosen": -1.247855544090271, + "logits/rejected": -2.551926612854004, + "logps/chosen": -186.82125854492188, + "logps/rejected": -226.6473388671875, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.285735130310059, + "rewards/margins": 4.158509254455566, + "rewards/rejected": -10.444244384765625, + "step": 10296 + }, + { + "epoch": 1.6, + "learning_rate": 6.595097256081747e-06, + "logits/chosen": -2.944047451019287, + "logits/rejected": -2.6108858585357666, + "logps/chosen": -339.66998291015625, + "logps/rejected": -425.82861328125, + "loss": 0.2016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.759583473205566, + "rewards/margins": 4.07048225402832, + "rewards/rejected": -11.830065727233887, + "step": 10297 + }, + { + "epoch": 1.6, + "learning_rate": 6.5943638155506e-06, + "logits/chosen": -1.5903854370117188, + "logits/rejected": -2.787435531616211, + "logps/chosen": -123.22737121582031, + "logps/rejected": -359.77093505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4921875, + "rewards/margins": 13.053581237792969, + "rewards/rejected": -14.545767784118652, + "step": 10298 + }, + { + "epoch": 1.6, + "learning_rate": 6.593630375019452e-06, + "logits/chosen": -2.523590326309204, + "logits/rejected": -2.0326087474823, + "logps/chosen": -515.6128540039062, + "logps/rejected": -491.6646423339844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092578887939453, + "rewards/margins": 7.704117298126221, + "rewards/rejected": -13.796695709228516, + "step": 10299 + }, + { + "epoch": 1.6, + "learning_rate": 6.592896934488304e-06, + "logits/chosen": -2.2366158962249756, + "logits/rejected": -2.9897818565368652, + "logps/chosen": -354.7257385253906, + "logps/rejected": -489.74267578125, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6201844215393066, + "rewards/margins": 2.660688638687134, + "rewards/rejected": -6.2808732986450195, + "step": 10300 + }, + { + "epoch": 1.6, + "learning_rate": 6.5921634939571555e-06, + "logits/chosen": -2.596073627471924, + "logits/rejected": -2.5428645610809326, + "logps/chosen": -153.87509155273438, + "logps/rejected": -338.8009033203125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.050538063049316, + "rewards/margins": 7.003037452697754, + "rewards/rejected": -12.05357551574707, + "step": 10301 + }, + { + "epoch": 1.6, + "learning_rate": 6.591430053426008e-06, + "logits/chosen": -2.030120849609375, + "logits/rejected": -2.3975775241851807, + "logps/chosen": -179.9284210205078, + "logps/rejected": -472.238525390625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.958591461181641, + "rewards/margins": 9.181621551513672, + "rewards/rejected": -16.140213012695312, + "step": 10302 + }, + { + "epoch": 1.6, + "learning_rate": 6.59069661289486e-06, + "logits/chosen": -1.7262383699417114, + "logits/rejected": -3.125826835632324, + "logps/chosen": -84.86245727539062, + "logps/rejected": -307.5975341796875, + "loss": 0.893, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.592851161956787, + "rewards/margins": 4.309296607971191, + "rewards/rejected": -8.90214729309082, + "step": 10303 + }, + { + "epoch": 1.6, + "learning_rate": 6.589963172363712e-06, + "logits/chosen": -1.9976648092269897, + "logits/rejected": -2.847013473510742, + "logps/chosen": -466.8646545410156, + "logps/rejected": -438.2672424316406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177255630493164, + "rewards/margins": 8.089251518249512, + "rewards/rejected": -14.266507148742676, + "step": 10304 + }, + { + "epoch": 1.6, + "learning_rate": 6.589229731832564e-06, + "logits/chosen": -1.698858380317688, + "logits/rejected": -2.940742015838623, + "logps/chosen": -271.81048583984375, + "logps/rejected": -519.986083984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826914310455322, + "rewards/margins": 7.16903829574585, + "rewards/rejected": -12.995952606201172, + "step": 10305 + }, + { + "epoch": 1.6, + "learning_rate": 6.588496291301416e-06, + "logits/chosen": -2.966492176055908, + "logits/rejected": -2.3060097694396973, + "logps/chosen": -400.3431701660156, + "logps/rejected": -533.6685791015625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.252660751342773, + "rewards/margins": 7.317070007324219, + "rewards/rejected": -12.569730758666992, + "step": 10306 + }, + { + "epoch": 1.6, + "learning_rate": 6.5877628507702684e-06, + "logits/chosen": -2.511741876602173, + "logits/rejected": -3.0303261280059814, + "logps/chosen": -718.4246826171875, + "logps/rejected": -831.4892578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.573117256164551, + "rewards/margins": 10.638398170471191, + "rewards/rejected": -14.211515426635742, + "step": 10307 + }, + { + "epoch": 1.6, + "learning_rate": 6.58702941023912e-06, + "logits/chosen": -2.7628440856933594, + "logits/rejected": -3.0723929405212402, + "logps/chosen": -122.05590057373047, + "logps/rejected": -498.6593017578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.715565204620361, + "rewards/margins": 9.897506713867188, + "rewards/rejected": -16.61307144165039, + "step": 10308 + }, + { + "epoch": 1.6, + "learning_rate": 6.586295969707972e-06, + "logits/chosen": -2.710777521133423, + "logits/rejected": -2.947669744491577, + "logps/chosen": -164.40399169921875, + "logps/rejected": -290.55535888671875, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.938076019287109, + "rewards/margins": 1.693042278289795, + "rewards/rejected": -9.631118774414062, + "step": 10309 + }, + { + "epoch": 1.6, + "learning_rate": 6.585562529176824e-06, + "logits/chosen": -2.88364839553833, + "logits/rejected": -1.7318037748336792, + "logps/chosen": -339.9677734375, + "logps/rejected": -297.75, + "loss": 1.7179, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.657454490661621, + "rewards/margins": 2.276564121246338, + "rewards/rejected": -6.934019088745117, + "step": 10310 + }, + { + "epoch": 1.6, + "learning_rate": 6.584829088645677e-06, + "logits/chosen": -2.534071922302246, + "logits/rejected": -3.0198724269866943, + "logps/chosen": -304.8254089355469, + "logps/rejected": -587.394775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1311774253845215, + "rewards/margins": 8.57829761505127, + "rewards/rejected": -13.709474563598633, + "step": 10311 + }, + { + "epoch": 1.6, + "learning_rate": 6.584095648114529e-06, + "logits/chosen": -1.9571574926376343, + "logits/rejected": -2.8479208946228027, + "logps/chosen": -352.0294494628906, + "logps/rejected": -360.10589599609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2409043312072754, + "rewards/margins": 10.22970962524414, + "rewards/rejected": -12.470613479614258, + "step": 10312 + }, + { + "epoch": 1.6, + "learning_rate": 6.5833622075833806e-06, + "logits/chosen": -2.2208919525146484, + "logits/rejected": -2.663177967071533, + "logps/chosen": -138.34310913085938, + "logps/rejected": -306.2449951171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.974410057067871, + "rewards/margins": 10.378252983093262, + "rewards/rejected": -15.352663040161133, + "step": 10313 + }, + { + "epoch": 1.6, + "learning_rate": 6.582628767052233e-06, + "logits/chosen": -2.9394149780273438, + "logits/rejected": -2.9032275676727295, + "logps/chosen": -150.4061737060547, + "logps/rejected": -179.60699462890625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.270630359649658, + "rewards/margins": 6.44807767868042, + "rewards/rejected": -9.718708038330078, + "step": 10314 + }, + { + "epoch": 1.6, + "learning_rate": 6.581895326521085e-06, + "logits/chosen": -2.817625045776367, + "logits/rejected": -2.590059995651245, + "logps/chosen": -107.6004867553711, + "logps/rejected": -322.9281005859375, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8271169662475586, + "rewards/margins": 8.830116271972656, + "rewards/rejected": -12.657234191894531, + "step": 10315 + }, + { + "epoch": 1.6, + "learning_rate": 6.581161885989938e-06, + "logits/chosen": -2.945547580718994, + "logits/rejected": -2.8655316829681396, + "logps/chosen": -372.3143005371094, + "logps/rejected": -332.0107421875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.941139221191406, + "rewards/margins": 5.066099166870117, + "rewards/rejected": -12.007238388061523, + "step": 10316 + }, + { + "epoch": 1.6, + "learning_rate": 6.58042844545879e-06, + "logits/chosen": -2.6604416370391846, + "logits/rejected": -2.7434513568878174, + "logps/chosen": -387.8465881347656, + "logps/rejected": -594.5023193359375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.132396697998047, + "rewards/margins": 9.706733703613281, + "rewards/rejected": -12.839130401611328, + "step": 10317 + }, + { + "epoch": 1.6, + "learning_rate": 6.579695004927642e-06, + "logits/chosen": -1.8825697898864746, + "logits/rejected": -2.244729518890381, + "logps/chosen": -358.0455017089844, + "logps/rejected": -417.0462646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.978099346160889, + "rewards/margins": 11.452898979187012, + "rewards/rejected": -17.430997848510742, + "step": 10318 + }, + { + "epoch": 1.6, + "learning_rate": 6.5789615643964935e-06, + "logits/chosen": -2.286670684814453, + "logits/rejected": -2.740973711013794, + "logps/chosen": -297.7672119140625, + "logps/rejected": -339.8045654296875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3580403327941895, + "rewards/margins": 5.571766376495361, + "rewards/rejected": -12.92980670928955, + "step": 10319 + }, + { + "epoch": 1.6, + "learning_rate": 6.578228123865346e-06, + "logits/chosen": -2.942873001098633, + "logits/rejected": -2.9531538486480713, + "logps/chosen": -355.3443908691406, + "logps/rejected": -342.1262512207031, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.967343330383301, + "rewards/margins": 5.133672714233398, + "rewards/rejected": -10.101015090942383, + "step": 10320 + }, + { + "epoch": 1.61, + "learning_rate": 6.577494683334198e-06, + "logits/chosen": -2.820788860321045, + "logits/rejected": -3.035249948501587, + "logps/chosen": -128.28428649902344, + "logps/rejected": -334.32794189453125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.469728946685791, + "rewards/margins": 8.3308687210083, + "rewards/rejected": -12.80059814453125, + "step": 10321 + }, + { + "epoch": 1.61, + "learning_rate": 6.57676124280305e-06, + "logits/chosen": -1.7778918743133545, + "logits/rejected": -2.863833427429199, + "logps/chosen": -250.6957550048828, + "logps/rejected": -472.90869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.441056251525879, + "rewards/margins": 10.403644561767578, + "rewards/rejected": -13.844700813293457, + "step": 10322 + }, + { + "epoch": 1.61, + "learning_rate": 6.576027802271902e-06, + "logits/chosen": -3.1555020809173584, + "logits/rejected": -3.106733560562134, + "logps/chosen": -150.35459899902344, + "logps/rejected": -284.9664306640625, + "loss": 0.0865, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.926919460296631, + "rewards/margins": 6.336123466491699, + "rewards/rejected": -12.263042449951172, + "step": 10323 + }, + { + "epoch": 1.61, + "learning_rate": 6.575294361740754e-06, + "logits/chosen": -2.7030787467956543, + "logits/rejected": -2.6921918392181396, + "logps/chosen": -290.5210266113281, + "logps/rejected": -222.55038452148438, + "loss": 1.5978, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.518693447113037, + "rewards/margins": 2.369903564453125, + "rewards/rejected": -7.88859748840332, + "step": 10324 + }, + { + "epoch": 1.61, + "learning_rate": 6.5745609212096065e-06, + "logits/chosen": -2.5256757736206055, + "logits/rejected": -2.9627106189727783, + "logps/chosen": -164.62721252441406, + "logps/rejected": -509.7470703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.701947212219238, + "rewards/margins": 8.032798767089844, + "rewards/rejected": -12.734745979309082, + "step": 10325 + }, + { + "epoch": 1.61, + "learning_rate": 6.573827480678458e-06, + "logits/chosen": -1.818818211555481, + "logits/rejected": -2.6868245601654053, + "logps/chosen": -362.15496826171875, + "logps/rejected": -673.4253540039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.311442852020264, + "rewards/margins": 12.254100799560547, + "rewards/rejected": -16.56554412841797, + "step": 10326 + }, + { + "epoch": 1.61, + "learning_rate": 6.57309404014731e-06, + "logits/chosen": -2.0846447944641113, + "logits/rejected": -2.6286401748657227, + "logps/chosen": -332.0240173339844, + "logps/rejected": -365.1963806152344, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8636116981506348, + "rewards/margins": 6.171951770782471, + "rewards/rejected": -9.035563468933105, + "step": 10327 + }, + { + "epoch": 1.61, + "learning_rate": 6.572360599616162e-06, + "logits/chosen": -3.1243202686309814, + "logits/rejected": -2.0849361419677734, + "logps/chosen": -689.802001953125, + "logps/rejected": -498.2635192871094, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.603145122528076, + "rewards/margins": 7.108221530914307, + "rewards/rejected": -11.711366653442383, + "step": 10328 + }, + { + "epoch": 1.61, + "learning_rate": 6.571627159085015e-06, + "logits/chosen": -2.0956196784973145, + "logits/rejected": -2.3599274158477783, + "logps/chosen": -139.5832977294922, + "logps/rejected": -416.27874755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.540043354034424, + "rewards/margins": 9.68846321105957, + "rewards/rejected": -13.228506088256836, + "step": 10329 + }, + { + "epoch": 1.61, + "learning_rate": 6.570893718553867e-06, + "logits/chosen": -1.9517172574996948, + "logits/rejected": -2.320218324661255, + "logps/chosen": -158.94200134277344, + "logps/rejected": -350.55877685546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.249293327331543, + "rewards/margins": 8.54470443725586, + "rewards/rejected": -12.793997764587402, + "step": 10330 + }, + { + "epoch": 1.61, + "learning_rate": 6.5701602780227194e-06, + "logits/chosen": -2.409515619277954, + "logits/rejected": -2.0867691040039062, + "logps/chosen": -450.72247314453125, + "logps/rejected": -447.13214111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7422876358032227, + "rewards/margins": 10.404874801635742, + "rewards/rejected": -13.147161483764648, + "step": 10331 + }, + { + "epoch": 1.61, + "learning_rate": 6.569426837491571e-06, + "logits/chosen": -2.767874240875244, + "logits/rejected": -3.0088558197021484, + "logps/chosen": -86.302734375, + "logps/rejected": -362.32818603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.070157527923584, + "rewards/margins": 9.616687774658203, + "rewards/rejected": -14.686845779418945, + "step": 10332 + }, + { + "epoch": 1.61, + "learning_rate": 6.568693396960424e-06, + "logits/chosen": -1.5493345260620117, + "logits/rejected": -2.8210484981536865, + "logps/chosen": -132.07298278808594, + "logps/rejected": -589.9055786132812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.521157741546631, + "rewards/margins": 9.872367858886719, + "rewards/rejected": -14.393525123596191, + "step": 10333 + }, + { + "epoch": 1.61, + "learning_rate": 6.567959956429276e-06, + "logits/chosen": -2.2950351238250732, + "logits/rejected": -2.482393264770508, + "logps/chosen": -151.5189971923828, + "logps/rejected": -349.548095703125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.451664447784424, + "rewards/margins": 6.999048233032227, + "rewards/rejected": -12.450713157653809, + "step": 10334 + }, + { + "epoch": 1.61, + "learning_rate": 6.567226515898128e-06, + "logits/chosen": -1.2151979207992554, + "logits/rejected": -2.618715524673462, + "logps/chosen": -112.41995239257812, + "logps/rejected": -514.8721313476562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.14353084564209, + "rewards/margins": 11.896324157714844, + "rewards/rejected": -17.03985595703125, + "step": 10335 + }, + { + "epoch": 1.61, + "learning_rate": 6.56649307536698e-06, + "logits/chosen": -1.6706125736236572, + "logits/rejected": -2.8143222332000732, + "logps/chosen": -273.21539306640625, + "logps/rejected": -494.1829528808594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5489044189453125, + "rewards/margins": 10.231626510620117, + "rewards/rejected": -15.78053092956543, + "step": 10336 + }, + { + "epoch": 1.61, + "learning_rate": 6.5657596348358316e-06, + "logits/chosen": -2.699265956878662, + "logits/rejected": -3.0264549255371094, + "logps/chosen": -85.34423828125, + "logps/rejected": -216.1288604736328, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.022435665130615, + "rewards/margins": 5.962451934814453, + "rewards/rejected": -10.984888076782227, + "step": 10337 + }, + { + "epoch": 1.61, + "learning_rate": 6.565026194304684e-06, + "logits/chosen": -2.807108163833618, + "logits/rejected": -2.1025590896606445, + "logps/chosen": -419.66070556640625, + "logps/rejected": -493.8399353027344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072965145111084, + "rewards/margins": 7.776764869689941, + "rewards/rejected": -11.849729537963867, + "step": 10338 + }, + { + "epoch": 1.61, + "learning_rate": 6.564292753773536e-06, + "logits/chosen": -2.398757219314575, + "logits/rejected": -2.7293882369995117, + "logps/chosen": -137.69281005859375, + "logps/rejected": -423.8448486328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.247757911682129, + "rewards/margins": 11.951101303100586, + "rewards/rejected": -17.1988582611084, + "step": 10339 + }, + { + "epoch": 1.61, + "learning_rate": 6.563559313242388e-06, + "logits/chosen": -1.4918911457061768, + "logits/rejected": -2.850614547729492, + "logps/chosen": -549.3798828125, + "logps/rejected": -524.011474609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894017219543457, + "rewards/margins": 7.748003959655762, + "rewards/rejected": -13.642021179199219, + "step": 10340 + }, + { + "epoch": 1.61, + "learning_rate": 6.56282587271124e-06, + "logits/chosen": -2.8230221271514893, + "logits/rejected": -2.639523506164551, + "logps/chosen": -269.53857421875, + "logps/rejected": -334.98876953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4294052124023438, + "rewards/margins": 8.585260391235352, + "rewards/rejected": -12.014665603637695, + "step": 10341 + }, + { + "epoch": 1.61, + "learning_rate": 6.562092432180093e-06, + "logits/chosen": -2.628535270690918, + "logits/rejected": -2.9234023094177246, + "logps/chosen": -149.8699951171875, + "logps/rejected": -319.5286865234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.591909885406494, + "rewards/margins": 8.172218322753906, + "rewards/rejected": -10.764127731323242, + "step": 10342 + }, + { + "epoch": 1.61, + "learning_rate": 6.5613589916489445e-06, + "logits/chosen": -2.7792420387268066, + "logits/rejected": -2.5402510166168213, + "logps/chosen": -339.93695068359375, + "logps/rejected": -291.1520690917969, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7912750244140625, + "rewards/margins": 4.691720962524414, + "rewards/rejected": -10.482995986938477, + "step": 10343 + }, + { + "epoch": 1.61, + "learning_rate": 6.560625551117796e-06, + "logits/chosen": -2.835831642150879, + "logits/rejected": -2.3529791831970215, + "logps/chosen": -169.18727111816406, + "logps/rejected": -300.89105224609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.516310691833496, + "rewards/margins": 7.869728088378906, + "rewards/rejected": -13.386038780212402, + "step": 10344 + }, + { + "epoch": 1.61, + "learning_rate": 6.559892110586648e-06, + "logits/chosen": -2.8602523803710938, + "logits/rejected": -2.3430652618408203, + "logps/chosen": -382.20770263671875, + "logps/rejected": -588.4739990234375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.465262413024902, + "rewards/margins": 5.6412458419799805, + "rewards/rejected": -12.106508255004883, + "step": 10345 + }, + { + "epoch": 1.61, + "learning_rate": 6.5591586700555e-06, + "logits/chosen": -2.1746249198913574, + "logits/rejected": -2.97080135345459, + "logps/chosen": -189.2799072265625, + "logps/rejected": -383.38214111328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6595346927642822, + "rewards/margins": 9.346145629882812, + "rewards/rejected": -13.005680084228516, + "step": 10346 + }, + { + "epoch": 1.61, + "learning_rate": 6.558425229524353e-06, + "logits/chosen": -3.0349559783935547, + "logits/rejected": -2.083240270614624, + "logps/chosen": -866.79150390625, + "logps/rejected": -567.1226806640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.276190757751465, + "rewards/margins": 8.297099113464355, + "rewards/rejected": -13.57328987121582, + "step": 10347 + }, + { + "epoch": 1.61, + "learning_rate": 6.557691788993206e-06, + "logits/chosen": -3.011049270629883, + "logits/rejected": -1.8256207704544067, + "logps/chosen": -392.54376220703125, + "logps/rejected": -299.23895263671875, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.636114597320557, + "rewards/margins": 5.116021156311035, + "rewards/rejected": -10.75213623046875, + "step": 10348 + }, + { + "epoch": 1.61, + "learning_rate": 6.5569583484620575e-06, + "logits/chosen": -2.781710386276245, + "logits/rejected": -2.157306432723999, + "logps/chosen": -214.9735565185547, + "logps/rejected": -226.18829345703125, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.117817401885986, + "rewards/margins": 4.794837951660156, + "rewards/rejected": -9.912654876708984, + "step": 10349 + }, + { + "epoch": 1.61, + "learning_rate": 6.556224907930909e-06, + "logits/chosen": -1.7195827960968018, + "logits/rejected": -2.86751651763916, + "logps/chosen": -419.6370849609375, + "logps/rejected": -479.6461181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.520038604736328, + "rewards/margins": 10.680548667907715, + "rewards/rejected": -18.20058822631836, + "step": 10350 + }, + { + "epoch": 1.61, + "learning_rate": 6.555491467399762e-06, + "logits/chosen": -1.6749070882797241, + "logits/rejected": -3.0007262229919434, + "logps/chosen": -389.939208984375, + "logps/rejected": -577.04638671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.978177070617676, + "rewards/margins": 7.256648063659668, + "rewards/rejected": -14.234825134277344, + "step": 10351 + }, + { + "epoch": 1.61, + "learning_rate": 6.554758026868614e-06, + "logits/chosen": -2.246624231338501, + "logits/rejected": -2.581226348876953, + "logps/chosen": -331.53985595703125, + "logps/rejected": -480.29144287109375, + "loss": 0.0609, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.571382522583008, + "rewards/margins": 7.683964729309082, + "rewards/rejected": -16.255346298217773, + "step": 10352 + }, + { + "epoch": 1.61, + "learning_rate": 6.554024586337466e-06, + "logits/chosen": -1.7900856733322144, + "logits/rejected": -2.638676166534424, + "logps/chosen": -160.7517852783203, + "logps/rejected": -543.4569702148438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.89627742767334, + "rewards/margins": 14.194262504577637, + "rewards/rejected": -19.090539932250977, + "step": 10353 + }, + { + "epoch": 1.61, + "learning_rate": 6.553291145806318e-06, + "logits/chosen": -2.3780248165130615, + "logits/rejected": -3.03330659866333, + "logps/chosen": -279.9324951171875, + "logps/rejected": -383.01507568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7886276245117188, + "rewards/margins": 9.134140014648438, + "rewards/rejected": -12.922767639160156, + "step": 10354 + }, + { + "epoch": 1.61, + "learning_rate": 6.55255770527517e-06, + "logits/chosen": -2.3543760776519775, + "logits/rejected": -2.7555882930755615, + "logps/chosen": -110.22508239746094, + "logps/rejected": -173.11093139648438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.724785804748535, + "rewards/margins": 8.664873123168945, + "rewards/rejected": -11.38965892791748, + "step": 10355 + }, + { + "epoch": 1.61, + "learning_rate": 6.551824264744022e-06, + "logits/chosen": -2.7338483333587646, + "logits/rejected": -2.824519157409668, + "logps/chosen": -162.04385375976562, + "logps/rejected": -352.49066162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4268527030944824, + "rewards/margins": 8.252487182617188, + "rewards/rejected": -10.679340362548828, + "step": 10356 + }, + { + "epoch": 1.61, + "learning_rate": 6.551090824212874e-06, + "logits/chosen": -2.281010866165161, + "logits/rejected": -2.830756187438965, + "logps/chosen": -134.55010986328125, + "logps/rejected": -318.6180725097656, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.329154014587402, + "rewards/margins": 6.919563293457031, + "rewards/rejected": -11.248717308044434, + "step": 10357 + }, + { + "epoch": 1.61, + "learning_rate": 6.550357383681726e-06, + "logits/chosen": -1.7092045545578003, + "logits/rejected": -2.7471494674682617, + "logps/chosen": -163.69888305664062, + "logps/rejected": -368.6160583496094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.959963798522949, + "rewards/margins": 8.504804611206055, + "rewards/rejected": -13.46476936340332, + "step": 10358 + }, + { + "epoch": 1.61, + "learning_rate": 6.549623943150578e-06, + "logits/chosen": -2.6385498046875, + "logits/rejected": -2.533599853515625, + "logps/chosen": -194.8692626953125, + "logps/rejected": -241.70999145507812, + "loss": 1.3136, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.672418594360352, + "rewards/margins": 3.0459482669830322, + "rewards/rejected": -9.718366622924805, + "step": 10359 + }, + { + "epoch": 1.61, + "learning_rate": 6.548890502619431e-06, + "logits/chosen": -2.896151065826416, + "logits/rejected": -2.21561336517334, + "logps/chosen": -159.14328002929688, + "logps/rejected": -291.78448486328125, + "loss": 1.6966, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.184139251708984, + "rewards/margins": 3.635636568069458, + "rewards/rejected": -9.81977653503418, + "step": 10360 + }, + { + "epoch": 1.61, + "learning_rate": 6.5481570620882826e-06, + "logits/chosen": -1.7854793071746826, + "logits/rejected": -2.4996554851531982, + "logps/chosen": -148.56419372558594, + "logps/rejected": -317.42413330078125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.214964866638184, + "rewards/margins": 6.231600761413574, + "rewards/rejected": -11.446565628051758, + "step": 10361 + }, + { + "epoch": 1.61, + "learning_rate": 6.5474236215571345e-06, + "logits/chosen": -1.935196876525879, + "logits/rejected": -3.021838426589966, + "logps/chosen": -158.40740966796875, + "logps/rejected": -285.4297790527344, + "loss": 0.7029, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.488266468048096, + "rewards/margins": 3.4301252365112305, + "rewards/rejected": -10.918392181396484, + "step": 10362 + }, + { + "epoch": 1.61, + "learning_rate": 6.546690181025986e-06, + "logits/chosen": -0.9886391758918762, + "logits/rejected": -2.9143717288970947, + "logps/chosen": -128.93711853027344, + "logps/rejected": -345.3730163574219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.45742130279541, + "rewards/margins": 8.810855865478516, + "rewards/rejected": -13.268278121948242, + "step": 10363 + }, + { + "epoch": 1.61, + "learning_rate": 6.545956740494839e-06, + "logits/chosen": -2.936683416366577, + "logits/rejected": -2.4532570838928223, + "logps/chosen": -419.68402099609375, + "logps/rejected": -500.9742431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.808878421783447, + "rewards/margins": 8.586828231811523, + "rewards/rejected": -13.395706176757812, + "step": 10364 + }, + { + "epoch": 1.61, + "learning_rate": 6.545223299963692e-06, + "logits/chosen": -2.085613489151001, + "logits/rejected": -2.4689807891845703, + "logps/chosen": -149.73956298828125, + "logps/rejected": -369.3631591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4234840869903564, + "rewards/margins": 12.116872787475586, + "rewards/rejected": -14.54035758972168, + "step": 10365 + }, + { + "epoch": 1.61, + "learning_rate": 6.544489859432544e-06, + "logits/chosen": -1.827589511871338, + "logits/rejected": -2.6882405281066895, + "logps/chosen": -241.74215698242188, + "logps/rejected": -519.6958618164062, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980927467346191, + "rewards/margins": 9.499164581298828, + "rewards/rejected": -15.480093002319336, + "step": 10366 + }, + { + "epoch": 1.61, + "learning_rate": 6.5437564189013955e-06, + "logits/chosen": -1.7198047637939453, + "logits/rejected": -2.127284288406372, + "logps/chosen": -304.8312683105469, + "logps/rejected": -374.75537109375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.071937084197998, + "rewards/margins": 8.014948844909668, + "rewards/rejected": -12.086886405944824, + "step": 10367 + }, + { + "epoch": 1.61, + "learning_rate": 6.543022978370247e-06, + "logits/chosen": -2.901341438293457, + "logits/rejected": -2.637418270111084, + "logps/chosen": -515.9953002929688, + "logps/rejected": -716.58349609375, + "loss": 0.1561, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.987518310546875, + "rewards/margins": 4.1954345703125, + "rewards/rejected": -9.182952880859375, + "step": 10368 + }, + { + "epoch": 1.61, + "learning_rate": 6.5422895378391e-06, + "logits/chosen": -2.7049691677093506, + "logits/rejected": -3.086219310760498, + "logps/chosen": -91.64920043945312, + "logps/rejected": -336.841796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6818361282348633, + "rewards/margins": 9.748922348022461, + "rewards/rejected": -12.43075942993164, + "step": 10369 + }, + { + "epoch": 1.61, + "learning_rate": 6.541556097307952e-06, + "logits/chosen": -2.766010284423828, + "logits/rejected": -2.6235718727111816, + "logps/chosen": -232.27581787109375, + "logps/rejected": -202.62857055664062, + "loss": 3.9047, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.931303977966309, + "rewards/margins": -1.2117173671722412, + "rewards/rejected": -7.7195868492126465, + "step": 10370 + }, + { + "epoch": 1.61, + "learning_rate": 6.540822656776804e-06, + "logits/chosen": -3.0046374797821045, + "logits/rejected": -1.7215498685836792, + "logps/chosen": -259.9100646972656, + "logps/rejected": -194.45477294921875, + "loss": 1.026, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4932050704956055, + "rewards/margins": 3.2594127655029297, + "rewards/rejected": -8.752617835998535, + "step": 10371 + }, + { + "epoch": 1.61, + "learning_rate": 6.540089216245656e-06, + "logits/chosen": -2.0961246490478516, + "logits/rejected": -2.381737470626831, + "logps/chosen": -396.31097412109375, + "logps/rejected": -421.45184326171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.044120788574219, + "rewards/margins": 7.0268754959106445, + "rewards/rejected": -14.07099723815918, + "step": 10372 + }, + { + "epoch": 1.61, + "learning_rate": 6.539355775714508e-06, + "logits/chosen": -0.9371110796928406, + "logits/rejected": -2.8433127403259277, + "logps/chosen": -120.12494659423828, + "logps/rejected": -737.8575439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1975555419921875, + "rewards/margins": 14.850801467895508, + "rewards/rejected": -20.048357009887695, + "step": 10373 + }, + { + "epoch": 1.61, + "learning_rate": 6.53862233518336e-06, + "logits/chosen": -2.764556646347046, + "logits/rejected": -2.9066860675811768, + "logps/chosen": -225.9401092529297, + "logps/rejected": -445.055908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.05775785446167, + "rewards/margins": 9.382375717163086, + "rewards/rejected": -12.440134048461914, + "step": 10374 + }, + { + "epoch": 1.61, + "learning_rate": 6.537888894652212e-06, + "logits/chosen": -1.863145112991333, + "logits/rejected": -2.7429730892181396, + "logps/chosen": -149.60208129882812, + "logps/rejected": -366.5173034667969, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.33974027633667, + "rewards/margins": 9.019184112548828, + "rewards/rejected": -16.358924865722656, + "step": 10375 + }, + { + "epoch": 1.61, + "learning_rate": 6.537155454121064e-06, + "logits/chosen": -2.9091484546661377, + "logits/rejected": -2.169248104095459, + "logps/chosen": -283.88116455078125, + "logps/rejected": -253.24752807617188, + "loss": 0.377, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.677523136138916, + "rewards/margins": 5.579223155975342, + "rewards/rejected": -10.256746292114258, + "step": 10376 + }, + { + "epoch": 1.61, + "learning_rate": 6.536422013589916e-06, + "logits/chosen": -2.901177406311035, + "logits/rejected": -2.4038193225860596, + "logps/chosen": -177.85897827148438, + "logps/rejected": -204.84841918945312, + "loss": 2.2648, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.6396026611328125, + "rewards/margins": 1.4170761108398438, + "rewards/rejected": -8.056678771972656, + "step": 10377 + }, + { + "epoch": 1.61, + "learning_rate": 6.535688573058769e-06, + "logits/chosen": -2.8886821269989014, + "logits/rejected": -2.337470054626465, + "logps/chosen": -357.1461181640625, + "logps/rejected": -367.354248046875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.388990879058838, + "rewards/margins": 5.749821662902832, + "rewards/rejected": -10.138813018798828, + "step": 10378 + }, + { + "epoch": 1.61, + "learning_rate": 6.534955132527621e-06, + "logits/chosen": -2.1668739318847656, + "logits/rejected": -2.9904391765594482, + "logps/chosen": -126.66695404052734, + "logps/rejected": -532.2597045898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7361984252929688, + "rewards/margins": 10.948246002197266, + "rewards/rejected": -14.684444427490234, + "step": 10379 + }, + { + "epoch": 1.61, + "learning_rate": 6.5342216919964725e-06, + "logits/chosen": -2.6892921924591064, + "logits/rejected": -2.7257282733917236, + "logps/chosen": -377.877685546875, + "logps/rejected": -353.698486328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.763885021209717, + "rewards/margins": 7.82861852645874, + "rewards/rejected": -12.592503547668457, + "step": 10380 + }, + { + "epoch": 1.61, + "learning_rate": 6.533488251465325e-06, + "logits/chosen": -2.2797152996063232, + "logits/rejected": -2.7205710411071777, + "logps/chosen": -206.70028686523438, + "logps/rejected": -236.2569122314453, + "loss": 2.8883, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.767245292663574, + "rewards/margins": 0.7130365371704102, + "rewards/rejected": -8.480281829833984, + "step": 10381 + }, + { + "epoch": 1.61, + "learning_rate": 6.532754810934178e-06, + "logits/chosen": -2.944182872772217, + "logits/rejected": -2.5718677043914795, + "logps/chosen": -441.8223571777344, + "logps/rejected": -542.119140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.001564025878906, + "rewards/margins": 8.861574172973633, + "rewards/rejected": -13.863138198852539, + "step": 10382 + }, + { + "epoch": 1.61, + "learning_rate": 6.53202137040303e-06, + "logits/chosen": -2.2703187465667725, + "logits/rejected": -3.065105438232422, + "logps/chosen": -225.64337158203125, + "logps/rejected": -346.20635986328125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.011261940002441, + "rewards/margins": 7.397907257080078, + "rewards/rejected": -12.409168243408203, + "step": 10383 + }, + { + "epoch": 1.61, + "learning_rate": 6.531287929871882e-06, + "logits/chosen": -2.2612340450286865, + "logits/rejected": -2.4449095726013184, + "logps/chosen": -248.5012664794922, + "logps/rejected": -371.0554504394531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.267845869064331, + "rewards/margins": 10.74443244934082, + "rewards/rejected": -13.01227855682373, + "step": 10384 + }, + { + "epoch": 1.62, + "learning_rate": 6.530554489340734e-06, + "logits/chosen": -2.7424204349517822, + "logits/rejected": -2.697601079940796, + "logps/chosen": -169.2764892578125, + "logps/rejected": -256.6717834472656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.032071590423584, + "rewards/margins": 7.827958106994629, + "rewards/rejected": -11.860030174255371, + "step": 10385 + }, + { + "epoch": 1.62, + "learning_rate": 6.5298210488095855e-06, + "logits/chosen": -2.7237460613250732, + "logits/rejected": -2.6016435623168945, + "logps/chosen": -259.4619140625, + "logps/rejected": -303.2659912109375, + "loss": 1.7925, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.902231693267822, + "rewards/margins": 4.160676002502441, + "rewards/rejected": -11.062908172607422, + "step": 10386 + }, + { + "epoch": 1.62, + "learning_rate": 6.529087608278438e-06, + "logits/chosen": -1.4471757411956787, + "logits/rejected": -2.7820510864257812, + "logps/chosen": -103.41340637207031, + "logps/rejected": -318.6488952636719, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1886091232299805, + "rewards/margins": 7.8600921630859375, + "rewards/rejected": -13.048702239990234, + "step": 10387 + }, + { + "epoch": 1.62, + "learning_rate": 6.52835416774729e-06, + "logits/chosen": -2.9800972938537598, + "logits/rejected": -0.7602849006652832, + "logps/chosen": -514.67431640625, + "logps/rejected": -239.1138916015625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.142307281494141, + "rewards/margins": 5.607372283935547, + "rewards/rejected": -10.749679565429688, + "step": 10388 + }, + { + "epoch": 1.62, + "learning_rate": 6.527620727216142e-06, + "logits/chosen": -2.537177801132202, + "logits/rejected": -2.9450366497039795, + "logps/chosen": -89.02255249023438, + "logps/rejected": -120.72918701171875, + "loss": 0.6163, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.518418788909912, + "rewards/margins": 3.2736642360687256, + "rewards/rejected": -7.792082786560059, + "step": 10389 + }, + { + "epoch": 1.62, + "learning_rate": 6.526887286684994e-06, + "logits/chosen": -2.851155996322632, + "logits/rejected": -2.7842891216278076, + "logps/chosen": -182.92239379882812, + "logps/rejected": -249.55935668945312, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.732312202453613, + "rewards/margins": 5.744075775146484, + "rewards/rejected": -11.476387023925781, + "step": 10390 + }, + { + "epoch": 1.62, + "learning_rate": 6.5261538461538465e-06, + "logits/chosen": -2.1263623237609863, + "logits/rejected": -3.099292516708374, + "logps/chosen": -634.3171997070312, + "logps/rejected": -737.2988891601562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.858381748199463, + "rewards/margins": 9.84758472442627, + "rewards/rejected": -14.705965995788574, + "step": 10391 + }, + { + "epoch": 1.62, + "learning_rate": 6.5254204056226984e-06, + "logits/chosen": -2.8802640438079834, + "logits/rejected": -2.462508201599121, + "logps/chosen": -201.2611083984375, + "logps/rejected": -300.45379638671875, + "loss": 0.2114, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.287939071655273, + "rewards/margins": 3.2802560329437256, + "rewards/rejected": -9.568195343017578, + "step": 10392 + }, + { + "epoch": 1.62, + "learning_rate": 6.52468696509155e-06, + "logits/chosen": -2.9139106273651123, + "logits/rejected": -2.9760243892669678, + "logps/chosen": -129.1015167236328, + "logps/rejected": -234.3900146484375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.943899393081665, + "rewards/margins": 7.626666069030762, + "rewards/rejected": -11.570566177368164, + "step": 10393 + }, + { + "epoch": 1.62, + "learning_rate": 6.523953524560402e-06, + "logits/chosen": -3.0314576625823975, + "logits/rejected": -3.0278096199035645, + "logps/chosen": -83.49806213378906, + "logps/rejected": -162.5965118408203, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.829683780670166, + "rewards/margins": 4.170111179351807, + "rewards/rejected": -7.999794960021973, + "step": 10394 + }, + { + "epoch": 1.62, + "learning_rate": 6.523220084029254e-06, + "logits/chosen": -2.480668544769287, + "logits/rejected": -2.890700340270996, + "logps/chosen": -98.53883361816406, + "logps/rejected": -299.94903564453125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.790367603302002, + "rewards/margins": 8.340230941772461, + "rewards/rejected": -12.130599021911621, + "step": 10395 + }, + { + "epoch": 1.62, + "learning_rate": 6.522486643498107e-06, + "logits/chosen": -2.832486629486084, + "logits/rejected": -2.6497509479522705, + "logps/chosen": -428.8797607421875, + "logps/rejected": -510.972412109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2450737953186035, + "rewards/margins": 7.888065338134766, + "rewards/rejected": -12.133138656616211, + "step": 10396 + }, + { + "epoch": 1.62, + "learning_rate": 6.521753202966959e-06, + "logits/chosen": -3.036135196685791, + "logits/rejected": -3.0334606170654297, + "logps/chosen": -142.1604766845703, + "logps/rejected": -228.6962432861328, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9823384284973145, + "rewards/margins": 4.244557857513428, + "rewards/rejected": -10.226896286010742, + "step": 10397 + }, + { + "epoch": 1.62, + "learning_rate": 6.521019762435811e-06, + "logits/chosen": -2.691894054412842, + "logits/rejected": -2.79521107673645, + "logps/chosen": -141.00892639160156, + "logps/rejected": -321.1101379394531, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3871898651123047, + "rewards/margins": 6.543587684631348, + "rewards/rejected": -9.930777549743652, + "step": 10398 + }, + { + "epoch": 1.62, + "learning_rate": 6.520286321904663e-06, + "logits/chosen": -1.639413595199585, + "logits/rejected": -2.7778778076171875, + "logps/chosen": -155.70953369140625, + "logps/rejected": -506.8952941894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.299389362335205, + "rewards/margins": 9.529178619384766, + "rewards/rejected": -13.828567504882812, + "step": 10399 + }, + { + "epoch": 1.62, + "learning_rate": 6.519552881373516e-06, + "logits/chosen": -2.2805874347686768, + "logits/rejected": -2.7044825553894043, + "logps/chosen": -360.51849365234375, + "logps/rejected": -508.32086181640625, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.979086875915527, + "rewards/margins": 4.78985595703125, + "rewards/rejected": -9.768941879272461, + "step": 10400 + }, + { + "epoch": 1.62, + "learning_rate": 6.518819440842368e-06, + "logits/chosen": -2.8688619136810303, + "logits/rejected": -3.024768829345703, + "logps/chosen": -144.00660705566406, + "logps/rejected": -335.5089111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6367275714874268, + "rewards/margins": 12.298273086547852, + "rewards/rejected": -14.935001373291016, + "step": 10401 + }, + { + "epoch": 1.62, + "learning_rate": 6.51808600031122e-06, + "logits/chosen": -1.8435757160186768, + "logits/rejected": -2.814765691757202, + "logps/chosen": -71.19548797607422, + "logps/rejected": -335.9036865234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.162537097930908, + "rewards/margins": 7.716500282287598, + "rewards/rejected": -13.879036903381348, + "step": 10402 + }, + { + "epoch": 1.62, + "learning_rate": 6.517352559780072e-06, + "logits/chosen": -1.5911743640899658, + "logits/rejected": -2.953415632247925, + "logps/chosen": -132.407470703125, + "logps/rejected": -317.0964050292969, + "loss": 1.1687, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.2072367668151855, + "rewards/margins": 3.2543106079101562, + "rewards/rejected": -10.4615478515625, + "step": 10403 + }, + { + "epoch": 1.62, + "learning_rate": 6.5166191192489235e-06, + "logits/chosen": -2.968622922897339, + "logits/rejected": -2.1684157848358154, + "logps/chosen": -348.0520324707031, + "logps/rejected": -361.4486999511719, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7998552322387695, + "rewards/margins": 5.492847919464111, + "rewards/rejected": -10.292703628540039, + "step": 10404 + }, + { + "epoch": 1.62, + "learning_rate": 6.515885678717776e-06, + "logits/chosen": -2.724093198776245, + "logits/rejected": -3.0150811672210693, + "logps/chosen": -103.34925079345703, + "logps/rejected": -159.87887573242188, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.538737773895264, + "rewards/margins": 3.8722453117370605, + "rewards/rejected": -10.410983085632324, + "step": 10405 + }, + { + "epoch": 1.62, + "learning_rate": 6.515152238186628e-06, + "logits/chosen": -2.7026519775390625, + "logits/rejected": -2.9662246704101562, + "logps/chosen": -561.3746948242188, + "logps/rejected": -561.9596557617188, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.906683921813965, + "rewards/margins": 10.262836456298828, + "rewards/rejected": -13.169519424438477, + "step": 10406 + }, + { + "epoch": 1.62, + "learning_rate": 6.51441879765548e-06, + "logits/chosen": -2.8644022941589355, + "logits/rejected": -2.408522605895996, + "logps/chosen": -243.9189453125, + "logps/rejected": -225.48785400390625, + "loss": 2.3861, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.094161033630371, + "rewards/margins": 1.178290605545044, + "rewards/rejected": -8.272451400756836, + "step": 10407 + }, + { + "epoch": 1.62, + "learning_rate": 6.513685357124332e-06, + "logits/chosen": -1.3940759897232056, + "logits/rejected": -2.425821304321289, + "logps/chosen": -218.25228881835938, + "logps/rejected": -494.7397766113281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.917174339294434, + "rewards/margins": 8.723857879638672, + "rewards/rejected": -13.641032218933105, + "step": 10408 + }, + { + "epoch": 1.62, + "learning_rate": 6.512951916593185e-06, + "logits/chosen": -2.998246431350708, + "logits/rejected": -1.9967384338378906, + "logps/chosen": -441.96905517578125, + "logps/rejected": -302.0709533691406, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.182694435119629, + "rewards/margins": 6.18618106842041, + "rewards/rejected": -10.368875503540039, + "step": 10409 + }, + { + "epoch": 1.62, + "learning_rate": 6.5122184760620365e-06, + "logits/chosen": -1.9480135440826416, + "logits/rejected": -3.0236656665802, + "logps/chosen": -112.22000122070312, + "logps/rejected": -325.238037109375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.448256492614746, + "rewards/margins": 5.833335876464844, + "rewards/rejected": -14.281593322753906, + "step": 10410 + }, + { + "epoch": 1.62, + "learning_rate": 6.511485035530888e-06, + "logits/chosen": -2.8798530101776123, + "logits/rejected": -2.596161365509033, + "logps/chosen": -444.1539611816406, + "logps/rejected": -382.8513488769531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.761016845703125, + "rewards/margins": 7.509124755859375, + "rewards/rejected": -12.2701416015625, + "step": 10411 + }, + { + "epoch": 1.62, + "learning_rate": 6.51075159499974e-06, + "logits/chosen": -2.784757375717163, + "logits/rejected": -1.9926371574401855, + "logps/chosen": -427.23675537109375, + "logps/rejected": -296.9547424316406, + "loss": 0.3474, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.818422317504883, + "rewards/margins": 3.212850332260132, + "rewards/rejected": -11.031272888183594, + "step": 10412 + }, + { + "epoch": 1.62, + "learning_rate": 6.510018154468592e-06, + "logits/chosen": -2.364063024520874, + "logits/rejected": -2.7600908279418945, + "logps/chosen": -177.0959930419922, + "logps/rejected": -469.6994934082031, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.226988792419434, + "rewards/margins": 6.8713178634643555, + "rewards/rejected": -14.098306655883789, + "step": 10413 + }, + { + "epoch": 1.62, + "learning_rate": 6.509284713937445e-06, + "logits/chosen": -2.8401710987091064, + "logits/rejected": -2.944521427154541, + "logps/chosen": -191.35873413085938, + "logps/rejected": -320.08197021484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.487738847732544, + "rewards/margins": 7.786372184753418, + "rewards/rejected": -11.274110794067383, + "step": 10414 + }, + { + "epoch": 1.62, + "learning_rate": 6.508551273406297e-06, + "logits/chosen": -3.1685614585876465, + "logits/rejected": -3.0472452640533447, + "logps/chosen": -207.52862548828125, + "logps/rejected": -331.7890319824219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.309330940246582, + "rewards/margins": 7.852490425109863, + "rewards/rejected": -13.161821365356445, + "step": 10415 + }, + { + "epoch": 1.62, + "learning_rate": 6.5078178328751494e-06, + "logits/chosen": -1.8289541006088257, + "logits/rejected": -2.855952262878418, + "logps/chosen": -150.822509765625, + "logps/rejected": -386.58294677734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6019515991210938, + "rewards/margins": 8.72594165802002, + "rewards/rejected": -12.327893257141113, + "step": 10416 + }, + { + "epoch": 1.62, + "learning_rate": 6.507084392344001e-06, + "logits/chosen": -2.55002760887146, + "logits/rejected": -2.930023670196533, + "logps/chosen": -503.45263671875, + "logps/rejected": -538.8267211914062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.33998441696167, + "rewards/margins": 11.50665283203125, + "rewards/rejected": -14.846637725830078, + "step": 10417 + }, + { + "epoch": 1.62, + "learning_rate": 6.506350951812854e-06, + "logits/chosen": -2.094743490219116, + "logits/rejected": -3.0083978176116943, + "logps/chosen": -473.3688049316406, + "logps/rejected": -533.91943359375, + "loss": 0.9092, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.610363960266113, + "rewards/margins": 2.803119659423828, + "rewards/rejected": -9.413483619689941, + "step": 10418 + }, + { + "epoch": 1.62, + "learning_rate": 6.505617511281706e-06, + "logits/chosen": -2.887324333190918, + "logits/rejected": -2.491476058959961, + "logps/chosen": -310.89990234375, + "logps/rejected": -374.38812255859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.238187789916992, + "rewards/margins": 7.4901442527771, + "rewards/rejected": -12.72833251953125, + "step": 10419 + }, + { + "epoch": 1.62, + "learning_rate": 6.504884070750558e-06, + "logits/chosen": -2.7486014366149902, + "logits/rejected": -2.816791296005249, + "logps/chosen": -339.3349914550781, + "logps/rejected": -336.97528076171875, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.086734771728516, + "rewards/margins": 6.956021308898926, + "rewards/rejected": -11.042756080627441, + "step": 10420 + }, + { + "epoch": 1.62, + "learning_rate": 6.50415063021941e-06, + "logits/chosen": -2.6491448879241943, + "logits/rejected": -2.548532247543335, + "logps/chosen": -176.053955078125, + "logps/rejected": -358.74395751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.640618324279785, + "rewards/margins": 9.556365966796875, + "rewards/rejected": -13.19698429107666, + "step": 10421 + }, + { + "epoch": 1.62, + "learning_rate": 6.503417189688262e-06, + "logits/chosen": -2.7499163150787354, + "logits/rejected": -1.7055706977844238, + "logps/chosen": -402.0221862792969, + "logps/rejected": -231.8534393310547, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.59506893157959, + "rewards/margins": 4.880466461181641, + "rewards/rejected": -12.47553539276123, + "step": 10422 + }, + { + "epoch": 1.62, + "learning_rate": 6.502683749157114e-06, + "logits/chosen": -1.983400583267212, + "logits/rejected": -2.744155168533325, + "logps/chosen": -206.70306396484375, + "logps/rejected": -312.89276123046875, + "loss": 0.9116, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.929309844970703, + "rewards/margins": 3.3255109786987305, + "rewards/rejected": -11.25482177734375, + "step": 10423 + }, + { + "epoch": 1.62, + "learning_rate": 6.501950308625966e-06, + "logits/chosen": -2.8949475288391113, + "logits/rejected": -1.876151204109192, + "logps/chosen": -234.5824737548828, + "logps/rejected": -285.930908203125, + "loss": 1.5021, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.840295791625977, + "rewards/margins": 6.08571720123291, + "rewards/rejected": -12.926012992858887, + "step": 10424 + }, + { + "epoch": 1.62, + "learning_rate": 6.501216868094818e-06, + "logits/chosen": -2.434596538543701, + "logits/rejected": -2.7261385917663574, + "logps/chosen": -141.09739685058594, + "logps/rejected": -354.15057373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.786970138549805, + "rewards/margins": 10.421348571777344, + "rewards/rejected": -16.20832061767578, + "step": 10425 + }, + { + "epoch": 1.62, + "learning_rate": 6.50048342756367e-06, + "logits/chosen": -1.4605746269226074, + "logits/rejected": -2.8643553256988525, + "logps/chosen": -113.55534362792969, + "logps/rejected": -373.95013427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.737854242324829, + "rewards/margins": 9.03128433227539, + "rewards/rejected": -12.769139289855957, + "step": 10426 + }, + { + "epoch": 1.62, + "learning_rate": 6.499749987032523e-06, + "logits/chosen": -0.8440957069396973, + "logits/rejected": -2.388920545578003, + "logps/chosen": -127.2120361328125, + "logps/rejected": -384.3698425292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.399100303649902, + "rewards/margins": 10.427074432373047, + "rewards/rejected": -14.826173782348633, + "step": 10427 + }, + { + "epoch": 1.62, + "learning_rate": 6.4990165465013745e-06, + "logits/chosen": -2.318772315979004, + "logits/rejected": -2.5825886726379395, + "logps/chosen": -109.88839721679688, + "logps/rejected": -199.2149658203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.958211898803711, + "rewards/margins": 7.513897895812988, + "rewards/rejected": -12.4721097946167, + "step": 10428 + }, + { + "epoch": 1.62, + "learning_rate": 6.498283105970226e-06, + "logits/chosen": -2.0728766918182373, + "logits/rejected": -2.7544314861297607, + "logps/chosen": -151.33242797851562, + "logps/rejected": -332.6889953613281, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.814953804016113, + "rewards/margins": 8.968265533447266, + "rewards/rejected": -13.783220291137695, + "step": 10429 + }, + { + "epoch": 1.62, + "learning_rate": 6.497549665439078e-06, + "logits/chosen": -2.5470499992370605, + "logits/rejected": -1.906402826309204, + "logps/chosen": -177.29644775390625, + "logps/rejected": -206.47970581054688, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.502700328826904, + "rewards/margins": 6.735902786254883, + "rewards/rejected": -14.238603591918945, + "step": 10430 + }, + { + "epoch": 1.62, + "learning_rate": 6.496816224907931e-06, + "logits/chosen": -3.0346832275390625, + "logits/rejected": -1.821759581565857, + "logps/chosen": -442.1321105957031, + "logps/rejected": -302.0334777832031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174900531768799, + "rewards/margins": 8.756828308105469, + "rewards/rejected": -12.93172836303711, + "step": 10431 + }, + { + "epoch": 1.62, + "learning_rate": 6.496082784376783e-06, + "logits/chosen": -2.864197254180908, + "logits/rejected": -1.8723052740097046, + "logps/chosen": -525.9052734375, + "logps/rejected": -321.9703063964844, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.070833206176758, + "rewards/margins": 6.890328884124756, + "rewards/rejected": -13.961162567138672, + "step": 10432 + }, + { + "epoch": 1.62, + "learning_rate": 6.495349343845636e-06, + "logits/chosen": -2.5305051803588867, + "logits/rejected": -1.75583016872406, + "logps/chosen": -213.0496826171875, + "logps/rejected": -271.7129211425781, + "loss": 0.0536, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5401086807250977, + "rewards/margins": 6.235993385314941, + "rewards/rejected": -9.776102066040039, + "step": 10433 + }, + { + "epoch": 1.62, + "learning_rate": 6.4946159033144875e-06, + "logits/chosen": -2.547597885131836, + "logits/rejected": -2.936889410018921, + "logps/chosen": -374.9692077636719, + "logps/rejected": -493.725341796875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.048346042633057, + "rewards/margins": 7.862677574157715, + "rewards/rejected": -11.91102409362793, + "step": 10434 + }, + { + "epoch": 1.62, + "learning_rate": 6.493882462783339e-06, + "logits/chosen": -2.444847822189331, + "logits/rejected": -2.717745065689087, + "logps/chosen": -570.7874755859375, + "logps/rejected": -573.6163940429688, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9885783195495605, + "rewards/margins": 5.574615478515625, + "rewards/rejected": -11.563194274902344, + "step": 10435 + }, + { + "epoch": 1.62, + "learning_rate": 6.493149022252192e-06, + "logits/chosen": -2.096508741378784, + "logits/rejected": -3.0897090435028076, + "logps/chosen": -87.42741394042969, + "logps/rejected": -398.29803466796875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1375532150268555, + "rewards/margins": 6.92426061630249, + "rewards/rejected": -11.061813354492188, + "step": 10436 + }, + { + "epoch": 1.62, + "learning_rate": 6.492415581721044e-06, + "logits/chosen": -1.1341160535812378, + "logits/rejected": -2.8256335258483887, + "logps/chosen": -223.9357147216797, + "logps/rejected": -566.513671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.169145584106445, + "rewards/margins": 8.783717155456543, + "rewards/rejected": -13.952862739562988, + "step": 10437 + }, + { + "epoch": 1.62, + "learning_rate": 6.491682141189896e-06, + "logits/chosen": -2.4603333473205566, + "logits/rejected": -1.9897862672805786, + "logps/chosen": -550.2225341796875, + "logps/rejected": -397.3186950683594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9416894912719727, + "rewards/margins": 8.751233100891113, + "rewards/rejected": -12.692922592163086, + "step": 10438 + }, + { + "epoch": 1.62, + "learning_rate": 6.490948700658748e-06, + "logits/chosen": -2.227660894393921, + "logits/rejected": -3.076674699783325, + "logps/chosen": -112.71393585205078, + "logps/rejected": -286.54559326171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9626283645629883, + "rewards/margins": 7.280493259429932, + "rewards/rejected": -10.243122100830078, + "step": 10439 + }, + { + "epoch": 1.62, + "learning_rate": 6.4902152601276004e-06, + "logits/chosen": -3.123840808868408, + "logits/rejected": -2.963226318359375, + "logps/chosen": -403.6528015136719, + "logps/rejected": -214.01612854003906, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.291240692138672, + "rewards/margins": 6.066107749938965, + "rewards/rejected": -11.357348442077637, + "step": 10440 + }, + { + "epoch": 1.62, + "learning_rate": 6.489481819596452e-06, + "logits/chosen": -2.5819337368011475, + "logits/rejected": -3.0774660110473633, + "logps/chosen": -389.9167785644531, + "logps/rejected": -446.377685546875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631725311279297, + "rewards/margins": 7.016106605529785, + "rewards/rejected": -12.647831916809082, + "step": 10441 + }, + { + "epoch": 1.62, + "learning_rate": 6.488748379065304e-06, + "logits/chosen": -2.8214149475097656, + "logits/rejected": -2.938793659210205, + "logps/chosen": -111.537841796875, + "logps/rejected": -205.03651428222656, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.347900390625, + "rewards/margins": 4.667738914489746, + "rewards/rejected": -9.015639305114746, + "step": 10442 + }, + { + "epoch": 1.62, + "learning_rate": 6.488014938534156e-06, + "logits/chosen": -2.7052738666534424, + "logits/rejected": -3.1113882064819336, + "logps/chosen": -79.57744598388672, + "logps/rejected": -229.16537475585938, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.131464958190918, + "rewards/margins": 6.321897029876709, + "rewards/rejected": -10.453361511230469, + "step": 10443 + }, + { + "epoch": 1.62, + "learning_rate": 6.487281498003008e-06, + "logits/chosen": -2.995622396469116, + "logits/rejected": -2.5824005603790283, + "logps/chosen": -246.84439086914062, + "logps/rejected": -266.3726501464844, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.395321846008301, + "rewards/margins": 3.2773709297180176, + "rewards/rejected": -7.672692775726318, + "step": 10444 + }, + { + "epoch": 1.62, + "learning_rate": 6.486548057471861e-06, + "logits/chosen": -3.031648874282837, + "logits/rejected": -2.756274700164795, + "logps/chosen": -214.79766845703125, + "logps/rejected": -139.30789184570312, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.921219825744629, + "rewards/margins": 4.4917402267456055, + "rewards/rejected": -7.412960052490234, + "step": 10445 + }, + { + "epoch": 1.62, + "learning_rate": 6.4858146169407126e-06, + "logits/chosen": -2.1723639965057373, + "logits/rejected": -3.005953788757324, + "logps/chosen": -168.02490234375, + "logps/rejected": -341.3424377441406, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.358311653137207, + "rewards/margins": 5.863153457641602, + "rewards/rejected": -11.221466064453125, + "step": 10446 + }, + { + "epoch": 1.62, + "learning_rate": 6.4850811764095644e-06, + "logits/chosen": -2.936469316482544, + "logits/rejected": -1.646477222442627, + "logps/chosen": -227.54261779785156, + "logps/rejected": -149.25177001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.558677911758423, + "rewards/margins": 8.301187515258789, + "rewards/rejected": -10.859865188598633, + "step": 10447 + }, + { + "epoch": 1.62, + "learning_rate": 6.484347735878416e-06, + "logits/chosen": -2.627553939819336, + "logits/rejected": -2.9574010372161865, + "logps/chosen": -436.5087585449219, + "logps/rejected": -320.255615234375, + "loss": 0.1054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.831851005554199, + "rewards/margins": 3.876222848892212, + "rewards/rejected": -9.708074569702148, + "step": 10448 + }, + { + "epoch": 1.63, + "learning_rate": 6.483614295347269e-06, + "logits/chosen": -1.77385675907135, + "logits/rejected": -3.015679359436035, + "logps/chosen": -97.89651489257812, + "logps/rejected": -314.27020263671875, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.503430366516113, + "rewards/margins": 5.938608169555664, + "rewards/rejected": -10.442039489746094, + "step": 10449 + }, + { + "epoch": 1.63, + "learning_rate": 6.482880854816122e-06, + "logits/chosen": -2.815236806869507, + "logits/rejected": -2.0864570140838623, + "logps/chosen": -308.79498291015625, + "logps/rejected": -283.7227478027344, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128337860107422, + "rewards/margins": 3.872980833053589, + "rewards/rejected": -10.00131893157959, + "step": 10450 + }, + { + "epoch": 1.63, + "learning_rate": 6.482147414284974e-06, + "logits/chosen": -3.1604506969451904, + "logits/rejected": -2.1034958362579346, + "logps/chosen": -962.572998046875, + "logps/rejected": -327.3497619628906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.041689395904541, + "rewards/margins": 9.892497062683105, + "rewards/rejected": -14.934186935424805, + "step": 10451 + }, + { + "epoch": 1.63, + "learning_rate": 6.4814139737538255e-06, + "logits/chosen": -2.009493827819824, + "logits/rejected": -2.708712100982666, + "logps/chosen": -135.19961547851562, + "logps/rejected": -334.43096923828125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.729863166809082, + "rewards/margins": 6.343986511230469, + "rewards/rejected": -12.07384967803955, + "step": 10452 + }, + { + "epoch": 1.63, + "learning_rate": 6.480680533222677e-06, + "logits/chosen": -2.1087543964385986, + "logits/rejected": -2.947021007537842, + "logps/chosen": -347.26934814453125, + "logps/rejected": -501.6858215332031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.918428897857666, + "rewards/margins": 8.384462356567383, + "rewards/rejected": -12.302891731262207, + "step": 10453 + }, + { + "epoch": 1.63, + "learning_rate": 6.47994709269153e-06, + "logits/chosen": -2.636650800704956, + "logits/rejected": -3.1304197311401367, + "logps/chosen": -516.742431640625, + "logps/rejected": -565.7979736328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.795495986938477, + "rewards/margins": 7.181435585021973, + "rewards/rejected": -11.976930618286133, + "step": 10454 + }, + { + "epoch": 1.63, + "learning_rate": 6.479213652160382e-06, + "logits/chosen": -2.8541955947875977, + "logits/rejected": -2.5703396797180176, + "logps/chosen": -915.3702392578125, + "logps/rejected": -662.0721435546875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.655363082885742, + "rewards/margins": 7.795825004577637, + "rewards/rejected": -12.451187133789062, + "step": 10455 + }, + { + "epoch": 1.63, + "learning_rate": 6.478480211629234e-06, + "logits/chosen": -1.0242829322814941, + "logits/rejected": -2.7606256008148193, + "logps/chosen": -227.9322509765625, + "logps/rejected": -302.42822265625, + "loss": 0.1512, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673361778259277, + "rewards/margins": 2.50003981590271, + "rewards/rejected": -9.173401832580566, + "step": 10456 + }, + { + "epoch": 1.63, + "learning_rate": 6.477746771098086e-06, + "logits/chosen": -3.026049852371216, + "logits/rejected": -2.988849401473999, + "logps/chosen": -185.0377960205078, + "logps/rejected": -206.9102783203125, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056704521179199, + "rewards/margins": 5.637628555297852, + "rewards/rejected": -11.69433307647705, + "step": 10457 + }, + { + "epoch": 1.63, + "learning_rate": 6.4770133305669385e-06, + "logits/chosen": -2.3959758281707764, + "logits/rejected": -2.762345790863037, + "logps/chosen": -183.9272918701172, + "logps/rejected": -278.71826171875, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.37118911743164, + "rewards/margins": 4.61005163192749, + "rewards/rejected": -13.981240272521973, + "step": 10458 + }, + { + "epoch": 1.63, + "learning_rate": 6.47627989003579e-06, + "logits/chosen": -2.794919967651367, + "logits/rejected": -2.857081174850464, + "logps/chosen": -196.8993377685547, + "logps/rejected": -305.36480712890625, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8926897048950195, + "rewards/margins": 6.353168487548828, + "rewards/rejected": -11.245858192443848, + "step": 10459 + }, + { + "epoch": 1.63, + "learning_rate": 6.475546449504642e-06, + "logits/chosen": -1.8655093908309937, + "logits/rejected": -2.860222578048706, + "logps/chosen": -282.28436279296875, + "logps/rejected": -349.9581604003906, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.523224830627441, + "rewards/margins": 6.980364799499512, + "rewards/rejected": -11.503589630126953, + "step": 10460 + }, + { + "epoch": 1.63, + "learning_rate": 6.474813008973494e-06, + "logits/chosen": -2.829526662826538, + "logits/rejected": -2.96152400970459, + "logps/chosen": -123.47239685058594, + "logps/rejected": -245.09262084960938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.120754241943359, + "rewards/margins": 7.614911079406738, + "rewards/rejected": -12.735664367675781, + "step": 10461 + }, + { + "epoch": 1.63, + "learning_rate": 6.474079568442346e-06, + "logits/chosen": -1.7799665927886963, + "logits/rejected": -2.7795798778533936, + "logps/chosen": -198.79336547851562, + "logps/rejected": -300.84124755859375, + "loss": 0.0698, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.232344150543213, + "rewards/margins": 3.855282783508301, + "rewards/rejected": -9.087627410888672, + "step": 10462 + }, + { + "epoch": 1.63, + "learning_rate": 6.473346127911199e-06, + "logits/chosen": -3.127742052078247, + "logits/rejected": -2.482694387435913, + "logps/chosen": -501.9020080566406, + "logps/rejected": -355.4759826660156, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.466379642486572, + "rewards/margins": 5.91147518157959, + "rewards/rejected": -10.37785530090332, + "step": 10463 + }, + { + "epoch": 1.63, + "learning_rate": 6.472612687380051e-06, + "logits/chosen": -2.6633219718933105, + "logits/rejected": -2.622288942337036, + "logps/chosen": -286.3838195800781, + "logps/rejected": -303.9954833984375, + "loss": 1.0278, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.171347141265869, + "rewards/margins": 2.258530378341675, + "rewards/rejected": -9.429877281188965, + "step": 10464 + }, + { + "epoch": 1.63, + "learning_rate": 6.4718792468489025e-06, + "logits/chosen": -2.272550582885742, + "logits/rejected": -3.166665554046631, + "logps/chosen": -69.8819580078125, + "logps/rejected": -272.5296325683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.014334678649902, + "rewards/margins": 9.098207473754883, + "rewards/rejected": -13.112542152404785, + "step": 10465 + }, + { + "epoch": 1.63, + "learning_rate": 6.471145806317755e-06, + "logits/chosen": -1.8295586109161377, + "logits/rejected": -2.900217294692993, + "logps/chosen": -214.14834594726562, + "logps/rejected": -444.4873352050781, + "loss": 1.2578, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.090086936950684, + "rewards/margins": 3.25764799118042, + "rewards/rejected": -11.347734451293945, + "step": 10466 + }, + { + "epoch": 1.63, + "learning_rate": 6.470412365786608e-06, + "logits/chosen": -2.5927324295043945, + "logits/rejected": -1.8857535123825073, + "logps/chosen": -209.68634033203125, + "logps/rejected": -333.6269226074219, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.404494285583496, + "rewards/margins": 4.272068023681641, + "rewards/rejected": -11.676563262939453, + "step": 10467 + }, + { + "epoch": 1.63, + "learning_rate": 6.46967892525546e-06, + "logits/chosen": -2.599299907684326, + "logits/rejected": -1.8085565567016602, + "logps/chosen": -226.7739715576172, + "logps/rejected": -215.3442840576172, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9032421112060547, + "rewards/margins": 4.965507507324219, + "rewards/rejected": -8.868749618530273, + "step": 10468 + }, + { + "epoch": 1.63, + "learning_rate": 6.468945484724312e-06, + "logits/chosen": -2.749690294265747, + "logits/rejected": -1.6910594701766968, + "logps/chosen": -227.85659790039062, + "logps/rejected": -319.6559143066406, + "loss": 0.1219, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4111976623535156, + "rewards/margins": 5.006364822387695, + "rewards/rejected": -8.417562484741211, + "step": 10469 + }, + { + "epoch": 1.63, + "learning_rate": 6.4682120441931636e-06, + "logits/chosen": -2.7554564476013184, + "logits/rejected": -2.9290144443511963, + "logps/chosen": -112.98448944091797, + "logps/rejected": -196.53366088867188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.37685489654541, + "rewards/margins": 7.001335144042969, + "rewards/rejected": -11.378190040588379, + "step": 10470 + }, + { + "epoch": 1.63, + "learning_rate": 6.467478603662016e-06, + "logits/chosen": -2.7504122257232666, + "logits/rejected": -2.5468552112579346, + "logps/chosen": -411.0772705078125, + "logps/rejected": -264.0157775878906, + "loss": 2.0369, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.159814834594727, + "rewards/margins": 0.11084103584289551, + "rewards/rejected": -7.270655632019043, + "step": 10471 + }, + { + "epoch": 1.63, + "learning_rate": 6.466745163130868e-06, + "logits/chosen": -2.7801706790924072, + "logits/rejected": -2.9284350872039795, + "logps/chosen": -357.3056945800781, + "logps/rejected": -541.7279052734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.338828086853027, + "rewards/margins": 8.301583290100098, + "rewards/rejected": -15.640411376953125, + "step": 10472 + }, + { + "epoch": 1.63, + "learning_rate": 6.46601172259972e-06, + "logits/chosen": -1.907117486000061, + "logits/rejected": -2.971522808074951, + "logps/chosen": -121.31979370117188, + "logps/rejected": -434.2970886230469, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.464641094207764, + "rewards/margins": 4.456357955932617, + "rewards/rejected": -8.920999526977539, + "step": 10473 + }, + { + "epoch": 1.63, + "learning_rate": 6.465278282068572e-06, + "logits/chosen": -2.0558347702026367, + "logits/rejected": -2.708752155303955, + "logps/chosen": -303.5577392578125, + "logps/rejected": -376.27423095703125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1530914306640625, + "rewards/margins": 4.318853378295898, + "rewards/rejected": -10.471944808959961, + "step": 10474 + }, + { + "epoch": 1.63, + "learning_rate": 6.464544841537424e-06, + "logits/chosen": -2.794834613800049, + "logits/rejected": -2.9659008979797363, + "logps/chosen": -138.16458129882812, + "logps/rejected": -167.31527709960938, + "loss": 2.0977, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.107393264770508, + "rewards/margins": 1.5094408988952637, + "rewards/rejected": -9.61683464050293, + "step": 10475 + }, + { + "epoch": 1.63, + "learning_rate": 6.4638114010062765e-06, + "logits/chosen": -2.9281058311462402, + "logits/rejected": -1.8205674886703491, + "logps/chosen": -255.0415496826172, + "logps/rejected": -203.4716796875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072909355163574, + "rewards/margins": 5.6913862228393555, + "rewards/rejected": -9.76429557800293, + "step": 10476 + }, + { + "epoch": 1.63, + "learning_rate": 6.463077960475128e-06, + "logits/chosen": -3.0727007389068604, + "logits/rejected": -3.1920900344848633, + "logps/chosen": -465.40789794921875, + "logps/rejected": -427.2287902832031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.438920497894287, + "rewards/margins": 8.754310607910156, + "rewards/rejected": -11.193231582641602, + "step": 10477 + }, + { + "epoch": 1.63, + "learning_rate": 6.46234451994398e-06, + "logits/chosen": -2.1247646808624268, + "logits/rejected": -3.1344408988952637, + "logps/chosen": -71.38346862792969, + "logps/rejected": -420.4073486328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.664151191711426, + "rewards/margins": 9.620010375976562, + "rewards/rejected": -14.284162521362305, + "step": 10478 + }, + { + "epoch": 1.63, + "learning_rate": 6.461611079412832e-06, + "logits/chosen": -2.5796549320220947, + "logits/rejected": -3.0282750129699707, + "logps/chosen": -493.01324462890625, + "logps/rejected": -545.62451171875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.842672824859619, + "rewards/margins": 7.068621635437012, + "rewards/rejected": -10.911294937133789, + "step": 10479 + }, + { + "epoch": 1.63, + "learning_rate": 6.460877638881685e-06, + "logits/chosen": -1.7753278017044067, + "logits/rejected": -2.7316417694091797, + "logps/chosen": -206.83322143554688, + "logps/rejected": -323.8729248046875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.762886047363281, + "rewards/margins": 7.334990978240967, + "rewards/rejected": -12.097877502441406, + "step": 10480 + }, + { + "epoch": 1.63, + "learning_rate": 6.460144198350537e-06, + "logits/chosen": -2.7424209117889404, + "logits/rejected": -2.848026752471924, + "logps/chosen": -187.1810760498047, + "logps/rejected": -312.8934326171875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.543459415435791, + "rewards/margins": 6.171062469482422, + "rewards/rejected": -10.714521408081055, + "step": 10481 + }, + { + "epoch": 1.63, + "learning_rate": 6.459410757819389e-06, + "logits/chosen": -2.3117177486419678, + "logits/rejected": -1.7238467931747437, + "logps/chosen": -564.9862670898438, + "logps/rejected": -574.3136596679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3547523021697998, + "rewards/margins": 17.3862361907959, + "rewards/rejected": -18.74098777770996, + "step": 10482 + }, + { + "epoch": 1.63, + "learning_rate": 6.458677317288241e-06, + "logits/chosen": -2.8278050422668457, + "logits/rejected": -1.724008560180664, + "logps/chosen": -282.40032958984375, + "logps/rejected": -158.41595458984375, + "loss": 0.1022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0592033863067627, + "rewards/margins": 4.399730682373047, + "rewards/rejected": -7.458934307098389, + "step": 10483 + }, + { + "epoch": 1.63, + "learning_rate": 6.457943876757093e-06, + "logits/chosen": -2.0759847164154053, + "logits/rejected": -3.0753440856933594, + "logps/chosen": -284.4231872558594, + "logps/rejected": -578.069580078125, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7989912033081055, + "rewards/margins": 5.075774192810059, + "rewards/rejected": -9.874765396118164, + "step": 10484 + }, + { + "epoch": 1.63, + "learning_rate": 6.457210436225946e-06, + "logits/chosen": -2.8296077251434326, + "logits/rejected": -2.2093286514282227, + "logps/chosen": -173.699462890625, + "logps/rejected": -374.0980224609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.988399982452393, + "rewards/margins": 8.032735824584961, + "rewards/rejected": -13.021135330200195, + "step": 10485 + }, + { + "epoch": 1.63, + "learning_rate": 6.456476995694798e-06, + "logits/chosen": -2.1930763721466064, + "logits/rejected": -2.6516928672790527, + "logps/chosen": -183.4827880859375, + "logps/rejected": -362.299560546875, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3363990783691406, + "rewards/margins": 7.288695335388184, + "rewards/rejected": -10.625094413757324, + "step": 10486 + }, + { + "epoch": 1.63, + "learning_rate": 6.45574355516365e-06, + "logits/chosen": -1.434032917022705, + "logits/rejected": -2.5791125297546387, + "logps/chosen": -279.11865234375, + "logps/rejected": -499.17578125, + "loss": 0.4373, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.166962146759033, + "rewards/margins": 4.223747253417969, + "rewards/rejected": -10.390708923339844, + "step": 10487 + }, + { + "epoch": 1.63, + "learning_rate": 6.455010114632502e-06, + "logits/chosen": -3.0414772033691406, + "logits/rejected": -3.099820375442505, + "logps/chosen": -80.59402465820312, + "logps/rejected": -243.4849090576172, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.746401786804199, + "rewards/margins": 7.303412437438965, + "rewards/rejected": -12.049814224243164, + "step": 10488 + }, + { + "epoch": 1.63, + "learning_rate": 6.454276674101354e-06, + "logits/chosen": -2.362898111343384, + "logits/rejected": -2.901822090148926, + "logps/chosen": -113.86640930175781, + "logps/rejected": -326.1705322265625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.16064453125, + "rewards/margins": 8.2848482131958, + "rewards/rejected": -11.4454927444458, + "step": 10489 + }, + { + "epoch": 1.63, + "learning_rate": 6.453543233570206e-06, + "logits/chosen": -3.034872531890869, + "logits/rejected": -2.9511914253234863, + "logps/chosen": -424.550048828125, + "logps/rejected": -185.10263061523438, + "loss": 0.5562, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.998118877410889, + "rewards/margins": 0.2985198497772217, + "rewards/rejected": -8.296638488769531, + "step": 10490 + }, + { + "epoch": 1.63, + "learning_rate": 6.452809793039058e-06, + "logits/chosen": -2.792755365371704, + "logits/rejected": -2.522711992263794, + "logps/chosen": -166.56130981445312, + "logps/rejected": -239.52517700195312, + "loss": 0.0965, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.089781761169434, + "rewards/margins": 4.562410354614258, + "rewards/rejected": -10.652192115783691, + "step": 10491 + }, + { + "epoch": 1.63, + "learning_rate": 6.45207635250791e-06, + "logits/chosen": -2.834007978439331, + "logits/rejected": -3.0543034076690674, + "logps/chosen": -393.4095458984375, + "logps/rejected": -537.2572631835938, + "loss": 0.0654, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906116485595703, + "rewards/margins": 5.2321271896362305, + "rewards/rejected": -11.138242721557617, + "step": 10492 + }, + { + "epoch": 1.63, + "learning_rate": 6.451342911976762e-06, + "logits/chosen": -2.1570544242858887, + "logits/rejected": -2.7263503074645996, + "logps/chosen": -102.24449157714844, + "logps/rejected": -404.6091613769531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.178304195404053, + "rewards/margins": 11.364365577697754, + "rewards/rejected": -17.54267120361328, + "step": 10493 + }, + { + "epoch": 1.63, + "learning_rate": 6.4506094714456146e-06, + "logits/chosen": -2.229083776473999, + "logits/rejected": -2.9866533279418945, + "logps/chosen": -111.500244140625, + "logps/rejected": -208.97018432617188, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755462646484375, + "rewards/margins": 6.403522491455078, + "rewards/rejected": -12.158985137939453, + "step": 10494 + }, + { + "epoch": 1.63, + "learning_rate": 6.4498760309144665e-06, + "logits/chosen": -1.9208958148956299, + "logits/rejected": -2.7111704349517822, + "logps/chosen": -131.76370239257812, + "logps/rejected": -141.10403442382812, + "loss": 0.2342, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979586124420166, + "rewards/margins": 1.6038291454315186, + "rewards/rejected": -7.5834150314331055, + "step": 10495 + }, + { + "epoch": 1.63, + "learning_rate": 6.449142590383318e-06, + "logits/chosen": -3.0925259590148926, + "logits/rejected": -1.8303277492523193, + "logps/chosen": -534.8971557617188, + "logps/rejected": -438.3828430175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.498582363128662, + "rewards/margins": 10.639741897583008, + "rewards/rejected": -16.138324737548828, + "step": 10496 + }, + { + "epoch": 1.63, + "learning_rate": 6.44840914985217e-06, + "logits/chosen": -1.9946918487548828, + "logits/rejected": -2.7467288970947266, + "logps/chosen": -243.34573364257812, + "logps/rejected": -274.09002685546875, + "loss": 1.3314, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.120477199554443, + "rewards/margins": 4.4525346755981445, + "rewards/rejected": -10.57301139831543, + "step": 10497 + }, + { + "epoch": 1.63, + "learning_rate": 6.447675709321023e-06, + "logits/chosen": -1.4280498027801514, + "logits/rejected": -2.7843167781829834, + "logps/chosen": -122.42387390136719, + "logps/rejected": -330.4075927734375, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.337697982788086, + "rewards/margins": 6.254551887512207, + "rewards/rejected": -12.592249870300293, + "step": 10498 + }, + { + "epoch": 1.63, + "learning_rate": 6.446942268789875e-06, + "logits/chosen": -3.114912509918213, + "logits/rejected": -2.471799373626709, + "logps/chosen": -1167.84375, + "logps/rejected": -535.9329833984375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5087432861328125, + "rewards/margins": 9.811319351196289, + "rewards/rejected": -16.3200626373291, + "step": 10499 + }, + { + "epoch": 1.63, + "learning_rate": 6.4462088282587275e-06, + "logits/chosen": -1.3592671155929565, + "logits/rejected": -2.8166043758392334, + "logps/chosen": -107.27093505859375, + "logps/rejected": -296.08428955078125, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894063949584961, + "rewards/margins": 4.654305458068848, + "rewards/rejected": -10.548368453979492, + "step": 10500 + }, + { + "epoch": 1.63, + "learning_rate": 6.445475387727579e-06, + "logits/chosen": -3.073714256286621, + "logits/rejected": -1.766762375831604, + "logps/chosen": -264.698486328125, + "logps/rejected": -225.19398498535156, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.544404983520508, + "rewards/margins": 5.472652435302734, + "rewards/rejected": -9.017057418823242, + "step": 10501 + }, + { + "epoch": 1.63, + "learning_rate": 6.444741947196431e-06, + "logits/chosen": -2.433572769165039, + "logits/rejected": -2.8909614086151123, + "logps/chosen": -342.9648132324219, + "logps/rejected": -376.88360595703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6865310668945312, + "rewards/margins": 10.359391212463379, + "rewards/rejected": -14.04592227935791, + "step": 10502 + }, + { + "epoch": 1.63, + "learning_rate": 6.444008506665284e-06, + "logits/chosen": -1.650302767753601, + "logits/rejected": -3.0643980503082275, + "logps/chosen": -201.7091522216797, + "logps/rejected": -380.72039794921875, + "loss": 0.7522, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.327712059020996, + "rewards/margins": 1.4688963890075684, + "rewards/rejected": -8.796607971191406, + "step": 10503 + }, + { + "epoch": 1.63, + "learning_rate": 6.443275066134136e-06, + "logits/chosen": -2.6085023880004883, + "logits/rejected": -2.951324462890625, + "logps/chosen": -205.08555603027344, + "logps/rejected": -333.39044189453125, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3414015769958496, + "rewards/margins": 6.527563571929932, + "rewards/rejected": -9.868965148925781, + "step": 10504 + }, + { + "epoch": 1.63, + "learning_rate": 6.442541625602988e-06, + "logits/chosen": -3.1066408157348633, + "logits/rejected": -3.0626418590545654, + "logps/chosen": -364.75213623046875, + "logps/rejected": -477.59356689453125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.15313196182251, + "rewards/margins": 6.8332319259643555, + "rewards/rejected": -13.986364364624023, + "step": 10505 + }, + { + "epoch": 1.63, + "learning_rate": 6.44180818507184e-06, + "logits/chosen": -2.863821268081665, + "logits/rejected": -1.7309623956680298, + "logps/chosen": -156.85311889648438, + "logps/rejected": -152.56109619140625, + "loss": 0.7158, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4653425216674805, + "rewards/margins": 5.196124076843262, + "rewards/rejected": -10.661466598510742, + "step": 10506 + }, + { + "epoch": 1.63, + "learning_rate": 6.441074744540692e-06, + "logits/chosen": -2.9942257404327393, + "logits/rejected": -2.8054707050323486, + "logps/chosen": -348.7957763671875, + "logps/rejected": -329.44757080078125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.228621482849121, + "rewards/margins": 6.964444160461426, + "rewards/rejected": -10.193065643310547, + "step": 10507 + }, + { + "epoch": 1.63, + "learning_rate": 6.440341304009544e-06, + "logits/chosen": -1.8808315992355347, + "logits/rejected": -2.6026089191436768, + "logps/chosen": -296.85546875, + "logps/rejected": -627.3739013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5078842639923096, + "rewards/margins": 12.994718551635742, + "rewards/rejected": -15.502603530883789, + "step": 10508 + }, + { + "epoch": 1.63, + "learning_rate": 6.439607863478396e-06, + "logits/chosen": -0.9990997314453125, + "logits/rejected": -2.398707389831543, + "logps/chosen": -125.3318862915039, + "logps/rejected": -404.7261962890625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.962247848510742, + "rewards/margins": 6.917438983917236, + "rewards/rejected": -17.87968635559082, + "step": 10509 + }, + { + "epoch": 1.63, + "learning_rate": 6.438874422947248e-06, + "logits/chosen": -3.0021886825561523, + "logits/rejected": -2.9969663619995117, + "logps/chosen": -262.2981872558594, + "logps/rejected": -404.6422119140625, + "loss": 0.3736, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.891146659851074, + "rewards/margins": 6.832639694213867, + "rewards/rejected": -12.723786354064941, + "step": 10510 + }, + { + "epoch": 1.63, + "learning_rate": 6.4381409824161e-06, + "logits/chosen": -2.9898929595947266, + "logits/rejected": -2.9381628036499023, + "logps/chosen": -137.53421020507812, + "logps/rejected": -260.30908203125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.718327045440674, + "rewards/margins": 6.769580364227295, + "rewards/rejected": -12.487907409667969, + "step": 10511 + }, + { + "epoch": 1.63, + "learning_rate": 6.437407541884953e-06, + "logits/chosen": -1.8234803676605225, + "logits/rejected": -2.617947578430176, + "logps/chosen": -106.29610443115234, + "logps/rejected": -243.24244689941406, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.590870380401611, + "rewards/margins": 4.326411247253418, + "rewards/rejected": -9.917282104492188, + "step": 10512 + }, + { + "epoch": 1.63, + "learning_rate": 6.4366741013538045e-06, + "logits/chosen": -1.6286553144454956, + "logits/rejected": -2.4951343536376953, + "logps/chosen": -200.249755859375, + "logps/rejected": -532.2450561523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.225209951400757, + "rewards/margins": 14.43499755859375, + "rewards/rejected": -16.660205841064453, + "step": 10513 + }, + { + "epoch": 1.64, + "learning_rate": 6.435940660822656e-06, + "logits/chosen": -3.023499011993408, + "logits/rejected": -2.9298691749572754, + "logps/chosen": -278.11236572265625, + "logps/rejected": -221.2881317138672, + "loss": 0.0482, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.830809593200684, + "rewards/margins": 3.1129891872406006, + "rewards/rejected": -9.943799018859863, + "step": 10514 + }, + { + "epoch": 1.64, + "learning_rate": 6.435207220291508e-06, + "logits/chosen": -2.5064516067504883, + "logits/rejected": -3.076174259185791, + "logps/chosen": -176.51126098632812, + "logps/rejected": -406.4434814453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1681084632873535, + "rewards/margins": 10.065271377563477, + "rewards/rejected": -14.233379364013672, + "step": 10515 + }, + { + "epoch": 1.64, + "learning_rate": 6.434473779760361e-06, + "logits/chosen": -2.367356777191162, + "logits/rejected": -2.633309841156006, + "logps/chosen": -190.1434326171875, + "logps/rejected": -401.5423889160156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.608819007873535, + "rewards/margins": 9.185586929321289, + "rewards/rejected": -11.794404983520508, + "step": 10516 + }, + { + "epoch": 1.64, + "learning_rate": 6.433740339229214e-06, + "logits/chosen": -2.3134169578552246, + "logits/rejected": -2.7497546672821045, + "logps/chosen": -176.45559692382812, + "logps/rejected": -444.7590637207031, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.491701602935791, + "rewards/margins": 9.620716094970703, + "rewards/rejected": -13.112419128417969, + "step": 10517 + }, + { + "epoch": 1.64, + "learning_rate": 6.433006898698066e-06, + "logits/chosen": -2.694530963897705, + "logits/rejected": -3.0430450439453125, + "logps/chosen": -260.6466979980469, + "logps/rejected": -573.7025146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9916915893554688, + "rewards/margins": 10.78878402709961, + "rewards/rejected": -12.780475616455078, + "step": 10518 + }, + { + "epoch": 1.64, + "learning_rate": 6.4322734581669175e-06, + "logits/chosen": -2.5284667015075684, + "logits/rejected": -2.9586496353149414, + "logps/chosen": -275.41693115234375, + "logps/rejected": -317.869140625, + "loss": 0.5402, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.0307841300964355, + "rewards/margins": 3.9749441146850586, + "rewards/rejected": -10.005727767944336, + "step": 10519 + }, + { + "epoch": 1.64, + "learning_rate": 6.43154001763577e-06, + "logits/chosen": -2.905545711517334, + "logits/rejected": -2.344841480255127, + "logps/chosen": -773.63427734375, + "logps/rejected": -725.806884765625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.25457239151001, + "rewards/margins": 7.568268775939941, + "rewards/rejected": -12.82284164428711, + "step": 10520 + }, + { + "epoch": 1.64, + "learning_rate": 6.430806577104622e-06, + "logits/chosen": -2.6778945922851562, + "logits/rejected": -2.9912259578704834, + "logps/chosen": -191.09744262695312, + "logps/rejected": -388.91876220703125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0213518142700195, + "rewards/margins": 6.754642963409424, + "rewards/rejected": -10.775995254516602, + "step": 10521 + }, + { + "epoch": 1.64, + "learning_rate": 6.430073136573474e-06, + "logits/chosen": -3.0783281326293945, + "logits/rejected": -2.094700813293457, + "logps/chosen": -557.4105224609375, + "logps/rejected": -499.7463073730469, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.796600341796875, + "rewards/margins": 8.191215515136719, + "rewards/rejected": -9.987815856933594, + "step": 10522 + }, + { + "epoch": 1.64, + "learning_rate": 6.429339696042326e-06, + "logits/chosen": -2.4313714504241943, + "logits/rejected": -2.4974677562713623, + "logps/chosen": -572.1185302734375, + "logps/rejected": -652.8221435546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6936378479003906, + "rewards/margins": 8.10140323638916, + "rewards/rejected": -11.79504108428955, + "step": 10523 + }, + { + "epoch": 1.64, + "learning_rate": 6.428606255511178e-06, + "logits/chosen": -2.8689804077148438, + "logits/rejected": -2.9729833602905273, + "logps/chosen": -348.58154296875, + "logps/rejected": -246.3661651611328, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60682487487793, + "rewards/margins": 3.2891557216644287, + "rewards/rejected": -7.895980358123779, + "step": 10524 + }, + { + "epoch": 1.64, + "learning_rate": 6.4278728149800304e-06, + "logits/chosen": -2.3605129718780518, + "logits/rejected": -2.9174411296844482, + "logps/chosen": -75.0863265991211, + "logps/rejected": -310.1016845703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6800036430358887, + "rewards/margins": 7.126278400421143, + "rewards/rejected": -10.806282043457031, + "step": 10525 + }, + { + "epoch": 1.64, + "learning_rate": 6.427139374448882e-06, + "logits/chosen": -1.829584002494812, + "logits/rejected": -3.014127016067505, + "logps/chosen": -692.6910400390625, + "logps/rejected": -543.6080322265625, + "loss": 1.4859, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.473098754882812, + "rewards/margins": 5.27854585647583, + "rewards/rejected": -13.751644134521484, + "step": 10526 + }, + { + "epoch": 1.64, + "learning_rate": 6.426405933917734e-06, + "logits/chosen": -3.1015472412109375, + "logits/rejected": -2.7683675289154053, + "logps/chosen": -387.00897216796875, + "logps/rejected": -382.44024658203125, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.171225070953369, + "rewards/margins": 6.429922103881836, + "rewards/rejected": -9.601147651672363, + "step": 10527 + }, + { + "epoch": 1.64, + "learning_rate": 6.425672493386586e-06, + "logits/chosen": -2.5601208209991455, + "logits/rejected": -2.708385705947876, + "logps/chosen": -207.5228729248047, + "logps/rejected": -283.8479309082031, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.465968132019043, + "rewards/margins": 7.545341968536377, + "rewards/rejected": -11.011310577392578, + "step": 10528 + }, + { + "epoch": 1.64, + "learning_rate": 6.424939052855439e-06, + "logits/chosen": -1.7290962934494019, + "logits/rejected": -2.8052048683166504, + "logps/chosen": -103.49466705322266, + "logps/rejected": -417.8665771484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.070841312408447, + "rewards/margins": 10.54715347290039, + "rewards/rejected": -15.617995262145996, + "step": 10529 + }, + { + "epoch": 1.64, + "learning_rate": 6.424205612324291e-06, + "logits/chosen": -1.6418160200119019, + "logits/rejected": -2.7749736309051514, + "logps/chosen": -119.81487274169922, + "logps/rejected": -412.1634826660156, + "loss": 1.3636, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.159049034118652, + "rewards/margins": 3.886402130126953, + "rewards/rejected": -12.045450210571289, + "step": 10530 + }, + { + "epoch": 1.64, + "learning_rate": 6.4234721717931425e-06, + "logits/chosen": -3.1619720458984375, + "logits/rejected": -2.5946645736694336, + "logps/chosen": -319.1688232421875, + "logps/rejected": -358.13006591796875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4665327072143555, + "rewards/margins": 5.888655662536621, + "rewards/rejected": -11.355188369750977, + "step": 10531 + }, + { + "epoch": 1.64, + "learning_rate": 6.422738731261994e-06, + "logits/chosen": -2.1779439449310303, + "logits/rejected": -2.6669883728027344, + "logps/chosen": -99.49972534179688, + "logps/rejected": -392.0389099121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8905413150787354, + "rewards/margins": 10.851799011230469, + "rewards/rejected": -14.742340087890625, + "step": 10532 + }, + { + "epoch": 1.64, + "learning_rate": 6.422005290730847e-06, + "logits/chosen": -2.7643072605133057, + "logits/rejected": -2.8556172847747803, + "logps/chosen": -147.43115234375, + "logps/rejected": -197.71620178222656, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.275481224060059, + "rewards/margins": 4.517473220825195, + "rewards/rejected": -10.79295539855957, + "step": 10533 + }, + { + "epoch": 1.64, + "learning_rate": 6.4212718501997e-06, + "logits/chosen": -1.5306730270385742, + "logits/rejected": -2.996145009994507, + "logps/chosen": -84.66757202148438, + "logps/rejected": -368.6002502441406, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.811694622039795, + "rewards/margins": 5.805324554443359, + "rewards/rejected": -9.617019653320312, + "step": 10534 + }, + { + "epoch": 1.64, + "learning_rate": 6.420538409668552e-06, + "logits/chosen": -2.8825199604034424, + "logits/rejected": -2.381272315979004, + "logps/chosen": -270.2693786621094, + "logps/rejected": -312.7762451171875, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.276315212249756, + "rewards/margins": 6.174554824829102, + "rewards/rejected": -10.450870513916016, + "step": 10535 + }, + { + "epoch": 1.64, + "learning_rate": 6.419804969137404e-06, + "logits/chosen": -2.659564971923828, + "logits/rejected": -2.94142746925354, + "logps/chosen": -177.81236267089844, + "logps/rejected": -323.1572265625, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.435154914855957, + "rewards/margins": 5.305646896362305, + "rewards/rejected": -8.740801811218262, + "step": 10536 + }, + { + "epoch": 1.64, + "learning_rate": 6.4190715286062555e-06, + "logits/chosen": -2.8470358848571777, + "logits/rejected": -2.0906896591186523, + "logps/chosen": -773.2577514648438, + "logps/rejected": -575.5992431640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5086870193481445, + "rewards/margins": 8.263586044311523, + "rewards/rejected": -13.772274017333984, + "step": 10537 + }, + { + "epoch": 1.64, + "learning_rate": 6.418338088075108e-06, + "logits/chosen": -3.0794894695281982, + "logits/rejected": -3.205034017562866, + "logps/chosen": -106.81089782714844, + "logps/rejected": -218.3038330078125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.182056188583374, + "rewards/margins": 6.273270606994629, + "rewards/rejected": -9.455327033996582, + "step": 10538 + }, + { + "epoch": 1.64, + "learning_rate": 6.41760464754396e-06, + "logits/chosen": -2.5739479064941406, + "logits/rejected": -2.0497260093688965, + "logps/chosen": -251.2369384765625, + "logps/rejected": -307.59075927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.305229187011719, + "rewards/margins": 10.105231285095215, + "rewards/rejected": -14.410459518432617, + "step": 10539 + }, + { + "epoch": 1.64, + "learning_rate": 6.416871207012812e-06, + "logits/chosen": -2.4951608180999756, + "logits/rejected": -3.1381261348724365, + "logps/chosen": -134.23342895507812, + "logps/rejected": -379.29052734375, + "loss": 3.4616, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.912845611572266, + "rewards/margins": 3.1532976627349854, + "rewards/rejected": -12.066143989562988, + "step": 10540 + }, + { + "epoch": 1.64, + "learning_rate": 6.416137766481664e-06, + "logits/chosen": -2.996835708618164, + "logits/rejected": -2.8371870517730713, + "logps/chosen": -366.18365478515625, + "logps/rejected": -273.5071716308594, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3002610206604, + "rewards/margins": 4.513877868652344, + "rewards/rejected": -9.814139366149902, + "step": 10541 + }, + { + "epoch": 1.64, + "learning_rate": 6.415404325950516e-06, + "logits/chosen": -1.914841890335083, + "logits/rejected": -2.729855537414551, + "logps/chosen": -266.9378967285156, + "logps/rejected": -353.59228515625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.115658760070801, + "rewards/margins": 6.711689472198486, + "rewards/rejected": -13.827348709106445, + "step": 10542 + }, + { + "epoch": 1.64, + "learning_rate": 6.4146708854193685e-06, + "logits/chosen": -2.690154790878296, + "logits/rejected": -2.8123581409454346, + "logps/chosen": -570.5743408203125, + "logps/rejected": -466.8565979003906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.460623264312744, + "rewards/margins": 7.754789352416992, + "rewards/rejected": -12.215413093566895, + "step": 10543 + }, + { + "epoch": 1.64, + "learning_rate": 6.41393744488822e-06, + "logits/chosen": -1.8861044645309448, + "logits/rejected": -2.331439971923828, + "logps/chosen": -317.3397216796875, + "logps/rejected": -641.475341796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9874649047851562, + "rewards/margins": 16.176708221435547, + "rewards/rejected": -18.164173126220703, + "step": 10544 + }, + { + "epoch": 1.64, + "learning_rate": 6.413204004357072e-06, + "logits/chosen": -2.959664821624756, + "logits/rejected": -2.68411922454834, + "logps/chosen": -684.502685546875, + "logps/rejected": -549.9295043945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8240463733673096, + "rewards/margins": 11.08317756652832, + "rewards/rejected": -12.907224655151367, + "step": 10545 + }, + { + "epoch": 1.64, + "learning_rate": 6.412470563825924e-06, + "logits/chosen": -3.0200047492980957, + "logits/rejected": -3.0336577892303467, + "logps/chosen": -577.66650390625, + "logps/rejected": -537.6146240234375, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.990181922912598, + "rewards/margins": 4.45326042175293, + "rewards/rejected": -9.443442344665527, + "step": 10546 + }, + { + "epoch": 1.64, + "learning_rate": 6.411737123294777e-06, + "logits/chosen": -3.2947959899902344, + "logits/rejected": -2.5615718364715576, + "logps/chosen": -168.03517150878906, + "logps/rejected": -127.30874633789062, + "loss": 1.3915, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.996601104736328, + "rewards/margins": 2.5899808406829834, + "rewards/rejected": -8.58658218383789, + "step": 10547 + }, + { + "epoch": 1.64, + "learning_rate": 6.411003682763629e-06, + "logits/chosen": -2.1571576595306396, + "logits/rejected": -2.582350730895996, + "logps/chosen": -157.7501220703125, + "logps/rejected": -402.5614013671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3803532123565674, + "rewards/margins": 8.785184860229492, + "rewards/rejected": -12.165538787841797, + "step": 10548 + }, + { + "epoch": 1.64, + "learning_rate": 6.410270242232481e-06, + "logits/chosen": -3.1288516521453857, + "logits/rejected": -2.445429563522339, + "logps/chosen": -383.03021240234375, + "logps/rejected": -423.35272216796875, + "loss": 0.396, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.120543003082275, + "rewards/margins": 5.288998603820801, + "rewards/rejected": -11.409542083740234, + "step": 10549 + }, + { + "epoch": 1.64, + "learning_rate": 6.409536801701333e-06, + "logits/chosen": -1.405483365058899, + "logits/rejected": -2.683380365371704, + "logps/chosen": -114.66736602783203, + "logps/rejected": -414.032470703125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.720918655395508, + "rewards/margins": 8.536178588867188, + "rewards/rejected": -13.257097244262695, + "step": 10550 + }, + { + "epoch": 1.64, + "learning_rate": 6.408803361170185e-06, + "logits/chosen": -2.2568557262420654, + "logits/rejected": -2.9313929080963135, + "logps/chosen": -394.4751281738281, + "logps/rejected": -418.63848876953125, + "loss": 2.827, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.936553001403809, + "rewards/margins": -2.220541477203369, + "rewards/rejected": -10.716011047363281, + "step": 10551 + }, + { + "epoch": 1.64, + "learning_rate": 6.408069920639038e-06, + "logits/chosen": -1.7134596109390259, + "logits/rejected": -2.975277900695801, + "logps/chosen": -78.03738403320312, + "logps/rejected": -253.1754608154297, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3624396324157715, + "rewards/margins": 8.29115104675293, + "rewards/rejected": -12.65359115600586, + "step": 10552 + }, + { + "epoch": 1.64, + "learning_rate": 6.40733648010789e-06, + "logits/chosen": -2.4754936695098877, + "logits/rejected": -2.6836397647857666, + "logps/chosen": -463.4414367675781, + "logps/rejected": -682.3275756835938, + "loss": 0.4162, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.896395683288574, + "rewards/margins": 3.1786928176879883, + "rewards/rejected": -10.075088500976562, + "step": 10553 + }, + { + "epoch": 1.64, + "learning_rate": 6.406603039576742e-06, + "logits/chosen": -2.6002559661865234, + "logits/rejected": -2.496192455291748, + "logps/chosen": -170.82669067382812, + "logps/rejected": -258.82623291015625, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.009546756744385, + "rewards/margins": 6.319067478179932, + "rewards/rejected": -13.328614234924316, + "step": 10554 + }, + { + "epoch": 1.64, + "learning_rate": 6.4058695990455935e-06, + "logits/chosen": -2.036367416381836, + "logits/rejected": -2.688727378845215, + "logps/chosen": -236.77479553222656, + "logps/rejected": -436.0740661621094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760313510894775, + "rewards/margins": 10.936901092529297, + "rewards/rejected": -16.697214126586914, + "step": 10555 + }, + { + "epoch": 1.64, + "learning_rate": 6.405136158514446e-06, + "logits/chosen": -1.8278069496154785, + "logits/rejected": -3.090761184692383, + "logps/chosen": -193.48460388183594, + "logps/rejected": -423.65301513671875, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.067198753356934, + "rewards/margins": 4.125565052032471, + "rewards/rejected": -12.192764282226562, + "step": 10556 + }, + { + "epoch": 1.64, + "learning_rate": 6.404402717983298e-06, + "logits/chosen": -3.024327039718628, + "logits/rejected": -3.0833961963653564, + "logps/chosen": -93.02593231201172, + "logps/rejected": -221.06478881835938, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.150931358337402, + "rewards/margins": 5.269029140472412, + "rewards/rejected": -10.419960021972656, + "step": 10557 + }, + { + "epoch": 1.64, + "learning_rate": 6.40366927745215e-06, + "logits/chosen": -2.4355170726776123, + "logits/rejected": -3.101247787475586, + "logps/chosen": -120.07181549072266, + "logps/rejected": -291.43206787109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.509331226348877, + "rewards/margins": 7.121878623962402, + "rewards/rejected": -9.631210327148438, + "step": 10558 + }, + { + "epoch": 1.64, + "learning_rate": 6.402935836921002e-06, + "logits/chosen": -3.175936222076416, + "logits/rejected": -3.1893398761749268, + "logps/chosen": -360.7517395019531, + "logps/rejected": -389.35186767578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.787726879119873, + "rewards/margins": 8.644750595092773, + "rewards/rejected": -12.432476997375488, + "step": 10559 + }, + { + "epoch": 1.64, + "learning_rate": 6.402202396389854e-06, + "logits/chosen": -2.621866226196289, + "logits/rejected": -2.8953657150268555, + "logps/chosen": -156.44326782226562, + "logps/rejected": -380.29766845703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.398697853088379, + "rewards/margins": 8.376891136169434, + "rewards/rejected": -13.775588989257812, + "step": 10560 + }, + { + "epoch": 1.64, + "learning_rate": 6.4014689558587065e-06, + "logits/chosen": -2.0310654640197754, + "logits/rejected": -2.7913548946380615, + "logps/chosen": -178.78797912597656, + "logps/rejected": -519.1998291015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.347041130065918, + "rewards/margins": 9.986865997314453, + "rewards/rejected": -16.333908081054688, + "step": 10561 + }, + { + "epoch": 1.64, + "learning_rate": 6.400735515327558e-06, + "logits/chosen": -3.1080148220062256, + "logits/rejected": -3.223496437072754, + "logps/chosen": -159.9668426513672, + "logps/rejected": -284.5037841796875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.464797019958496, + "rewards/margins": 6.480320453643799, + "rewards/rejected": -9.945117950439453, + "step": 10562 + }, + { + "epoch": 1.64, + "learning_rate": 6.40000207479641e-06, + "logits/chosen": -2.8482000827789307, + "logits/rejected": -2.5509963035583496, + "logps/chosen": -485.9709777832031, + "logps/rejected": -467.6905517578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.594795227050781, + "rewards/margins": 6.803493499755859, + "rewards/rejected": -13.39828872680664, + "step": 10563 + }, + { + "epoch": 1.64, + "learning_rate": 6.399268634265262e-06, + "logits/chosen": -1.6303187608718872, + "logits/rejected": -2.6366219520568848, + "logps/chosen": -121.79334259033203, + "logps/rejected": -311.16253662109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255200386047363, + "rewards/margins": 6.708758354187012, + "rewards/rejected": -11.963958740234375, + "step": 10564 + }, + { + "epoch": 1.64, + "learning_rate": 6.398535193734115e-06, + "logits/chosen": -2.8967010974884033, + "logits/rejected": -3.0960941314697266, + "logps/chosen": -129.27125549316406, + "logps/rejected": -338.2174072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.375472068786621, + "rewards/margins": 10.753678321838379, + "rewards/rejected": -13.129150390625, + "step": 10565 + }, + { + "epoch": 1.64, + "learning_rate": 6.397801753202967e-06, + "logits/chosen": -2.2928876876831055, + "logits/rejected": -3.078887939453125, + "logps/chosen": -298.27001953125, + "logps/rejected": -480.7575988769531, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0768537521362305, + "rewards/margins": 6.740635871887207, + "rewards/rejected": -13.817489624023438, + "step": 10566 + }, + { + "epoch": 1.64, + "learning_rate": 6.3970683126718195e-06, + "logits/chosen": -3.114128828048706, + "logits/rejected": -3.078350067138672, + "logps/chosen": -224.04345703125, + "logps/rejected": -123.23646545410156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9458341598510742, + "rewards/margins": 7.2102580070495605, + "rewards/rejected": -9.156091690063477, + "step": 10567 + }, + { + "epoch": 1.64, + "learning_rate": 6.396334872140671e-06, + "logits/chosen": -2.8396778106689453, + "logits/rejected": -2.740144729614258, + "logps/chosen": -381.73089599609375, + "logps/rejected": -447.41168212890625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.024725914001465, + "rewards/margins": 5.667792320251465, + "rewards/rejected": -12.69251823425293, + "step": 10568 + }, + { + "epoch": 1.64, + "learning_rate": 6.395601431609524e-06, + "logits/chosen": -1.9388331174850464, + "logits/rejected": -2.7296383380889893, + "logps/chosen": -203.16062927246094, + "logps/rejected": -297.75994873046875, + "loss": 0.2576, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.436341285705566, + "rewards/margins": 3.296884775161743, + "rewards/rejected": -9.73322582244873, + "step": 10569 + }, + { + "epoch": 1.64, + "learning_rate": 6.394867991078376e-06, + "logits/chosen": -2.5943655967712402, + "logits/rejected": -2.0925374031066895, + "logps/chosen": -259.9013366699219, + "logps/rejected": -311.61456298828125, + "loss": 2.0272, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.939010143280029, + "rewards/margins": 0.6516554355621338, + "rewards/rejected": -8.590665817260742, + "step": 10570 + }, + { + "epoch": 1.64, + "learning_rate": 6.394134550547228e-06, + "logits/chosen": -2.6685454845428467, + "logits/rejected": -2.771663188934326, + "logps/chosen": -289.09283447265625, + "logps/rejected": -520.523193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.366269111633301, + "rewards/margins": 11.542571067810059, + "rewards/rejected": -16.90884017944336, + "step": 10571 + }, + { + "epoch": 1.64, + "learning_rate": 6.39340111001608e-06, + "logits/chosen": -1.7141776084899902, + "logits/rejected": -2.7622761726379395, + "logps/chosen": -280.2967834472656, + "logps/rejected": -475.337890625, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132971286773682, + "rewards/margins": 8.320890426635742, + "rewards/rejected": -14.453862190246582, + "step": 10572 + }, + { + "epoch": 1.64, + "learning_rate": 6.392667669484932e-06, + "logits/chosen": -2.5293755531311035, + "logits/rejected": -3.017740249633789, + "logps/chosen": -54.718284606933594, + "logps/rejected": -173.17636108398438, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.067420482635498, + "rewards/margins": 6.419918060302734, + "rewards/rejected": -10.48733901977539, + "step": 10573 + }, + { + "epoch": 1.64, + "learning_rate": 6.391934228953784e-06, + "logits/chosen": -2.4841196537017822, + "logits/rejected": -3.0470662117004395, + "logps/chosen": -332.506103515625, + "logps/rejected": -384.8058776855469, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.682066917419434, + "rewards/margins": 7.030831336975098, + "rewards/rejected": -12.712898254394531, + "step": 10574 + }, + { + "epoch": 1.64, + "learning_rate": 6.391200788422636e-06, + "logits/chosen": -3.1408660411834717, + "logits/rejected": -2.9325685501098633, + "logps/chosen": -626.8287963867188, + "logps/rejected": -506.36279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.37451171875, + "rewards/margins": 11.857597351074219, + "rewards/rejected": -14.232109069824219, + "step": 10575 + }, + { + "epoch": 1.64, + "learning_rate": 6.390467347891488e-06, + "logits/chosen": -2.6729865074157715, + "logits/rejected": -3.2579405307769775, + "logps/chosen": -121.38543701171875, + "logps/rejected": -445.21142578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.536221504211426, + "rewards/margins": 10.840039253234863, + "rewards/rejected": -15.376260757446289, + "step": 10576 + }, + { + "epoch": 1.64, + "learning_rate": 6.38973390736034e-06, + "logits/chosen": -2.342033624649048, + "logits/rejected": -3.109163284301758, + "logps/chosen": -181.7355499267578, + "logps/rejected": -379.36358642578125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.444833755493164, + "rewards/margins": 7.004359722137451, + "rewards/rejected": -12.449193954467773, + "step": 10577 + }, + { + "epoch": 1.65, + "learning_rate": 6.389000466829193e-06, + "logits/chosen": -2.977121591567993, + "logits/rejected": -2.5084428787231445, + "logps/chosen": -155.2234344482422, + "logps/rejected": -186.0811309814453, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.78507137298584, + "rewards/margins": 4.630925178527832, + "rewards/rejected": -10.415996551513672, + "step": 10578 + }, + { + "epoch": 1.65, + "learning_rate": 6.3882670262980446e-06, + "logits/chosen": -1.735012412071228, + "logits/rejected": -2.9318013191223145, + "logps/chosen": -83.12141418457031, + "logps/rejected": -357.7426452636719, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.884125232696533, + "rewards/margins": 7.4578399658203125, + "rewards/rejected": -14.341964721679688, + "step": 10579 + }, + { + "epoch": 1.65, + "learning_rate": 6.3875335857668964e-06, + "logits/chosen": -2.928828477859497, + "logits/rejected": -2.9869744777679443, + "logps/chosen": -132.58282470703125, + "logps/rejected": -221.33749389648438, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.191678047180176, + "rewards/margins": 4.886605262756348, + "rewards/rejected": -11.078283309936523, + "step": 10580 + }, + { + "epoch": 1.65, + "learning_rate": 6.386800145235748e-06, + "logits/chosen": -1.8667467832565308, + "logits/rejected": -2.80183482170105, + "logps/chosen": -174.88107299804688, + "logps/rejected": -534.6300048828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.037977695465088, + "rewards/margins": 9.43646240234375, + "rewards/rejected": -14.47443962097168, + "step": 10581 + }, + { + "epoch": 1.65, + "learning_rate": 6.3860667047046e-06, + "logits/chosen": -2.206301689147949, + "logits/rejected": -3.0989482402801514, + "logps/chosen": -260.31903076171875, + "logps/rejected": -522.0296020507812, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.383098602294922, + "rewards/margins": 4.930696487426758, + "rewards/rejected": -10.31379508972168, + "step": 10582 + }, + { + "epoch": 1.65, + "learning_rate": 6.385333264173453e-06, + "logits/chosen": -2.55305814743042, + "logits/rejected": -3.076059103012085, + "logps/chosen": -768.2967529296875, + "logps/rejected": -785.1126098632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9947266578674316, + "rewards/margins": 12.417045593261719, + "rewards/rejected": -14.411772727966309, + "step": 10583 + }, + { + "epoch": 1.65, + "learning_rate": 6.384599823642306e-06, + "logits/chosen": -2.938520908355713, + "logits/rejected": -2.2018604278564453, + "logps/chosen": -215.24557495117188, + "logps/rejected": -288.37841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5871132612228394, + "rewards/margins": 10.740530014038086, + "rewards/rejected": -12.327642440795898, + "step": 10584 + }, + { + "epoch": 1.65, + "learning_rate": 6.3838663831111575e-06, + "logits/chosen": -2.808068037033081, + "logits/rejected": -2.2414820194244385, + "logps/chosen": -735.448974609375, + "logps/rejected": -533.0269165039062, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.028007507324219, + "rewards/margins": 6.522401809692383, + "rewards/rejected": -15.550409317016602, + "step": 10585 + }, + { + "epoch": 1.65, + "learning_rate": 6.383132942580009e-06, + "logits/chosen": -2.2716450691223145, + "logits/rejected": -2.9195592403411865, + "logps/chosen": -626.0961303710938, + "logps/rejected": -531.9539794921875, + "loss": 2.1702, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.351963520050049, + "rewards/margins": 2.354482650756836, + "rewards/rejected": -7.706446170806885, + "step": 10586 + }, + { + "epoch": 1.65, + "learning_rate": 6.382399502048862e-06, + "logits/chosen": -2.7827959060668945, + "logits/rejected": -3.1413819789886475, + "logps/chosen": -141.26620483398438, + "logps/rejected": -284.7218017578125, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.06072998046875, + "rewards/margins": 5.340174198150635, + "rewards/rejected": -10.400903701782227, + "step": 10587 + }, + { + "epoch": 1.65, + "learning_rate": 6.381666061517714e-06, + "logits/chosen": -2.684992551803589, + "logits/rejected": -3.0492665767669678, + "logps/chosen": -626.1810913085938, + "logps/rejected": -678.8467407226562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9865341186523438, + "rewards/margins": 12.136990547180176, + "rewards/rejected": -15.12352466583252, + "step": 10588 + }, + { + "epoch": 1.65, + "learning_rate": 6.380932620986566e-06, + "logits/chosen": -2.5011119842529297, + "logits/rejected": -3.035062074661255, + "logps/chosen": -113.14757537841797, + "logps/rejected": -224.759033203125, + "loss": 0.0874, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.520090103149414, + "rewards/margins": 2.650535821914673, + "rewards/rejected": -9.170625686645508, + "step": 10589 + }, + { + "epoch": 1.65, + "learning_rate": 6.380199180455418e-06, + "logits/chosen": -1.413398265838623, + "logits/rejected": -2.503012180328369, + "logps/chosen": -189.64894104003906, + "logps/rejected": -421.58154296875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7185750007629395, + "rewards/margins": 9.685062408447266, + "rewards/rejected": -15.403636932373047, + "step": 10590 + }, + { + "epoch": 1.65, + "learning_rate": 6.37946573992427e-06, + "logits/chosen": -2.8021929264068604, + "logits/rejected": -2.827679395675659, + "logps/chosen": -220.94554138183594, + "logps/rejected": -391.091064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4417431354522705, + "rewards/margins": 12.641071319580078, + "rewards/rejected": -14.08281421661377, + "step": 10591 + }, + { + "epoch": 1.65, + "learning_rate": 6.378732299393122e-06, + "logits/chosen": -2.609665632247925, + "logits/rejected": -2.7974653244018555, + "logps/chosen": -248.09915161132812, + "logps/rejected": -252.1334991455078, + "loss": 0.7821, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.097407341003418, + "rewards/margins": 4.023186206817627, + "rewards/rejected": -11.120593070983887, + "step": 10592 + }, + { + "epoch": 1.65, + "learning_rate": 6.377998858861974e-06, + "logits/chosen": -3.114990234375, + "logits/rejected": -3.1962528228759766, + "logps/chosen": -83.71773529052734, + "logps/rejected": -262.4961242675781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.453976631164551, + "rewards/margins": 10.791189193725586, + "rewards/rejected": -15.24516487121582, + "step": 10593 + }, + { + "epoch": 1.65, + "learning_rate": 6.377265418330826e-06, + "logits/chosen": -2.9223434925079346, + "logits/rejected": -3.107304096221924, + "logps/chosen": -168.70733642578125, + "logps/rejected": -285.4234313964844, + "loss": 1.4233, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.627239227294922, + "rewards/margins": 5.227317810058594, + "rewards/rejected": -13.854557037353516, + "step": 10594 + }, + { + "epoch": 1.65, + "learning_rate": 6.376531977799678e-06, + "logits/chosen": -3.007524251937866, + "logits/rejected": -3.0575010776519775, + "logps/chosen": -70.27824401855469, + "logps/rejected": -159.02847290039062, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.166506767272949, + "rewards/margins": 7.333964824676514, + "rewards/rejected": -11.500471115112305, + "step": 10595 + }, + { + "epoch": 1.65, + "learning_rate": 6.375798537268531e-06, + "logits/chosen": -1.4903897047042847, + "logits/rejected": -2.5941648483276367, + "logps/chosen": -257.1518249511719, + "logps/rejected": -618.606689453125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.200601100921631, + "rewards/margins": 6.6366987228393555, + "rewards/rejected": -13.837299346923828, + "step": 10596 + }, + { + "epoch": 1.65, + "learning_rate": 6.375065096737383e-06, + "logits/chosen": -2.731604814529419, + "logits/rejected": -1.5122720003128052, + "logps/chosen": -234.60598754882812, + "logps/rejected": -313.50927734375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0962324142456055, + "rewards/margins": 6.2217936515808105, + "rewards/rejected": -10.318025588989258, + "step": 10597 + }, + { + "epoch": 1.65, + "learning_rate": 6.3743316562062345e-06, + "logits/chosen": -2.7958381175994873, + "logits/rejected": -2.9348855018615723, + "logps/chosen": -301.2396545410156, + "logps/rejected": -359.9747314453125, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0133683681488037, + "rewards/margins": 7.50747537612915, + "rewards/rejected": -9.520843505859375, + "step": 10598 + }, + { + "epoch": 1.65, + "learning_rate": 6.373598215675086e-06, + "logits/chosen": -2.7453908920288086, + "logits/rejected": -1.1690601110458374, + "logps/chosen": -248.79368591308594, + "logps/rejected": -245.4235382080078, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.976006984710693, + "rewards/margins": 7.043475151062012, + "rewards/rejected": -13.019481658935547, + "step": 10599 + }, + { + "epoch": 1.65, + "learning_rate": 6.372864775143939e-06, + "logits/chosen": -1.339159607887268, + "logits/rejected": -2.8672988414764404, + "logps/chosen": -121.41102600097656, + "logps/rejected": -256.0426025390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.720666885375977, + "rewards/margins": 7.6565375328063965, + "rewards/rejected": -12.377204895019531, + "step": 10600 + }, + { + "epoch": 1.65, + "learning_rate": 6.372131334612792e-06, + "logits/chosen": -1.9158475399017334, + "logits/rejected": -2.733358144760132, + "logps/chosen": -204.17681884765625, + "logps/rejected": -365.04754638671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.530586242675781, + "rewards/margins": 8.073479652404785, + "rewards/rejected": -12.604065895080566, + "step": 10601 + }, + { + "epoch": 1.65, + "learning_rate": 6.371397894081644e-06, + "logits/chosen": -2.4606103897094727, + "logits/rejected": -2.9967451095581055, + "logps/chosen": -120.17407989501953, + "logps/rejected": -383.7525939941406, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7088799476623535, + "rewards/margins": 7.8186445236206055, + "rewards/rejected": -12.527524948120117, + "step": 10602 + }, + { + "epoch": 1.65, + "learning_rate": 6.3706644535504956e-06, + "logits/chosen": -2.3240585327148438, + "logits/rejected": -2.9116363525390625, + "logps/chosen": -166.84310913085938, + "logps/rejected": -620.5508422851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.772029876708984, + "rewards/margins": 11.22006607055664, + "rewards/rejected": -16.992095947265625, + "step": 10603 + }, + { + "epoch": 1.65, + "learning_rate": 6.3699310130193474e-06, + "logits/chosen": -2.380382776260376, + "logits/rejected": -3.0405921936035156, + "logps/chosen": -106.04625701904297, + "logps/rejected": -430.80206298828125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.229467868804932, + "rewards/margins": 6.216460704803467, + "rewards/rejected": -11.445928573608398, + "step": 10604 + }, + { + "epoch": 1.65, + "learning_rate": 6.3691975724882e-06, + "logits/chosen": -1.1846470832824707, + "logits/rejected": -2.8122572898864746, + "logps/chosen": -98.61785888671875, + "logps/rejected": -653.268798828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.335689544677734, + "rewards/margins": 13.081329345703125, + "rewards/rejected": -18.41701889038086, + "step": 10605 + }, + { + "epoch": 1.65, + "learning_rate": 6.368464131957052e-06, + "logits/chosen": -2.3854987621307373, + "logits/rejected": -3.078862190246582, + "logps/chosen": -285.7109680175781, + "logps/rejected": -476.4130554199219, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.018315315246582, + "rewards/margins": 8.98396110534668, + "rewards/rejected": -13.002275466918945, + "step": 10606 + }, + { + "epoch": 1.65, + "learning_rate": 6.367730691425904e-06, + "logits/chosen": -2.793401002883911, + "logits/rejected": -1.7161544561386108, + "logps/chosen": -432.83819580078125, + "logps/rejected": -256.1017150878906, + "loss": 0.8072, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.829127311706543, + "rewards/margins": 0.11731576919555664, + "rewards/rejected": -6.946443557739258, + "step": 10607 + }, + { + "epoch": 1.65, + "learning_rate": 6.366997250894756e-06, + "logits/chosen": -2.0575315952301025, + "logits/rejected": -2.9600157737731934, + "logps/chosen": -215.5034637451172, + "logps/rejected": -220.5210723876953, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.382089614868164, + "rewards/margins": 5.271986484527588, + "rewards/rejected": -10.654075622558594, + "step": 10608 + }, + { + "epoch": 1.65, + "learning_rate": 6.3662638103636085e-06, + "logits/chosen": -2.677912950515747, + "logits/rejected": -2.9062321186065674, + "logps/chosen": -233.53477478027344, + "logps/rejected": -363.15228271484375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0975661277771, + "rewards/margins": 6.26552152633667, + "rewards/rejected": -10.36308765411377, + "step": 10609 + }, + { + "epoch": 1.65, + "learning_rate": 6.36553036983246e-06, + "logits/chosen": -1.897797703742981, + "logits/rejected": -3.0811173915863037, + "logps/chosen": -96.30412292480469, + "logps/rejected": -413.8723449707031, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7030601501464844, + "rewards/margins": 10.54126262664795, + "rewards/rejected": -14.244322776794434, + "step": 10610 + }, + { + "epoch": 1.65, + "learning_rate": 6.364796929301312e-06, + "logits/chosen": -2.824871778488159, + "logits/rejected": -3.053196668624878, + "logps/chosen": -82.54720306396484, + "logps/rejected": -277.9873352050781, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.512608051300049, + "rewards/margins": 7.822097301483154, + "rewards/rejected": -13.334705352783203, + "step": 10611 + }, + { + "epoch": 1.65, + "learning_rate": 6.364063488770164e-06, + "logits/chosen": -2.9419350624084473, + "logits/rejected": -2.4496238231658936, + "logps/chosen": -679.598388671875, + "logps/rejected": -540.0586547851562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.574078559875488, + "rewards/margins": 8.98680305480957, + "rewards/rejected": -15.560881614685059, + "step": 10612 + }, + { + "epoch": 1.65, + "learning_rate": 6.363330048239016e-06, + "logits/chosen": -2.9245593547821045, + "logits/rejected": -2.1511712074279785, + "logps/chosen": -206.7991943359375, + "logps/rejected": -203.06985473632812, + "loss": 0.2057, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.511519432067871, + "rewards/margins": 3.7545037269592285, + "rewards/rejected": -11.266023635864258, + "step": 10613 + }, + { + "epoch": 1.65, + "learning_rate": 6.362596607707869e-06, + "logits/chosen": -2.9661808013916016, + "logits/rejected": -2.983309507369995, + "logps/chosen": -920.399658203125, + "logps/rejected": -866.2533569335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8274383544921875, + "rewards/margins": 10.567163467407227, + "rewards/rejected": -15.394601821899414, + "step": 10614 + }, + { + "epoch": 1.65, + "learning_rate": 6.361863167176721e-06, + "logits/chosen": -2.9174702167510986, + "logits/rejected": -2.295039415359497, + "logps/chosen": -624.7825317382812, + "logps/rejected": -639.4481201171875, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.357768535614014, + "rewards/margins": 6.203239440917969, + "rewards/rejected": -12.56100845336914, + "step": 10615 + }, + { + "epoch": 1.65, + "learning_rate": 6.3611297266455725e-06, + "logits/chosen": -2.936953067779541, + "logits/rejected": -2.9544572830200195, + "logps/chosen": -207.9768829345703, + "logps/rejected": -313.97576904296875, + "loss": 2.2447, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.650172710418701, + "rewards/margins": 2.7111289501190186, + "rewards/rejected": -10.36130142211914, + "step": 10616 + }, + { + "epoch": 1.65, + "learning_rate": 6.360396286114425e-06, + "logits/chosen": -2.778032064437866, + "logits/rejected": -2.9464786052703857, + "logps/chosen": -205.0181121826172, + "logps/rejected": -468.4357604980469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.499913692474365, + "rewards/margins": 7.681171417236328, + "rewards/rejected": -12.181085586547852, + "step": 10617 + }, + { + "epoch": 1.65, + "learning_rate": 6.359662845583278e-06, + "logits/chosen": -0.44372186064720154, + "logits/rejected": -2.323301076889038, + "logps/chosen": -150.1864013671875, + "logps/rejected": -852.52001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032903671264648, + "rewards/margins": 16.048742294311523, + "rewards/rejected": -22.081645965576172, + "step": 10618 + }, + { + "epoch": 1.65, + "learning_rate": 6.35892940505213e-06, + "logits/chosen": -0.5448821783065796, + "logits/rejected": -2.96035099029541, + "logps/chosen": -159.23422241210938, + "logps/rejected": -294.0526123046875, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.191946029663086, + "rewards/margins": 4.886070251464844, + "rewards/rejected": -11.07801628112793, + "step": 10619 + }, + { + "epoch": 1.65, + "learning_rate": 6.358195964520982e-06, + "logits/chosen": -2.8082008361816406, + "logits/rejected": -3.0854785442352295, + "logps/chosen": -406.2244567871094, + "logps/rejected": -457.56451416015625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.429841041564941, + "rewards/margins": 6.423487663269043, + "rewards/rejected": -11.853328704833984, + "step": 10620 + }, + { + "epoch": 1.65, + "learning_rate": 6.357462523989834e-06, + "logits/chosen": -1.6144211292266846, + "logits/rejected": -2.8346099853515625, + "logps/chosen": -158.72869873046875, + "logps/rejected": -357.53607177734375, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.488715171813965, + "rewards/margins": 4.829198837280273, + "rewards/rejected": -8.317914009094238, + "step": 10621 + }, + { + "epoch": 1.65, + "learning_rate": 6.3567290834586855e-06, + "logits/chosen": -1.951414942741394, + "logits/rejected": -2.858159303665161, + "logps/chosen": -174.27244567871094, + "logps/rejected": -281.2523193359375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7354960441589355, + "rewards/margins": 6.9468488693237305, + "rewards/rejected": -11.682344436645508, + "step": 10622 + }, + { + "epoch": 1.65, + "learning_rate": 6.355995642927538e-06, + "logits/chosen": -2.497400999069214, + "logits/rejected": -1.990161657333374, + "logps/chosen": -115.28813171386719, + "logps/rejected": -224.5135955810547, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.211099624633789, + "rewards/margins": 8.149867057800293, + "rewards/rejected": -13.360966682434082, + "step": 10623 + }, + { + "epoch": 1.65, + "learning_rate": 6.35526220239639e-06, + "logits/chosen": -3.1231496334075928, + "logits/rejected": -3.1905765533447266, + "logps/chosen": -52.30385971069336, + "logps/rejected": -188.92962646484375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0879011154174805, + "rewards/margins": 5.844124794006348, + "rewards/rejected": -9.932025909423828, + "step": 10624 + }, + { + "epoch": 1.65, + "learning_rate": 6.354528761865242e-06, + "logits/chosen": -2.225151777267456, + "logits/rejected": -3.0647265911102295, + "logps/chosen": -468.1790771484375, + "logps/rejected": -535.97314453125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6906418800354, + "rewards/margins": 6.185432434082031, + "rewards/rejected": -11.876073837280273, + "step": 10625 + }, + { + "epoch": 1.65, + "learning_rate": 6.353795321334094e-06, + "logits/chosen": -2.850768566131592, + "logits/rejected": -2.3107526302337646, + "logps/chosen": -567.627197265625, + "logps/rejected": -402.6002502441406, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.206035614013672, + "rewards/margins": 6.278473377227783, + "rewards/rejected": -9.484508514404297, + "step": 10626 + }, + { + "epoch": 1.65, + "learning_rate": 6.3530618808029466e-06, + "logits/chosen": -2.931978464126587, + "logits/rejected": -2.866098642349243, + "logps/chosen": -182.27633666992188, + "logps/rejected": -347.8790283203125, + "loss": 1.4717, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.357291221618652, + "rewards/margins": 3.639723300933838, + "rewards/rejected": -11.997014045715332, + "step": 10627 + }, + { + "epoch": 1.65, + "learning_rate": 6.3523284402717985e-06, + "logits/chosen": -2.8117928504943848, + "logits/rejected": -0.7566109299659729, + "logps/chosen": -426.9182434082031, + "logps/rejected": -297.46282958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.288495063781738, + "rewards/margins": 9.645261764526367, + "rewards/rejected": -14.933755874633789, + "step": 10628 + }, + { + "epoch": 1.65, + "learning_rate": 6.35159499974065e-06, + "logits/chosen": -2.531240463256836, + "logits/rejected": -2.908494472503662, + "logps/chosen": -602.858642578125, + "logps/rejected": -395.2125244140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.472291469573975, + "rewards/margins": 8.283760070800781, + "rewards/rejected": -14.756051063537598, + "step": 10629 + }, + { + "epoch": 1.65, + "learning_rate": 6.350861559209502e-06, + "logits/chosen": -2.8009531497955322, + "logits/rejected": -3.1410341262817383, + "logps/chosen": -74.5656509399414, + "logps/rejected": -211.58169555664062, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135042667388916, + "rewards/margins": 6.737548828125, + "rewards/rejected": -10.872591018676758, + "step": 10630 + }, + { + "epoch": 1.65, + "learning_rate": 6.350128118678354e-06, + "logits/chosen": -3.047058582305908, + "logits/rejected": -3.1319682598114014, + "logps/chosen": -604.3143920898438, + "logps/rejected": -525.8029174804688, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.897183418273926, + "rewards/margins": 6.189178466796875, + "rewards/rejected": -11.0863618850708, + "step": 10631 + }, + { + "epoch": 1.65, + "learning_rate": 6.349394678147207e-06, + "logits/chosen": -2.860341787338257, + "logits/rejected": -2.9547829627990723, + "logps/chosen": -706.4385375976562, + "logps/rejected": -945.5999755859375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.78717041015625, + "rewards/margins": 8.123588562011719, + "rewards/rejected": -11.910758972167969, + "step": 10632 + }, + { + "epoch": 1.65, + "learning_rate": 6.348661237616059e-06, + "logits/chosen": -2.492326498031616, + "logits/rejected": -3.0330018997192383, + "logps/chosen": -186.7477569580078, + "logps/rejected": -416.577392578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.467486619949341, + "rewards/margins": 8.783699989318848, + "rewards/rejected": -12.25118637084961, + "step": 10633 + }, + { + "epoch": 1.65, + "learning_rate": 6.347927797084911e-06, + "logits/chosen": -1.7335243225097656, + "logits/rejected": -2.566223382949829, + "logps/chosen": -216.2554473876953, + "logps/rejected": -329.7992858886719, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.106313705444336, + "rewards/margins": 7.273568630218506, + "rewards/rejected": -12.3798828125, + "step": 10634 + }, + { + "epoch": 1.65, + "learning_rate": 6.347194356553763e-06, + "logits/chosen": -2.3500680923461914, + "logits/rejected": -2.954623222351074, + "logps/chosen": -62.59503173828125, + "logps/rejected": -313.2675476074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8604230880737305, + "rewards/margins": 10.161189079284668, + "rewards/rejected": -15.021612167358398, + "step": 10635 + }, + { + "epoch": 1.65, + "learning_rate": 6.346460916022616e-06, + "logits/chosen": -2.9035000801086426, + "logits/rejected": -2.9714231491088867, + "logps/chosen": -174.41334533691406, + "logps/rejected": -408.822265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4486236572265625, + "rewards/margins": 10.650487899780273, + "rewards/rejected": -13.099111557006836, + "step": 10636 + }, + { + "epoch": 1.65, + "learning_rate": 6.345727475491468e-06, + "logits/chosen": -2.3737540245056152, + "logits/rejected": -2.825828790664673, + "logps/chosen": -131.55677795410156, + "logps/rejected": -258.32916259765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.17762565612793, + "rewards/margins": 9.118417739868164, + "rewards/rejected": -14.296043395996094, + "step": 10637 + }, + { + "epoch": 1.65, + "learning_rate": 6.34499403496032e-06, + "logits/chosen": -1.5061874389648438, + "logits/rejected": -1.6294078826904297, + "logps/chosen": -252.22760009765625, + "logps/rejected": -439.42803955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7747278213500977, + "rewards/margins": 11.204581260681152, + "rewards/rejected": -14.97930908203125, + "step": 10638 + }, + { + "epoch": 1.65, + "learning_rate": 6.344260594429172e-06, + "logits/chosen": -1.3627231121063232, + "logits/rejected": -2.665498733520508, + "logps/chosen": -184.87078857421875, + "logps/rejected": -414.7930908203125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6766886711120605, + "rewards/margins": 11.303658485412598, + "rewards/rejected": -15.9803466796875, + "step": 10639 + }, + { + "epoch": 1.65, + "learning_rate": 6.3435271538980235e-06, + "logits/chosen": -2.657982110977173, + "logits/rejected": -2.960965871810913, + "logps/chosen": -177.9130401611328, + "logps/rejected": -377.15313720703125, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.157162666320801, + "rewards/margins": 6.528384208679199, + "rewards/rejected": -13.685546875, + "step": 10640 + }, + { + "epoch": 1.65, + "learning_rate": 6.342793713366876e-06, + "logits/chosen": -2.941178798675537, + "logits/rejected": -1.8934659957885742, + "logps/chosen": -624.38427734375, + "logps/rejected": -612.71533203125, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.166952610015869, + "rewards/margins": 8.17746639251709, + "rewards/rejected": -15.344419479370117, + "step": 10641 + }, + { + "epoch": 1.66, + "learning_rate": 6.342060272835728e-06, + "logits/chosen": -1.371935248374939, + "logits/rejected": -3.0926620960235596, + "logps/chosen": -129.3164825439453, + "logps/rejected": -415.7974548339844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.548209190368652, + "rewards/margins": 8.079964637756348, + "rewards/rejected": -12.628173828125, + "step": 10642 + }, + { + "epoch": 1.66, + "learning_rate": 6.34132683230458e-06, + "logits/chosen": -1.8708045482635498, + "logits/rejected": -2.828953504562378, + "logps/chosen": -220.201416015625, + "logps/rejected": -498.1670837402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.021542549133301, + "rewards/margins": 8.955379486083984, + "rewards/rejected": -12.976922988891602, + "step": 10643 + }, + { + "epoch": 1.66, + "learning_rate": 6.340593391773432e-06, + "logits/chosen": -1.8388036489486694, + "logits/rejected": -2.934542417526245, + "logps/chosen": -190.1526336669922, + "logps/rejected": -395.75616455078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3553056716918945, + "rewards/margins": 10.295348167419434, + "rewards/rejected": -15.650653839111328, + "step": 10644 + }, + { + "epoch": 1.66, + "learning_rate": 6.339859951242285e-06, + "logits/chosen": -1.9647619724273682, + "logits/rejected": -2.526198625564575, + "logps/chosen": -300.47222900390625, + "logps/rejected": -383.526123046875, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962118625640869, + "rewards/margins": 7.198704242706299, + "rewards/rejected": -13.160822868347168, + "step": 10645 + }, + { + "epoch": 1.66, + "learning_rate": 6.3391265107111365e-06, + "logits/chosen": -2.7929890155792236, + "logits/rejected": -1.9923614263534546, + "logps/chosen": -161.64682006835938, + "logps/rejected": -216.5816192626953, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.769927740097046, + "rewards/margins": 7.789790153503418, + "rewards/rejected": -11.559718132019043, + "step": 10646 + }, + { + "epoch": 1.66, + "learning_rate": 6.338393070179988e-06, + "logits/chosen": -2.764458417892456, + "logits/rejected": -2.830289602279663, + "logps/chosen": -164.92593383789062, + "logps/rejected": -445.23187255859375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.783818244934082, + "rewards/margins": 8.75657844543457, + "rewards/rejected": -12.540396690368652, + "step": 10647 + }, + { + "epoch": 1.66, + "learning_rate": 6.33765962964884e-06, + "logits/chosen": -1.5212714672088623, + "logits/rejected": -2.8366146087646484, + "logps/chosen": -177.81930541992188, + "logps/rejected": -429.9677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4807381629943848, + "rewards/margins": 11.190383911132812, + "rewards/rejected": -14.671121597290039, + "step": 10648 + }, + { + "epoch": 1.66, + "learning_rate": 6.336926189117692e-06, + "logits/chosen": -2.862086057662964, + "logits/rejected": -2.49409556388855, + "logps/chosen": -269.9315185546875, + "logps/rejected": -345.7452697753906, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008387565612793, + "rewards/margins": 5.484871864318848, + "rewards/rejected": -11.49325942993164, + "step": 10649 + }, + { + "epoch": 1.66, + "learning_rate": 6.336192748586545e-06, + "logits/chosen": -2.257495403289795, + "logits/rejected": -2.77158784866333, + "logps/chosen": -236.75048828125, + "logps/rejected": -297.5788269042969, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.372694969177246, + "rewards/margins": 8.126579284667969, + "rewards/rejected": -15.499275207519531, + "step": 10650 + }, + { + "epoch": 1.66, + "learning_rate": 6.335459308055398e-06, + "logits/chosen": -2.9989821910858154, + "logits/rejected": -2.895707368850708, + "logps/chosen": -548.064697265625, + "logps/rejected": -505.28350830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8534669876098633, + "rewards/margins": 9.947198867797852, + "rewards/rejected": -13.800665855407715, + "step": 10651 + }, + { + "epoch": 1.66, + "learning_rate": 6.3347258675242495e-06, + "logits/chosen": -1.96576988697052, + "logits/rejected": -2.761021375656128, + "logps/chosen": -255.1883087158203, + "logps/rejected": -397.2892761230469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.138437747955322, + "rewards/margins": 11.7808837890625, + "rewards/rejected": -15.919321060180664, + "step": 10652 + }, + { + "epoch": 1.66, + "learning_rate": 6.333992426993101e-06, + "logits/chosen": -2.677295446395874, + "logits/rejected": -1.6573702096939087, + "logps/chosen": -386.5362548828125, + "logps/rejected": -262.21356201171875, + "loss": 2.4503, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.523048400878906, + "rewards/margins": 0.6862270832061768, + "rewards/rejected": -6.209275722503662, + "step": 10653 + }, + { + "epoch": 1.66, + "learning_rate": 6.333258986461954e-06, + "logits/chosen": -2.0496294498443604, + "logits/rejected": -2.914609670639038, + "logps/chosen": -307.99481201171875, + "logps/rejected": -615.1477661132812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4105324745178223, + "rewards/margins": 8.964361190795898, + "rewards/rejected": -11.374893188476562, + "step": 10654 + }, + { + "epoch": 1.66, + "learning_rate": 6.332525545930806e-06, + "logits/chosen": -3.1302804946899414, + "logits/rejected": -1.6346148252487183, + "logps/chosen": -324.7608947753906, + "logps/rejected": -346.8188781738281, + "loss": 1.0502, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.247802734375, + "rewards/margins": 4.618554592132568, + "rewards/rejected": -10.866357803344727, + "step": 10655 + }, + { + "epoch": 1.66, + "learning_rate": 6.331792105399658e-06, + "logits/chosen": -1.744309663772583, + "logits/rejected": -2.6686809062957764, + "logps/chosen": -241.88209533691406, + "logps/rejected": -385.3441162109375, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.429190158843994, + "rewards/margins": 6.3115434646606445, + "rewards/rejected": -12.740734100341797, + "step": 10656 + }, + { + "epoch": 1.66, + "learning_rate": 6.33105866486851e-06, + "logits/chosen": -2.339839220046997, + "logits/rejected": -2.906489133834839, + "logps/chosen": -177.98614501953125, + "logps/rejected": -432.11517333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7665393352508545, + "rewards/margins": 13.662275314331055, + "rewards/rejected": -17.428813934326172, + "step": 10657 + }, + { + "epoch": 1.66, + "learning_rate": 6.330325224337362e-06, + "logits/chosen": -2.507052421569824, + "logits/rejected": -2.979693651199341, + "logps/chosen": -289.3575134277344, + "logps/rejected": -369.1000671386719, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104325771331787, + "rewards/margins": 5.532896995544434, + "rewards/rejected": -9.637222290039062, + "step": 10658 + }, + { + "epoch": 1.66, + "learning_rate": 6.329591783806214e-06, + "logits/chosen": -1.547912836074829, + "logits/rejected": -2.9033048152923584, + "logps/chosen": -537.3270263671875, + "logps/rejected": -658.0853271484375, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.185720920562744, + "rewards/margins": 5.348081588745117, + "rewards/rejected": -11.533802032470703, + "step": 10659 + }, + { + "epoch": 1.66, + "learning_rate": 6.328858343275066e-06, + "logits/chosen": -2.988975763320923, + "logits/rejected": -2.420583963394165, + "logps/chosen": -513.6195068359375, + "logps/rejected": -602.4151611328125, + "loss": 0.1218, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.614147186279297, + "rewards/margins": 2.649794101715088, + "rewards/rejected": -11.263940811157227, + "step": 10660 + }, + { + "epoch": 1.66, + "learning_rate": 6.328124902743918e-06, + "logits/chosen": -2.0093166828155518, + "logits/rejected": -3.0833282470703125, + "logps/chosen": -258.4424133300781, + "logps/rejected": -653.1390380859375, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.332093238830566, + "rewards/margins": 5.252373218536377, + "rewards/rejected": -10.584466934204102, + "step": 10661 + }, + { + "epoch": 1.66, + "learning_rate": 6.32739146221277e-06, + "logits/chosen": -2.834613084793091, + "logits/rejected": -2.3813114166259766, + "logps/chosen": -487.787353515625, + "logps/rejected": -442.7587890625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.264409065246582, + "rewards/margins": 8.420753479003906, + "rewards/rejected": -16.685161590576172, + "step": 10662 + }, + { + "epoch": 1.66, + "learning_rate": 6.326658021681623e-06, + "logits/chosen": -2.3560831546783447, + "logits/rejected": -2.823712110519409, + "logps/chosen": -188.60671997070312, + "logps/rejected": -290.419189453125, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011274814605713, + "rewards/margins": 5.537506103515625, + "rewards/rejected": -9.54878044128418, + "step": 10663 + }, + { + "epoch": 1.66, + "learning_rate": 6.3259245811504745e-06, + "logits/chosen": -0.9411256909370422, + "logits/rejected": -2.621981143951416, + "logps/chosen": -113.76940155029297, + "logps/rejected": -507.7997741699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.159906387329102, + "rewards/margins": 11.077754974365234, + "rewards/rejected": -16.237661361694336, + "step": 10664 + }, + { + "epoch": 1.66, + "learning_rate": 6.325191140619326e-06, + "logits/chosen": -2.9592723846435547, + "logits/rejected": -2.7347805500030518, + "logps/chosen": -580.9069213867188, + "logps/rejected": -662.2862548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.508879661560059, + "rewards/margins": 10.282576560974121, + "rewards/rejected": -14.79145622253418, + "step": 10665 + }, + { + "epoch": 1.66, + "learning_rate": 6.324457700088178e-06, + "logits/chosen": -3.051208019256592, + "logits/rejected": -2.397855043411255, + "logps/chosen": -499.5621643066406, + "logps/rejected": -378.36639404296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.077093601226807, + "rewards/margins": 8.730937957763672, + "rewards/rejected": -13.80803108215332, + "step": 10666 + }, + { + "epoch": 1.66, + "learning_rate": 6.323724259557031e-06, + "logits/chosen": -2.8748559951782227, + "logits/rejected": -2.8763625621795654, + "logps/chosen": -61.83831024169922, + "logps/rejected": -147.92984008789062, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6472837924957275, + "rewards/margins": 5.015336990356445, + "rewards/rejected": -8.662620544433594, + "step": 10667 + }, + { + "epoch": 1.66, + "learning_rate": 6.322990819025884e-06, + "logits/chosen": -2.6647844314575195, + "logits/rejected": -3.1384403705596924, + "logps/chosen": -104.88319396972656, + "logps/rejected": -270.29583740234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9578022956848145, + "rewards/margins": 8.22513198852539, + "rewards/rejected": -11.182933807373047, + "step": 10668 + }, + { + "epoch": 1.66, + "learning_rate": 6.322257378494736e-06, + "logits/chosen": -2.035874843597412, + "logits/rejected": -2.8380844593048096, + "logps/chosen": -106.47018432617188, + "logps/rejected": -275.96728515625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.565889358520508, + "rewards/margins": 8.00711727142334, + "rewards/rejected": -12.573006629943848, + "step": 10669 + }, + { + "epoch": 1.66, + "learning_rate": 6.3215239379635875e-06, + "logits/chosen": -1.7317163944244385, + "logits/rejected": -2.589484930038452, + "logps/chosen": -111.0739974975586, + "logps/rejected": -218.841552734375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.926545143127441, + "rewards/margins": 5.9379682540893555, + "rewards/rejected": -12.864513397216797, + "step": 10670 + }, + { + "epoch": 1.66, + "learning_rate": 6.320790497432439e-06, + "logits/chosen": -2.388511896133423, + "logits/rejected": -2.573040008544922, + "logps/chosen": -375.66864013671875, + "logps/rejected": -689.56591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.59083890914917, + "rewards/margins": 9.817438125610352, + "rewards/rejected": -14.408276557922363, + "step": 10671 + }, + { + "epoch": 1.66, + "learning_rate": 6.320057056901292e-06, + "logits/chosen": -2.7713239192962646, + "logits/rejected": -1.7606098651885986, + "logps/chosen": -415.2834777832031, + "logps/rejected": -345.0496520996094, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.852458477020264, + "rewards/margins": 7.53642463684082, + "rewards/rejected": -12.388883590698242, + "step": 10672 + }, + { + "epoch": 1.66, + "learning_rate": 6.319323616370144e-06, + "logits/chosen": -2.897988796234131, + "logits/rejected": -3.1644270420074463, + "logps/chosen": -56.10169982910156, + "logps/rejected": -364.9154052734375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.485341548919678, + "rewards/margins": 8.202756881713867, + "rewards/rejected": -12.688097953796387, + "step": 10673 + }, + { + "epoch": 1.66, + "learning_rate": 6.318590175838996e-06, + "logits/chosen": -2.7229862213134766, + "logits/rejected": -2.8931171894073486, + "logps/chosen": -101.20733642578125, + "logps/rejected": -293.3416442871094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4611153602600098, + "rewards/margins": 8.969459533691406, + "rewards/rejected": -12.430574417114258, + "step": 10674 + }, + { + "epoch": 1.66, + "learning_rate": 6.317856735307848e-06, + "logits/chosen": -2.0051827430725098, + "logits/rejected": -2.795833110809326, + "logps/chosen": -202.20831298828125, + "logps/rejected": -247.51324462890625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8930864334106445, + "rewards/margins": 6.443507671356201, + "rewards/rejected": -9.336593627929688, + "step": 10675 + }, + { + "epoch": 1.66, + "learning_rate": 6.3171232947767005e-06, + "logits/chosen": -2.9106738567352295, + "logits/rejected": -3.0689077377319336, + "logps/chosen": -95.72142028808594, + "logps/rejected": -208.098388671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4605937004089355, + "rewards/margins": 9.487041473388672, + "rewards/rejected": -13.947635650634766, + "step": 10676 + }, + { + "epoch": 1.66, + "learning_rate": 6.316389854245552e-06, + "logits/chosen": -3.0064258575439453, + "logits/rejected": -2.73531436920166, + "logps/chosen": -566.489013671875, + "logps/rejected": -684.5349731445312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.603416442871094, + "rewards/margins": 7.896149635314941, + "rewards/rejected": -13.499565124511719, + "step": 10677 + }, + { + "epoch": 1.66, + "learning_rate": 6.315656413714404e-06, + "logits/chosen": -2.77056622505188, + "logits/rejected": -3.003405809402466, + "logps/chosen": -223.76144409179688, + "logps/rejected": -395.9308776855469, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218104839324951, + "rewards/margins": 10.389707565307617, + "rewards/rejected": -15.607812881469727, + "step": 10678 + }, + { + "epoch": 1.66, + "learning_rate": 6.314922973183256e-06, + "logits/chosen": -1.7210662364959717, + "logits/rejected": -2.8936684131622314, + "logps/chosen": -332.9976806640625, + "logps/rejected": -469.9302978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.010073661804199, + "rewards/margins": 10.2183837890625, + "rewards/rejected": -14.228458404541016, + "step": 10679 + }, + { + "epoch": 1.66, + "learning_rate": 6.314189532652108e-06, + "logits/chosen": -2.5249245166778564, + "logits/rejected": -2.901634454727173, + "logps/chosen": -132.778076171875, + "logps/rejected": -379.3069763183594, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.75837516784668, + "rewards/margins": 9.333414077758789, + "rewards/rejected": -14.091789245605469, + "step": 10680 + }, + { + "epoch": 1.66, + "learning_rate": 6.313456092120961e-06, + "logits/chosen": -3.01554536819458, + "logits/rejected": -2.7335128784179688, + "logps/chosen": -167.67349243164062, + "logps/rejected": -229.5874786376953, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.606663703918457, + "rewards/margins": 7.182779312133789, + "rewards/rejected": -12.789443016052246, + "step": 10681 + }, + { + "epoch": 1.66, + "learning_rate": 6.312722651589813e-06, + "logits/chosen": -2.1697559356689453, + "logits/rejected": -3.1163105964660645, + "logps/chosen": -104.5557861328125, + "logps/rejected": -285.9097900390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149830341339111, + "rewards/margins": 6.211049556732178, + "rewards/rejected": -12.360879898071289, + "step": 10682 + }, + { + "epoch": 1.66, + "learning_rate": 6.3119892110586645e-06, + "logits/chosen": -3.139950752258301, + "logits/rejected": -2.7446508407592773, + "logps/chosen": -196.18765258789062, + "logps/rejected": -227.0083465576172, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.536187648773193, + "rewards/margins": 5.190008163452148, + "rewards/rejected": -9.7261962890625, + "step": 10683 + }, + { + "epoch": 1.66, + "learning_rate": 6.311255770527517e-06, + "logits/chosen": -1.3825703859329224, + "logits/rejected": -2.519685745239258, + "logps/chosen": -120.01852416992188, + "logps/rejected": -336.90753173828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1736063957214355, + "rewards/margins": 8.571242332458496, + "rewards/rejected": -13.744848251342773, + "step": 10684 + }, + { + "epoch": 1.66, + "learning_rate": 6.31052232999637e-06, + "logits/chosen": -2.925381660461426, + "logits/rejected": -1.4028607606887817, + "logps/chosen": -520.58544921875, + "logps/rejected": -351.217041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951655387878418, + "rewards/margins": 10.239055633544922, + "rewards/rejected": -15.190711975097656, + "step": 10685 + }, + { + "epoch": 1.66, + "learning_rate": 6.309788889465222e-06, + "logits/chosen": -2.0608651638031006, + "logits/rejected": -2.921065092086792, + "logps/chosen": -282.8641052246094, + "logps/rejected": -447.87384033203125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.690860748291016, + "rewards/margins": 8.498833656311035, + "rewards/rejected": -15.189693450927734, + "step": 10686 + }, + { + "epoch": 1.66, + "learning_rate": 6.309055448934074e-06, + "logits/chosen": -1.5928006172180176, + "logits/rejected": -2.726383924484253, + "logps/chosen": -136.273193359375, + "logps/rejected": -246.3546600341797, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.531649589538574, + "rewards/margins": 8.131092071533203, + "rewards/rejected": -12.662740707397461, + "step": 10687 + }, + { + "epoch": 1.66, + "learning_rate": 6.3083220084029255e-06, + "logits/chosen": -1.8174716234207153, + "logits/rejected": -3.048966407775879, + "logps/chosen": -113.45626831054688, + "logps/rejected": -318.8820495605469, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.492849826812744, + "rewards/margins": 7.799903869628906, + "rewards/rejected": -12.292753219604492, + "step": 10688 + }, + { + "epoch": 1.66, + "learning_rate": 6.3075885678717774e-06, + "logits/chosen": -2.219583034515381, + "logits/rejected": -2.9613184928894043, + "logps/chosen": -514.3038330078125, + "logps/rejected": -708.1694946289062, + "loss": 0.1693, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.623720169067383, + "rewards/margins": 3.275202989578247, + "rewards/rejected": -11.89892292022705, + "step": 10689 + }, + { + "epoch": 1.66, + "learning_rate": 6.30685512734063e-06, + "logits/chosen": -2.1520004272460938, + "logits/rejected": -2.8723857402801514, + "logps/chosen": -399.67156982421875, + "logps/rejected": -718.844970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.933516025543213, + "rewards/margins": 11.911700248718262, + "rewards/rejected": -16.845216751098633, + "step": 10690 + }, + { + "epoch": 1.66, + "learning_rate": 6.306121686809482e-06, + "logits/chosen": -2.1338863372802734, + "logits/rejected": -3.1713106632232666, + "logps/chosen": -179.11593627929688, + "logps/rejected": -491.1037902832031, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4636993408203125, + "rewards/margins": 6.206189155578613, + "rewards/rejected": -10.669888496398926, + "step": 10691 + }, + { + "epoch": 1.66, + "learning_rate": 6.305388246278334e-06, + "logits/chosen": -2.9568467140197754, + "logits/rejected": -2.4382688999176025, + "logps/chosen": -275.7393798828125, + "logps/rejected": -313.24591064453125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.981003761291504, + "rewards/margins": 8.882299423217773, + "rewards/rejected": -13.863302230834961, + "step": 10692 + }, + { + "epoch": 1.66, + "learning_rate": 6.304654805747186e-06, + "logits/chosen": -2.716996431350708, + "logits/rejected": -2.9049463272094727, + "logps/chosen": -121.2100601196289, + "logps/rejected": -227.4994659423828, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7764437198638916, + "rewards/margins": 7.3147125244140625, + "rewards/rejected": -11.091156005859375, + "step": 10693 + }, + { + "epoch": 1.66, + "learning_rate": 6.3039213652160385e-06, + "logits/chosen": -3.1471309661865234, + "logits/rejected": -2.633665084838867, + "logps/chosen": -317.165283203125, + "logps/rejected": -172.0594482421875, + "loss": 1.9536, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.902142524719238, + "rewards/margins": 1.5366311073303223, + "rewards/rejected": -7.4387736320495605, + "step": 10694 + }, + { + "epoch": 1.66, + "learning_rate": 6.30318792468489e-06, + "logits/chosen": -2.3280465602874756, + "logits/rejected": -2.76298189163208, + "logps/chosen": -321.2598571777344, + "logps/rejected": -415.39691162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4475998878479, + "rewards/margins": 9.451997756958008, + "rewards/rejected": -14.899599075317383, + "step": 10695 + }, + { + "epoch": 1.66, + "learning_rate": 6.302454484153742e-06, + "logits/chosen": -2.8945703506469727, + "logits/rejected": -2.889106512069702, + "logps/chosen": -147.28805541992188, + "logps/rejected": -379.5835266113281, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.313941955566406, + "rewards/margins": 8.559806823730469, + "rewards/rejected": -12.873748779296875, + "step": 10696 + }, + { + "epoch": 1.66, + "learning_rate": 6.301721043622594e-06, + "logits/chosen": -1.9383530616760254, + "logits/rejected": -2.7601871490478516, + "logps/chosen": -109.47805786132812, + "logps/rejected": -436.21563720703125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3950743675231934, + "rewards/margins": 12.141361236572266, + "rewards/rejected": -15.536436080932617, + "step": 10697 + }, + { + "epoch": 1.66, + "learning_rate": 6.300987603091446e-06, + "logits/chosen": -2.679875612258911, + "logits/rejected": -1.8695770502090454, + "logps/chosen": -202.9620819091797, + "logps/rejected": -278.97943115234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1865234375, + "rewards/margins": 9.418535232543945, + "rewards/rejected": -14.605058670043945, + "step": 10698 + }, + { + "epoch": 1.66, + "learning_rate": 6.300254162560299e-06, + "logits/chosen": -2.2745237350463867, + "logits/rejected": -2.688918352127075, + "logps/chosen": -89.9535903930664, + "logps/rejected": -270.6400146484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.499497890472412, + "rewards/margins": 10.14534854888916, + "rewards/rejected": -14.644845962524414, + "step": 10699 + }, + { + "epoch": 1.66, + "learning_rate": 6.299520722029151e-06, + "logits/chosen": -1.5136005878448486, + "logits/rejected": -2.8339526653289795, + "logps/chosen": -218.53359985351562, + "logps/rejected": -370.9459533691406, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.01589822769165, + "rewards/margins": 5.698190689086914, + "rewards/rejected": -10.714088439941406, + "step": 10700 + }, + { + "epoch": 1.66, + "learning_rate": 6.298787281498003e-06, + "logits/chosen": -2.0492281913757324, + "logits/rejected": -3.05916690826416, + "logps/chosen": -162.23974609375, + "logps/rejected": -479.81231689453125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.690561294555664, + "rewards/margins": 8.762792587280273, + "rewards/rejected": -15.453353881835938, + "step": 10701 + }, + { + "epoch": 1.66, + "learning_rate": 6.298053840966855e-06, + "logits/chosen": -2.950331211090088, + "logits/rejected": -2.5120925903320312, + "logps/chosen": -430.11859130859375, + "logps/rejected": -458.86456298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9334697723388672, + "rewards/margins": 9.469886779785156, + "rewards/rejected": -11.403356552124023, + "step": 10702 + }, + { + "epoch": 1.66, + "learning_rate": 6.297320400435708e-06, + "logits/chosen": -2.8121178150177, + "logits/rejected": -2.757031202316284, + "logps/chosen": -490.60528564453125, + "logps/rejected": -346.88482666015625, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.448996067047119, + "rewards/margins": 5.024888515472412, + "rewards/rejected": -11.473884582519531, + "step": 10703 + }, + { + "epoch": 1.66, + "learning_rate": 6.29658695990456e-06, + "logits/chosen": -1.3196814060211182, + "logits/rejected": -2.704375982284546, + "logps/chosen": -144.36911010742188, + "logps/rejected": -357.8723449707031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.584100723266602, + "rewards/margins": 7.677849769592285, + "rewards/rejected": -13.261951446533203, + "step": 10704 + }, + { + "epoch": 1.66, + "learning_rate": 6.295853519373412e-06, + "logits/chosen": -2.5500850677490234, + "logits/rejected": -1.8793317079544067, + "logps/chosen": -173.17861938476562, + "logps/rejected": -250.7931365966797, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.13112735748291, + "rewards/margins": 8.032417297363281, + "rewards/rejected": -15.163544654846191, + "step": 10705 + }, + { + "epoch": 1.67, + "learning_rate": 6.295120078842264e-06, + "logits/chosen": -2.079254150390625, + "logits/rejected": -3.0156641006469727, + "logps/chosen": -92.71543884277344, + "logps/rejected": -201.13809204101562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.34644889831543, + "rewards/margins": 8.449344635009766, + "rewards/rejected": -12.795793533325195, + "step": 10706 + }, + { + "epoch": 1.67, + "learning_rate": 6.294386638311116e-06, + "logits/chosen": -3.005605697631836, + "logits/rejected": -1.7747381925582886, + "logps/chosen": -521.3919677734375, + "logps/rejected": -278.13232421875, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.907960891723633, + "rewards/margins": 5.31722354888916, + "rewards/rejected": -10.225183486938477, + "step": 10707 + }, + { + "epoch": 1.67, + "learning_rate": 6.293653197779968e-06, + "logits/chosen": -2.826209306716919, + "logits/rejected": -1.7481294870376587, + "logps/chosen": -954.519775390625, + "logps/rejected": -423.1839904785156, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.291749954223633, + "rewards/margins": 5.8488311767578125, + "rewards/rejected": -13.140581130981445, + "step": 10708 + }, + { + "epoch": 1.67, + "learning_rate": 6.29291975724882e-06, + "logits/chosen": -3.0531177520751953, + "logits/rejected": -1.951951503753662, + "logps/chosen": -471.9828796386719, + "logps/rejected": -426.1539001464844, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.453943729400635, + "rewards/margins": 6.641737461090088, + "rewards/rejected": -12.095681190490723, + "step": 10709 + }, + { + "epoch": 1.67, + "learning_rate": 6.292186316717672e-06, + "logits/chosen": -2.6846232414245605, + "logits/rejected": -1.9752936363220215, + "logps/chosen": -845.7360229492188, + "logps/rejected": -565.9722900390625, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.887262344360352, + "rewards/margins": 2.603724718093872, + "rewards/rejected": -10.490986824035645, + "step": 10710 + }, + { + "epoch": 1.67, + "learning_rate": 6.291452876186524e-06, + "logits/chosen": -3.169619083404541, + "logits/rejected": -1.7333863973617554, + "logps/chosen": -389.5777587890625, + "logps/rejected": -137.63113403320312, + "loss": 0.1253, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.753125190734863, + "rewards/margins": 3.205096483230591, + "rewards/rejected": -7.958221435546875, + "step": 10711 + }, + { + "epoch": 1.67, + "learning_rate": 6.2907194356553766e-06, + "logits/chosen": -2.986905813217163, + "logits/rejected": -1.6711328029632568, + "logps/chosen": -430.4598693847656, + "logps/rejected": -315.5436706542969, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.117204666137695, + "rewards/margins": 4.033565521240234, + "rewards/rejected": -12.15077018737793, + "step": 10712 + }, + { + "epoch": 1.67, + "learning_rate": 6.2899859951242284e-06, + "logits/chosen": -1.7597261667251587, + "logits/rejected": -3.0024490356445312, + "logps/chosen": -129.47805786132812, + "logps/rejected": -288.7688903808594, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118760108947754, + "rewards/margins": 6.840049743652344, + "rewards/rejected": -12.958809852600098, + "step": 10713 + }, + { + "epoch": 1.67, + "learning_rate": 6.28925255459308e-06, + "logits/chosen": -2.8669469356536865, + "logits/rejected": -1.4174144268035889, + "logps/chosen": -390.93280029296875, + "logps/rejected": -389.92767333984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.754009246826172, + "rewards/margins": 10.368597030639648, + "rewards/rejected": -15.12260627746582, + "step": 10714 + }, + { + "epoch": 1.67, + "learning_rate": 6.288519114061932e-06, + "logits/chosen": -1.4774901866912842, + "logits/rejected": -2.4475743770599365, + "logps/chosen": -272.27752685546875, + "logps/rejected": -452.900634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2045392990112305, + "rewards/margins": 11.633955001831055, + "rewards/rejected": -15.838495254516602, + "step": 10715 + }, + { + "epoch": 1.67, + "learning_rate": 6.287785673530785e-06, + "logits/chosen": -2.7766003608703613, + "logits/rejected": -2.1957361698150635, + "logps/chosen": -227.97317504882812, + "logps/rejected": -255.9715118408203, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222370624542236, + "rewards/margins": 8.006852149963379, + "rewards/rejected": -14.229223251342773, + "step": 10716 + }, + { + "epoch": 1.67, + "learning_rate": 6.287052232999637e-06, + "logits/chosen": -1.4885653257369995, + "logits/rejected": -2.9164726734161377, + "logps/chosen": -242.69833374023438, + "logps/rejected": -572.1845703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5076587200164795, + "rewards/margins": 9.366814613342285, + "rewards/rejected": -12.874473571777344, + "step": 10717 + }, + { + "epoch": 1.67, + "learning_rate": 6.2863187924684895e-06, + "logits/chosen": -2.9862120151519775, + "logits/rejected": -2.061574697494507, + "logps/chosen": -480.02215576171875, + "logps/rejected": -409.6730651855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479037284851074, + "rewards/margins": 9.884603500366211, + "rewards/rejected": -14.363641738891602, + "step": 10718 + }, + { + "epoch": 1.67, + "learning_rate": 6.285585351937341e-06, + "logits/chosen": -1.7536557912826538, + "logits/rejected": -2.6120126247406006, + "logps/chosen": -150.42364501953125, + "logps/rejected": -311.09552001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9700732231140137, + "rewards/margins": 9.568109512329102, + "rewards/rejected": -13.538182258605957, + "step": 10719 + }, + { + "epoch": 1.67, + "learning_rate": 6.284851911406193e-06, + "logits/chosen": -1.7313697338104248, + "logits/rejected": -2.6706273555755615, + "logps/chosen": -286.41375732421875, + "logps/rejected": -411.4972229003906, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4871721267700195, + "rewards/margins": 7.500094890594482, + "rewards/rejected": -13.987266540527344, + "step": 10720 + }, + { + "epoch": 1.67, + "learning_rate": 6.284118470875046e-06, + "logits/chosen": -2.7128515243530273, + "logits/rejected": -3.0873732566833496, + "logps/chosen": -136.08319091796875, + "logps/rejected": -511.1853942871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008784294128418, + "rewards/margins": 12.647542953491211, + "rewards/rejected": -18.656326293945312, + "step": 10721 + }, + { + "epoch": 1.67, + "learning_rate": 6.283385030343898e-06, + "logits/chosen": -2.823486328125, + "logits/rejected": -3.0058295726776123, + "logps/chosen": -190.5166473388672, + "logps/rejected": -520.1290893554688, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.514759063720703, + "rewards/margins": 7.787632942199707, + "rewards/rejected": -11.30239200592041, + "step": 10722 + }, + { + "epoch": 1.67, + "learning_rate": 6.28265158981275e-06, + "logits/chosen": -2.37477970123291, + "logits/rejected": -2.8561463356018066, + "logps/chosen": -217.83633422851562, + "logps/rejected": -356.08807373046875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.497766494750977, + "rewards/margins": 6.294269561767578, + "rewards/rejected": -10.792036056518555, + "step": 10723 + }, + { + "epoch": 1.67, + "learning_rate": 6.281918149281602e-06, + "logits/chosen": -2.673098087310791, + "logits/rejected": -2.653935194015503, + "logps/chosen": -129.8616943359375, + "logps/rejected": -253.65570068359375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.580743789672852, + "rewards/margins": 6.361170291900635, + "rewards/rejected": -12.941913604736328, + "step": 10724 + }, + { + "epoch": 1.67, + "learning_rate": 6.281184708750454e-06, + "logits/chosen": -2.8972160816192627, + "logits/rejected": -3.0720481872558594, + "logps/chosen": -235.91156005859375, + "logps/rejected": -357.7815246582031, + "loss": 1.0608, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.31664514541626, + "rewards/margins": 1.965574026107788, + "rewards/rejected": -7.282218933105469, + "step": 10725 + }, + { + "epoch": 1.67, + "learning_rate": 6.280451268219306e-06, + "logits/chosen": -2.9231951236724854, + "logits/rejected": -3.025480270385742, + "logps/chosen": -469.20782470703125, + "logps/rejected": -484.25018310546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.89211368560791, + "rewards/margins": 8.876087188720703, + "rewards/rejected": -11.76820182800293, + "step": 10726 + }, + { + "epoch": 1.67, + "learning_rate": 6.279717827688158e-06, + "logits/chosen": -3.037719488143921, + "logits/rejected": -2.275500774383545, + "logps/chosen": -615.10400390625, + "logps/rejected": -499.3003234863281, + "loss": 0.6339, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.397180557250977, + "rewards/margins": 0.22255897521972656, + "rewards/rejected": -7.619739532470703, + "step": 10727 + }, + { + "epoch": 1.67, + "learning_rate": 6.27898438715701e-06, + "logits/chosen": -2.315699338912964, + "logits/rejected": -2.3122596740722656, + "logps/chosen": -160.12808227539062, + "logps/rejected": -289.19610595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6694672107696533, + "rewards/margins": 9.690877914428711, + "rewards/rejected": -13.360344886779785, + "step": 10728 + }, + { + "epoch": 1.67, + "learning_rate": 6.278250946625862e-06, + "logits/chosen": -2.421813726425171, + "logits/rejected": -3.123687744140625, + "logps/chosen": -203.0838165283203, + "logps/rejected": -642.013427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03986930847168, + "rewards/margins": 10.63807487487793, + "rewards/rejected": -16.67794418334961, + "step": 10729 + }, + { + "epoch": 1.67, + "learning_rate": 6.277517506094715e-06, + "logits/chosen": -1.7812141180038452, + "logits/rejected": -2.9982235431671143, + "logps/chosen": -463.497314453125, + "logps/rejected": -485.3799133300781, + "loss": 0.1523, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.203424453735352, + "rewards/margins": 4.019886493682861, + "rewards/rejected": -13.223311424255371, + "step": 10730 + }, + { + "epoch": 1.67, + "learning_rate": 6.2767840655635665e-06, + "logits/chosen": -2.931872606277466, + "logits/rejected": -3.1205685138702393, + "logps/chosen": -54.98979949951172, + "logps/rejected": -293.8890380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.521764039993286, + "rewards/margins": 8.867738723754883, + "rewards/rejected": -12.389503479003906, + "step": 10731 + }, + { + "epoch": 1.67, + "learning_rate": 6.276050625032418e-06, + "logits/chosen": -2.0320611000061035, + "logits/rejected": -2.132870674133301, + "logps/chosen": -276.6116638183594, + "logps/rejected": -582.9794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.166853904724121, + "rewards/margins": 18.065818786621094, + "rewards/rejected": -22.23267364501953, + "step": 10732 + }, + { + "epoch": 1.67, + "learning_rate": 6.27531718450127e-06, + "logits/chosen": -2.5252187252044678, + "logits/rejected": -3.00809383392334, + "logps/chosen": -303.4014892578125, + "logps/rejected": -411.70367431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.714874744415283, + "rewards/margins": 9.04977035522461, + "rewards/rejected": -12.76464557647705, + "step": 10733 + }, + { + "epoch": 1.67, + "learning_rate": 6.274583743970123e-06, + "logits/chosen": -2.768732786178589, + "logits/rejected": -3.103837251663208, + "logps/chosen": -51.77944564819336, + "logps/rejected": -220.65524291992188, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5737757682800293, + "rewards/margins": 8.549613952636719, + "rewards/rejected": -12.123390197753906, + "step": 10734 + }, + { + "epoch": 1.67, + "learning_rate": 6.273850303438976e-06, + "logits/chosen": -2.070174217224121, + "logits/rejected": -2.688906192779541, + "logps/chosen": -365.7060546875, + "logps/rejected": -580.81005859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.733301162719727, + "rewards/margins": 11.207754135131836, + "rewards/rejected": -15.941055297851562, + "step": 10735 + }, + { + "epoch": 1.67, + "learning_rate": 6.2731168629078276e-06, + "logits/chosen": -2.359247922897339, + "logits/rejected": -2.815516471862793, + "logps/chosen": -406.16058349609375, + "logps/rejected": -572.5798950195312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.50431227684021, + "rewards/margins": 9.387310028076172, + "rewards/rejected": -12.891621589660645, + "step": 10736 + }, + { + "epoch": 1.67, + "learning_rate": 6.2723834223766794e-06, + "logits/chosen": -2.8458139896392822, + "logits/rejected": -2.7944986820220947, + "logps/chosen": -324.0813293457031, + "logps/rejected": -523.581787109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945916175842285, + "rewards/margins": 8.509347915649414, + "rewards/rejected": -14.455265045166016, + "step": 10737 + }, + { + "epoch": 1.67, + "learning_rate": 6.271649981845531e-06, + "logits/chosen": -2.4477269649505615, + "logits/rejected": -2.755403518676758, + "logps/chosen": -396.3860168457031, + "logps/rejected": -434.6237487792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.325456380844116, + "rewards/margins": 10.352846145629883, + "rewards/rejected": -13.678302764892578, + "step": 10738 + }, + { + "epoch": 1.67, + "learning_rate": 6.270916541314384e-06, + "logits/chosen": -1.8093928098678589, + "logits/rejected": -2.538085699081421, + "logps/chosen": -93.96393585205078, + "logps/rejected": -326.5185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8906002044677734, + "rewards/margins": 11.348649978637695, + "rewards/rejected": -15.239250183105469, + "step": 10739 + }, + { + "epoch": 1.67, + "learning_rate": 6.270183100783236e-06, + "logits/chosen": -1.3897041082382202, + "logits/rejected": -2.7283506393432617, + "logps/chosen": -218.69271850585938, + "logps/rejected": -483.1126708984375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.559047698974609, + "rewards/margins": 7.577218055725098, + "rewards/rejected": -13.136265754699707, + "step": 10740 + }, + { + "epoch": 1.67, + "learning_rate": 6.269449660252088e-06, + "logits/chosen": -2.9979870319366455, + "logits/rejected": -1.6154729127883911, + "logps/chosen": -471.3325500488281, + "logps/rejected": -125.9158706665039, + "loss": 0.5666, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.060701370239258, + "rewards/margins": 0.664858341217041, + "rewards/rejected": -5.725559234619141, + "step": 10741 + }, + { + "epoch": 1.67, + "learning_rate": 6.26871621972094e-06, + "logits/chosen": -2.5611610412597656, + "logits/rejected": -2.9147047996520996, + "logps/chosen": -149.74691772460938, + "logps/rejected": -290.59716796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.645015239715576, + "rewards/margins": 8.04453182220459, + "rewards/rejected": -11.689547538757324, + "step": 10742 + }, + { + "epoch": 1.67, + "learning_rate": 6.267982779189792e-06, + "logits/chosen": -2.768061876296997, + "logits/rejected": -2.495461940765381, + "logps/chosen": -418.6811828613281, + "logps/rejected": -566.8069458007812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4664745330810547, + "rewards/margins": 11.470952987670898, + "rewards/rejected": -13.937427520751953, + "step": 10743 + }, + { + "epoch": 1.67, + "learning_rate": 6.267249338658644e-06, + "logits/chosen": -2.2974259853363037, + "logits/rejected": -2.697218179702759, + "logps/chosen": -253.17532348632812, + "logps/rejected": -491.5196533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.568751335144043, + "rewards/margins": 10.93669605255127, + "rewards/rejected": -16.505447387695312, + "step": 10744 + }, + { + "epoch": 1.67, + "learning_rate": 6.266515898127496e-06, + "logits/chosen": -1.8750165700912476, + "logits/rejected": -2.7339351177215576, + "logps/chosen": -174.00584411621094, + "logps/rejected": -277.8560791015625, + "loss": 0.6194, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.526067733764648, + "rewards/margins": 5.467512130737305, + "rewards/rejected": -10.993579864501953, + "step": 10745 + }, + { + "epoch": 1.67, + "learning_rate": 6.265782457596348e-06, + "logits/chosen": -1.3936874866485596, + "logits/rejected": -2.9029929637908936, + "logps/chosen": -73.2418212890625, + "logps/rejected": -299.931884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9972569942474365, + "rewards/margins": 9.892423629760742, + "rewards/rejected": -12.889680862426758, + "step": 10746 + }, + { + "epoch": 1.67, + "learning_rate": 6.2650490170652e-06, + "logits/chosen": -1.0655218362808228, + "logits/rejected": -2.91477108001709, + "logps/chosen": -134.37362670898438, + "logps/rejected": -404.14581298828125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651544094085693, + "rewards/margins": 8.271883964538574, + "rewards/rejected": -15.92342758178711, + "step": 10747 + }, + { + "epoch": 1.67, + "learning_rate": 6.264315576534053e-06, + "logits/chosen": -2.9236032962799072, + "logits/rejected": -2.40338134765625, + "logps/chosen": -132.23886108398438, + "logps/rejected": -207.03421020507812, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.617377758026123, + "rewards/margins": 9.177827835083008, + "rewards/rejected": -14.795206069946289, + "step": 10748 + }, + { + "epoch": 1.67, + "learning_rate": 6.2635821360029045e-06, + "logits/chosen": -2.8438000679016113, + "logits/rejected": -3.1548149585723877, + "logps/chosen": -222.70574951171875, + "logps/rejected": -235.54769897460938, + "loss": 0.1526, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79289436340332, + "rewards/margins": 5.9396514892578125, + "rewards/rejected": -10.732545852661133, + "step": 10749 + }, + { + "epoch": 1.67, + "learning_rate": 6.262848695471756e-06, + "logits/chosen": -2.7667078971862793, + "logits/rejected": -1.6849493980407715, + "logps/chosen": -192.037109375, + "logps/rejected": -170.4492950439453, + "loss": 1.4374, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.261270523071289, + "rewards/margins": 2.8598337173461914, + "rewards/rejected": -11.12110424041748, + "step": 10750 + }, + { + "epoch": 1.67, + "learning_rate": 6.262115254940609e-06, + "logits/chosen": -0.9095508456230164, + "logits/rejected": -2.3041696548461914, + "logps/chosen": -76.68860626220703, + "logps/rejected": -430.41552734375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3352508544921875, + "rewards/margins": 9.733875274658203, + "rewards/rejected": -16.06912612915039, + "step": 10751 + }, + { + "epoch": 1.67, + "learning_rate": 6.261381814409462e-06, + "logits/chosen": -2.774599313735962, + "logits/rejected": -2.6789467334747314, + "logps/chosen": -346.98516845703125, + "logps/rejected": -452.270751953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.444012403488159, + "rewards/margins": 10.506570816040039, + "rewards/rejected": -12.950583457946777, + "step": 10752 + }, + { + "epoch": 1.67, + "learning_rate": 6.260648373878314e-06, + "logits/chosen": -2.3790950775146484, + "logits/rejected": -2.9387471675872803, + "logps/chosen": -388.5559387207031, + "logps/rejected": -549.4238891601562, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570162773132324, + "rewards/margins": 3.950695276260376, + "rewards/rejected": -10.520857810974121, + "step": 10753 + }, + { + "epoch": 1.67, + "learning_rate": 6.259914933347166e-06, + "logits/chosen": -1.2773147821426392, + "logits/rejected": -2.597795248031616, + "logps/chosen": -133.99575805664062, + "logps/rejected": -381.9144592285156, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.61772346496582, + "rewards/margins": 9.896743774414062, + "rewards/rejected": -17.514467239379883, + "step": 10754 + }, + { + "epoch": 1.67, + "learning_rate": 6.2591814928160175e-06, + "logits/chosen": -1.739030122756958, + "logits/rejected": -1.8668590784072876, + "logps/chosen": -326.1932678222656, + "logps/rejected": -423.3884582519531, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3970184326171875, + "rewards/margins": 11.413846015930176, + "rewards/rejected": -15.810863494873047, + "step": 10755 + }, + { + "epoch": 1.67, + "learning_rate": 6.25844805228487e-06, + "logits/chosen": -1.867922067642212, + "logits/rejected": -2.6003029346466064, + "logps/chosen": -170.20936584472656, + "logps/rejected": -375.44110107421875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.418683052062988, + "rewards/margins": 10.606578826904297, + "rewards/rejected": -15.025261878967285, + "step": 10756 + }, + { + "epoch": 1.67, + "learning_rate": 6.257714611753722e-06, + "logits/chosen": -2.377504348754883, + "logits/rejected": -3.051193952560425, + "logps/chosen": -72.13832092285156, + "logps/rejected": -245.0062713623047, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.201491355895996, + "rewards/margins": 7.84211540222168, + "rewards/rejected": -12.04360580444336, + "step": 10757 + }, + { + "epoch": 1.67, + "learning_rate": 6.256981171222574e-06, + "logits/chosen": -2.1079585552215576, + "logits/rejected": -2.7213423252105713, + "logps/chosen": -87.26564025878906, + "logps/rejected": -300.4037170410156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.177816867828369, + "rewards/margins": 8.871017456054688, + "rewards/rejected": -13.048833847045898, + "step": 10758 + }, + { + "epoch": 1.67, + "learning_rate": 6.256247730691426e-06, + "logits/chosen": -2.2226998805999756, + "logits/rejected": -2.454669237136841, + "logps/chosen": -251.666748046875, + "logps/rejected": -391.5710144042969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.784270286560059, + "rewards/margins": 10.201875686645508, + "rewards/rejected": -16.98614501953125, + "step": 10759 + }, + { + "epoch": 1.67, + "learning_rate": 6.255514290160278e-06, + "logits/chosen": -2.4849913120269775, + "logits/rejected": -2.8171803951263428, + "logps/chosen": -107.36863708496094, + "logps/rejected": -355.6167907714844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.889734745025635, + "rewards/margins": 9.972524642944336, + "rewards/rejected": -15.862258911132812, + "step": 10760 + }, + { + "epoch": 1.67, + "learning_rate": 6.2547808496291305e-06, + "logits/chosen": -2.559694290161133, + "logits/rejected": -2.9611265659332275, + "logps/chosen": -209.85174560546875, + "logps/rejected": -330.51409912109375, + "loss": 0.0669, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.959835052490234, + "rewards/margins": 6.340453147888184, + "rewards/rejected": -11.300288200378418, + "step": 10761 + }, + { + "epoch": 1.67, + "learning_rate": 6.254047409097982e-06, + "logits/chosen": -2.9336025714874268, + "logits/rejected": -1.3285962343215942, + "logps/chosen": -694.7318115234375, + "logps/rejected": -386.556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3501415252685547, + "rewards/margins": 10.507923126220703, + "rewards/rejected": -13.858064651489258, + "step": 10762 + }, + { + "epoch": 1.67, + "learning_rate": 6.253313968566834e-06, + "logits/chosen": -2.959627151489258, + "logits/rejected": -3.0286340713500977, + "logps/chosen": -441.6409912109375, + "logps/rejected": -493.19287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227433204650879, + "rewards/margins": 8.489059448242188, + "rewards/rejected": -13.71649169921875, + "step": 10763 + }, + { + "epoch": 1.67, + "learning_rate": 6.252580528035686e-06, + "logits/chosen": -1.769277572631836, + "logits/rejected": -3.0236129760742188, + "logps/chosen": -159.47412109375, + "logps/rejected": -462.9598693847656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.459874153137207, + "rewards/margins": 8.793566703796387, + "rewards/rejected": -14.253440856933594, + "step": 10764 + }, + { + "epoch": 1.67, + "learning_rate": 6.251847087504539e-06, + "logits/chosen": -2.7128777503967285, + "logits/rejected": -2.841641664505005, + "logps/chosen": -143.5426788330078, + "logps/rejected": -247.91299438476562, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125903606414795, + "rewards/margins": 4.889235496520996, + "rewards/rejected": -9.015138626098633, + "step": 10765 + }, + { + "epoch": 1.67, + "learning_rate": 6.251113646973391e-06, + "logits/chosen": -2.909952163696289, + "logits/rejected": -2.179161310195923, + "logps/chosen": -676.2470703125, + "logps/rejected": -524.3153076171875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.240114688873291, + "rewards/margins": 6.426313400268555, + "rewards/rejected": -11.666427612304688, + "step": 10766 + }, + { + "epoch": 1.67, + "learning_rate": 6.2503802064422426e-06, + "logits/chosen": -2.8938872814178467, + "logits/rejected": -2.5177931785583496, + "logps/chosen": -549.1076049804688, + "logps/rejected": -131.88430786132812, + "loss": 2.0434, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.417108535766602, + "rewards/margins": 1.5538709163665771, + "rewards/rejected": -9.970979690551758, + "step": 10767 + }, + { + "epoch": 1.67, + "learning_rate": 6.249646765911095e-06, + "logits/chosen": -3.138986110687256, + "logits/rejected": -2.9800827503204346, + "logps/chosen": -469.1798095703125, + "logps/rejected": -748.2047729492188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.346362113952637, + "rewards/margins": 9.221925735473633, + "rewards/rejected": -16.568286895751953, + "step": 10768 + }, + { + "epoch": 1.67, + "learning_rate": 6.248913325379947e-06, + "logits/chosen": -1.8740333318710327, + "logits/rejected": -2.7273688316345215, + "logps/chosen": -247.5059051513672, + "logps/rejected": -557.776611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917468070983887, + "rewards/margins": 11.813613891601562, + "rewards/rejected": -18.731082916259766, + "step": 10769 + }, + { + "epoch": 1.67, + "learning_rate": 6.2481798848488e-06, + "logits/chosen": -2.965090036392212, + "logits/rejected": -3.215078115463257, + "logps/chosen": -252.77845764160156, + "logps/rejected": -382.98162841796875, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.657965660095215, + "rewards/margins": 5.12617301940918, + "rewards/rejected": -9.784138679504395, + "step": 10770 + }, + { + "epoch": 1.68, + "learning_rate": 6.247446444317652e-06, + "logits/chosen": -1.3718323707580566, + "logits/rejected": -2.838761329650879, + "logps/chosen": -49.14337158203125, + "logps/rejected": -405.7675476074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.901085615158081, + "rewards/margins": 10.25604248046875, + "rewards/rejected": -14.15712833404541, + "step": 10771 + }, + { + "epoch": 1.68, + "learning_rate": 6.246713003786504e-06, + "logits/chosen": -0.9548751711845398, + "logits/rejected": -3.0171356201171875, + "logps/chosen": -123.83230590820312, + "logps/rejected": -378.5756530761719, + "loss": 0.377, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.775463104248047, + "rewards/margins": 3.051450252532959, + "rewards/rejected": -11.826912879943848, + "step": 10772 + }, + { + "epoch": 1.68, + "learning_rate": 6.2459795632553555e-06, + "logits/chosen": -2.525012493133545, + "logits/rejected": -2.992295026779175, + "logps/chosen": -301.57208251953125, + "logps/rejected": -416.2511901855469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.119692325592041, + "rewards/margins": 11.859258651733398, + "rewards/rejected": -17.97895050048828, + "step": 10773 + }, + { + "epoch": 1.68, + "learning_rate": 6.245246122724208e-06, + "logits/chosen": -2.351031541824341, + "logits/rejected": -2.8605856895446777, + "logps/chosen": -148.52801513671875, + "logps/rejected": -436.63739013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.617276191711426, + "rewards/margins": 11.35521125793457, + "rewards/rejected": -15.972487449645996, + "step": 10774 + }, + { + "epoch": 1.68, + "learning_rate": 6.24451268219306e-06, + "logits/chosen": -2.302335023880005, + "logits/rejected": -1.2485136985778809, + "logps/chosen": -156.60379028320312, + "logps/rejected": -203.48129272460938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9164533615112305, + "rewards/margins": 8.2833890914917, + "rewards/rejected": -13.19984245300293, + "step": 10775 + }, + { + "epoch": 1.68, + "learning_rate": 6.243779241661912e-06, + "logits/chosen": -2.817517042160034, + "logits/rejected": -1.6000486612319946, + "logps/chosen": -140.2838134765625, + "logps/rejected": -166.2912139892578, + "loss": 0.5286, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.885310649871826, + "rewards/margins": 4.956368923187256, + "rewards/rejected": -9.841679573059082, + "step": 10776 + }, + { + "epoch": 1.68, + "learning_rate": 6.243045801130764e-06, + "logits/chosen": -3.063699245452881, + "logits/rejected": -2.9182567596435547, + "logps/chosen": -760.5684204101562, + "logps/rejected": -752.5091552734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239636421203613, + "rewards/margins": 8.658256530761719, + "rewards/rejected": -13.897892951965332, + "step": 10777 + }, + { + "epoch": 1.68, + "learning_rate": 6.242312360599616e-06, + "logits/chosen": -3.186810255050659, + "logits/rejected": -2.8751981258392334, + "logps/chosen": -204.39578247070312, + "logps/rejected": -271.360595703125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.047339916229248, + "rewards/margins": 7.249285697937012, + "rewards/rejected": -11.296625137329102, + "step": 10778 + }, + { + "epoch": 1.68, + "learning_rate": 6.2415789200684685e-06, + "logits/chosen": -2.036423683166504, + "logits/rejected": -2.9746196269989014, + "logps/chosen": -269.69195556640625, + "logps/rejected": -421.00616455078125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.240720510482788, + "rewards/margins": 8.829349517822266, + "rewards/rejected": -12.070070266723633, + "step": 10779 + }, + { + "epoch": 1.68, + "learning_rate": 6.24084547953732e-06, + "logits/chosen": -2.9693024158477783, + "logits/rejected": -2.759787082672119, + "logps/chosen": -162.0849609375, + "logps/rejected": -272.31640625, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.509580612182617, + "rewards/margins": 3.443350076675415, + "rewards/rejected": -10.952930450439453, + "step": 10780 + }, + { + "epoch": 1.68, + "learning_rate": 6.240112039006172e-06, + "logits/chosen": -2.3614253997802734, + "logits/rejected": -2.818026304244995, + "logps/chosen": -348.83050537109375, + "logps/rejected": -460.6901550292969, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.779672622680664, + "rewards/margins": 6.3784332275390625, + "rewards/rejected": -12.158105850219727, + "step": 10781 + }, + { + "epoch": 1.68, + "learning_rate": 6.239378598475024e-06, + "logits/chosen": -2.514683723449707, + "logits/rejected": -2.6287424564361572, + "logps/chosen": -256.8570251464844, + "logps/rejected": -440.9840087890625, + "loss": 0.0782, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.85051965713501, + "rewards/margins": 10.332669258117676, + "rewards/rejected": -16.183189392089844, + "step": 10782 + }, + { + "epoch": 1.68, + "learning_rate": 6.238645157943877e-06, + "logits/chosen": -1.6523947715759277, + "logits/rejected": -2.744152784347534, + "logps/chosen": -86.57958221435547, + "logps/rejected": -196.8966522216797, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.393853187561035, + "rewards/margins": 4.0564374923706055, + "rewards/rejected": -11.45029067993164, + "step": 10783 + }, + { + "epoch": 1.68, + "learning_rate": 6.237911717412729e-06, + "logits/chosen": -2.9324910640716553, + "logits/rejected": -3.127314567565918, + "logps/chosen": -477.435546875, + "logps/rejected": -609.0948486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3467366695404053, + "rewards/margins": 11.557254791259766, + "rewards/rejected": -14.90399169921875, + "step": 10784 + }, + { + "epoch": 1.68, + "learning_rate": 6.237178276881581e-06, + "logits/chosen": -1.281492829322815, + "logits/rejected": -2.366762399673462, + "logps/chosen": -293.2963562011719, + "logps/rejected": -676.62548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71271276473999, + "rewards/margins": 13.863208770751953, + "rewards/rejected": -19.5759220123291, + "step": 10785 + }, + { + "epoch": 1.68, + "learning_rate": 6.236444836350433e-06, + "logits/chosen": -2.954355239868164, + "logits/rejected": -3.0733416080474854, + "logps/chosen": -95.67318725585938, + "logps/rejected": -116.84141540527344, + "loss": 3.4808, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.919156074523926, + "rewards/margins": 0.7302520275115967, + "rewards/rejected": -7.649408340454102, + "step": 10786 + }, + { + "epoch": 1.68, + "learning_rate": 6.235711395819285e-06, + "logits/chosen": -2.514281988143921, + "logits/rejected": -1.9085044860839844, + "logps/chosen": -224.66590881347656, + "logps/rejected": -220.95852661132812, + "loss": 0.2758, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.09415340423584, + "rewards/margins": 3.4425771236419678, + "rewards/rejected": -11.53672981262207, + "step": 10787 + }, + { + "epoch": 1.68, + "learning_rate": 6.234977955288138e-06, + "logits/chosen": -2.8821115493774414, + "logits/rejected": -1.9938478469848633, + "logps/chosen": -844.366943359375, + "logps/rejected": -470.621337890625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.070633888244629, + "rewards/margins": 4.738266944885254, + "rewards/rejected": -12.808900833129883, + "step": 10788 + }, + { + "epoch": 1.68, + "learning_rate": 6.23424451475699e-06, + "logits/chosen": -3.068192720413208, + "logits/rejected": -2.35863995552063, + "logps/chosen": -210.5887451171875, + "logps/rejected": -372.42559814453125, + "loss": 0.2367, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.427339553833008, + "rewards/margins": 6.102903366088867, + "rewards/rejected": -12.530242919921875, + "step": 10789 + }, + { + "epoch": 1.68, + "learning_rate": 6.233511074225842e-06, + "logits/chosen": -1.7599365711212158, + "logits/rejected": -2.8404030799865723, + "logps/chosen": -99.4341049194336, + "logps/rejected": -321.97088623046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.426051139831543, + "rewards/margins": 6.133542060852051, + "rewards/rejected": -10.559593200683594, + "step": 10790 + }, + { + "epoch": 1.68, + "learning_rate": 6.2327776336946936e-06, + "logits/chosen": -1.8481789827346802, + "logits/rejected": -2.245777130126953, + "logps/chosen": -451.3468017578125, + "logps/rejected": -769.3670654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.968321800231934, + "rewards/margins": 13.74280834197998, + "rewards/rejected": -18.711130142211914, + "step": 10791 + }, + { + "epoch": 1.68, + "learning_rate": 6.232044193163546e-06, + "logits/chosen": -1.7603915929794312, + "logits/rejected": -2.5894908905029297, + "logps/chosen": -309.81561279296875, + "logps/rejected": -423.7444763183594, + "loss": 1.5659, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.139955520629883, + "rewards/margins": 3.8363771438598633, + "rewards/rejected": -13.976332664489746, + "step": 10792 + }, + { + "epoch": 1.68, + "learning_rate": 6.231310752632398e-06, + "logits/chosen": -1.8252490758895874, + "logits/rejected": -2.7982993125915527, + "logps/chosen": -204.25645446777344, + "logps/rejected": -432.9397277832031, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.871142625808716, + "rewards/margins": 7.113290786743164, + "rewards/rejected": -9.9844331741333, + "step": 10793 + }, + { + "epoch": 1.68, + "learning_rate": 6.23057731210125e-06, + "logits/chosen": -3.0482289791107178, + "logits/rejected": -3.251934051513672, + "logps/chosen": -80.61607360839844, + "logps/rejected": -231.56243896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.436162233352661, + "rewards/margins": 9.216127395629883, + "rewards/rejected": -12.652290344238281, + "step": 10794 + }, + { + "epoch": 1.68, + "learning_rate": 6.229843871570102e-06, + "logits/chosen": -2.8301584720611572, + "logits/rejected": -3.058032512664795, + "logps/chosen": -82.38070678710938, + "logps/rejected": -308.095947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.913481712341309, + "rewards/margins": 9.98559856414795, + "rewards/rejected": -16.899080276489258, + "step": 10795 + }, + { + "epoch": 1.68, + "learning_rate": 6.229110431038955e-06, + "logits/chosen": -2.5985636711120605, + "logits/rejected": -1.6679511070251465, + "logps/chosen": -308.2825622558594, + "logps/rejected": -136.58499145507812, + "loss": 1.6699, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.684548377990723, + "rewards/margins": -1.3879849910736084, + "rewards/rejected": -7.296563148498535, + "step": 10796 + }, + { + "epoch": 1.68, + "learning_rate": 6.2283769905078065e-06, + "logits/chosen": -3.1376984119415283, + "logits/rejected": -3.0089328289031982, + "logps/chosen": -165.65809631347656, + "logps/rejected": -236.47177124023438, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.311765670776367, + "rewards/margins": 6.059858322143555, + "rewards/rejected": -10.371623992919922, + "step": 10797 + }, + { + "epoch": 1.68, + "learning_rate": 6.227643549976658e-06, + "logits/chosen": -2.4689598083496094, + "logits/rejected": -2.686875104904175, + "logps/chosen": -155.19461059570312, + "logps/rejected": -361.753173828125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.052062034606934, + "rewards/margins": 6.596942901611328, + "rewards/rejected": -12.649005889892578, + "step": 10798 + }, + { + "epoch": 1.68, + "learning_rate": 6.22691010944551e-06, + "logits/chosen": -1.6256639957427979, + "logits/rejected": -2.6642532348632812, + "logps/chosen": -64.89772033691406, + "logps/rejected": -391.6360778808594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.974172115325928, + "rewards/margins": 11.426274299621582, + "rewards/rejected": -16.400447845458984, + "step": 10799 + }, + { + "epoch": 1.68, + "learning_rate": 6.226176668914362e-06, + "logits/chosen": -1.5738000869750977, + "logits/rejected": -2.960299491882324, + "logps/chosen": -266.43505859375, + "logps/rejected": -557.1182861328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2375969886779785, + "rewards/margins": 10.5272216796875, + "rewards/rejected": -15.764819145202637, + "step": 10800 + }, + { + "epoch": 1.68, + "learning_rate": 6.225443228383215e-06, + "logits/chosen": -0.7659547328948975, + "logits/rejected": -2.380990982055664, + "logps/chosen": -324.6671142578125, + "logps/rejected": -614.3934326171875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.828030586242676, + "rewards/margins": 6.767824172973633, + "rewards/rejected": -12.595853805541992, + "step": 10801 + }, + { + "epoch": 1.68, + "learning_rate": 6.224709787852067e-06, + "logits/chosen": -0.8068387508392334, + "logits/rejected": -2.871279239654541, + "logps/chosen": -134.5562286376953, + "logps/rejected": -507.5694885253906, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.326340675354004, + "rewards/margins": 5.639671325683594, + "rewards/rejected": -12.966012954711914, + "step": 10802 + }, + { + "epoch": 1.68, + "learning_rate": 6.2239763473209195e-06, + "logits/chosen": -2.9249866008758545, + "logits/rejected": -2.6614890098571777, + "logps/chosen": -601.9149169921875, + "logps/rejected": -569.640380859375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.150759696960449, + "rewards/margins": 7.941662788391113, + "rewards/rejected": -12.092422485351562, + "step": 10803 + }, + { + "epoch": 1.68, + "learning_rate": 6.223242906789771e-06, + "logits/chosen": -2.7672154903411865, + "logits/rejected": -2.6005024909973145, + "logps/chosen": -206.9625244140625, + "logps/rejected": -213.82940673828125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.943226337432861, + "rewards/margins": 7.372160911560059, + "rewards/rejected": -13.315387725830078, + "step": 10804 + }, + { + "epoch": 1.68, + "learning_rate": 6.222509466258624e-06, + "logits/chosen": -2.838934898376465, + "logits/rejected": -3.037902593612671, + "logps/chosen": -64.5689697265625, + "logps/rejected": -184.65878295898438, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734228134155273, + "rewards/margins": 4.951696872711182, + "rewards/rejected": -9.685924530029297, + "step": 10805 + }, + { + "epoch": 1.68, + "learning_rate": 6.221776025727476e-06, + "logits/chosen": -2.895681619644165, + "logits/rejected": -3.127983808517456, + "logps/chosen": -78.38606262207031, + "logps/rejected": -239.51930236816406, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.402082443237305, + "rewards/margins": 4.96668815612793, + "rewards/rejected": -11.368770599365234, + "step": 10806 + }, + { + "epoch": 1.68, + "learning_rate": 6.221042585196328e-06, + "logits/chosen": -2.450967788696289, + "logits/rejected": -2.0001235008239746, + "logps/chosen": -415.82586669921875, + "logps/rejected": -503.211181640625, + "loss": 2.86, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.306051731109619, + "rewards/margins": 6.584376335144043, + "rewards/rejected": -13.89042854309082, + "step": 10807 + }, + { + "epoch": 1.68, + "learning_rate": 6.22030914466518e-06, + "logits/chosen": -2.0690624713897705, + "logits/rejected": -2.569113254547119, + "logps/chosen": -234.45123291015625, + "logps/rejected": -373.7689208984375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.399211883544922, + "rewards/margins": 5.546717166900635, + "rewards/rejected": -10.945928573608398, + "step": 10808 + }, + { + "epoch": 1.68, + "learning_rate": 6.219575704134032e-06, + "logits/chosen": -2.8020312786102295, + "logits/rejected": -1.911876916885376, + "logps/chosen": -351.8091125488281, + "logps/rejected": -290.0733337402344, + "loss": 0.6518, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.366899490356445, + "rewards/margins": 3.949331283569336, + "rewards/rejected": -14.316230773925781, + "step": 10809 + }, + { + "epoch": 1.68, + "learning_rate": 6.218842263602884e-06, + "logits/chosen": -1.2239350080490112, + "logits/rejected": -2.7392966747283936, + "logps/chosen": -131.9979705810547, + "logps/rejected": -309.5543518066406, + "loss": 0.7812, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.496725082397461, + "rewards/margins": 3.8696820735931396, + "rewards/rejected": -9.36640739440918, + "step": 10810 + }, + { + "epoch": 1.68, + "learning_rate": 6.218108823071736e-06, + "logits/chosen": -0.5443431735038757, + "logits/rejected": -2.562047243118286, + "logps/chosen": -93.74928283691406, + "logps/rejected": -435.5346374511719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.205997467041016, + "rewards/margins": 10.0753812789917, + "rewards/rejected": -15.281377792358398, + "step": 10811 + }, + { + "epoch": 1.68, + "learning_rate": 6.217375382540588e-06, + "logits/chosen": -2.1686625480651855, + "logits/rejected": -2.6777572631835938, + "logps/chosen": -193.98150634765625, + "logps/rejected": -337.29364013671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.577178001403809, + "rewards/margins": 7.499943733215332, + "rewards/rejected": -12.07712173461914, + "step": 10812 + }, + { + "epoch": 1.68, + "learning_rate": 6.21664194200944e-06, + "logits/chosen": -2.8468315601348877, + "logits/rejected": -3.1874215602874756, + "logps/chosen": -78.1723861694336, + "logps/rejected": -291.4934387207031, + "loss": 0.1192, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.972193241119385, + "rewards/margins": 3.231907844543457, + "rewards/rejected": -10.2041015625, + "step": 10813 + }, + { + "epoch": 1.68, + "learning_rate": 6.215908501478293e-06, + "logits/chosen": -2.7350893020629883, + "logits/rejected": -2.3488404750823975, + "logps/chosen": -173.8700714111328, + "logps/rejected": -326.4113464355469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1965184211730957, + "rewards/margins": 9.179515838623047, + "rewards/rejected": -12.3760347366333, + "step": 10814 + }, + { + "epoch": 1.68, + "learning_rate": 6.215175060947145e-06, + "logits/chosen": -2.8943216800689697, + "logits/rejected": -1.9317705631256104, + "logps/chosen": -524.3251342773438, + "logps/rejected": -492.41754150390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.280388355255127, + "rewards/margins": 8.085262298583984, + "rewards/rejected": -14.36565113067627, + "step": 10815 + }, + { + "epoch": 1.68, + "learning_rate": 6.2144416204159965e-06, + "logits/chosen": -2.833752155303955, + "logits/rejected": -2.2243754863739014, + "logps/chosen": -438.3521728515625, + "logps/rejected": -360.2693176269531, + "loss": 1.2255, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.6383771896362305, + "rewards/margins": 2.0512983798980713, + "rewards/rejected": -9.689676284790039, + "step": 10816 + }, + { + "epoch": 1.68, + "learning_rate": 6.213708179884848e-06, + "logits/chosen": -2.4394543170928955, + "logits/rejected": -2.9257445335388184, + "logps/chosen": -794.395751953125, + "logps/rejected": -716.5534057617188, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377108573913574, + "rewards/margins": 7.8208465576171875, + "rewards/rejected": -13.197955131530762, + "step": 10817 + }, + { + "epoch": 1.68, + "learning_rate": 6.2129747393537e-06, + "logits/chosen": -2.8923966884613037, + "logits/rejected": -2.1833598613739014, + "logps/chosen": -132.2782745361328, + "logps/rejected": -205.9815673828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0966691970825195, + "rewards/margins": 7.480419635772705, + "rewards/rejected": -12.577089309692383, + "step": 10818 + }, + { + "epoch": 1.68, + "learning_rate": 6.212241298822553e-06, + "logits/chosen": -2.7253499031066895, + "logits/rejected": -3.017603635787964, + "logps/chosen": -209.05967712402344, + "logps/rejected": -204.88540649414062, + "loss": 0.6237, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.078243255615234, + "rewards/margins": 2.420199155807495, + "rewards/rejected": -9.498441696166992, + "step": 10819 + }, + { + "epoch": 1.68, + "learning_rate": 6.211507858291406e-06, + "logits/chosen": -2.399998426437378, + "logits/rejected": -2.510960817337036, + "logps/chosen": -182.7914581298828, + "logps/rejected": -302.2563171386719, + "loss": 0.2495, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.934267520904541, + "rewards/margins": 4.572105884552002, + "rewards/rejected": -11.506373405456543, + "step": 10820 + }, + { + "epoch": 1.68, + "learning_rate": 6.2107744177602575e-06, + "logits/chosen": -2.0178945064544678, + "logits/rejected": -2.8199820518493652, + "logps/chosen": -301.708740234375, + "logps/rejected": -603.10888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.667543411254883, + "rewards/margins": 13.741390228271484, + "rewards/rejected": -16.408933639526367, + "step": 10821 + }, + { + "epoch": 1.68, + "learning_rate": 6.2100409772291094e-06, + "logits/chosen": -2.9004435539245605, + "logits/rejected": -2.542787790298462, + "logps/chosen": -430.5037536621094, + "logps/rejected": -283.65850830078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.643172264099121, + "rewards/margins": 8.144450187683105, + "rewards/rejected": -12.787622451782227, + "step": 10822 + }, + { + "epoch": 1.68, + "learning_rate": 6.209307536697962e-06, + "logits/chosen": -2.8120617866516113, + "logits/rejected": -1.6515426635742188, + "logps/chosen": -398.89117431640625, + "logps/rejected": -354.99298095703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.412389278411865, + "rewards/margins": 9.02727222442627, + "rewards/rejected": -13.439661026000977, + "step": 10823 + }, + { + "epoch": 1.68, + "learning_rate": 6.208574096166814e-06, + "logits/chosen": -2.362053155899048, + "logits/rejected": -2.9235646724700928, + "logps/chosen": -179.32333374023438, + "logps/rejected": -260.53277587890625, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209817886352539, + "rewards/margins": 4.173003196716309, + "rewards/rejected": -8.382820129394531, + "step": 10824 + }, + { + "epoch": 1.68, + "learning_rate": 6.207840655635666e-06, + "logits/chosen": -2.0669033527374268, + "logits/rejected": -3.0831615924835205, + "logps/chosen": -196.5427703857422, + "logps/rejected": -199.47186279296875, + "loss": 4.4349, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.910103797912598, + "rewards/margins": 0.16279888153076172, + "rewards/rejected": -9.07290267944336, + "step": 10825 + }, + { + "epoch": 1.68, + "learning_rate": 6.207107215104518e-06, + "logits/chosen": -1.3701800107955933, + "logits/rejected": -2.6951637268066406, + "logps/chosen": -188.49717712402344, + "logps/rejected": -367.6661071777344, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.66606330871582, + "rewards/margins": 6.560173988342285, + "rewards/rejected": -11.226237297058105, + "step": 10826 + }, + { + "epoch": 1.68, + "learning_rate": 6.20637377457337e-06, + "logits/chosen": -3.0649170875549316, + "logits/rejected": -2.4212286472320557, + "logps/chosen": -738.7463989257812, + "logps/rejected": -369.75347900390625, + "loss": 0.8003, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.587050437927246, + "rewards/margins": 1.0797374248504639, + "rewards/rejected": -8.666788101196289, + "step": 10827 + }, + { + "epoch": 1.68, + "learning_rate": 6.205640334042222e-06, + "logits/chosen": -2.8068771362304688, + "logits/rejected": -2.3783907890319824, + "logps/chosen": -538.7816162109375, + "logps/rejected": -473.61358642578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.505641460418701, + "rewards/margins": 8.332942008972168, + "rewards/rejected": -11.838582992553711, + "step": 10828 + }, + { + "epoch": 1.68, + "learning_rate": 6.204906893511074e-06, + "logits/chosen": -2.587895631790161, + "logits/rejected": -2.57576060295105, + "logps/chosen": -123.90419006347656, + "logps/rejected": -392.54449462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9260573387146, + "rewards/margins": 11.145374298095703, + "rewards/rejected": -17.07143211364746, + "step": 10829 + }, + { + "epoch": 1.68, + "learning_rate": 6.204173452979926e-06, + "logits/chosen": -2.6555113792419434, + "logits/rejected": -2.5813097953796387, + "logps/chosen": -223.24778747558594, + "logps/rejected": -343.95965576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.301053762435913, + "rewards/margins": 10.358118057250977, + "rewards/rejected": -12.659172058105469, + "step": 10830 + }, + { + "epoch": 1.68, + "learning_rate": 6.203440012448778e-06, + "logits/chosen": -2.601320743560791, + "logits/rejected": -2.6097066402435303, + "logps/chosen": -204.21124267578125, + "logps/rejected": -493.8901062011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.191006183624268, + "rewards/margins": 12.76681137084961, + "rewards/rejected": -16.95781898498535, + "step": 10831 + }, + { + "epoch": 1.68, + "learning_rate": 6.202706571917631e-06, + "logits/chosen": -3.1463730335235596, + "logits/rejected": -2.881011724472046, + "logps/chosen": -252.9616241455078, + "logps/rejected": -171.96669006347656, + "loss": 0.4989, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.786567687988281, + "rewards/margins": 4.558185577392578, + "rewards/rejected": -11.34475326538086, + "step": 10832 + }, + { + "epoch": 1.68, + "learning_rate": 6.201973131386483e-06, + "logits/chosen": -0.6139704585075378, + "logits/rejected": -1.5273901224136353, + "logps/chosen": -257.6929931640625, + "logps/rejected": -482.2957763671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.74755859375, + "rewards/margins": 8.784591674804688, + "rewards/rejected": -13.532150268554688, + "step": 10833 + }, + { + "epoch": 1.68, + "learning_rate": 6.2012396908553345e-06, + "logits/chosen": -2.562107801437378, + "logits/rejected": -3.074864149093628, + "logps/chosen": -76.17517852783203, + "logps/rejected": -311.4600524902344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.647017002105713, + "rewards/margins": 10.402828216552734, + "rewards/rejected": -15.049844741821289, + "step": 10834 + }, + { + "epoch": 1.69, + "learning_rate": 6.200506250324186e-06, + "logits/chosen": -2.703226327896118, + "logits/rejected": -2.733081340789795, + "logps/chosen": -147.86892700195312, + "logps/rejected": -243.478515625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.236688613891602, + "rewards/margins": 5.829682350158691, + "rewards/rejected": -10.066370964050293, + "step": 10835 + }, + { + "epoch": 1.69, + "learning_rate": 6.199772809793039e-06, + "logits/chosen": -1.9948707818984985, + "logits/rejected": -2.9276137351989746, + "logps/chosen": -463.27337646484375, + "logps/rejected": -627.2805786132812, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9482955932617188, + "rewards/margins": 10.342924118041992, + "rewards/rejected": -14.291219711303711, + "step": 10836 + }, + { + "epoch": 1.69, + "learning_rate": 6.199039369261892e-06, + "logits/chosen": -1.187829613685608, + "logits/rejected": -2.6551427841186523, + "logps/chosen": -160.0547332763672, + "logps/rejected": -559.814208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.910312652587891, + "rewards/margins": 12.954954147338867, + "rewards/rejected": -20.865264892578125, + "step": 10837 + }, + { + "epoch": 1.69, + "learning_rate": 6.198305928730744e-06, + "logits/chosen": -2.5420784950256348, + "logits/rejected": -2.9866902828216553, + "logps/chosen": -827.4480590820312, + "logps/rejected": -609.9234008789062, + "loss": 0.4002, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.298890590667725, + "rewards/margins": 4.403502464294434, + "rewards/rejected": -10.702392578125, + "step": 10838 + }, + { + "epoch": 1.69, + "learning_rate": 6.197572488199596e-06, + "logits/chosen": -2.7341268062591553, + "logits/rejected": -2.764227867126465, + "logps/chosen": -480.10906982421875, + "logps/rejected": -625.8084106445312, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.211817741394043, + "rewards/margins": 7.166879653930664, + "rewards/rejected": -14.378697395324707, + "step": 10839 + }, + { + "epoch": 1.69, + "learning_rate": 6.1968390476684475e-06, + "logits/chosen": -1.5233656167984009, + "logits/rejected": -2.9367055892944336, + "logps/chosen": -140.4309539794922, + "logps/rejected": -407.0628356933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.86299467086792, + "rewards/margins": 12.431612014770508, + "rewards/rejected": -16.294607162475586, + "step": 10840 + }, + { + "epoch": 1.69, + "learning_rate": 6.1961056071373e-06, + "logits/chosen": -3.058811902999878, + "logits/rejected": -2.7590227127075195, + "logps/chosen": -252.6912841796875, + "logps/rejected": -332.633056640625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8855957984924316, + "rewards/margins": 6.437593936920166, + "rewards/rejected": -10.323189735412598, + "step": 10841 + }, + { + "epoch": 1.69, + "learning_rate": 6.195372166606152e-06, + "logits/chosen": -2.162705659866333, + "logits/rejected": -2.9764626026153564, + "logps/chosen": -204.017578125, + "logps/rejected": -456.0030822753906, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.758474349975586, + "rewards/margins": 5.926266670227051, + "rewards/rejected": -12.68474006652832, + "step": 10842 + }, + { + "epoch": 1.69, + "learning_rate": 6.194638726075004e-06, + "logits/chosen": -2.8777599334716797, + "logits/rejected": -1.7524042129516602, + "logps/chosen": -385.529296875, + "logps/rejected": -310.191650390625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974491119384766, + "rewards/margins": 6.318230152130127, + "rewards/rejected": -12.292720794677734, + "step": 10843 + }, + { + "epoch": 1.69, + "learning_rate": 6.193905285543856e-06, + "logits/chosen": -2.2433888912200928, + "logits/rejected": -2.7582151889801025, + "logps/chosen": -339.7318115234375, + "logps/rejected": -299.6748046875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.05795431137085, + "rewards/margins": 5.408960342407227, + "rewards/rejected": -9.466915130615234, + "step": 10844 + }, + { + "epoch": 1.69, + "learning_rate": 6.1931718450127086e-06, + "logits/chosen": -1.410078763961792, + "logits/rejected": -2.9226527214050293, + "logps/chosen": -112.70979309082031, + "logps/rejected": -532.2300415039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.249577283859253, + "rewards/margins": 11.545299530029297, + "rewards/rejected": -14.794876098632812, + "step": 10845 + }, + { + "epoch": 1.69, + "learning_rate": 6.1924384044815604e-06, + "logits/chosen": -1.4479342699050903, + "logits/rejected": -2.9776203632354736, + "logps/chosen": -471.301513671875, + "logps/rejected": -687.354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0958147048950195, + "rewards/margins": 11.95751953125, + "rewards/rejected": -16.053333282470703, + "step": 10846 + }, + { + "epoch": 1.69, + "learning_rate": 6.191704963950412e-06, + "logits/chosen": -2.329322576522827, + "logits/rejected": -3.002756118774414, + "logps/chosen": -78.99607849121094, + "logps/rejected": -314.3106689453125, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0219621658325195, + "rewards/margins": 4.360415458679199, + "rewards/rejected": -9.382377624511719, + "step": 10847 + }, + { + "epoch": 1.69, + "learning_rate": 6.190971523419264e-06, + "logits/chosen": -2.7153589725494385, + "logits/rejected": -3.1263880729675293, + "logps/chosen": -235.74838256835938, + "logps/rejected": -280.2957458496094, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729808807373047, + "rewards/margins": 3.6144769191741943, + "rewards/rejected": -10.34428596496582, + "step": 10848 + }, + { + "epoch": 1.69, + "learning_rate": 6.190238082888116e-06, + "logits/chosen": -1.9378464221954346, + "logits/rejected": -2.5873279571533203, + "logps/chosen": -139.46826171875, + "logps/rejected": -171.02191162109375, + "loss": 0.5806, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.595712184906006, + "rewards/margins": 4.435064315795898, + "rewards/rejected": -9.030776977539062, + "step": 10849 + }, + { + "epoch": 1.69, + "learning_rate": 6.189504642356969e-06, + "logits/chosen": -1.770270586013794, + "logits/rejected": -2.4776127338409424, + "logps/chosen": -185.71389770507812, + "logps/rejected": -362.9461669921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.502279758453369, + "rewards/margins": 6.7534661293029785, + "rewards/rejected": -13.255745887756348, + "step": 10850 + }, + { + "epoch": 1.69, + "learning_rate": 6.188771201825821e-06, + "logits/chosen": -3.0176894664764404, + "logits/rejected": -1.9818958044052124, + "logps/chosen": -456.3582763671875, + "logps/rejected": -324.15289306640625, + "loss": 0.056, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.108464241027832, + "rewards/margins": 4.042663097381592, + "rewards/rejected": -9.151126861572266, + "step": 10851 + }, + { + "epoch": 1.69, + "learning_rate": 6.1880377612946726e-06, + "logits/chosen": -2.5062758922576904, + "logits/rejected": -3.1282718181610107, + "logps/chosen": -288.6109619140625, + "logps/rejected": -555.6947021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.91374135017395, + "rewards/margins": 10.627666473388672, + "rewards/rejected": -13.54140853881836, + "step": 10852 + }, + { + "epoch": 1.69, + "learning_rate": 6.187304320763525e-06, + "logits/chosen": -2.868256092071533, + "logits/rejected": -2.8862993717193604, + "logps/chosen": -216.49139404296875, + "logps/rejected": -340.6185302734375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0309367179870605, + "rewards/margins": 6.719176292419434, + "rewards/rejected": -13.750112533569336, + "step": 10853 + }, + { + "epoch": 1.69, + "learning_rate": 6.186570880232378e-06, + "logits/chosen": -2.2990622520446777, + "logits/rejected": -2.951281785964966, + "logps/chosen": -268.78448486328125, + "logps/rejected": -346.50830078125, + "loss": 0.4826, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.826457977294922, + "rewards/margins": 5.639410972595215, + "rewards/rejected": -10.465869903564453, + "step": 10854 + }, + { + "epoch": 1.69, + "learning_rate": 6.18583743970123e-06, + "logits/chosen": -2.1133224964141846, + "logits/rejected": -2.9293625354766846, + "logps/chosen": -821.5205688476562, + "logps/rejected": -777.625, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.43167781829834, + "rewards/margins": 5.000396728515625, + "rewards/rejected": -12.432073593139648, + "step": 10855 + }, + { + "epoch": 1.69, + "learning_rate": 6.185103999170082e-06, + "logits/chosen": -2.6994268894195557, + "logits/rejected": -3.1220543384552, + "logps/chosen": -121.98603057861328, + "logps/rejected": -256.2966613769531, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4733901023864746, + "rewards/margins": 7.194944381713867, + "rewards/rejected": -9.6683349609375, + "step": 10856 + }, + { + "epoch": 1.69, + "learning_rate": 6.184370558638934e-06, + "logits/chosen": -1.224411129951477, + "logits/rejected": -2.5656254291534424, + "logps/chosen": -123.7713851928711, + "logps/rejected": -381.8094482421875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.782879829406738, + "rewards/margins": 8.48421859741211, + "rewards/rejected": -16.267099380493164, + "step": 10857 + }, + { + "epoch": 1.69, + "learning_rate": 6.1836371181077855e-06, + "logits/chosen": -2.757268190383911, + "logits/rejected": -2.993987798690796, + "logps/chosen": -124.55989837646484, + "logps/rejected": -259.1806640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.783140182495117, + "rewards/margins": 8.041946411132812, + "rewards/rejected": -13.82508659362793, + "step": 10858 + }, + { + "epoch": 1.69, + "learning_rate": 6.182903677576638e-06, + "logits/chosen": -1.862962007522583, + "logits/rejected": -2.7215614318847656, + "logps/chosen": -246.89404296875, + "logps/rejected": -417.2010192871094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.210412979125977, + "rewards/margins": 9.111614227294922, + "rewards/rejected": -14.322027206420898, + "step": 10859 + }, + { + "epoch": 1.69, + "learning_rate": 6.18217023704549e-06, + "logits/chosen": -3.060314893722534, + "logits/rejected": -2.0076000690460205, + "logps/chosen": -304.42279052734375, + "logps/rejected": -131.7209014892578, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9273505210876465, + "rewards/margins": 5.86406135559082, + "rewards/rejected": -9.791412353515625, + "step": 10860 + }, + { + "epoch": 1.69, + "learning_rate": 6.181436796514342e-06, + "logits/chosen": -2.968489646911621, + "logits/rejected": -1.6752820014953613, + "logps/chosen": -265.48431396484375, + "logps/rejected": -173.84730529785156, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245726585388184, + "rewards/margins": 6.153572082519531, + "rewards/rejected": -11.399298667907715, + "step": 10861 + }, + { + "epoch": 1.69, + "learning_rate": 6.180703355983194e-06, + "logits/chosen": -2.090571880340576, + "logits/rejected": -2.683401107788086, + "logps/chosen": -132.3837890625, + "logps/rejected": -493.576416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.177910327911377, + "rewards/margins": 10.645990371704102, + "rewards/rejected": -17.82390022277832, + "step": 10862 + }, + { + "epoch": 1.69, + "learning_rate": 6.179969915452047e-06, + "logits/chosen": -2.409238338470459, + "logits/rejected": -2.719982385635376, + "logps/chosen": -153.2080535888672, + "logps/rejected": -266.03594970703125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9921393394470215, + "rewards/margins": 6.968210220336914, + "rewards/rejected": -11.960350036621094, + "step": 10863 + }, + { + "epoch": 1.69, + "learning_rate": 6.1792364749208985e-06, + "logits/chosen": -2.9093945026397705, + "logits/rejected": -1.0788031816482544, + "logps/chosen": -612.3316650390625, + "logps/rejected": -391.3834533691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.624759674072266, + "rewards/margins": 10.894441604614258, + "rewards/rejected": -17.519201278686523, + "step": 10864 + }, + { + "epoch": 1.69, + "learning_rate": 6.17850303438975e-06, + "logits/chosen": -2.808487892150879, + "logits/rejected": -2.8780407905578613, + "logps/chosen": -483.05743408203125, + "logps/rejected": -501.5347900390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.298840045928955, + "rewards/margins": 7.6747236251831055, + "rewards/rejected": -11.973564147949219, + "step": 10865 + }, + { + "epoch": 1.69, + "learning_rate": 6.177769593858602e-06, + "logits/chosen": -2.1192615032196045, + "logits/rejected": -2.8223206996917725, + "logps/chosen": -494.9986572265625, + "logps/rejected": -770.2606201171875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.437850475311279, + "rewards/margins": 9.531243324279785, + "rewards/rejected": -15.969093322753906, + "step": 10866 + }, + { + "epoch": 1.69, + "learning_rate": 6.177036153327454e-06, + "logits/chosen": -1.4610003232955933, + "logits/rejected": -2.9808473587036133, + "logps/chosen": -204.96205139160156, + "logps/rejected": -616.7140502929688, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.695895195007324, + "rewards/margins": 4.86500358581543, + "rewards/rejected": -12.560897827148438, + "step": 10867 + }, + { + "epoch": 1.69, + "learning_rate": 6.176302712796307e-06, + "logits/chosen": -2.098179340362549, + "logits/rejected": -2.9397964477539062, + "logps/chosen": -281.944091796875, + "logps/rejected": -346.8510437011719, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.544432163238525, + "rewards/margins": 6.164883613586426, + "rewards/rejected": -11.70931625366211, + "step": 10868 + }, + { + "epoch": 1.69, + "learning_rate": 6.175569272265159e-06, + "logits/chosen": -2.4996278285980225, + "logits/rejected": -3.0429556369781494, + "logps/chosen": -69.80366516113281, + "logps/rejected": -217.77005004882812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.609258651733398, + "rewards/margins": 7.1803154945373535, + "rewards/rejected": -11.789573669433594, + "step": 10869 + }, + { + "epoch": 1.69, + "learning_rate": 6.1748358317340114e-06, + "logits/chosen": -1.5290226936340332, + "logits/rejected": -2.2909932136535645, + "logps/chosen": -511.36688232421875, + "logps/rejected": -525.2286376953125, + "loss": 0.7581, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.324942588806152, + "rewards/margins": 14.468873023986816, + "rewards/rejected": -20.79381561279297, + "step": 10870 + }, + { + "epoch": 1.69, + "learning_rate": 6.174102391202863e-06, + "logits/chosen": -1.5927926301956177, + "logits/rejected": -2.902630090713501, + "logps/chosen": -121.70597076416016, + "logps/rejected": -340.42767333984375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.160970687866211, + "rewards/margins": 6.343045234680176, + "rewards/rejected": -15.504015922546387, + "step": 10871 + }, + { + "epoch": 1.69, + "learning_rate": 6.173368950671716e-06, + "logits/chosen": -2.8763182163238525, + "logits/rejected": -2.740849018096924, + "logps/chosen": -215.46885681152344, + "logps/rejected": -310.6296081542969, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.553587436676025, + "rewards/margins": 7.535553932189941, + "rewards/rejected": -13.089141845703125, + "step": 10872 + }, + { + "epoch": 1.69, + "learning_rate": 6.172635510140568e-06, + "logits/chosen": -2.7659800052642822, + "logits/rejected": -2.2503199577331543, + "logps/chosen": -265.51904296875, + "logps/rejected": -395.1689758300781, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67999267578125, + "rewards/margins": 8.010191917419434, + "rewards/rejected": -15.690183639526367, + "step": 10873 + }, + { + "epoch": 1.69, + "learning_rate": 6.17190206960942e-06, + "logits/chosen": -0.8153651356697083, + "logits/rejected": -2.6998164653778076, + "logps/chosen": -177.2884063720703, + "logps/rejected": -453.18414306640625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020071029663086, + "rewards/margins": 11.019394874572754, + "rewards/rejected": -17.039464950561523, + "step": 10874 + }, + { + "epoch": 1.69, + "learning_rate": 6.171168629078272e-06, + "logits/chosen": -2.332530975341797, + "logits/rejected": -3.002866268157959, + "logps/chosen": -283.279541015625, + "logps/rejected": -697.462158203125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781068325042725, + "rewards/margins": 5.7559356689453125, + "rewards/rejected": -11.537004470825195, + "step": 10875 + }, + { + "epoch": 1.69, + "learning_rate": 6.1704351885471236e-06, + "logits/chosen": -2.4810619354248047, + "logits/rejected": -3.097107172012329, + "logps/chosen": -112.54910278320312, + "logps/rejected": -361.42279052734375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.231766700744629, + "rewards/margins": 8.830041885375977, + "rewards/rejected": -15.061809539794922, + "step": 10876 + }, + { + "epoch": 1.69, + "learning_rate": 6.169701748015976e-06, + "logits/chosen": -2.2789297103881836, + "logits/rejected": -2.732877731323242, + "logps/chosen": -117.94462585449219, + "logps/rejected": -322.16131591796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156122207641602, + "rewards/margins": 6.441329002380371, + "rewards/rejected": -10.597451210021973, + "step": 10877 + }, + { + "epoch": 1.69, + "learning_rate": 6.168968307484828e-06, + "logits/chosen": -1.9154263734817505, + "logits/rejected": -2.8702080249786377, + "logps/chosen": -209.65707397460938, + "logps/rejected": -281.294677734375, + "loss": 0.6682, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.124704837799072, + "rewards/margins": 3.8054471015930176, + "rewards/rejected": -9.93015193939209, + "step": 10878 + }, + { + "epoch": 1.69, + "learning_rate": 6.16823486695368e-06, + "logits/chosen": -2.8008217811584473, + "logits/rejected": -2.3376002311706543, + "logps/chosen": -328.5116882324219, + "logps/rejected": -303.43896484375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.717865943908691, + "rewards/margins": 5.856169700622559, + "rewards/rejected": -10.57403564453125, + "step": 10879 + }, + { + "epoch": 1.69, + "learning_rate": 6.167501426422532e-06, + "logits/chosen": -2.40042781829834, + "logits/rejected": -2.8136439323425293, + "logps/chosen": -235.95672607421875, + "logps/rejected": -383.95684814453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.251702308654785, + "rewards/margins": 8.322559356689453, + "rewards/rejected": -13.574260711669922, + "step": 10880 + }, + { + "epoch": 1.69, + "learning_rate": 6.166767985891385e-06, + "logits/chosen": -2.618285894393921, + "logits/rejected": -3.07033371925354, + "logps/chosen": -106.9953842163086, + "logps/rejected": -309.53656005859375, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.038824558258057, + "rewards/margins": 4.521989345550537, + "rewards/rejected": -10.560813903808594, + "step": 10881 + }, + { + "epoch": 1.69, + "learning_rate": 6.1660345453602365e-06, + "logits/chosen": -3.0351431369781494, + "logits/rejected": -3.0000271797180176, + "logps/chosen": -176.39053344726562, + "logps/rejected": -461.63519287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149698257446289, + "rewards/margins": 10.504176139831543, + "rewards/rejected": -16.65387535095215, + "step": 10882 + }, + { + "epoch": 1.69, + "learning_rate": 6.165301104829088e-06, + "logits/chosen": -2.985245704650879, + "logits/rejected": -2.0798726081848145, + "logps/chosen": -349.8474426269531, + "logps/rejected": -284.16461181640625, + "loss": 0.2269, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2048563957214355, + "rewards/margins": 3.6428654193878174, + "rewards/rejected": -8.847722053527832, + "step": 10883 + }, + { + "epoch": 1.69, + "learning_rate": 6.16456766429794e-06, + "logits/chosen": -2.505082607269287, + "logits/rejected": -2.8878402709960938, + "logps/chosen": -319.6140441894531, + "logps/rejected": -286.677734375, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.728050231933594, + "rewards/margins": 5.658892631530762, + "rewards/rejected": -10.386942863464355, + "step": 10884 + }, + { + "epoch": 1.69, + "learning_rate": 6.163834223766792e-06, + "logits/chosen": -1.8999500274658203, + "logits/rejected": -3.163935422897339, + "logps/chosen": -348.1979064941406, + "logps/rejected": -443.59808349609375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.735279083251953, + "rewards/margins": 5.116028785705566, + "rewards/rejected": -9.85130786895752, + "step": 10885 + }, + { + "epoch": 1.69, + "learning_rate": 6.163100783235645e-06, + "logits/chosen": -2.9201812744140625, + "logits/rejected": -2.685089588165283, + "logps/chosen": -502.68359375, + "logps/rejected": -489.821533203125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1973443031311035, + "rewards/margins": 5.222258567810059, + "rewards/rejected": -10.41960334777832, + "step": 10886 + }, + { + "epoch": 1.69, + "learning_rate": 6.162367342704498e-06, + "logits/chosen": -2.207137107849121, + "logits/rejected": -3.0426735877990723, + "logps/chosen": -103.87557983398438, + "logps/rejected": -326.37445068359375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1900787353515625, + "rewards/margins": 5.99819278717041, + "rewards/rejected": -11.188271522521973, + "step": 10887 + }, + { + "epoch": 1.69, + "learning_rate": 6.1616339021733495e-06, + "logits/chosen": -2.8511404991149902, + "logits/rejected": -2.8165440559387207, + "logps/chosen": -109.1700439453125, + "logps/rejected": -289.7546081542969, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.455148696899414, + "rewards/margins": 6.514948844909668, + "rewards/rejected": -11.970097541809082, + "step": 10888 + }, + { + "epoch": 1.69, + "learning_rate": 6.160900461642201e-06, + "logits/chosen": -2.999844551086426, + "logits/rejected": -2.0797622203826904, + "logps/chosen": -350.06683349609375, + "logps/rejected": -339.6745910644531, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.507317543029785, + "rewards/margins": 2.195268154144287, + "rewards/rejected": -10.702585220336914, + "step": 10889 + }, + { + "epoch": 1.69, + "learning_rate": 6.160167021111054e-06, + "logits/chosen": -2.164358139038086, + "logits/rejected": -2.802114486694336, + "logps/chosen": -139.8311004638672, + "logps/rejected": -184.8758087158203, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.932517051696777, + "rewards/margins": 5.809075355529785, + "rewards/rejected": -12.741592407226562, + "step": 10890 + }, + { + "epoch": 1.69, + "learning_rate": 6.159433580579906e-06, + "logits/chosen": -2.7067856788635254, + "logits/rejected": -3.0779783725738525, + "logps/chosen": -153.0583038330078, + "logps/rejected": -388.1315002441406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3882951736450195, + "rewards/margins": 7.624218940734863, + "rewards/rejected": -14.012514114379883, + "step": 10891 + }, + { + "epoch": 1.69, + "learning_rate": 6.158700140048758e-06, + "logits/chosen": -2.8194215297698975, + "logits/rejected": -1.6099776029586792, + "logps/chosen": -209.1680145263672, + "logps/rejected": -253.59246826171875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463912010192871, + "rewards/margins": 7.681357383728027, + "rewards/rejected": -13.145269393920898, + "step": 10892 + }, + { + "epoch": 1.69, + "learning_rate": 6.15796669951761e-06, + "logits/chosen": -2.626749277114868, + "logits/rejected": -3.026245355606079, + "logps/chosen": -286.20654296875, + "logps/rejected": -380.0133972167969, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.916259288787842, + "rewards/margins": 5.806726455688477, + "rewards/rejected": -11.722986221313477, + "step": 10893 + }, + { + "epoch": 1.69, + "learning_rate": 6.1572332589864625e-06, + "logits/chosen": -3.082542896270752, + "logits/rejected": -3.137584686279297, + "logps/chosen": -155.9761199951172, + "logps/rejected": -387.37591552734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.891142845153809, + "rewards/margins": 9.257081985473633, + "rewards/rejected": -16.148223876953125, + "step": 10894 + }, + { + "epoch": 1.69, + "learning_rate": 6.156499818455314e-06, + "logits/chosen": -3.033864974975586, + "logits/rejected": -2.783057451248169, + "logps/chosen": -175.51222229003906, + "logps/rejected": -207.40658569335938, + "loss": 0.6751, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.346022129058838, + "rewards/margins": 3.936896562576294, + "rewards/rejected": -10.282918930053711, + "step": 10895 + }, + { + "epoch": 1.69, + "learning_rate": 6.155766377924166e-06, + "logits/chosen": -3.0489001274108887, + "logits/rejected": -2.5982167720794678, + "logps/chosen": -309.7098083496094, + "logps/rejected": -314.7576904296875, + "loss": 1.9609, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.506968975067139, + "rewards/margins": 3.036574363708496, + "rewards/rejected": -10.543542861938477, + "step": 10896 + }, + { + "epoch": 1.69, + "learning_rate": 6.155032937393018e-06, + "logits/chosen": -2.9565045833587646, + "logits/rejected": -2.921903133392334, + "logps/chosen": -423.30987548828125, + "logps/rejected": -352.18896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7018463611602783, + "rewards/margins": 11.298015594482422, + "rewards/rejected": -12.999862670898438, + "step": 10897 + }, + { + "epoch": 1.69, + "learning_rate": 6.15429949686187e-06, + "logits/chosen": -2.3331985473632812, + "logits/rejected": -2.9555716514587402, + "logps/chosen": -163.3466339111328, + "logps/rejected": -317.9513244628906, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.652266502380371, + "rewards/margins": 7.079158306121826, + "rewards/rejected": -15.731424331665039, + "step": 10898 + }, + { + "epoch": 1.7, + "learning_rate": 6.153566056330723e-06, + "logits/chosen": -2.937469005584717, + "logits/rejected": -2.7498950958251953, + "logps/chosen": -338.45916748046875, + "logps/rejected": -383.57244873046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1372761726379395, + "rewards/margins": 7.1298980712890625, + "rewards/rejected": -11.267173767089844, + "step": 10899 + }, + { + "epoch": 1.7, + "learning_rate": 6.1528326157995746e-06, + "logits/chosen": -2.82305908203125, + "logits/rejected": -2.149789333343506, + "logps/chosen": -607.5060424804688, + "logps/rejected": -466.76019287109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.319241523742676, + "rewards/margins": 9.09060287475586, + "rewards/rejected": -13.409844398498535, + "step": 10900 + }, + { + "epoch": 1.7, + "learning_rate": 6.1520991752684264e-06, + "logits/chosen": -1.8683034181594849, + "logits/rejected": -2.952366590499878, + "logps/chosen": -152.37350463867188, + "logps/rejected": -328.4784240722656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.222809791564941, + "rewards/margins": 8.577657699584961, + "rewards/rejected": -13.800467491149902, + "step": 10901 + }, + { + "epoch": 1.7, + "learning_rate": 6.151365734737278e-06, + "logits/chosen": -2.5426583290100098, + "logits/rejected": -3.047921657562256, + "logps/chosen": -128.64013671875, + "logps/rejected": -157.74937438964844, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.487163543701172, + "rewards/margins": 3.995504140853882, + "rewards/rejected": -9.482667922973633, + "step": 10902 + }, + { + "epoch": 1.7, + "learning_rate": 6.150632294206131e-06, + "logits/chosen": -2.776454448699951, + "logits/rejected": -2.8459627628326416, + "logps/chosen": -311.3812255859375, + "logps/rejected": -496.30352783203125, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.643751621246338, + "rewards/margins": 5.701321125030518, + "rewards/rejected": -9.345072746276855, + "step": 10903 + }, + { + "epoch": 1.7, + "learning_rate": 6.149898853674984e-06, + "logits/chosen": -2.5074524879455566, + "logits/rejected": -2.9401941299438477, + "logps/chosen": -79.057861328125, + "logps/rejected": -272.3590393066406, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018168926239014, + "rewards/margins": 5.428807735443115, + "rewards/rejected": -11.446976661682129, + "step": 10904 + }, + { + "epoch": 1.7, + "learning_rate": 6.149165413143836e-06, + "logits/chosen": -2.8087174892425537, + "logits/rejected": -2.999044895172119, + "logps/chosen": -511.0712890625, + "logps/rejected": -698.6112060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.581458330154419, + "rewards/margins": 14.69371223449707, + "rewards/rejected": -15.275171279907227, + "step": 10905 + }, + { + "epoch": 1.7, + "learning_rate": 6.1484319726126875e-06, + "logits/chosen": -1.954178810119629, + "logits/rejected": -1.3426811695098877, + "logps/chosen": -425.3393859863281, + "logps/rejected": -479.10198974609375, + "loss": 2.448, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.502986907958984, + "rewards/margins": 0.9926896095275879, + "rewards/rejected": -9.495676040649414, + "step": 10906 + }, + { + "epoch": 1.7, + "learning_rate": 6.147698532081539e-06, + "logits/chosen": -2.559126138687134, + "logits/rejected": -2.9441962242126465, + "logps/chosen": -195.99563598632812, + "logps/rejected": -421.392333984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395688533782959, + "rewards/margins": 10.87952995300293, + "rewards/rejected": -14.275218963623047, + "step": 10907 + }, + { + "epoch": 1.7, + "learning_rate": 6.146965091550392e-06, + "logits/chosen": -2.3572216033935547, + "logits/rejected": -2.6777706146240234, + "logps/chosen": -313.36602783203125, + "logps/rejected": -286.20550537109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.252995491027832, + "rewards/margins": 7.733922004699707, + "rewards/rejected": -10.986917495727539, + "step": 10908 + }, + { + "epoch": 1.7, + "learning_rate": 6.146231651019244e-06, + "logits/chosen": -0.9548482894897461, + "logits/rejected": -2.8789448738098145, + "logps/chosen": -107.08485412597656, + "logps/rejected": -511.40032958984375, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.212550163269043, + "rewards/margins": 5.363476753234863, + "rewards/rejected": -14.576026916503906, + "step": 10909 + }, + { + "epoch": 1.7, + "learning_rate": 6.145498210488096e-06, + "logits/chosen": -2.2360587120056152, + "logits/rejected": -2.8570261001586914, + "logps/chosen": -252.0281219482422, + "logps/rejected": -415.67462158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.942351341247559, + "rewards/margins": 9.948083877563477, + "rewards/rejected": -14.890434265136719, + "step": 10910 + }, + { + "epoch": 1.7, + "learning_rate": 6.144764769956948e-06, + "logits/chosen": -2.0378365516662598, + "logits/rejected": -2.888125419616699, + "logps/chosen": -154.4286346435547, + "logps/rejected": -247.6793670654297, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.841317892074585, + "rewards/margins": 8.08304214477539, + "rewards/rejected": -11.924360275268555, + "step": 10911 + }, + { + "epoch": 1.7, + "learning_rate": 6.1440313294258005e-06, + "logits/chosen": -1.988621473312378, + "logits/rejected": -2.739100694656372, + "logps/chosen": -129.07247924804688, + "logps/rejected": -319.72161865234375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145369529724121, + "rewards/margins": 6.126723766326904, + "rewards/rejected": -11.272092819213867, + "step": 10912 + }, + { + "epoch": 1.7, + "learning_rate": 6.143297888894652e-06, + "logits/chosen": -1.3338950872421265, + "logits/rejected": -2.8291335105895996, + "logps/chosen": -199.06996154785156, + "logps/rejected": -305.3890380859375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.70937728881836, + "rewards/margins": 5.81640625, + "rewards/rejected": -14.52578353881836, + "step": 10913 + }, + { + "epoch": 1.7, + "learning_rate": 6.142564448363504e-06, + "logits/chosen": -2.2654330730438232, + "logits/rejected": -2.891129493713379, + "logps/chosen": -146.60089111328125, + "logps/rejected": -315.1905212402344, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7099385261535645, + "rewards/margins": 4.941684246063232, + "rewards/rejected": -11.651622772216797, + "step": 10914 + }, + { + "epoch": 1.7, + "learning_rate": 6.141831007832356e-06, + "logits/chosen": -2.360138177871704, + "logits/rejected": -2.7954037189483643, + "logps/chosen": -92.40623474121094, + "logps/rejected": -446.3214111328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668134689331055, + "rewards/margins": 10.298892974853516, + "rewards/rejected": -16.96702766418457, + "step": 10915 + }, + { + "epoch": 1.7, + "learning_rate": 6.141097567301208e-06, + "logits/chosen": -2.897526979446411, + "logits/rejected": -1.7618606090545654, + "logps/chosen": -282.9498291015625, + "logps/rejected": -220.70054626464844, + "loss": 0.0651, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981574058532715, + "rewards/margins": 2.7559590339660645, + "rewards/rejected": -8.737533569335938, + "step": 10916 + }, + { + "epoch": 1.7, + "learning_rate": 6.140364126770061e-06, + "logits/chosen": -2.94128155708313, + "logits/rejected": -2.407341957092285, + "logps/chosen": -785.0230102539062, + "logps/rejected": -420.5740966796875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4449663162231445, + "rewards/margins": 7.115665435791016, + "rewards/rejected": -14.560630798339844, + "step": 10917 + }, + { + "epoch": 1.7, + "learning_rate": 6.139630686238913e-06, + "logits/chosen": -2.1474010944366455, + "logits/rejected": -2.965130090713501, + "logps/chosen": -154.11471557617188, + "logps/rejected": -397.6907043457031, + "loss": 0.0321, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5302188396453857, + "rewards/margins": 7.591302871704102, + "rewards/rejected": -11.12152099609375, + "step": 10918 + }, + { + "epoch": 1.7, + "learning_rate": 6.1388972457077645e-06, + "logits/chosen": -2.709507465362549, + "logits/rejected": -2.8433053493499756, + "logps/chosen": -506.81317138671875, + "logps/rejected": -450.57269287109375, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.627551078796387, + "rewards/margins": 5.081112861633301, + "rewards/rejected": -11.708663940429688, + "step": 10919 + }, + { + "epoch": 1.7, + "learning_rate": 6.138163805176617e-06, + "logits/chosen": -2.020850658416748, + "logits/rejected": -2.7024624347686768, + "logps/chosen": -81.68081665039062, + "logps/rejected": -347.20550537109375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.572266101837158, + "rewards/margins": 9.683279991149902, + "rewards/rejected": -15.255546569824219, + "step": 10920 + }, + { + "epoch": 1.7, + "learning_rate": 6.13743036464547e-06, + "logits/chosen": -2.8966784477233887, + "logits/rejected": -1.7123463153839111, + "logps/chosen": -207.27261352539062, + "logps/rejected": -155.93275451660156, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.082674026489258, + "rewards/margins": 6.326076030731201, + "rewards/rejected": -11.4087495803833, + "step": 10921 + }, + { + "epoch": 1.7, + "learning_rate": 6.136696924114322e-06, + "logits/chosen": -1.6587324142456055, + "logits/rejected": -2.8041317462921143, + "logps/chosen": -176.99612426757812, + "logps/rejected": -404.14715576171875, + "loss": 0.4951, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.746672630310059, + "rewards/margins": 5.9913225173950195, + "rewards/rejected": -12.737995147705078, + "step": 10922 + }, + { + "epoch": 1.7, + "learning_rate": 6.135963483583174e-06, + "logits/chosen": -2.8728435039520264, + "logits/rejected": -2.6352272033691406, + "logps/chosen": -593.33935546875, + "logps/rejected": -494.2214660644531, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.519986867904663, + "rewards/margins": 7.303975582122803, + "rewards/rejected": -10.823963165283203, + "step": 10923 + }, + { + "epoch": 1.7, + "learning_rate": 6.1352300430520256e-06, + "logits/chosen": -3.1025428771972656, + "logits/rejected": -2.9041197299957275, + "logps/chosen": -297.2671203613281, + "logps/rejected": -413.9768981933594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4859883785247803, + "rewards/margins": 13.175128936767578, + "rewards/rejected": -15.661117553710938, + "step": 10924 + }, + { + "epoch": 1.7, + "learning_rate": 6.1344966025208775e-06, + "logits/chosen": -2.2056140899658203, + "logits/rejected": -3.131420135498047, + "logps/chosen": -205.837890625, + "logps/rejected": -534.7373657226562, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.217129707336426, + "rewards/margins": 8.017207145690918, + "rewards/rejected": -14.234336853027344, + "step": 10925 + }, + { + "epoch": 1.7, + "learning_rate": 6.13376316198973e-06, + "logits/chosen": -2.4388325214385986, + "logits/rejected": -2.776733636856079, + "logps/chosen": -171.7384033203125, + "logps/rejected": -341.84271240234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5344624519348145, + "rewards/margins": 7.696918964385986, + "rewards/rejected": -12.2313814163208, + "step": 10926 + }, + { + "epoch": 1.7, + "learning_rate": 6.133029721458582e-06, + "logits/chosen": -2.653080701828003, + "logits/rejected": -1.304979920387268, + "logps/chosen": -218.18919372558594, + "logps/rejected": -218.891357421875, + "loss": 0.0543, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.085558891296387, + "rewards/margins": 3.9266371726989746, + "rewards/rejected": -14.012195587158203, + "step": 10927 + }, + { + "epoch": 1.7, + "learning_rate": 6.132296280927434e-06, + "logits/chosen": -2.08388090133667, + "logits/rejected": -2.847158908843994, + "logps/chosen": -133.49887084960938, + "logps/rejected": -342.1532897949219, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4740777015686035, + "rewards/margins": 6.656409740447998, + "rewards/rejected": -13.130487442016602, + "step": 10928 + }, + { + "epoch": 1.7, + "learning_rate": 6.131562840396286e-06, + "logits/chosen": -2.7940409183502197, + "logits/rejected": -2.975689649581909, + "logps/chosen": -179.96127319335938, + "logps/rejected": -371.25189208984375, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0940470695495605, + "rewards/margins": 7.938765048980713, + "rewards/rejected": -11.032812118530273, + "step": 10929 + }, + { + "epoch": 1.7, + "learning_rate": 6.1308293998651385e-06, + "logits/chosen": -2.3993561267852783, + "logits/rejected": -2.905472755432129, + "logps/chosen": -217.3865966796875, + "logps/rejected": -436.680419921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.700210094451904, + "rewards/margins": 9.143932342529297, + "rewards/rejected": -13.84414291381836, + "step": 10930 + }, + { + "epoch": 1.7, + "learning_rate": 6.13009595933399e-06, + "logits/chosen": -3.0448997020721436, + "logits/rejected": -1.0452024936676025, + "logps/chosen": -409.751708984375, + "logps/rejected": -232.4908447265625, + "loss": 0.6552, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.895036697387695, + "rewards/margins": 2.0656731128692627, + "rewards/rejected": -8.960709571838379, + "step": 10931 + }, + { + "epoch": 1.7, + "learning_rate": 6.129362518802842e-06, + "logits/chosen": -1.237038254737854, + "logits/rejected": -2.7431235313415527, + "logps/chosen": -106.6419448852539, + "logps/rejected": -274.2899475097656, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.887041091918945, + "rewards/margins": 3.4231812953948975, + "rewards/rejected": -10.310222625732422, + "step": 10932 + }, + { + "epoch": 1.7, + "learning_rate": 6.128629078271694e-06, + "logits/chosen": -2.7642312049865723, + "logits/rejected": -2.4133217334747314, + "logps/chosen": -332.1436767578125, + "logps/rejected": -344.21881103515625, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.189765930175781, + "rewards/margins": 3.925936222076416, + "rewards/rejected": -8.115701675415039, + "step": 10933 + }, + { + "epoch": 1.7, + "learning_rate": 6.127895637740546e-06, + "logits/chosen": -2.8234241008758545, + "logits/rejected": -2.9248268604278564, + "logps/chosen": -543.1863403320312, + "logps/rejected": -663.0433349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.478132247924805, + "rewards/margins": 11.011323928833008, + "rewards/rejected": -19.489456176757812, + "step": 10934 + }, + { + "epoch": 1.7, + "learning_rate": 6.127162197209399e-06, + "logits/chosen": -2.896257162094116, + "logits/rejected": -3.1046292781829834, + "logps/chosen": -114.73627471923828, + "logps/rejected": -205.40945434570312, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.924455165863037, + "rewards/margins": 5.550380706787109, + "rewards/rejected": -10.474836349487305, + "step": 10935 + }, + { + "epoch": 1.7, + "learning_rate": 6.126428756678251e-06, + "logits/chosen": -2.9378304481506348, + "logits/rejected": -1.9781266450881958, + "logps/chosen": -533.659423828125, + "logps/rejected": -441.5741271972656, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.332436561584473, + "rewards/margins": 7.826892852783203, + "rewards/rejected": -14.159329414367676, + "step": 10936 + }, + { + "epoch": 1.7, + "learning_rate": 6.125695316147103e-06, + "logits/chosen": -2.688403367996216, + "logits/rejected": -2.9953978061676025, + "logps/chosen": -156.95977783203125, + "logps/rejected": -350.7481689453125, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11343240737915, + "rewards/margins": 8.37162971496582, + "rewards/rejected": -14.485061645507812, + "step": 10937 + }, + { + "epoch": 1.7, + "learning_rate": 6.124961875615955e-06, + "logits/chosen": -2.079794406890869, + "logits/rejected": -2.9588711261749268, + "logps/chosen": -128.08770751953125, + "logps/rejected": -354.6285400390625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.59835147857666, + "rewards/margins": 6.004998207092285, + "rewards/rejected": -11.603349685668945, + "step": 10938 + }, + { + "epoch": 1.7, + "learning_rate": 6.124228435084808e-06, + "logits/chosen": -2.726071357727051, + "logits/rejected": -3.097869396209717, + "logps/chosen": -280.0343933105469, + "logps/rejected": -451.63714599609375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0240654945373535, + "rewards/margins": 6.733617782592773, + "rewards/rejected": -11.757682800292969, + "step": 10939 + }, + { + "epoch": 1.7, + "learning_rate": 6.12349499455366e-06, + "logits/chosen": -2.2745444774627686, + "logits/rejected": -2.9881064891815186, + "logps/chosen": -749.1524658203125, + "logps/rejected": -674.5738525390625, + "loss": 1.4847, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.202834129333496, + "rewards/margins": 4.359635829925537, + "rewards/rejected": -13.562469482421875, + "step": 10940 + }, + { + "epoch": 1.7, + "learning_rate": 6.122761554022512e-06, + "logits/chosen": -2.5588624477386475, + "logits/rejected": -3.02243971824646, + "logps/chosen": -159.8920440673828, + "logps/rejected": -311.0788269042969, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2445549964904785, + "rewards/margins": 5.654539585113525, + "rewards/rejected": -11.899094581604004, + "step": 10941 + }, + { + "epoch": 1.7, + "learning_rate": 6.122028113491364e-06, + "logits/chosen": -1.565339207649231, + "logits/rejected": -3.0146644115448, + "logps/chosen": -122.31039428710938, + "logps/rejected": -325.1148681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.686317443847656, + "rewards/margins": 9.836179733276367, + "rewards/rejected": -14.522497177124023, + "step": 10942 + }, + { + "epoch": 1.7, + "learning_rate": 6.121294672960216e-06, + "logits/chosen": -2.096531391143799, + "logits/rejected": -3.167829751968384, + "logps/chosen": -303.08746337890625, + "logps/rejected": -583.1165771484375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795870304107666, + "rewards/margins": 8.013711929321289, + "rewards/rejected": -13.809581756591797, + "step": 10943 + }, + { + "epoch": 1.7, + "learning_rate": 6.120561232429068e-06, + "logits/chosen": -2.618213653564453, + "logits/rejected": -2.900339365005493, + "logps/chosen": -169.54830932617188, + "logps/rejected": -291.02703857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92678165435791, + "rewards/margins": 10.20045280456543, + "rewards/rejected": -16.127235412597656, + "step": 10944 + }, + { + "epoch": 1.7, + "learning_rate": 6.11982779189792e-06, + "logits/chosen": -1.3329874277114868, + "logits/rejected": -2.653521776199341, + "logps/chosen": -371.3131103515625, + "logps/rejected": -626.07470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.665267467498779, + "rewards/margins": 10.501368522644043, + "rewards/rejected": -17.166635513305664, + "step": 10945 + }, + { + "epoch": 1.7, + "learning_rate": 6.119094351366772e-06, + "logits/chosen": -2.865962266921997, + "logits/rejected": -3.0696446895599365, + "logps/chosen": -421.2205810546875, + "logps/rejected": -420.0006103515625, + "loss": 2.734, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.689178466796875, + "rewards/margins": 5.208800315856934, + "rewards/rejected": -13.897977828979492, + "step": 10946 + }, + { + "epoch": 1.7, + "learning_rate": 6.118360910835624e-06, + "logits/chosen": -2.180138349533081, + "logits/rejected": -2.744873523712158, + "logps/chosen": -291.20904541015625, + "logps/rejected": -370.0206298828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.636517524719238, + "rewards/margins": 7.679719924926758, + "rewards/rejected": -13.316237449645996, + "step": 10947 + }, + { + "epoch": 1.7, + "learning_rate": 6.117627470304477e-06, + "logits/chosen": -2.9935402870178223, + "logits/rejected": -2.830690383911133, + "logps/chosen": -228.2154541015625, + "logps/rejected": -329.81414794921875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2429094314575195, + "rewards/margins": 8.415449142456055, + "rewards/rejected": -14.65835952758789, + "step": 10948 + }, + { + "epoch": 1.7, + "learning_rate": 6.1168940297733285e-06, + "logits/chosen": -2.6039795875549316, + "logits/rejected": -3.0631539821624756, + "logps/chosen": -154.2041778564453, + "logps/rejected": -275.3204345703125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.811127662658691, + "rewards/margins": 5.717885971069336, + "rewards/rejected": -11.529013633728027, + "step": 10949 + }, + { + "epoch": 1.7, + "learning_rate": 6.11616058924218e-06, + "logits/chosen": -2.867959499359131, + "logits/rejected": -2.856720209121704, + "logps/chosen": -714.5952758789062, + "logps/rejected": -738.7901000976562, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.257426261901855, + "rewards/margins": 7.670618534088135, + "rewards/rejected": -16.92804527282715, + "step": 10950 + }, + { + "epoch": 1.7, + "learning_rate": 6.115427148711032e-06, + "logits/chosen": -2.4538509845733643, + "logits/rejected": -2.975511312484741, + "logps/chosen": -571.3262329101562, + "logps/rejected": -677.3876953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1098389625549316, + "rewards/margins": 8.091503143310547, + "rewards/rejected": -11.20134162902832, + "step": 10951 + }, + { + "epoch": 1.7, + "learning_rate": 6.114693708179885e-06, + "logits/chosen": -3.0954980850219727, + "logits/rejected": -3.0657317638397217, + "logps/chosen": -196.69549560546875, + "logps/rejected": -266.9981689453125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6588737964630127, + "rewards/margins": 5.432692050933838, + "rewards/rejected": -8.09156608581543, + "step": 10952 + }, + { + "epoch": 1.7, + "learning_rate": 6.113960267648737e-06, + "logits/chosen": -2.9737303256988525, + "logits/rejected": -2.9066736698150635, + "logps/chosen": -571.548828125, + "logps/rejected": -552.5001831054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.497012615203857, + "rewards/margins": 11.471094131469727, + "rewards/rejected": -15.968106269836426, + "step": 10953 + }, + { + "epoch": 1.7, + "learning_rate": 6.1132268271175895e-06, + "logits/chosen": -2.667131185531616, + "logits/rejected": -3.088801383972168, + "logps/chosen": -173.72296142578125, + "logps/rejected": -337.1582946777344, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.96181583404541, + "rewards/margins": 9.145281791687012, + "rewards/rejected": -15.107097625732422, + "step": 10954 + }, + { + "epoch": 1.7, + "learning_rate": 6.1124933865864414e-06, + "logits/chosen": -2.3564674854278564, + "logits/rejected": -2.9442138671875, + "logps/chosen": -146.44415283203125, + "logps/rejected": -306.27471923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7875285148620605, + "rewards/margins": 9.312185287475586, + "rewards/rejected": -14.099713325500488, + "step": 10955 + }, + { + "epoch": 1.7, + "learning_rate": 6.111759946055293e-06, + "logits/chosen": -1.2295196056365967, + "logits/rejected": -2.8092141151428223, + "logps/chosen": -234.80322265625, + "logps/rejected": -574.9603271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.315500259399414, + "rewards/margins": 10.9166259765625, + "rewards/rejected": -16.232126235961914, + "step": 10956 + }, + { + "epoch": 1.7, + "learning_rate": 6.111026505524146e-06, + "logits/chosen": -3.024156332015991, + "logits/rejected": -1.8055914640426636, + "logps/chosen": -465.88128662109375, + "logps/rejected": -387.0832824707031, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.571139335632324, + "rewards/margins": 7.394217491149902, + "rewards/rejected": -14.965356826782227, + "step": 10957 + }, + { + "epoch": 1.7, + "learning_rate": 6.110293064992998e-06, + "logits/chosen": -3.0509750843048096, + "logits/rejected": -3.0890953540802, + "logps/chosen": -347.6060791015625, + "logps/rejected": -342.44525146484375, + "loss": 2.6397, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.232343673706055, + "rewards/margins": 3.454845428466797, + "rewards/rejected": -11.687189102172852, + "step": 10958 + }, + { + "epoch": 1.7, + "learning_rate": 6.10955962446185e-06, + "logits/chosen": -2.238759756088257, + "logits/rejected": -2.6864116191864014, + "logps/chosen": -228.21450805664062, + "logps/rejected": -399.64019775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.980104923248291, + "rewards/margins": 9.05251693725586, + "rewards/rejected": -16.032623291015625, + "step": 10959 + }, + { + "epoch": 1.7, + "learning_rate": 6.108826183930702e-06, + "logits/chosen": -2.551732063293457, + "logits/rejected": -2.7276217937469482, + "logps/chosen": -334.81732177734375, + "logps/rejected": -313.6534729003906, + "loss": 1.6597, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.672560214996338, + "rewards/margins": 1.5830628871917725, + "rewards/rejected": -9.255622863769531, + "step": 10960 + }, + { + "epoch": 1.7, + "learning_rate": 6.108092743399554e-06, + "logits/chosen": -2.8845832347869873, + "logits/rejected": -2.987985134124756, + "logps/chosen": -63.178985595703125, + "logps/rejected": -515.7945556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.696836471557617, + "rewards/margins": 16.462615966796875, + "rewards/rejected": -20.159452438354492, + "step": 10961 + }, + { + "epoch": 1.7, + "learning_rate": 6.107359302868406e-06, + "logits/chosen": -2.834969997406006, + "logits/rejected": -2.086862087249756, + "logps/chosen": -526.5899047851562, + "logps/rejected": -405.74456787109375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.939577102661133, + "rewards/margins": 8.25071907043457, + "rewards/rejected": -13.190296173095703, + "step": 10962 + }, + { + "epoch": 1.7, + "learning_rate": 6.106625862337258e-06, + "logits/chosen": -3.0088119506835938, + "logits/rejected": -2.8686978816986084, + "logps/chosen": -289.169677734375, + "logps/rejected": -294.65667724609375, + "loss": 0.6162, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.188648223876953, + "rewards/margins": 1.1014342308044434, + "rewards/rejected": -10.290082931518555, + "step": 10963 + }, + { + "epoch": 1.71, + "learning_rate": 6.10589242180611e-06, + "logits/chosen": -2.892911672592163, + "logits/rejected": -2.1423962116241455, + "logps/chosen": -348.0290222167969, + "logps/rejected": -163.97634887695312, + "loss": 0.4364, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.062815189361572, + "rewards/margins": 2.1268515586853027, + "rewards/rejected": -8.189666748046875, + "step": 10964 + }, + { + "epoch": 1.71, + "learning_rate": 6.105158981274962e-06, + "logits/chosen": -2.1758604049682617, + "logits/rejected": -2.82523775100708, + "logps/chosen": -127.58158111572266, + "logps/rejected": -311.71148681640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9482007026672363, + "rewards/margins": 9.428559303283691, + "rewards/rejected": -13.37675952911377, + "step": 10965 + }, + { + "epoch": 1.71, + "learning_rate": 6.104425540743815e-06, + "logits/chosen": -0.7445047497749329, + "logits/rejected": -2.3272101879119873, + "logps/chosen": -230.50823974609375, + "logps/rejected": -449.6014709472656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.766949653625488, + "rewards/margins": 8.67576789855957, + "rewards/rejected": -14.442716598510742, + "step": 10966 + }, + { + "epoch": 1.71, + "learning_rate": 6.1036921002126665e-06, + "logits/chosen": -1.3072600364685059, + "logits/rejected": -2.993514060974121, + "logps/chosen": -158.05615234375, + "logps/rejected": -487.20550537109375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.787687301635742, + "rewards/margins": 10.024887084960938, + "rewards/rejected": -17.81257438659668, + "step": 10967 + }, + { + "epoch": 1.71, + "learning_rate": 6.102958659681518e-06, + "logits/chosen": -2.8585801124572754, + "logits/rejected": -2.9795870780944824, + "logps/chosen": -99.94241333007812, + "logps/rejected": -184.46878051757812, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.693437576293945, + "rewards/margins": 5.624122619628906, + "rewards/rejected": -13.317560195922852, + "step": 10968 + }, + { + "epoch": 1.71, + "learning_rate": 6.10222521915037e-06, + "logits/chosen": -1.8742382526397705, + "logits/rejected": -2.6403861045837402, + "logps/chosen": -218.65570068359375, + "logps/rejected": -672.2430419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.404325008392334, + "rewards/margins": 10.539926528930664, + "rewards/rejected": -15.94425106048584, + "step": 10969 + }, + { + "epoch": 1.71, + "learning_rate": 6.101491778619223e-06, + "logits/chosen": -2.7775380611419678, + "logits/rejected": -2.9826314449310303, + "logps/chosen": -84.14627075195312, + "logps/rejected": -179.7900390625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.937734603881836, + "rewards/margins": 6.828240394592285, + "rewards/rejected": -11.765974998474121, + "step": 10970 + }, + { + "epoch": 1.71, + "learning_rate": 6.100758338088076e-06, + "logits/chosen": -2.847656726837158, + "logits/rejected": -2.6845927238464355, + "logps/chosen": -397.0426940917969, + "logps/rejected": -584.517822265625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.275125980377197, + "rewards/margins": 6.750490665435791, + "rewards/rejected": -11.025616645812988, + "step": 10971 + }, + { + "epoch": 1.71, + "learning_rate": 6.100024897556928e-06, + "logits/chosen": -1.6483694314956665, + "logits/rejected": -2.8155324459075928, + "logps/chosen": -123.69710540771484, + "logps/rejected": -233.72772216796875, + "loss": 0.3865, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.196733474731445, + "rewards/margins": 2.1684231758117676, + "rewards/rejected": -11.365156173706055, + "step": 10972 + }, + { + "epoch": 1.71, + "learning_rate": 6.0992914570257795e-06, + "logits/chosen": -2.9650135040283203, + "logits/rejected": -2.2431702613830566, + "logps/chosen": -486.1344299316406, + "logps/rejected": -488.40936279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0978410243988037, + "rewards/margins": 14.064071655273438, + "rewards/rejected": -17.16191291809082, + "step": 10973 + }, + { + "epoch": 1.71, + "learning_rate": 6.098558016494631e-06, + "logits/chosen": -2.289893865585327, + "logits/rejected": -3.025678873062134, + "logps/chosen": -146.11984252929688, + "logps/rejected": -310.84521484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9247612953186035, + "rewards/margins": 7.691084861755371, + "rewards/rejected": -12.615845680236816, + "step": 10974 + }, + { + "epoch": 1.71, + "learning_rate": 6.097824575963484e-06, + "logits/chosen": -2.6390345096588135, + "logits/rejected": -2.6249732971191406, + "logps/chosen": -193.1035919189453, + "logps/rejected": -228.10824584960938, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.798894882202148, + "rewards/margins": 4.8048553466796875, + "rewards/rejected": -9.603750228881836, + "step": 10975 + }, + { + "epoch": 1.71, + "learning_rate": 6.097091135432336e-06, + "logits/chosen": -1.2522854804992676, + "logits/rejected": -2.9384806156158447, + "logps/chosen": -232.70272827148438, + "logps/rejected": -538.0686645507812, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.27894401550293, + "rewards/margins": 4.740956783294678, + "rewards/rejected": -10.01990032196045, + "step": 10976 + }, + { + "epoch": 1.71, + "learning_rate": 6.096357694901188e-06, + "logits/chosen": -2.961726188659668, + "logits/rejected": -2.313220739364624, + "logps/chosen": -490.1477355957031, + "logps/rejected": -472.944091796875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2804059982299805, + "rewards/margins": 9.94678783416748, + "rewards/rejected": -14.227193832397461, + "step": 10977 + }, + { + "epoch": 1.71, + "learning_rate": 6.09562425437004e-06, + "logits/chosen": -1.5974706411361694, + "logits/rejected": -2.857120990753174, + "logps/chosen": -169.50131225585938, + "logps/rejected": -390.67376708984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.582037925720215, + "rewards/margins": 7.718189239501953, + "rewards/rejected": -13.300227165222168, + "step": 10978 + }, + { + "epoch": 1.71, + "learning_rate": 6.0948908138388924e-06, + "logits/chosen": -2.7378058433532715, + "logits/rejected": -3.0523569583892822, + "logps/chosen": -147.03175354003906, + "logps/rejected": -356.431884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5552752017974854, + "rewards/margins": 9.749366760253906, + "rewards/rejected": -13.304641723632812, + "step": 10979 + }, + { + "epoch": 1.71, + "learning_rate": 6.094157373307744e-06, + "logits/chosen": -2.7942731380462646, + "logits/rejected": -1.9861226081848145, + "logps/chosen": -382.10272216796875, + "logps/rejected": -265.68939208984375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047232151031494, + "rewards/margins": 6.35521936416626, + "rewards/rejected": -12.402451515197754, + "step": 10980 + }, + { + "epoch": 1.71, + "learning_rate": 6.093423932776596e-06, + "logits/chosen": -1.809980034828186, + "logits/rejected": -2.777061939239502, + "logps/chosen": -137.5487823486328, + "logps/rejected": -275.96710205078125, + "loss": 1.2691, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.609548568725586, + "rewards/margins": 2.091172218322754, + "rewards/rejected": -11.700721740722656, + "step": 10981 + }, + { + "epoch": 1.71, + "learning_rate": 6.092690492245448e-06, + "logits/chosen": -1.9322978258132935, + "logits/rejected": -2.6589770317077637, + "logps/chosen": -174.13124084472656, + "logps/rejected": -432.3865661621094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470220565795898, + "rewards/margins": 8.331480026245117, + "rewards/rejected": -14.801700592041016, + "step": 10982 + }, + { + "epoch": 1.71, + "learning_rate": 6.0919570517143e-06, + "logits/chosen": -2.0828285217285156, + "logits/rejected": -2.580061435699463, + "logps/chosen": -167.43128967285156, + "logps/rejected": -335.297119140625, + "loss": 0.3701, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.63920783996582, + "rewards/margins": 4.810582637786865, + "rewards/rejected": -12.449790954589844, + "step": 10983 + }, + { + "epoch": 1.71, + "learning_rate": 6.091223611183153e-06, + "logits/chosen": -2.8542208671569824, + "logits/rejected": -2.411588668823242, + "logps/chosen": -395.50616455078125, + "logps/rejected": -507.826904296875, + "loss": 0.0466, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.479344367980957, + "rewards/margins": 5.754368782043457, + "rewards/rejected": -12.233713150024414, + "step": 10984 + }, + { + "epoch": 1.71, + "learning_rate": 6.0904901706520046e-06, + "logits/chosen": -3.0186688899993896, + "logits/rejected": -2.753319501876831, + "logps/chosen": -193.680908203125, + "logps/rejected": -181.15814208984375, + "loss": 1.0443, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.144598007202148, + "rewards/margins": 4.055812835693359, + "rewards/rejected": -10.200410842895508, + "step": 10985 + }, + { + "epoch": 1.71, + "learning_rate": 6.0897567301208564e-06, + "logits/chosen": -2.188793182373047, + "logits/rejected": -2.936084747314453, + "logps/chosen": -98.95895385742188, + "logps/rejected": -255.8586883544922, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7061679363250732, + "rewards/margins": 8.33795166015625, + "rewards/rejected": -12.044120788574219, + "step": 10986 + }, + { + "epoch": 1.71, + "learning_rate": 6.089023289589709e-06, + "logits/chosen": -2.8206844329833984, + "logits/rejected": -1.9993116855621338, + "logps/chosen": -827.2080078125, + "logps/rejected": -553.0841064453125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22373127937316895, + "rewards/margins": 9.68620777130127, + "rewards/rejected": -9.90993881225586, + "step": 10987 + }, + { + "epoch": 1.71, + "learning_rate": 6.088289849058562e-06, + "logits/chosen": -2.2020392417907715, + "logits/rejected": -2.8978066444396973, + "logps/chosen": -79.58586120605469, + "logps/rejected": -276.9303894042969, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9419732093811035, + "rewards/margins": 7.751832008361816, + "rewards/rejected": -14.693805694580078, + "step": 10988 + }, + { + "epoch": 1.71, + "learning_rate": 6.087556408527414e-06, + "logits/chosen": -2.5510141849517822, + "logits/rejected": -2.6101508140563965, + "logps/chosen": -208.3991241455078, + "logps/rejected": -422.41680908203125, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.689028739929199, + "rewards/margins": 7.813510894775391, + "rewards/rejected": -12.502540588378906, + "step": 10989 + }, + { + "epoch": 1.71, + "learning_rate": 6.086822967996266e-06, + "logits/chosen": -2.17887282371521, + "logits/rejected": -2.972783088684082, + "logps/chosen": -283.05352783203125, + "logps/rejected": -478.7701416015625, + "loss": 0.2161, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.752964019775391, + "rewards/margins": 3.029866933822632, + "rewards/rejected": -8.782831192016602, + "step": 10990 + }, + { + "epoch": 1.71, + "learning_rate": 6.0860895274651175e-06, + "logits/chosen": -2.230515718460083, + "logits/rejected": -2.7999818325042725, + "logps/chosen": -129.7464141845703, + "logps/rejected": -379.699462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.874151229858398, + "rewards/margins": 9.58034610748291, + "rewards/rejected": -16.454498291015625, + "step": 10991 + }, + { + "epoch": 1.71, + "learning_rate": 6.08535608693397e-06, + "logits/chosen": -1.3823492527008057, + "logits/rejected": -3.0015294551849365, + "logps/chosen": -192.09884643554688, + "logps/rejected": -453.17498779296875, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.222148895263672, + "rewards/margins": 6.739953994750977, + "rewards/rejected": -14.962102890014648, + "step": 10992 + }, + { + "epoch": 1.71, + "learning_rate": 6.084622646402822e-06, + "logits/chosen": -3.015596389770508, + "logits/rejected": -2.263066530227661, + "logps/chosen": -337.04779052734375, + "logps/rejected": -317.5303649902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4051177501678467, + "rewards/margins": 11.176860809326172, + "rewards/rejected": -11.581978797912598, + "step": 10993 + }, + { + "epoch": 1.71, + "learning_rate": 6.083889205871674e-06, + "logits/chosen": -2.55265736579895, + "logits/rejected": -2.9355945587158203, + "logps/chosen": -247.8359832763672, + "logps/rejected": -350.03790283203125, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.586862564086914, + "rewards/margins": 7.375311851501465, + "rewards/rejected": -13.962174415588379, + "step": 10994 + }, + { + "epoch": 1.71, + "learning_rate": 6.083155765340526e-06, + "logits/chosen": -2.967778444290161, + "logits/rejected": -1.870091199874878, + "logps/chosen": -483.83062744140625, + "logps/rejected": -267.9300537109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.147035598754883, + "rewards/margins": 7.920804023742676, + "rewards/rejected": -12.067838668823242, + "step": 10995 + }, + { + "epoch": 1.71, + "learning_rate": 6.082422324809378e-06, + "logits/chosen": -2.8203909397125244, + "logits/rejected": -2.342653512954712, + "logps/chosen": -217.5092010498047, + "logps/rejected": -308.0011901855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.632601737976074, + "rewards/margins": 9.6165189743042, + "rewards/rejected": -17.249120712280273, + "step": 10996 + }, + { + "epoch": 1.71, + "learning_rate": 6.0816888842782305e-06, + "logits/chosen": -3.07277774810791, + "logits/rejected": -2.4034390449523926, + "logps/chosen": -252.4519805908203, + "logps/rejected": -125.49576568603516, + "loss": 0.2022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627405643463135, + "rewards/margins": 3.9721083641052246, + "rewards/rejected": -8.59951400756836, + "step": 10997 + }, + { + "epoch": 1.71, + "learning_rate": 6.080955443747082e-06, + "logits/chosen": -1.6766633987426758, + "logits/rejected": -2.7575392723083496, + "logps/chosen": -114.81698608398438, + "logps/rejected": -196.0730438232422, + "loss": 1.0363, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.127001762390137, + "rewards/margins": 1.0943987369537354, + "rewards/rejected": -10.221400260925293, + "step": 10998 + }, + { + "epoch": 1.71, + "learning_rate": 6.080222003215934e-06, + "logits/chosen": -2.1949212551116943, + "logits/rejected": -2.754920482635498, + "logps/chosen": -133.90357971191406, + "logps/rejected": -310.0015563964844, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.721982002258301, + "rewards/margins": 6.908206939697266, + "rewards/rejected": -11.630188941955566, + "step": 10999 + }, + { + "epoch": 1.71, + "learning_rate": 6.079488562684786e-06, + "logits/chosen": -2.836423635482788, + "logits/rejected": -2.9197776317596436, + "logps/chosen": -921.9810180664062, + "logps/rejected": -783.8775024414062, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4867401123046875, + "rewards/margins": 9.217679023742676, + "rewards/rejected": -11.704419136047363, + "step": 11000 + }, + { + "epoch": 1.71, + "learning_rate": 6.078755122153639e-06, + "logits/chosen": -2.942185640335083, + "logits/rejected": -2.7920725345611572, + "logps/chosen": -194.70315551757812, + "logps/rejected": -270.0786437988281, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8337321281433105, + "rewards/margins": 7.324671745300293, + "rewards/rejected": -12.158403396606445, + "step": 11001 + }, + { + "epoch": 1.71, + "learning_rate": 6.078021681622491e-06, + "logits/chosen": -3.009397029876709, + "logits/rejected": -2.3726966381073, + "logps/chosen": -229.4053497314453, + "logps/rejected": -241.6834259033203, + "loss": 0.2763, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.366422176361084, + "rewards/margins": 4.672127723693848, + "rewards/rejected": -10.038549423217773, + "step": 11002 + }, + { + "epoch": 1.71, + "learning_rate": 6.077288241091343e-06, + "logits/chosen": -2.987780809402466, + "logits/rejected": -2.66660737991333, + "logps/chosen": -321.6500549316406, + "logps/rejected": -397.43865966796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.227079391479492, + "rewards/margins": 8.577978134155273, + "rewards/rejected": -14.805057525634766, + "step": 11003 + }, + { + "epoch": 1.71, + "learning_rate": 6.076554800560195e-06, + "logits/chosen": -2.8054039478302, + "logits/rejected": -2.700730323791504, + "logps/chosen": -75.6408462524414, + "logps/rejected": -211.75994873046875, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.921728134155273, + "rewards/margins": 4.665571212768555, + "rewards/rejected": -10.587299346923828, + "step": 11004 + }, + { + "epoch": 1.71, + "learning_rate": 6.075821360029047e-06, + "logits/chosen": -2.0720925331115723, + "logits/rejected": -2.8430545330047607, + "logps/chosen": -136.7200927734375, + "logps/rejected": -263.6022033691406, + "loss": 2.3295, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.8742146492004395, + "rewards/margins": 2.66479229927063, + "rewards/rejected": -9.539007186889648, + "step": 11005 + }, + { + "epoch": 1.71, + "learning_rate": 6.0750879194979e-06, + "logits/chosen": -1.1346551179885864, + "logits/rejected": -2.7293508052825928, + "logps/chosen": -250.82742309570312, + "logps/rejected": -577.9998168945312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.76914644241333, + "rewards/margins": 11.221739768981934, + "rewards/rejected": -15.990886688232422, + "step": 11006 + }, + { + "epoch": 1.71, + "learning_rate": 6.074354478966752e-06, + "logits/chosen": -3.0318915843963623, + "logits/rejected": -3.1127538681030273, + "logps/chosen": -715.7190551757812, + "logps/rejected": -696.931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.465184211730957, + "rewards/margins": 10.567100524902344, + "rewards/rejected": -16.032285690307617, + "step": 11007 + }, + { + "epoch": 1.71, + "learning_rate": 6.073621038435604e-06, + "logits/chosen": -1.8376061916351318, + "logits/rejected": -2.930603504180908, + "logps/chosen": -232.77659606933594, + "logps/rejected": -339.517822265625, + "loss": 0.1296, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018077373504639, + "rewards/margins": 4.650771141052246, + "rewards/rejected": -10.668848037719727, + "step": 11008 + }, + { + "epoch": 1.71, + "learning_rate": 6.0728875979044556e-06, + "logits/chosen": -3.008887529373169, + "logits/rejected": -2.134490489959717, + "logps/chosen": -548.0188598632812, + "logps/rejected": -213.21240234375, + "loss": 1.2696, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.723580837249756, + "rewards/margins": 1.0225000381469727, + "rewards/rejected": -6.7460808753967285, + "step": 11009 + }, + { + "epoch": 1.71, + "learning_rate": 6.072154157373308e-06, + "logits/chosen": -2.787768840789795, + "logits/rejected": -1.2972400188446045, + "logps/chosen": -526.9298706054688, + "logps/rejected": -351.6976318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.931565046310425, + "rewards/margins": 9.428985595703125, + "rewards/rejected": -13.360549926757812, + "step": 11010 + }, + { + "epoch": 1.71, + "learning_rate": 6.07142071684216e-06, + "logits/chosen": -1.2330082654953003, + "logits/rejected": -2.963733673095703, + "logps/chosen": -244.14254760742188, + "logps/rejected": -351.0616760253906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.396836280822754, + "rewards/margins": 8.400819778442383, + "rewards/rejected": -10.797657012939453, + "step": 11011 + }, + { + "epoch": 1.71, + "learning_rate": 6.070687276311012e-06, + "logits/chosen": -2.5334243774414062, + "logits/rejected": -2.783047914505005, + "logps/chosen": -88.67401885986328, + "logps/rejected": -382.15545654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.502960205078125, + "rewards/margins": 11.990697860717773, + "rewards/rejected": -18.4936580657959, + "step": 11012 + }, + { + "epoch": 1.71, + "learning_rate": 6.069953835779864e-06, + "logits/chosen": -2.788229465484619, + "logits/rejected": -2.4525387287139893, + "logps/chosen": -412.2786865234375, + "logps/rejected": -465.2379150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2100114822387695, + "rewards/margins": 10.510924339294434, + "rewards/rejected": -14.720935821533203, + "step": 11013 + }, + { + "epoch": 1.71, + "learning_rate": 6.069220395248716e-06, + "logits/chosen": -2.564769744873047, + "logits/rejected": -2.6430180072784424, + "logps/chosen": -317.97381591796875, + "logps/rejected": -414.087890625, + "loss": 0.1934, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.541680335998535, + "rewards/margins": 1.7586445808410645, + "rewards/rejected": -12.300325393676758, + "step": 11014 + }, + { + "epoch": 1.71, + "learning_rate": 6.0684869547175685e-06, + "logits/chosen": -2.725667715072632, + "logits/rejected": -2.8495442867279053, + "logps/chosen": -426.7355651855469, + "logps/rejected": -575.11669921875, + "loss": 0.8335, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.043542861938477, + "rewards/margins": 3.564286708831787, + "rewards/rejected": -10.607829093933105, + "step": 11015 + }, + { + "epoch": 1.71, + "learning_rate": 6.06775351418642e-06, + "logits/chosen": -2.8589255809783936, + "logits/rejected": -2.996824026107788, + "logps/chosen": -493.7566833496094, + "logps/rejected": -524.5543823242188, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.484509468078613, + "rewards/margins": 8.504671096801758, + "rewards/rejected": -13.989181518554688, + "step": 11016 + }, + { + "epoch": 1.71, + "learning_rate": 6.067020073655272e-06, + "logits/chosen": -2.095087766647339, + "logits/rejected": -2.867992639541626, + "logps/chosen": -112.51818084716797, + "logps/rejected": -290.35565185546875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.822014808654785, + "rewards/margins": 7.58651065826416, + "rewards/rejected": -11.408525466918945, + "step": 11017 + }, + { + "epoch": 1.71, + "learning_rate": 6.066286633124124e-06, + "logits/chosen": -2.605211019515991, + "logits/rejected": -1.8304473161697388, + "logps/chosen": -324.33551025390625, + "logps/rejected": -261.22357177734375, + "loss": 2.0621, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.848190307617188, + "rewards/margins": 0.4614708423614502, + "rewards/rejected": -12.309661865234375, + "step": 11018 + }, + { + "epoch": 1.71, + "learning_rate": 6.065553192592977e-06, + "logits/chosen": -2.2301759719848633, + "logits/rejected": -2.9412789344787598, + "logps/chosen": -325.35028076171875, + "logps/rejected": -455.02716064453125, + "loss": 0.5205, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.008173942565918, + "rewards/margins": 2.8578808307647705, + "rewards/rejected": -9.86605453491211, + "step": 11019 + }, + { + "epoch": 1.71, + "learning_rate": 6.064819752061829e-06, + "logits/chosen": -2.5598723888397217, + "logits/rejected": -2.882178783416748, + "logps/chosen": -74.79657745361328, + "logps/rejected": -357.822021484375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.796833038330078, + "rewards/margins": 9.802413940429688, + "rewards/rejected": -13.599246978759766, + "step": 11020 + }, + { + "epoch": 1.71, + "learning_rate": 6.0640863115306815e-06, + "logits/chosen": -1.8382740020751953, + "logits/rejected": -2.8011257648468018, + "logps/chosen": -129.8665008544922, + "logps/rejected": -323.2283020019531, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3126091957092285, + "rewards/margins": 4.962921142578125, + "rewards/rejected": -9.275529861450195, + "step": 11021 + }, + { + "epoch": 1.71, + "learning_rate": 6.063352870999533e-06, + "logits/chosen": -2.595350980758667, + "logits/rejected": -1.2528890371322632, + "logps/chosen": -320.4605407714844, + "logps/rejected": -184.07920837402344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2748970985412598, + "rewards/margins": 8.142889976501465, + "rewards/rejected": -11.417787551879883, + "step": 11022 + }, + { + "epoch": 1.71, + "learning_rate": 6.062619430468385e-06, + "logits/chosen": -2.809276580810547, + "logits/rejected": -2.9631946086883545, + "logps/chosen": -96.00090026855469, + "logps/rejected": -185.58349609375, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.007884979248047, + "rewards/margins": 3.639376401901245, + "rewards/rejected": -9.647261619567871, + "step": 11023 + }, + { + "epoch": 1.71, + "learning_rate": 6.061885989937238e-06, + "logits/chosen": -2.303854465484619, + "logits/rejected": -1.5987213850021362, + "logps/chosen": -525.0206298828125, + "logps/rejected": -452.6810302734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.958446502685547, + "rewards/margins": 8.39011001586914, + "rewards/rejected": -17.348556518554688, + "step": 11024 + }, + { + "epoch": 1.71, + "learning_rate": 6.06115254940609e-06, + "logits/chosen": -2.9566144943237305, + "logits/rejected": -2.3026633262634277, + "logps/chosen": -274.45355224609375, + "logps/rejected": -284.44281005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.702836513519287, + "rewards/margins": 13.210132598876953, + "rewards/rejected": -14.912969589233398, + "step": 11025 + }, + { + "epoch": 1.71, + "learning_rate": 6.060419108874942e-06, + "logits/chosen": -2.8157637119293213, + "logits/rejected": -2.8929669857025146, + "logps/chosen": -118.84231567382812, + "logps/rejected": -241.47506713867188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.006194829940796, + "rewards/margins": 9.417717933654785, + "rewards/rejected": -11.423912048339844, + "step": 11026 + }, + { + "epoch": 1.71, + "learning_rate": 6.059685668343794e-06, + "logits/chosen": -2.8004584312438965, + "logits/rejected": -2.9986867904663086, + "logps/chosen": -91.85186004638672, + "logps/rejected": -252.0731201171875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.737236499786377, + "rewards/margins": 5.660480499267578, + "rewards/rejected": -10.397716522216797, + "step": 11027 + }, + { + "epoch": 1.72, + "learning_rate": 6.058952227812646e-06, + "logits/chosen": -2.6116783618927, + "logits/rejected": -2.985273838043213, + "logps/chosen": -80.14822387695312, + "logps/rejected": -314.81524658203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971536159515381, + "rewards/margins": 8.733476638793945, + "rewards/rejected": -14.705013275146484, + "step": 11028 + }, + { + "epoch": 1.72, + "learning_rate": 6.058218787281498e-06, + "logits/chosen": -2.9715864658355713, + "logits/rejected": -3.1855437755584717, + "logps/chosen": -187.93923950195312, + "logps/rejected": -227.80360412597656, + "loss": 0.235, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.399777412414551, + "rewards/margins": 3.8737969398498535, + "rewards/rejected": -8.273574829101562, + "step": 11029 + }, + { + "epoch": 1.72, + "learning_rate": 6.05748534675035e-06, + "logits/chosen": -2.626671075820923, + "logits/rejected": -3.206778049468994, + "logps/chosen": -135.626953125, + "logps/rejected": -217.0400390625, + "loss": 0.4719, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.206295013427734, + "rewards/margins": 1.2169508934020996, + "rewards/rejected": -8.423245429992676, + "step": 11030 + }, + { + "epoch": 1.72, + "learning_rate": 6.056751906219202e-06, + "logits/chosen": -2.0802032947540283, + "logits/rejected": -3.019280433654785, + "logps/chosen": -315.7910461425781, + "logps/rejected": -698.7330932617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.180792331695557, + "rewards/margins": 12.140045166015625, + "rewards/rejected": -18.320838928222656, + "step": 11031 + }, + { + "epoch": 1.72, + "learning_rate": 6.056018465688055e-06, + "logits/chosen": -2.806917428970337, + "logits/rejected": -1.592279314994812, + "logps/chosen": -253.45523071289062, + "logps/rejected": -224.70542907714844, + "loss": 0.2361, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.422639846801758, + "rewards/margins": 3.1681747436523438, + "rewards/rejected": -10.590814590454102, + "step": 11032 + }, + { + "epoch": 1.72, + "learning_rate": 6.0552850251569066e-06, + "logits/chosen": -1.7998101711273193, + "logits/rejected": -2.612751007080078, + "logps/chosen": -125.85055541992188, + "logps/rejected": -214.46115112304688, + "loss": 0.2298, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8030548095703125, + "rewards/margins": 3.6059863567352295, + "rewards/rejected": -11.409040451049805, + "step": 11033 + }, + { + "epoch": 1.72, + "learning_rate": 6.0545515846257584e-06, + "logits/chosen": -2.989683151245117, + "logits/rejected": -2.6521172523498535, + "logps/chosen": -112.56327819824219, + "logps/rejected": -190.11932373046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9075429439544678, + "rewards/margins": 8.930710792541504, + "rewards/rejected": -12.83825397491455, + "step": 11034 + }, + { + "epoch": 1.72, + "learning_rate": 6.05381814409461e-06, + "logits/chosen": -2.888232707977295, + "logits/rejected": -3.1174824237823486, + "logps/chosen": -585.1283569335938, + "logps/rejected": -602.6903076171875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.973964691162109, + "rewards/margins": 6.780972003936768, + "rewards/rejected": -12.754936218261719, + "step": 11035 + }, + { + "epoch": 1.72, + "learning_rate": 6.053084703563462e-06, + "logits/chosen": -2.934903383255005, + "logits/rejected": -2.7481882572174072, + "logps/chosen": -460.551025390625, + "logps/rejected": -517.361572265625, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.493711471557617, + "rewards/margins": 2.301297903060913, + "rewards/rejected": -9.79500961303711, + "step": 11036 + }, + { + "epoch": 1.72, + "learning_rate": 6.052351263032315e-06, + "logits/chosen": -2.232606887817383, + "logits/rejected": -2.977012872695923, + "logps/chosen": -336.57940673828125, + "logps/rejected": -458.6329345703125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.321167945861816, + "rewards/margins": 6.938129425048828, + "rewards/rejected": -13.259297370910645, + "step": 11037 + }, + { + "epoch": 1.72, + "learning_rate": 6.051617822501168e-06, + "logits/chosen": -2.8370251655578613, + "logits/rejected": -2.515082597732544, + "logps/chosen": -346.9603271484375, + "logps/rejected": -360.7268981933594, + "loss": 3.618, + "rewards/accuracies": 0.0, + "rewards/chosen": -11.529535293579102, + "rewards/margins": -3.5861518383026123, + "rewards/rejected": -7.94338321685791, + "step": 11038 + }, + { + "epoch": 1.72, + "learning_rate": 6.0508843819700195e-06, + "logits/chosen": -2.501479387283325, + "logits/rejected": -2.9511938095092773, + "logps/chosen": -144.22488403320312, + "logps/rejected": -251.84121704101562, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158263206481934, + "rewards/margins": 7.357306480407715, + "rewards/rejected": -13.515569686889648, + "step": 11039 + }, + { + "epoch": 1.72, + "learning_rate": 6.050150941438871e-06, + "logits/chosen": -3.1598994731903076, + "logits/rejected": -3.3266520500183105, + "logps/chosen": -110.93601989746094, + "logps/rejected": -149.76779174804688, + "loss": 1.1278, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.984163284301758, + "rewards/margins": 2.883118152618408, + "rewards/rejected": -7.867281913757324, + "step": 11040 + }, + { + "epoch": 1.72, + "learning_rate": 6.049417500907724e-06, + "logits/chosen": -2.7257580757141113, + "logits/rejected": -2.2946438789367676, + "logps/chosen": -468.8595886230469, + "logps/rejected": -452.52532958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.028957366943359, + "rewards/margins": 11.701443672180176, + "rewards/rejected": -15.730401992797852, + "step": 11041 + }, + { + "epoch": 1.72, + "learning_rate": 6.048684060376576e-06, + "logits/chosen": -3.094484806060791, + "logits/rejected": -3.115142345428467, + "logps/chosen": -644.5665893554688, + "logps/rejected": -622.177490234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9661712646484375, + "rewards/margins": 10.618563652038574, + "rewards/rejected": -13.584734916687012, + "step": 11042 + }, + { + "epoch": 1.72, + "learning_rate": 6.047950619845428e-06, + "logits/chosen": -2.214932441711426, + "logits/rejected": -2.8671183586120605, + "logps/chosen": -270.7132568359375, + "logps/rejected": -328.9423828125, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859065055847168, + "rewards/margins": 4.362362861633301, + "rewards/rejected": -10.221427917480469, + "step": 11043 + }, + { + "epoch": 1.72, + "learning_rate": 6.04721717931428e-06, + "logits/chosen": -2.9332826137542725, + "logits/rejected": -2.4075279235839844, + "logps/chosen": -499.8708190917969, + "logps/rejected": -357.6580810546875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.696277141571045, + "rewards/margins": 7.511591911315918, + "rewards/rejected": -11.207868576049805, + "step": 11044 + }, + { + "epoch": 1.72, + "learning_rate": 6.046483738783132e-06, + "logits/chosen": -2.7421367168426514, + "logits/rejected": -3.022371292114258, + "logps/chosen": -331.1851501464844, + "logps/rejected": -275.48095703125, + "loss": 0.5333, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.020133018493652, + "rewards/margins": 3.093683958053589, + "rewards/rejected": -10.11381721496582, + "step": 11045 + }, + { + "epoch": 1.72, + "learning_rate": 6.045750298251984e-06, + "logits/chosen": -3.0002121925354004, + "logits/rejected": -3.1057724952697754, + "logps/chosen": -203.65536499023438, + "logps/rejected": -295.3016052246094, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.816717147827148, + "rewards/margins": 5.527928352355957, + "rewards/rejected": -10.344645500183105, + "step": 11046 + }, + { + "epoch": 1.72, + "learning_rate": 6.045016857720836e-06, + "logits/chosen": -1.0813864469528198, + "logits/rejected": -2.9460809230804443, + "logps/chosen": -149.25167846679688, + "logps/rejected": -532.7785034179688, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.639931678771973, + "rewards/margins": 6.678556442260742, + "rewards/rejected": -12.318488121032715, + "step": 11047 + }, + { + "epoch": 1.72, + "learning_rate": 6.044283417189688e-06, + "logits/chosen": -2.410548686981201, + "logits/rejected": -2.87922739982605, + "logps/chosen": -189.3232879638672, + "logps/rejected": -286.5061950683594, + "loss": 0.0456, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.294978141784668, + "rewards/margins": 5.128806114196777, + "rewards/rejected": -10.423784255981445, + "step": 11048 + }, + { + "epoch": 1.72, + "learning_rate": 6.04354997665854e-06, + "logits/chosen": -2.110053062438965, + "logits/rejected": -2.9185423851013184, + "logps/chosen": -327.87164306640625, + "logps/rejected": -503.9933776855469, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3334832191467285, + "rewards/margins": 8.323274612426758, + "rewards/rejected": -13.656758308410645, + "step": 11049 + }, + { + "epoch": 1.72, + "learning_rate": 6.042816536127393e-06, + "logits/chosen": -3.0165326595306396, + "logits/rejected": -2.5622572898864746, + "logps/chosen": -218.38272094726562, + "logps/rejected": -303.7828674316406, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.168471336364746, + "rewards/margins": 5.616720199584961, + "rewards/rejected": -10.785192489624023, + "step": 11050 + }, + { + "epoch": 1.72, + "learning_rate": 6.042083095596245e-06, + "logits/chosen": -2.9744131565093994, + "logits/rejected": -2.734923839569092, + "logps/chosen": -122.66795349121094, + "logps/rejected": -220.06021118164062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.981445789337158, + "rewards/margins": 10.088247299194336, + "rewards/rejected": -15.069692611694336, + "step": 11051 + }, + { + "epoch": 1.72, + "learning_rate": 6.0413496550650965e-06, + "logits/chosen": -2.4406113624572754, + "logits/rejected": -2.8656885623931885, + "logps/chosen": -98.29008483886719, + "logps/rejected": -228.9938201904297, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.172591686248779, + "rewards/margins": 6.2920732498168945, + "rewards/rejected": -11.464664459228516, + "step": 11052 + }, + { + "epoch": 1.72, + "learning_rate": 6.040616214533948e-06, + "logits/chosen": -2.2931876182556152, + "logits/rejected": -2.875044107437134, + "logps/chosen": -74.17316436767578, + "logps/rejected": -345.64630126953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6675877571105957, + "rewards/margins": 8.130298614501953, + "rewards/rejected": -11.797886848449707, + "step": 11053 + }, + { + "epoch": 1.72, + "learning_rate": 6.039882774002801e-06, + "logits/chosen": -2.81691575050354, + "logits/rejected": -2.846738338470459, + "logps/chosen": -92.52305603027344, + "logps/rejected": -175.38427734375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.749577522277832, + "rewards/margins": 5.867971420288086, + "rewards/rejected": -12.617548942565918, + "step": 11054 + }, + { + "epoch": 1.72, + "learning_rate": 6.039149333471654e-06, + "logits/chosen": -2.396153450012207, + "logits/rejected": -3.0165438652038574, + "logps/chosen": -237.56634521484375, + "logps/rejected": -355.1891174316406, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.086109638214111, + "rewards/margins": 7.095696449279785, + "rewards/rejected": -13.181806564331055, + "step": 11055 + }, + { + "epoch": 1.72, + "learning_rate": 6.038415892940506e-06, + "logits/chosen": -2.5584187507629395, + "logits/rejected": -3.066864252090454, + "logps/chosen": -826.91015625, + "logps/rejected": -960.966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4656898975372314, + "rewards/margins": 11.590991973876953, + "rewards/rejected": -15.056680679321289, + "step": 11056 + }, + { + "epoch": 1.72, + "learning_rate": 6.0376824524093576e-06, + "logits/chosen": -2.79424786567688, + "logits/rejected": -2.347491502761841, + "logps/chosen": -199.79751586914062, + "logps/rejected": -287.2494812011719, + "loss": 0.3664, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.610716819763184, + "rewards/margins": 3.048734426498413, + "rewards/rejected": -8.659451484680176, + "step": 11057 + }, + { + "epoch": 1.72, + "learning_rate": 6.0369490118782095e-06, + "logits/chosen": -2.0491783618927, + "logits/rejected": -2.6977133750915527, + "logps/chosen": -99.08543395996094, + "logps/rejected": -279.1004638671875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.105600357055664, + "rewards/margins": 7.530764579772949, + "rewards/rejected": -12.636364936828613, + "step": 11058 + }, + { + "epoch": 1.72, + "learning_rate": 6.036215571347062e-06, + "logits/chosen": -2.4230291843414307, + "logits/rejected": -2.9381210803985596, + "logps/chosen": -527.2987670898438, + "logps/rejected": -696.25244140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.885963439941406, + "rewards/margins": 9.220270156860352, + "rewards/rejected": -14.106233596801758, + "step": 11059 + }, + { + "epoch": 1.72, + "learning_rate": 6.035482130815914e-06, + "logits/chosen": -2.722635269165039, + "logits/rejected": -2.6923723220825195, + "logps/chosen": -358.40924072265625, + "logps/rejected": -397.08819580078125, + "loss": 0.0676, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.991583824157715, + "rewards/margins": 6.901086807250977, + "rewards/rejected": -12.892670631408691, + "step": 11060 + }, + { + "epoch": 1.72, + "learning_rate": 6.034748690284766e-06, + "logits/chosen": -2.805321455001831, + "logits/rejected": -2.5464706420898438, + "logps/chosen": -262.7283935546875, + "logps/rejected": -288.5405578613281, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.280844211578369, + "rewards/margins": 8.331165313720703, + "rewards/rejected": -12.612009048461914, + "step": 11061 + }, + { + "epoch": 1.72, + "learning_rate": 6.034015249753618e-06, + "logits/chosen": -2.3858489990234375, + "logits/rejected": -3.0555219650268555, + "logps/chosen": -400.70501708984375, + "logps/rejected": -493.3730163574219, + "loss": 1.0157, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.596396446228027, + "rewards/margins": 3.488032102584839, + "rewards/rejected": -10.084428787231445, + "step": 11062 + }, + { + "epoch": 1.72, + "learning_rate": 6.03328180922247e-06, + "logits/chosen": -2.462245464324951, + "logits/rejected": -2.992441415786743, + "logps/chosen": -100.00846862792969, + "logps/rejected": -301.9994201660156, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.720255851745605, + "rewards/margins": 5.2608537673950195, + "rewards/rejected": -13.981109619140625, + "step": 11063 + }, + { + "epoch": 1.72, + "learning_rate": 6.032548368691322e-06, + "logits/chosen": -1.308374047279358, + "logits/rejected": -2.7676470279693604, + "logps/chosen": -242.1475830078125, + "logps/rejected": -289.59503173828125, + "loss": 1.11, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.219892501831055, + "rewards/margins": 2.2014684677124023, + "rewards/rejected": -11.421361923217773, + "step": 11064 + }, + { + "epoch": 1.72, + "learning_rate": 6.031814928160174e-06, + "logits/chosen": -2.7124133110046387, + "logits/rejected": -3.0370659828186035, + "logps/chosen": -450.9515075683594, + "logps/rejected": -617.8514404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.658146858215332, + "rewards/margins": 10.062398910522461, + "rewards/rejected": -14.72054672241211, + "step": 11065 + }, + { + "epoch": 1.72, + "learning_rate": 6.031081487629026e-06, + "logits/chosen": -2.7305359840393066, + "logits/rejected": -2.5287718772888184, + "logps/chosen": -390.19732666015625, + "logps/rejected": -475.3321533203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.452023506164551, + "rewards/margins": 10.367204666137695, + "rewards/rejected": -14.819228172302246, + "step": 11066 + }, + { + "epoch": 1.72, + "learning_rate": 6.030348047097878e-06, + "logits/chosen": -2.8221192359924316, + "logits/rejected": -1.707831859588623, + "logps/chosen": -467.76544189453125, + "logps/rejected": -290.5227355957031, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6158769130706787, + "rewards/margins": 6.375791549682617, + "rewards/rejected": -9.991668701171875, + "step": 11067 + }, + { + "epoch": 1.72, + "learning_rate": 6.029614606566731e-06, + "logits/chosen": -2.970811605453491, + "logits/rejected": -2.090437412261963, + "logps/chosen": -691.0694580078125, + "logps/rejected": -592.8372192382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.94974684715271, + "rewards/margins": 10.18877124786377, + "rewards/rejected": -14.138518333435059, + "step": 11068 + }, + { + "epoch": 1.72, + "learning_rate": 6.028881166035583e-06, + "logits/chosen": -1.5935791730880737, + "logits/rejected": -2.7400832176208496, + "logps/chosen": -274.30572509765625, + "logps/rejected": -613.08642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.225517272949219, + "rewards/margins": 9.884800910949707, + "rewards/rejected": -14.110319137573242, + "step": 11069 + }, + { + "epoch": 1.72, + "learning_rate": 6.0281477255044345e-06, + "logits/chosen": -2.970797538757324, + "logits/rejected": -2.9775054454803467, + "logps/chosen": -68.49701690673828, + "logps/rejected": -322.6907653808594, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.508133888244629, + "rewards/margins": 5.54176664352417, + "rewards/rejected": -11.04990005493164, + "step": 11070 + }, + { + "epoch": 1.72, + "learning_rate": 6.027414284973287e-06, + "logits/chosen": -2.618340253829956, + "logits/rejected": -2.936735153198242, + "logps/chosen": -82.86325073242188, + "logps/rejected": -187.33029174804688, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8992390632629395, + "rewards/margins": 4.8010125160217285, + "rewards/rejected": -11.700251579284668, + "step": 11071 + }, + { + "epoch": 1.72, + "learning_rate": 6.026680844442139e-06, + "logits/chosen": -1.1412571668624878, + "logits/rejected": -2.8260488510131836, + "logps/chosen": -110.98239135742188, + "logps/rejected": -447.447265625, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000836372375488, + "rewards/margins": 8.082769393920898, + "rewards/rejected": -14.083605766296387, + "step": 11072 + }, + { + "epoch": 1.72, + "learning_rate": 6.025947403910992e-06, + "logits/chosen": -2.6505160331726074, + "logits/rejected": -2.6739585399627686, + "logps/chosen": -254.90847778320312, + "logps/rejected": -253.52548217773438, + "loss": 1.7078, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.708690166473389, + "rewards/margins": 5.185915946960449, + "rewards/rejected": -11.89460563659668, + "step": 11073 + }, + { + "epoch": 1.72, + "learning_rate": 6.025213963379844e-06, + "logits/chosen": -2.8432977199554443, + "logits/rejected": -2.231574296951294, + "logps/chosen": -251.11444091796875, + "logps/rejected": -189.00558471679688, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.835545539855957, + "rewards/margins": 4.681005477905273, + "rewards/rejected": -11.516550064086914, + "step": 11074 + }, + { + "epoch": 1.72, + "learning_rate": 6.024480522848696e-06, + "logits/chosen": -2.758479595184326, + "logits/rejected": -2.931915044784546, + "logps/chosen": -174.1030731201172, + "logps/rejected": -475.5184326171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.567805290222168, + "rewards/margins": 11.48702621459961, + "rewards/rejected": -17.054832458496094, + "step": 11075 + }, + { + "epoch": 1.72, + "learning_rate": 6.0237470823175475e-06, + "logits/chosen": -2.7429754734039307, + "logits/rejected": -2.008174180984497, + "logps/chosen": -277.8083801269531, + "logps/rejected": -316.20257568359375, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.057901382446289, + "rewards/margins": 3.0100388526916504, + "rewards/rejected": -11.067939758300781, + "step": 11076 + }, + { + "epoch": 1.72, + "learning_rate": 6.0230136417864e-06, + "logits/chosen": -2.143832206726074, + "logits/rejected": -2.8721463680267334, + "logps/chosen": -182.67904663085938, + "logps/rejected": -459.1098327636719, + "loss": 1.1549, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.7150421142578125, + "rewards/margins": 1.1548748016357422, + "rewards/rejected": -8.869916915893555, + "step": 11077 + }, + { + "epoch": 1.72, + "learning_rate": 6.022280201255252e-06, + "logits/chosen": -1.5384876728057861, + "logits/rejected": -2.941404342651367, + "logps/chosen": -476.5696716308594, + "logps/rejected": -686.3713989257812, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.670411586761475, + "rewards/margins": 9.475058555603027, + "rewards/rejected": -16.145469665527344, + "step": 11078 + }, + { + "epoch": 1.72, + "learning_rate": 6.021546760724104e-06, + "logits/chosen": -2.137024402618408, + "logits/rejected": -2.514192581176758, + "logps/chosen": -221.22311401367188, + "logps/rejected": -433.8056335449219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.116464614868164, + "rewards/margins": 7.667013168334961, + "rewards/rejected": -13.783477783203125, + "step": 11079 + }, + { + "epoch": 1.72, + "learning_rate": 6.020813320192956e-06, + "logits/chosen": -2.374906539916992, + "logits/rejected": -2.917154312133789, + "logps/chosen": -152.0913543701172, + "logps/rejected": -280.5691223144531, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.712913990020752, + "rewards/margins": 6.477919101715088, + "rewards/rejected": -10.19083309173584, + "step": 11080 + }, + { + "epoch": 1.72, + "learning_rate": 6.020079879661809e-06, + "logits/chosen": -2.459254264831543, + "logits/rejected": -2.9441914558410645, + "logps/chosen": -91.06241607666016, + "logps/rejected": -316.3990173339844, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087832927703857, + "rewards/margins": 7.49550724029541, + "rewards/rejected": -13.58333969116211, + "step": 11081 + }, + { + "epoch": 1.72, + "learning_rate": 6.0193464391306605e-06, + "logits/chosen": -3.0638809204101562, + "logits/rejected": -2.239839553833008, + "logps/chosen": -206.6881103515625, + "logps/rejected": -180.98208618164062, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8274383544921875, + "rewards/margins": 6.360594749450684, + "rewards/rejected": -10.188034057617188, + "step": 11082 + }, + { + "epoch": 1.72, + "learning_rate": 6.018612998599512e-06, + "logits/chosen": -2.8453636169433594, + "logits/rejected": -1.525778889656067, + "logps/chosen": -198.8455047607422, + "logps/rejected": -241.9199676513672, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.462040901184082, + "rewards/margins": 4.962697505950928, + "rewards/rejected": -9.424738883972168, + "step": 11083 + }, + { + "epoch": 1.72, + "learning_rate": 6.017879558068364e-06, + "logits/chosen": -1.885762333869934, + "logits/rejected": -2.7287514209747314, + "logps/chosen": -275.91455078125, + "logps/rejected": -425.2195129394531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.960080146789551, + "rewards/margins": 8.37307357788086, + "rewards/rejected": -13.333152770996094, + "step": 11084 + }, + { + "epoch": 1.72, + "learning_rate": 6.017146117537216e-06, + "logits/chosen": -2.243514060974121, + "logits/rejected": -2.4527320861816406, + "logps/chosen": -230.1070098876953, + "logps/rejected": -272.25543212890625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.245437145233154, + "rewards/margins": 6.494171619415283, + "rewards/rejected": -11.739608764648438, + "step": 11085 + }, + { + "epoch": 1.72, + "learning_rate": 6.016412677006069e-06, + "logits/chosen": -1.9567407369613647, + "logits/rejected": -3.0261735916137695, + "logps/chosen": -277.3625793457031, + "logps/rejected": -478.7682800292969, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159589767456055, + "rewards/margins": 4.751360893249512, + "rewards/rejected": -11.910951614379883, + "step": 11086 + }, + { + "epoch": 1.72, + "learning_rate": 6.015679236474921e-06, + "logits/chosen": -2.7818145751953125, + "logits/rejected": -2.979398250579834, + "logps/chosen": -198.12855529785156, + "logps/rejected": -186.16839599609375, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072482109069824, + "rewards/margins": 5.5255327224731445, + "rewards/rejected": -9.598014831542969, + "step": 11087 + }, + { + "epoch": 1.72, + "learning_rate": 6.0149457959437734e-06, + "logits/chosen": -3.03265643119812, + "logits/rejected": -2.798177719116211, + "logps/chosen": -324.72930908203125, + "logps/rejected": -327.6794738769531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.189205169677734, + "rewards/margins": 7.194774627685547, + "rewards/rejected": -11.383979797363281, + "step": 11088 + }, + { + "epoch": 1.72, + "learning_rate": 6.014212355412625e-06, + "logits/chosen": -2.5215442180633545, + "logits/rejected": -2.979069948196411, + "logps/chosen": -209.07981872558594, + "logps/rejected": -357.93023681640625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.284114837646484, + "rewards/margins": 8.915481567382812, + "rewards/rejected": -13.199596405029297, + "step": 11089 + }, + { + "epoch": 1.72, + "learning_rate": 6.013478914881478e-06, + "logits/chosen": -2.6328117847442627, + "logits/rejected": -3.1362953186035156, + "logps/chosen": -120.13245391845703, + "logps/rejected": -304.5353698730469, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.049991607666016, + "rewards/margins": 7.416640281677246, + "rewards/rejected": -12.466631889343262, + "step": 11090 + }, + { + "epoch": 1.72, + "learning_rate": 6.01274547435033e-06, + "logits/chosen": -2.948462724685669, + "logits/rejected": -1.9361106157302856, + "logps/chosen": -455.49420166015625, + "logps/rejected": -288.222412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0643327236175537, + "rewards/margins": 11.443399429321289, + "rewards/rejected": -14.507732391357422, + "step": 11091 + }, + { + "epoch": 1.73, + "learning_rate": 6.012012033819182e-06, + "logits/chosen": -3.062819004058838, + "logits/rejected": -2.307966470718384, + "logps/chosen": -625.3310546875, + "logps/rejected": -459.35650634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.626730442047119, + "rewards/margins": 11.422378540039062, + "rewards/rejected": -15.049110412597656, + "step": 11092 + }, + { + "epoch": 1.73, + "learning_rate": 6.011278593288034e-06, + "logits/chosen": -2.3491599559783936, + "logits/rejected": -2.5394721031188965, + "logps/chosen": -92.6427993774414, + "logps/rejected": -194.10134887695312, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.073447227478027, + "rewards/margins": 6.872654914855957, + "rewards/rejected": -10.946102142333984, + "step": 11093 + }, + { + "epoch": 1.73, + "learning_rate": 6.0105451527568855e-06, + "logits/chosen": -2.882044553756714, + "logits/rejected": -2.1992459297180176, + "logps/chosen": -647.593017578125, + "logps/rejected": -318.4326477050781, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.001355171203613, + "rewards/margins": 6.062206268310547, + "rewards/rejected": -13.06356143951416, + "step": 11094 + }, + { + "epoch": 1.73, + "learning_rate": 6.009811712225738e-06, + "logits/chosen": -1.6866061687469482, + "logits/rejected": -2.9342563152313232, + "logps/chosen": -104.69837951660156, + "logps/rejected": -477.3577880859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.154501914978027, + "rewards/margins": 10.188911437988281, + "rewards/rejected": -14.343412399291992, + "step": 11095 + }, + { + "epoch": 1.73, + "learning_rate": 6.00907827169459e-06, + "logits/chosen": -2.0723323822021484, + "logits/rejected": -2.8502724170684814, + "logps/chosen": -283.9303894042969, + "logps/rejected": -386.4593505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6649842262268066, + "rewards/margins": 8.879003524780273, + "rewards/rejected": -12.543988227844238, + "step": 11096 + }, + { + "epoch": 1.73, + "learning_rate": 6.008344831163442e-06, + "logits/chosen": -1.7627973556518555, + "logits/rejected": -2.9914629459381104, + "logps/chosen": -107.80917358398438, + "logps/rejected": -607.5654907226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.221964359283447, + "rewards/margins": 16.812698364257812, + "rewards/rejected": -22.034664154052734, + "step": 11097 + }, + { + "epoch": 1.73, + "learning_rate": 6.007611390632294e-06, + "logits/chosen": -2.4924206733703613, + "logits/rejected": -2.9404563903808594, + "logps/chosen": -171.03756713867188, + "logps/rejected": -318.1400146484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.30556583404541, + "rewards/margins": 7.879188537597656, + "rewards/rejected": -11.184754371643066, + "step": 11098 + }, + { + "epoch": 1.73, + "learning_rate": 6.006877950101147e-06, + "logits/chosen": -3.0686538219451904, + "logits/rejected": -2.248244285583496, + "logps/chosen": -230.9320068359375, + "logps/rejected": -183.05909729003906, + "loss": 1.0844, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.781024932861328, + "rewards/margins": 1.2147161960601807, + "rewards/rejected": -7.995741367340088, + "step": 11099 + }, + { + "epoch": 1.73, + "learning_rate": 6.0061445095699985e-06, + "logits/chosen": -2.918485164642334, + "logits/rejected": -2.092095375061035, + "logps/chosen": -336.9871520996094, + "logps/rejected": -254.73873901367188, + "loss": 1.3319, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.553438663482666, + "rewards/margins": 1.5832850933074951, + "rewards/rejected": -8.136723518371582, + "step": 11100 + }, + { + "epoch": 1.73, + "learning_rate": 6.00541106903885e-06, + "logits/chosen": -2.612022876739502, + "logits/rejected": -3.1040403842926025, + "logps/chosen": -87.08805084228516, + "logps/rejected": -258.10211181640625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406940937042236, + "rewards/margins": 6.126926422119141, + "rewards/rejected": -12.533866882324219, + "step": 11101 + }, + { + "epoch": 1.73, + "learning_rate": 6.004677628507702e-06, + "logits/chosen": -2.9583518505096436, + "logits/rejected": -2.4337220191955566, + "logps/chosen": -899.778076171875, + "logps/rejected": -657.6900634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8961212635040283, + "rewards/margins": 10.870975494384766, + "rewards/rejected": -12.767097473144531, + "step": 11102 + }, + { + "epoch": 1.73, + "learning_rate": 6.003944187976554e-06, + "logits/chosen": -2.580522060394287, + "logits/rejected": -2.4110147953033447, + "logps/chosen": -353.23785400390625, + "logps/rejected": -270.3262939453125, + "loss": 0.9624, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.90848445892334, + "rewards/margins": 5.733983993530273, + "rewards/rejected": -10.642468452453613, + "step": 11103 + }, + { + "epoch": 1.73, + "learning_rate": 6.003210747445407e-06, + "logits/chosen": -2.67970871925354, + "logits/rejected": -2.324251413345337, + "logps/chosen": -196.0738983154297, + "logps/rejected": -141.79786682128906, + "loss": 0.9448, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.395472526550293, + "rewards/margins": 2.4071357250213623, + "rewards/rejected": -8.802608489990234, + "step": 11104 + }, + { + "epoch": 1.73, + "learning_rate": 6.00247730691426e-06, + "logits/chosen": -3.0638270378112793, + "logits/rejected": -3.1104044914245605, + "logps/chosen": -135.3234405517578, + "logps/rejected": -355.309326171875, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.36937952041626, + "rewards/margins": 5.529963970184326, + "rewards/rejected": -11.899343490600586, + "step": 11105 + }, + { + "epoch": 1.73, + "learning_rate": 6.0017438663831115e-06, + "logits/chosen": -3.1183788776397705, + "logits/rejected": -1.897239089012146, + "logps/chosen": -544.2798461914062, + "logps/rejected": -341.8935546875, + "loss": 1.7626, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.437051773071289, + "rewards/margins": 1.587799072265625, + "rewards/rejected": -9.024850845336914, + "step": 11106 + }, + { + "epoch": 1.73, + "learning_rate": 6.001010425851963e-06, + "logits/chosen": -2.2851319313049316, + "logits/rejected": -2.444819211959839, + "logps/chosen": -170.19677734375, + "logps/rejected": -462.90802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.550848007202148, + "rewards/margins": 15.20071029663086, + "rewards/rejected": -19.751558303833008, + "step": 11107 + }, + { + "epoch": 1.73, + "learning_rate": 6.000276985320816e-06, + "logits/chosen": -1.8875188827514648, + "logits/rejected": -2.657773494720459, + "logps/chosen": -384.329833984375, + "logps/rejected": -380.5335693359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.302391052246094, + "rewards/margins": 9.43565845489502, + "rewards/rejected": -13.738049507141113, + "step": 11108 + }, + { + "epoch": 1.73, + "learning_rate": 5.999543544789668e-06, + "logits/chosen": -2.5093488693237305, + "logits/rejected": -2.965080738067627, + "logps/chosen": -321.1034240722656, + "logps/rejected": -360.61419677734375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.37476110458374, + "rewards/margins": 6.322865009307861, + "rewards/rejected": -10.697626113891602, + "step": 11109 + }, + { + "epoch": 1.73, + "learning_rate": 5.99881010425852e-06, + "logits/chosen": -1.4493099451065063, + "logits/rejected": -2.430274248123169, + "logps/chosen": -137.897705078125, + "logps/rejected": -372.51165771484375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.722430229187012, + "rewards/margins": 7.97431755065918, + "rewards/rejected": -15.696747779846191, + "step": 11110 + }, + { + "epoch": 1.73, + "learning_rate": 5.998076663727372e-06, + "logits/chosen": -2.7414112091064453, + "logits/rejected": -2.984684705734253, + "logps/chosen": -254.35931396484375, + "logps/rejected": -377.36651611328125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.280839920043945, + "rewards/margins": 7.598287582397461, + "rewards/rejected": -11.879127502441406, + "step": 11111 + }, + { + "epoch": 1.73, + "learning_rate": 5.997343223196224e-06, + "logits/chosen": -2.335691213607788, + "logits/rejected": -2.6836345195770264, + "logps/chosen": -249.571533203125, + "logps/rejected": -414.11822509765625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422325134277344, + "rewards/margins": 12.137534141540527, + "rewards/rejected": -16.559860229492188, + "step": 11112 + }, + { + "epoch": 1.73, + "learning_rate": 5.996609782665076e-06, + "logits/chosen": -3.0410614013671875, + "logits/rejected": -2.3344595432281494, + "logps/chosen": -139.2093963623047, + "logps/rejected": -163.93312072753906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.01149320602417, + "rewards/margins": 7.2672553062438965, + "rewards/rejected": -10.278748512268066, + "step": 11113 + }, + { + "epoch": 1.73, + "learning_rate": 5.995876342133928e-06, + "logits/chosen": -2.780498743057251, + "logits/rejected": -2.87216854095459, + "logps/chosen": -525.2539672851562, + "logps/rejected": -809.1912841796875, + "loss": 0.4558, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.686800003051758, + "rewards/margins": 4.336508750915527, + "rewards/rejected": -10.023308753967285, + "step": 11114 + }, + { + "epoch": 1.73, + "learning_rate": 5.99514290160278e-06, + "logits/chosen": -0.6747778058052063, + "logits/rejected": -2.4878082275390625, + "logps/chosen": -162.1376190185547, + "logps/rejected": -725.729248046875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895948886871338, + "rewards/margins": 15.124822616577148, + "rewards/rejected": -21.020771026611328, + "step": 11115 + }, + { + "epoch": 1.73, + "learning_rate": 5.994409461071632e-06, + "logits/chosen": -2.9597702026367188, + "logits/rejected": -1.8247684240341187, + "logps/chosen": -350.75372314453125, + "logps/rejected": -150.49874877929688, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2059037685394287, + "rewards/margins": 12.415184020996094, + "rewards/rejected": -12.209280014038086, + "step": 11116 + }, + { + "epoch": 1.73, + "learning_rate": 5.993676020540485e-06, + "logits/chosen": -2.8160877227783203, + "logits/rejected": -2.418242931365967, + "logps/chosen": -372.82025146484375, + "logps/rejected": -510.036376953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.754802227020264, + "rewards/margins": 8.122310638427734, + "rewards/rejected": -14.87711238861084, + "step": 11117 + }, + { + "epoch": 1.73, + "learning_rate": 5.9929425800093365e-06, + "logits/chosen": -2.2521350383758545, + "logits/rejected": -2.7276244163513184, + "logps/chosen": -110.13316345214844, + "logps/rejected": -386.14337158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.117140293121338, + "rewards/margins": 10.91425895690918, + "rewards/rejected": -14.03139877319336, + "step": 11118 + }, + { + "epoch": 1.73, + "learning_rate": 5.9922091394781884e-06, + "logits/chosen": -1.390926718711853, + "logits/rejected": -2.942079782485962, + "logps/chosen": -171.7775115966797, + "logps/rejected": -367.12646484375, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.66787576675415, + "rewards/margins": 4.3176679611206055, + "rewards/rejected": -9.985544204711914, + "step": 11119 + }, + { + "epoch": 1.73, + "learning_rate": 5.99147569894704e-06, + "logits/chosen": -1.7634031772613525, + "logits/rejected": -2.612029790878296, + "logps/chosen": -138.33570861816406, + "logps/rejected": -477.6681823730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.095000982284546, + "rewards/margins": 12.902056694030762, + "rewards/rejected": -15.99705696105957, + "step": 11120 + }, + { + "epoch": 1.73, + "learning_rate": 5.990742258415893e-06, + "logits/chosen": -2.9425852298736572, + "logits/rejected": -2.3687212467193604, + "logps/chosen": -254.709228515625, + "logps/rejected": -200.04489135742188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2955238819122314, + "rewards/margins": 9.467903137207031, + "rewards/rejected": -11.763427734375, + "step": 11121 + }, + { + "epoch": 1.73, + "learning_rate": 5.990008817884746e-06, + "logits/chosen": -2.8342535495758057, + "logits/rejected": -3.0102126598358154, + "logps/chosen": -523.73876953125, + "logps/rejected": -545.4954833984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.640796661376953, + "rewards/margins": 7.713708877563477, + "rewards/rejected": -12.35450553894043, + "step": 11122 + }, + { + "epoch": 1.73, + "learning_rate": 5.989275377353598e-06, + "logits/chosen": -3.1346287727355957, + "logits/rejected": -2.8459365367889404, + "logps/chosen": -147.4375, + "logps/rejected": -431.7373046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.111650466918945, + "rewards/margins": 12.214776039123535, + "rewards/rejected": -17.326427459716797, + "step": 11123 + }, + { + "epoch": 1.73, + "learning_rate": 5.9885419368224495e-06, + "logits/chosen": -2.7459843158721924, + "logits/rejected": -2.5286924839019775, + "logps/chosen": -473.28350830078125, + "logps/rejected": -484.68505859375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.750912666320801, + "rewards/margins": 8.065879821777344, + "rewards/rejected": -12.816792488098145, + "step": 11124 + }, + { + "epoch": 1.73, + "learning_rate": 5.987808496291301e-06, + "logits/chosen": -2.598923683166504, + "logits/rejected": -3.0315964221954346, + "logps/chosen": -284.18548583984375, + "logps/rejected": -270.04266357421875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.409945011138916, + "rewards/margins": 5.562828540802002, + "rewards/rejected": -10.972773551940918, + "step": 11125 + }, + { + "epoch": 1.73, + "learning_rate": 5.987075055760154e-06, + "logits/chosen": -2.176090717315674, + "logits/rejected": -2.9300730228424072, + "logps/chosen": -374.3503723144531, + "logps/rejected": -507.02880859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315110206604004, + "rewards/margins": 6.787120819091797, + "rewards/rejected": -13.1022310256958, + "step": 11126 + }, + { + "epoch": 1.73, + "learning_rate": 5.986341615229006e-06, + "logits/chosen": -2.7230522632598877, + "logits/rejected": -3.0492753982543945, + "logps/chosen": -136.31756591796875, + "logps/rejected": -397.84722900390625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.393162727355957, + "rewards/margins": 9.961126327514648, + "rewards/rejected": -13.354289054870605, + "step": 11127 + }, + { + "epoch": 1.73, + "learning_rate": 5.985608174697858e-06, + "logits/chosen": -2.5093727111816406, + "logits/rejected": -2.9949634075164795, + "logps/chosen": -387.8419189453125, + "logps/rejected": -421.110595703125, + "loss": 0.1248, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.258505344390869, + "rewards/margins": 7.45552396774292, + "rewards/rejected": -12.714029312133789, + "step": 11128 + }, + { + "epoch": 1.73, + "learning_rate": 5.98487473416671e-06, + "logits/chosen": -2.7630043029785156, + "logits/rejected": -3.0784406661987305, + "logps/chosen": -228.77548217773438, + "logps/rejected": -241.92431640625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.502960205078125, + "rewards/margins": 6.835434913635254, + "rewards/rejected": -12.338395118713379, + "step": 11129 + }, + { + "epoch": 1.73, + "learning_rate": 5.9841412936355625e-06, + "logits/chosen": -2.9625656604766846, + "logits/rejected": -2.5171964168548584, + "logps/chosen": -554.288330078125, + "logps/rejected": -498.98931884765625, + "loss": 0.1148, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.758907318115234, + "rewards/margins": 5.9916863441467285, + "rewards/rejected": -10.750593185424805, + "step": 11130 + }, + { + "epoch": 1.73, + "learning_rate": 5.983407853104414e-06, + "logits/chosen": -2.4336302280426025, + "logits/rejected": -2.8068244457244873, + "logps/chosen": -140.51370239257812, + "logps/rejected": -303.24822998046875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.21978235244751, + "rewards/margins": 5.920365333557129, + "rewards/rejected": -10.14014720916748, + "step": 11131 + }, + { + "epoch": 1.73, + "learning_rate": 5.982674412573266e-06, + "logits/chosen": -3.043184995651245, + "logits/rejected": -2.6805102825164795, + "logps/chosen": -144.49880981445312, + "logps/rejected": -169.1918487548828, + "loss": 0.2268, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.335712432861328, + "rewards/margins": 4.136826515197754, + "rewards/rejected": -12.472539901733398, + "step": 11132 + }, + { + "epoch": 1.73, + "learning_rate": 5.981940972042118e-06, + "logits/chosen": -3.0211868286132812, + "logits/rejected": -2.981426239013672, + "logps/chosen": -87.85081481933594, + "logps/rejected": -243.526123046875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3534255027771, + "rewards/margins": 6.276000022888184, + "rewards/rejected": -10.629425048828125, + "step": 11133 + }, + { + "epoch": 1.73, + "learning_rate": 5.98120753151097e-06, + "logits/chosen": -2.146285057067871, + "logits/rejected": -2.971266508102417, + "logps/chosen": -472.56719970703125, + "logps/rejected": -515.1253662109375, + "loss": 1.5096, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.895310401916504, + "rewards/margins": 2.6382198333740234, + "rewards/rejected": -9.533530235290527, + "step": 11134 + }, + { + "epoch": 1.73, + "learning_rate": 5.980474090979823e-06, + "logits/chosen": -0.5741265416145325, + "logits/rejected": -2.716007709503174, + "logps/chosen": -147.32308959960938, + "logps/rejected": -453.28753662109375, + "loss": 1.257, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.483912467956543, + "rewards/margins": 2.7991995811462402, + "rewards/rejected": -12.283112525939941, + "step": 11135 + }, + { + "epoch": 1.73, + "learning_rate": 5.979740650448675e-06, + "logits/chosen": -2.334679365158081, + "logits/rejected": -2.895587682723999, + "logps/chosen": -186.10800170898438, + "logps/rejected": -317.2962646484375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.027154922485352, + "rewards/margins": 6.348820686340332, + "rewards/rejected": -12.375974655151367, + "step": 11136 + }, + { + "epoch": 1.73, + "learning_rate": 5.9790072099175265e-06, + "logits/chosen": -2.4351279735565186, + "logits/rejected": -2.850818157196045, + "logps/chosen": -269.3590393066406, + "logps/rejected": -315.09954833984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.104445457458496, + "rewards/margins": 9.85736083984375, + "rewards/rejected": -16.961807250976562, + "step": 11137 + }, + { + "epoch": 1.73, + "learning_rate": 5.978273769386379e-06, + "logits/chosen": -2.267857313156128, + "logits/rejected": -2.565333127975464, + "logps/chosen": -340.27294921875, + "logps/rejected": -434.4675598144531, + "loss": 0.6278, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.385576248168945, + "rewards/margins": 8.586348533630371, + "rewards/rejected": -17.971923828125, + "step": 11138 + }, + { + "epoch": 1.73, + "learning_rate": 5.977540328855232e-06, + "logits/chosen": -2.811007022857666, + "logits/rejected": -2.9229111671447754, + "logps/chosen": -341.664306640625, + "logps/rejected": -334.5560302734375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.531094551086426, + "rewards/margins": 7.568236351013184, + "rewards/rejected": -12.09933090209961, + "step": 11139 + }, + { + "epoch": 1.73, + "learning_rate": 5.976806888324084e-06, + "logits/chosen": -3.0484073162078857, + "logits/rejected": -2.523167371749878, + "logps/chosen": -225.493408203125, + "logps/rejected": -243.8284912109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.062602996826172, + "rewards/margins": 7.6878342628479, + "rewards/rejected": -10.750436782836914, + "step": 11140 + }, + { + "epoch": 1.73, + "learning_rate": 5.976073447792936e-06, + "logits/chosen": -0.8971444964408875, + "logits/rejected": -2.707883596420288, + "logps/chosen": -144.0210723876953, + "logps/rejected": -455.07354736328125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.017370223999023, + "rewards/margins": 8.816656112670898, + "rewards/rejected": -15.834025382995605, + "step": 11141 + }, + { + "epoch": 1.73, + "learning_rate": 5.9753400072617876e-06, + "logits/chosen": -2.218263864517212, + "logits/rejected": -2.802440643310547, + "logps/chosen": -369.76141357421875, + "logps/rejected": -409.4841613769531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.067675828933716, + "rewards/margins": 7.555662155151367, + "rewards/rejected": -10.623337745666504, + "step": 11142 + }, + { + "epoch": 1.73, + "learning_rate": 5.9746065667306394e-06, + "logits/chosen": -2.4055869579315186, + "logits/rejected": -2.662052869796753, + "logps/chosen": -199.304443359375, + "logps/rejected": -285.14825439453125, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.625044345855713, + "rewards/margins": 3.9876580238342285, + "rewards/rejected": -8.612702369689941, + "step": 11143 + }, + { + "epoch": 1.73, + "learning_rate": 5.973873126199492e-06, + "logits/chosen": -2.3419101238250732, + "logits/rejected": -2.8012125492095947, + "logps/chosen": -186.55995178222656, + "logps/rejected": -221.83901977539062, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.603841304779053, + "rewards/margins": 3.9014265537261963, + "rewards/rejected": -8.505268096923828, + "step": 11144 + }, + { + "epoch": 1.73, + "learning_rate": 5.973139685668344e-06, + "logits/chosen": -2.8380370140075684, + "logits/rejected": -3.0319128036499023, + "logps/chosen": -249.81390380859375, + "logps/rejected": -463.254638671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6797499656677246, + "rewards/margins": 10.59133529663086, + "rewards/rejected": -13.271084785461426, + "step": 11145 + }, + { + "epoch": 1.73, + "learning_rate": 5.972406245137196e-06, + "logits/chosen": -2.8813698291778564, + "logits/rejected": -1.4089922904968262, + "logps/chosen": -507.9084777832031, + "logps/rejected": -334.2091064453125, + "loss": 1.204, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.554120063781738, + "rewards/margins": 4.334482192993164, + "rewards/rejected": -10.888602256774902, + "step": 11146 + }, + { + "epoch": 1.73, + "learning_rate": 5.971672804606048e-06, + "logits/chosen": -2.63624906539917, + "logits/rejected": -2.830744981765747, + "logps/chosen": -403.98150634765625, + "logps/rejected": -541.6019897460938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.128779649734497, + "rewards/margins": 8.475323677062988, + "rewards/rejected": -11.604103088378906, + "step": 11147 + }, + { + "epoch": 1.73, + "learning_rate": 5.9709393640749005e-06, + "logits/chosen": -2.493764877319336, + "logits/rejected": -2.699547290802002, + "logps/chosen": -170.89141845703125, + "logps/rejected": -236.6302032470703, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.351980686187744, + "rewards/margins": 5.914580821990967, + "rewards/rejected": -9.266561508178711, + "step": 11148 + }, + { + "epoch": 1.73, + "learning_rate": 5.970205923543752e-06, + "logits/chosen": -1.1742037534713745, + "logits/rejected": -1.4437092542648315, + "logps/chosen": -275.0599365234375, + "logps/rejected": -204.4364776611328, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.696558475494385, + "rewards/margins": 2.057009220123291, + "rewards/rejected": -7.753567695617676, + "step": 11149 + }, + { + "epoch": 1.73, + "learning_rate": 5.969472483012604e-06, + "logits/chosen": -2.6490278244018555, + "logits/rejected": -1.6343045234680176, + "logps/chosen": -125.97491455078125, + "logps/rejected": -172.5664825439453, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.704945087432861, + "rewards/margins": 7.244030952453613, + "rewards/rejected": -13.948976516723633, + "step": 11150 + }, + { + "epoch": 1.73, + "learning_rate": 5.968739042481456e-06, + "logits/chosen": -1.2875406742095947, + "logits/rejected": -2.865919351577759, + "logps/chosen": -80.78999328613281, + "logps/rejected": -319.53680419921875, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.593207836151123, + "rewards/margins": 6.102208614349365, + "rewards/rejected": -10.695416450500488, + "step": 11151 + }, + { + "epoch": 1.73, + "learning_rate": 5.968005601950308e-06, + "logits/chosen": -1.4796773195266724, + "logits/rejected": -2.648191213607788, + "logps/chosen": -216.44158935546875, + "logps/rejected": -464.67657470703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.047286033630371, + "rewards/margins": 8.445343017578125, + "rewards/rejected": -16.49262809753418, + "step": 11152 + }, + { + "epoch": 1.73, + "learning_rate": 5.967272161419161e-06, + "logits/chosen": -2.621920347213745, + "logits/rejected": -3.099148750305176, + "logps/chosen": -147.51226806640625, + "logps/rejected": -303.9171142578125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.963271141052246, + "rewards/margins": 6.140189170837402, + "rewards/rejected": -11.103460311889648, + "step": 11153 + }, + { + "epoch": 1.73, + "learning_rate": 5.966538720888013e-06, + "logits/chosen": -2.543179988861084, + "logits/rejected": -2.365553617477417, + "logps/chosen": -101.07077026367188, + "logps/rejected": -290.5845031738281, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.902350902557373, + "rewards/margins": 8.06550407409668, + "rewards/rejected": -11.967855453491211, + "step": 11154 + }, + { + "epoch": 1.73, + "learning_rate": 5.965805280356865e-06, + "logits/chosen": -2.5818817615509033, + "logits/rejected": -2.465074062347412, + "logps/chosen": -123.75337219238281, + "logps/rejected": -162.93829345703125, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.462495803833008, + "rewards/margins": 2.4703025817871094, + "rewards/rejected": -9.932798385620117, + "step": 11155 + }, + { + "epoch": 1.73, + "learning_rate": 5.965071839825717e-06, + "logits/chosen": -2.856804847717285, + "logits/rejected": -2.2908997535705566, + "logps/chosen": -587.05322265625, + "logps/rejected": -494.13189697265625, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.05928373336792, + "rewards/margins": 4.048875331878662, + "rewards/rejected": -11.108159065246582, + "step": 11156 + }, + { + "epoch": 1.74, + "learning_rate": 5.96433839929457e-06, + "logits/chosen": -1.4874199628829956, + "logits/rejected": -2.5280535221099854, + "logps/chosen": -142.79798889160156, + "logps/rejected": -342.4820556640625, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.03785514831543, + "rewards/margins": 5.81698751449585, + "rewards/rejected": -12.854843139648438, + "step": 11157 + }, + { + "epoch": 1.74, + "learning_rate": 5.963604958763422e-06, + "logits/chosen": -2.9120404720306396, + "logits/rejected": -2.750317335128784, + "logps/chosen": -423.00701904296875, + "logps/rejected": -422.0739440917969, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.025552272796631, + "rewards/margins": 6.69218111038208, + "rewards/rejected": -11.717733383178711, + "step": 11158 + }, + { + "epoch": 1.74, + "learning_rate": 5.962871518232274e-06, + "logits/chosen": -2.7807977199554443, + "logits/rejected": -3.0329041481018066, + "logps/chosen": -285.7655334472656, + "logps/rejected": -454.8191833496094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2312827110290527, + "rewards/margins": 10.446019172668457, + "rewards/rejected": -12.677301406860352, + "step": 11159 + }, + { + "epoch": 1.74, + "learning_rate": 5.962138077701126e-06, + "logits/chosen": -2.1381123065948486, + "logits/rejected": -3.0819571018218994, + "logps/chosen": -174.18704223632812, + "logps/rejected": -672.5651245117188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6989641189575195, + "rewards/margins": 9.170714378356934, + "rewards/rejected": -15.869678497314453, + "step": 11160 + }, + { + "epoch": 1.74, + "learning_rate": 5.9614046371699775e-06, + "logits/chosen": -2.7514867782592773, + "logits/rejected": -2.426842451095581, + "logps/chosen": -230.83692932128906, + "logps/rejected": -313.7908020019531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.698653221130371, + "rewards/margins": 6.069295883178711, + "rewards/rejected": -9.767949104309082, + "step": 11161 + }, + { + "epoch": 1.74, + "learning_rate": 5.96067119663883e-06, + "logits/chosen": -2.568964958190918, + "logits/rejected": -2.701019763946533, + "logps/chosen": -144.0795135498047, + "logps/rejected": -221.1492919921875, + "loss": 0.6255, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.94242525100708, + "rewards/margins": 4.298094749450684, + "rewards/rejected": -11.240520477294922, + "step": 11162 + }, + { + "epoch": 1.74, + "learning_rate": 5.959937756107682e-06, + "logits/chosen": -2.8440792560577393, + "logits/rejected": -2.758260726928711, + "logps/chosen": -428.0899658203125, + "logps/rejected": -484.01763916015625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8274431228637695, + "rewards/margins": 6.649444580078125, + "rewards/rejected": -10.476887702941895, + "step": 11163 + }, + { + "epoch": 1.74, + "learning_rate": 5.959204315576534e-06, + "logits/chosen": -3.133410930633545, + "logits/rejected": -2.8867976665496826, + "logps/chosen": -425.9695739746094, + "logps/rejected": -417.7713317871094, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2906570434570312, + "rewards/margins": 7.488434314727783, + "rewards/rejected": -10.779090881347656, + "step": 11164 + }, + { + "epoch": 1.74, + "learning_rate": 5.958470875045386e-06, + "logits/chosen": -2.764603853225708, + "logits/rejected": -3.018540859222412, + "logps/chosen": -181.54043579101562, + "logps/rejected": -267.4735107421875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.061392307281494, + "rewards/margins": 6.689379692077637, + "rewards/rejected": -10.750772476196289, + "step": 11165 + }, + { + "epoch": 1.74, + "learning_rate": 5.9577374345142386e-06, + "logits/chosen": -2.727682590484619, + "logits/rejected": -2.7048752307891846, + "logps/chosen": -153.3286590576172, + "logps/rejected": -259.0887145996094, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.940952301025391, + "rewards/margins": 5.174293518066406, + "rewards/rejected": -10.115245819091797, + "step": 11166 + }, + { + "epoch": 1.74, + "learning_rate": 5.9570039939830904e-06, + "logits/chosen": -2.2850332260131836, + "logits/rejected": -2.625248908996582, + "logps/chosen": -258.1236877441406, + "logps/rejected": -336.0421447753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.286615014076233, + "rewards/margins": 12.953054428100586, + "rewards/rejected": -14.239668846130371, + "step": 11167 + }, + { + "epoch": 1.74, + "learning_rate": 5.956270553451942e-06, + "logits/chosen": -3.006865978240967, + "logits/rejected": -2.7121357917785645, + "logps/chosen": -198.64816284179688, + "logps/rejected": -187.0764923095703, + "loss": 0.8496, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.882070541381836, + "rewards/margins": 2.980855703353882, + "rewards/rejected": -8.862926483154297, + "step": 11168 + }, + { + "epoch": 1.74, + "learning_rate": 5.955537112920794e-06, + "logits/chosen": -2.9540092945098877, + "logits/rejected": -1.522202491760254, + "logps/chosen": -403.9021911621094, + "logps/rejected": -243.75247192382812, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0685224533081055, + "rewards/margins": 4.569190979003906, + "rewards/rejected": -10.637713432312012, + "step": 11169 + }, + { + "epoch": 1.74, + "learning_rate": 5.954803672389646e-06, + "logits/chosen": -3.151401996612549, + "logits/rejected": -2.0499918460845947, + "logps/chosen": -556.85546875, + "logps/rejected": -343.9067687988281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9823060035705566, + "rewards/margins": 9.74339771270752, + "rewards/rejected": -13.725703239440918, + "step": 11170 + }, + { + "epoch": 1.74, + "learning_rate": 5.954070231858499e-06, + "logits/chosen": -2.6964869499206543, + "logits/rejected": -2.83316707611084, + "logps/chosen": -468.9097900390625, + "logps/rejected": -527.7350463867188, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734388828277588, + "rewards/margins": 8.245019912719727, + "rewards/rejected": -12.979408264160156, + "step": 11171 + }, + { + "epoch": 1.74, + "learning_rate": 5.953336791327351e-06, + "logits/chosen": -2.7276456356048584, + "logits/rejected": -2.9543633460998535, + "logps/chosen": -236.27499389648438, + "logps/rejected": -295.87603759765625, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982439994812012, + "rewards/margins": 4.090402603149414, + "rewards/rejected": -10.072842597961426, + "step": 11172 + }, + { + "epoch": 1.74, + "learning_rate": 5.952603350796203e-06, + "logits/chosen": -1.8304249048233032, + "logits/rejected": -2.7153990268707275, + "logps/chosen": -114.2553939819336, + "logps/rejected": -411.5322570800781, + "loss": 0.096, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.868941307067871, + "rewards/margins": 4.920233726501465, + "rewards/rejected": -9.789175033569336, + "step": 11173 + }, + { + "epoch": 1.74, + "learning_rate": 5.951869910265055e-06, + "logits/chosen": -1.9568225145339966, + "logits/rejected": -2.7343180179595947, + "logps/chosen": -91.14530944824219, + "logps/rejected": -389.2677001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.200831413269043, + "rewards/margins": 10.422836303710938, + "rewards/rejected": -13.623666763305664, + "step": 11174 + }, + { + "epoch": 1.74, + "learning_rate": 5.951136469733908e-06, + "logits/chosen": -2.9348061084747314, + "logits/rejected": -3.139554738998413, + "logps/chosen": -64.29048156738281, + "logps/rejected": -177.60302734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.869745254516602, + "rewards/margins": 7.6364641189575195, + "rewards/rejected": -12.506208419799805, + "step": 11175 + }, + { + "epoch": 1.74, + "learning_rate": 5.95040302920276e-06, + "logits/chosen": -2.6874935626983643, + "logits/rejected": -2.737062931060791, + "logps/chosen": -401.3232421875, + "logps/rejected": -407.3419189453125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.345624923706055, + "rewards/margins": 4.944150924682617, + "rewards/rejected": -11.289775848388672, + "step": 11176 + }, + { + "epoch": 1.74, + "learning_rate": 5.949669588671612e-06, + "logits/chosen": -2.9453718662261963, + "logits/rejected": -2.847209930419922, + "logps/chosen": -120.34788513183594, + "logps/rejected": -137.83047485351562, + "loss": 0.1566, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235128402709961, + "rewards/margins": 3.7304344177246094, + "rewards/rejected": -9.96556282043457, + "step": 11177 + }, + { + "epoch": 1.74, + "learning_rate": 5.948936148140464e-06, + "logits/chosen": -2.6924521923065186, + "logits/rejected": -3.0370943546295166, + "logps/chosen": -122.07267761230469, + "logps/rejected": -245.75933837890625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.475681304931641, + "rewards/margins": 6.448126792907715, + "rewards/rejected": -10.923807144165039, + "step": 11178 + }, + { + "epoch": 1.74, + "learning_rate": 5.948202707609316e-06, + "logits/chosen": -1.6894807815551758, + "logits/rejected": -2.8646230697631836, + "logps/chosen": -254.400390625, + "logps/rejected": -534.385498046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166017532348633, + "rewards/margins": 8.585182189941406, + "rewards/rejected": -13.751199722290039, + "step": 11179 + }, + { + "epoch": 1.74, + "learning_rate": 5.947469267078168e-06, + "logits/chosen": -2.5693821907043457, + "logits/rejected": -2.690413475036621, + "logps/chosen": -208.37867736816406, + "logps/rejected": -240.09854125976562, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7895426750183105, + "rewards/margins": 6.746088981628418, + "rewards/rejected": -10.53563117980957, + "step": 11180 + }, + { + "epoch": 1.74, + "learning_rate": 5.94673582654702e-06, + "logits/chosen": -2.4972710609436035, + "logits/rejected": -2.521007537841797, + "logps/chosen": -118.56314086914062, + "logps/rejected": -541.3724365234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.343835353851318, + "rewards/margins": 7.411227226257324, + "rewards/rejected": -13.7550630569458, + "step": 11181 + }, + { + "epoch": 1.74, + "learning_rate": 5.946002386015872e-06, + "logits/chosen": -2.726208448410034, + "logits/rejected": -0.8568766713142395, + "logps/chosen": -320.7012939453125, + "logps/rejected": -243.01080322265625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.198372840881348, + "rewards/margins": 6.364718437194824, + "rewards/rejected": -11.563091278076172, + "step": 11182 + }, + { + "epoch": 1.74, + "learning_rate": 5.945268945484724e-06, + "logits/chosen": -2.702730417251587, + "logits/rejected": -2.903254747390747, + "logps/chosen": -341.06072998046875, + "logps/rejected": -574.7321166992188, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.751651763916016, + "rewards/margins": 2.4673044681549072, + "rewards/rejected": -9.218955993652344, + "step": 11183 + }, + { + "epoch": 1.74, + "learning_rate": 5.944535504953577e-06, + "logits/chosen": -3.080094337463379, + "logits/rejected": -2.8566880226135254, + "logps/chosen": -150.74896240234375, + "logps/rejected": -145.09515380859375, + "loss": 2.1041, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.248617649078369, + "rewards/margins": 1.649855136871338, + "rewards/rejected": -6.898472785949707, + "step": 11184 + }, + { + "epoch": 1.74, + "learning_rate": 5.9438020644224285e-06, + "logits/chosen": -2.8089683055877686, + "logits/rejected": -2.186904191970825, + "logps/chosen": -216.23670959472656, + "logps/rejected": -227.17352294921875, + "loss": 1.4163, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.991682529449463, + "rewards/margins": 3.1006112098693848, + "rewards/rejected": -9.092293739318848, + "step": 11185 + }, + { + "epoch": 1.74, + "learning_rate": 5.94306862389128e-06, + "logits/chosen": -2.044715642929077, + "logits/rejected": -3.0340640544891357, + "logps/chosen": -150.89146423339844, + "logps/rejected": -663.457763671875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.369405746459961, + "rewards/margins": 9.14120864868164, + "rewards/rejected": -12.510614395141602, + "step": 11186 + }, + { + "epoch": 1.74, + "learning_rate": 5.942335183360132e-06, + "logits/chosen": -2.636498212814331, + "logits/rejected": -2.3654227256774902, + "logps/chosen": -256.92926025390625, + "logps/rejected": -397.6147155761719, + "loss": 1.7179, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.314896583557129, + "rewards/margins": 2.0483815670013428, + "rewards/rejected": -11.36327838897705, + "step": 11187 + }, + { + "epoch": 1.74, + "learning_rate": 5.941601742828985e-06, + "logits/chosen": -1.43170964717865, + "logits/rejected": -2.714207649230957, + "logps/chosen": -250.82362365722656, + "logps/rejected": -398.0379638671875, + "loss": 0.7069, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.241815567016602, + "rewards/margins": 3.7213337421417236, + "rewards/rejected": -8.963149070739746, + "step": 11188 + }, + { + "epoch": 1.74, + "learning_rate": 5.940868302297837e-06, + "logits/chosen": -2.5617666244506836, + "logits/rejected": -3.0106394290924072, + "logps/chosen": -66.93663024902344, + "logps/rejected": -191.9984130859375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.159818172454834, + "rewards/margins": 6.252893447875977, + "rewards/rejected": -10.412712097167969, + "step": 11189 + }, + { + "epoch": 1.74, + "learning_rate": 5.9401348617666896e-06, + "logits/chosen": -2.9587929248809814, + "logits/rejected": -2.1316471099853516, + "logps/chosen": -257.6514892578125, + "logps/rejected": -175.38490295410156, + "loss": 0.8229, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.671377658843994, + "rewards/margins": 1.5900993347167969, + "rewards/rejected": -7.261476993560791, + "step": 11190 + }, + { + "epoch": 1.74, + "learning_rate": 5.9394014212355415e-06, + "logits/chosen": -0.5733896493911743, + "logits/rejected": -2.0000457763671875, + "logps/chosen": -159.96514892578125, + "logps/rejected": -445.3504943847656, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4401702880859375, + "rewards/margins": 14.530925750732422, + "rewards/rejected": -19.97109603881836, + "step": 11191 + }, + { + "epoch": 1.74, + "learning_rate": 5.938667980704393e-06, + "logits/chosen": -2.5317494869232178, + "logits/rejected": -1.2891544103622437, + "logps/chosen": -324.8892822265625, + "logps/rejected": -296.15447998046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.926397323608398, + "rewards/margins": 7.780116081237793, + "rewards/rejected": -12.706513404846191, + "step": 11192 + }, + { + "epoch": 1.74, + "learning_rate": 5.937934540173246e-06, + "logits/chosen": -2.366842269897461, + "logits/rejected": -2.7243130207061768, + "logps/chosen": -119.75407409667969, + "logps/rejected": -409.50433349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.200867176055908, + "rewards/margins": 10.110803604125977, + "rewards/rejected": -13.311670303344727, + "step": 11193 + }, + { + "epoch": 1.74, + "learning_rate": 5.937201099642098e-06, + "logits/chosen": -3.0640597343444824, + "logits/rejected": -2.930821180343628, + "logps/chosen": -123.28832244873047, + "logps/rejected": -157.12002563476562, + "loss": 2.406, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.430522918701172, + "rewards/margins": 1.6354446411132812, + "rewards/rejected": -8.065966606140137, + "step": 11194 + }, + { + "epoch": 1.74, + "learning_rate": 5.93646765911095e-06, + "logits/chosen": -2.971017360687256, + "logits/rejected": -2.7307581901550293, + "logps/chosen": -148.27694702148438, + "logps/rejected": -220.5546875, + "loss": 0.3767, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.971234321594238, + "rewards/margins": 4.460879325866699, + "rewards/rejected": -9.432114601135254, + "step": 11195 + }, + { + "epoch": 1.74, + "learning_rate": 5.935734218579802e-06, + "logits/chosen": -2.5855231285095215, + "logits/rejected": -3.038283109664917, + "logps/chosen": -203.452392578125, + "logps/rejected": -425.4803771972656, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081841468811035, + "rewards/margins": 5.9406962394714355, + "rewards/rejected": -11.022537231445312, + "step": 11196 + }, + { + "epoch": 1.74, + "learning_rate": 5.935000778048654e-06, + "logits/chosen": -2.672062397003174, + "logits/rejected": -2.990051507949829, + "logps/chosen": -159.20855712890625, + "logps/rejected": -335.00872802734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.874940395355225, + "rewards/margins": 7.7671918869018555, + "rewards/rejected": -14.642131805419922, + "step": 11197 + }, + { + "epoch": 1.74, + "learning_rate": 5.934267337517506e-06, + "logits/chosen": -2.347353458404541, + "logits/rejected": -3.1249582767486572, + "logps/chosen": -150.15419006347656, + "logps/rejected": -496.87445068359375, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.187448501586914, + "rewards/margins": 7.0570149421691895, + "rewards/rejected": -12.244462966918945, + "step": 11198 + }, + { + "epoch": 1.74, + "learning_rate": 5.933533896986358e-06, + "logits/chosen": -3.0314581394195557, + "logits/rejected": -2.9728810787200928, + "logps/chosen": -567.0110473632812, + "logps/rejected": -291.57861328125, + "loss": 1.4813, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.780845642089844, + "rewards/margins": 3.6108181476593018, + "rewards/rejected": -10.391664505004883, + "step": 11199 + }, + { + "epoch": 1.74, + "learning_rate": 5.93280045645521e-06, + "logits/chosen": -2.8635106086730957, + "logits/rejected": -2.047837495803833, + "logps/chosen": -403.9266357421875, + "logps/rejected": -321.55950927734375, + "loss": 1.5464, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.81949520111084, + "rewards/margins": -0.31796932220458984, + "rewards/rejected": -9.50152587890625, + "step": 11200 + }, + { + "epoch": 1.74, + "learning_rate": 5.932067015924062e-06, + "logits/chosen": -2.8416497707366943, + "logits/rejected": -2.9447762966156006, + "logps/chosen": -108.04019165039062, + "logps/rejected": -245.01197814941406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4578813314437866, + "rewards/margins": 8.453011512756348, + "rewards/rejected": -9.910892486572266, + "step": 11201 + }, + { + "epoch": 1.74, + "learning_rate": 5.931333575392915e-06, + "logits/chosen": -2.8963048458099365, + "logits/rejected": -3.1135149002075195, + "logps/chosen": -151.46456909179688, + "logps/rejected": -174.13916015625, + "loss": 2.3974, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.8599653244018555, + "rewards/margins": -0.650423526763916, + "rewards/rejected": -7.209542274475098, + "step": 11202 + }, + { + "epoch": 1.74, + "learning_rate": 5.9306001348617665e-06, + "logits/chosen": -2.1564764976501465, + "logits/rejected": -2.769298553466797, + "logps/chosen": -108.20700073242188, + "logps/rejected": -271.6817626953125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.223106384277344, + "rewards/margins": 7.413436412811279, + "rewards/rejected": -11.636543273925781, + "step": 11203 + }, + { + "epoch": 1.74, + "learning_rate": 5.929866694330618e-06, + "logits/chosen": -2.3432419300079346, + "logits/rejected": -2.6171209812164307, + "logps/chosen": -132.08419799804688, + "logps/rejected": -351.06060791015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.624178886413574, + "rewards/margins": 11.182422637939453, + "rewards/rejected": -16.806602478027344, + "step": 11204 + }, + { + "epoch": 1.74, + "learning_rate": 5.92913325379947e-06, + "logits/chosen": -1.537039041519165, + "logits/rejected": -2.79693341255188, + "logps/chosen": -467.15533447265625, + "logps/rejected": -730.1678466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.811762809753418, + "rewards/margins": 10.209352493286133, + "rewards/rejected": -15.02111530303955, + "step": 11205 + }, + { + "epoch": 1.74, + "learning_rate": 5.928399813268323e-06, + "logits/chosen": -0.9221925139427185, + "logits/rejected": -2.3786840438842773, + "logps/chosen": -369.05755615234375, + "logps/rejected": -678.7251586914062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.732728481292725, + "rewards/margins": 11.078475952148438, + "rewards/rejected": -15.81120491027832, + "step": 11206 + }, + { + "epoch": 1.74, + "learning_rate": 5.927666372737176e-06, + "logits/chosen": -2.944683313369751, + "logits/rejected": -1.8505878448486328, + "logps/chosen": -380.0638122558594, + "logps/rejected": -293.8132629394531, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.126475811004639, + "rewards/margins": 5.257864952087402, + "rewards/rejected": -12.384340286254883, + "step": 11207 + }, + { + "epoch": 1.74, + "learning_rate": 5.926932932206028e-06, + "logits/chosen": -2.8360700607299805, + "logits/rejected": -3.1150927543640137, + "logps/chosen": -179.4269256591797, + "logps/rejected": -243.6800537109375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.022162437438965, + "rewards/margins": 5.105998992919922, + "rewards/rejected": -9.128161430358887, + "step": 11208 + }, + { + "epoch": 1.74, + "learning_rate": 5.9261994916748795e-06, + "logits/chosen": -2.9131293296813965, + "logits/rejected": -2.199934482574463, + "logps/chosen": -448.6894836425781, + "logps/rejected": -320.921630859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0854294300079346, + "rewards/margins": 7.622365951538086, + "rewards/rejected": -10.707795143127441, + "step": 11209 + }, + { + "epoch": 1.74, + "learning_rate": 5.925466051143731e-06, + "logits/chosen": -2.7084641456604004, + "logits/rejected": -1.3782883882522583, + "logps/chosen": -200.57191467285156, + "logps/rejected": -232.272216796875, + "loss": 1.173, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.342464447021484, + "rewards/margins": 3.590965747833252, + "rewards/rejected": -8.933429718017578, + "step": 11210 + }, + { + "epoch": 1.74, + "learning_rate": 5.924732610612584e-06, + "logits/chosen": -2.774691581726074, + "logits/rejected": -3.0421183109283447, + "logps/chosen": -197.1048126220703, + "logps/rejected": -359.28546142578125, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.860692977905273, + "rewards/margins": 5.126702308654785, + "rewards/rejected": -9.987396240234375, + "step": 11211 + }, + { + "epoch": 1.74, + "learning_rate": 5.923999170081436e-06, + "logits/chosen": -3.0894250869750977, + "logits/rejected": -3.029176712036133, + "logps/chosen": -142.38377380371094, + "logps/rejected": -189.69747924804688, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.513844966888428, + "rewards/margins": 5.702859401702881, + "rewards/rejected": -11.216704368591309, + "step": 11212 + }, + { + "epoch": 1.74, + "learning_rate": 5.923265729550288e-06, + "logits/chosen": -3.0493648052215576, + "logits/rejected": -2.6231424808502197, + "logps/chosen": -541.6566162109375, + "logps/rejected": -255.78756713867188, + "loss": 0.7904, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.855569362640381, + "rewards/margins": 4.2136549949646, + "rewards/rejected": -10.06922435760498, + "step": 11213 + }, + { + "epoch": 1.74, + "learning_rate": 5.92253228901914e-06, + "logits/chosen": -2.4986822605133057, + "logits/rejected": -2.847968101501465, + "logps/chosen": -489.1942443847656, + "logps/rejected": -473.36505126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.498999834060669, + "rewards/margins": 11.875086784362793, + "rewards/rejected": -15.374086380004883, + "step": 11214 + }, + { + "epoch": 1.74, + "learning_rate": 5.9217988484879925e-06, + "logits/chosen": -2.9239892959594727, + "logits/rejected": -3.127650499343872, + "logps/chosen": -103.95552062988281, + "logps/rejected": -170.0493621826172, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.259875774383545, + "rewards/margins": 5.89048957824707, + "rewards/rejected": -9.150365829467773, + "step": 11215 + }, + { + "epoch": 1.74, + "learning_rate": 5.921065407956844e-06, + "logits/chosen": -1.5287762880325317, + "logits/rejected": -2.6952197551727295, + "logps/chosen": -170.88424682617188, + "logps/rejected": -315.10626220703125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.589259386062622, + "rewards/margins": 5.9896697998046875, + "rewards/rejected": -9.578929901123047, + "step": 11216 + }, + { + "epoch": 1.74, + "learning_rate": 5.920331967425696e-06, + "logits/chosen": -2.667834520339966, + "logits/rejected": -2.0057482719421387, + "logps/chosen": -251.55584716796875, + "logps/rejected": -205.70947265625, + "loss": 0.9531, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.2149658203125, + "rewards/margins": 1.442320704460144, + "rewards/rejected": -5.657286643981934, + "step": 11217 + }, + { + "epoch": 1.74, + "learning_rate": 5.919598526894548e-06, + "logits/chosen": -1.068591594696045, + "logits/rejected": -1.7402498722076416, + "logps/chosen": -352.11859130859375, + "logps/rejected": -517.0005493164062, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.663686752319336, + "rewards/margins": 8.093783378601074, + "rewards/rejected": -12.757469177246094, + "step": 11218 + }, + { + "epoch": 1.74, + "learning_rate": 5.918865086363401e-06, + "logits/chosen": -2.1482722759246826, + "logits/rejected": -2.7630269527435303, + "logps/chosen": -150.91065979003906, + "logps/rejected": -350.00567626953125, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.974867343902588, + "rewards/margins": 8.451473236083984, + "rewards/rejected": -12.426340103149414, + "step": 11219 + }, + { + "epoch": 1.74, + "learning_rate": 5.918131645832253e-06, + "logits/chosen": -2.672133207321167, + "logits/rejected": -2.73197078704834, + "logps/chosen": -272.1983337402344, + "logps/rejected": -304.64166259765625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.166290283203125, + "rewards/margins": 7.5439605712890625, + "rewards/rejected": -10.710250854492188, + "step": 11220 + }, + { + "epoch": 1.75, + "learning_rate": 5.917398205301105e-06, + "logits/chosen": -2.2393953800201416, + "logits/rejected": -2.779139995574951, + "logps/chosen": -412.83868408203125, + "logps/rejected": -389.7086181640625, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9368391036987305, + "rewards/margins": 5.090322494506836, + "rewards/rejected": -10.027161598205566, + "step": 11221 + }, + { + "epoch": 1.75, + "learning_rate": 5.9166647647699565e-06, + "logits/chosen": -2.6576521396636963, + "logits/rejected": -2.896859884262085, + "logps/chosen": -720.2965087890625, + "logps/rejected": -509.0187683105469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.339022636413574, + "rewards/margins": 7.642214775085449, + "rewards/rejected": -10.981237411499023, + "step": 11222 + }, + { + "epoch": 1.75, + "learning_rate": 5.915931324238809e-06, + "logits/chosen": -1.880699634552002, + "logits/rejected": -2.5816545486450195, + "logps/chosen": -226.9602508544922, + "logps/rejected": -259.0702819824219, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.506380081176758, + "rewards/margins": 4.726414680480957, + "rewards/rejected": -7.232794761657715, + "step": 11223 + }, + { + "epoch": 1.75, + "learning_rate": 5.915197883707662e-06, + "logits/chosen": -3.0312843322753906, + "logits/rejected": -2.869117498397827, + "logps/chosen": -137.82852172851562, + "logps/rejected": -240.02523803710938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.186014652252197, + "rewards/margins": 8.235512733459473, + "rewards/rejected": -12.421527862548828, + "step": 11224 + }, + { + "epoch": 1.75, + "learning_rate": 5.914464443176514e-06, + "logits/chosen": -3.0424654483795166, + "logits/rejected": -2.600799322128296, + "logps/chosen": -424.7306213378906, + "logps/rejected": -373.93170166015625, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.359383583068848, + "rewards/margins": 3.64790940284729, + "rewards/rejected": -11.007292747497559, + "step": 11225 + }, + { + "epoch": 1.75, + "learning_rate": 5.913731002645366e-06, + "logits/chosen": -1.8917511701583862, + "logits/rejected": -2.8210411071777344, + "logps/chosen": -277.3162841796875, + "logps/rejected": -434.47662353515625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.246258735656738, + "rewards/margins": 7.770310401916504, + "rewards/rejected": -15.016569137573242, + "step": 11226 + }, + { + "epoch": 1.75, + "learning_rate": 5.9129975621142175e-06, + "logits/chosen": -2.9219307899475098, + "logits/rejected": -2.3992316722869873, + "logps/chosen": -285.5257263183594, + "logps/rejected": -253.07017517089844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.391822338104248, + "rewards/margins": 6.869285583496094, + "rewards/rejected": -12.2611083984375, + "step": 11227 + }, + { + "epoch": 1.75, + "learning_rate": 5.91226412158307e-06, + "logits/chosen": -2.1240322589874268, + "logits/rejected": -2.9279487133026123, + "logps/chosen": -179.45162963867188, + "logps/rejected": -297.013671875, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.974761009216309, + "rewards/margins": 5.368407249450684, + "rewards/rejected": -10.343168258666992, + "step": 11228 + }, + { + "epoch": 1.75, + "learning_rate": 5.911530681051922e-06, + "logits/chosen": -3.060279607772827, + "logits/rejected": -2.9522104263305664, + "logps/chosen": -187.9391632080078, + "logps/rejected": -88.14837646484375, + "loss": 1.8016, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.156289100646973, + "rewards/margins": -0.17244887351989746, + "rewards/rejected": -5.983839988708496, + "step": 11229 + }, + { + "epoch": 1.75, + "learning_rate": 5.910797240520774e-06, + "logits/chosen": -2.9848556518554688, + "logits/rejected": -2.828946113586426, + "logps/chosen": -386.08880615234375, + "logps/rejected": -353.85015869140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7531661987304688, + "rewards/margins": 7.419468879699707, + "rewards/rejected": -10.172635078430176, + "step": 11230 + }, + { + "epoch": 1.75, + "learning_rate": 5.910063799989626e-06, + "logits/chosen": -2.3054919242858887, + "logits/rejected": -2.7462432384490967, + "logps/chosen": -154.02435302734375, + "logps/rejected": -202.8278350830078, + "loss": 1.2124, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.37917947769165, + "rewards/margins": 0.7943820953369141, + "rewards/rejected": -7.1735615730285645, + "step": 11231 + }, + { + "epoch": 1.75, + "learning_rate": 5.909330359458478e-06, + "logits/chosen": -2.7487306594848633, + "logits/rejected": -3.000953197479248, + "logps/chosen": -432.6546936035156, + "logps/rejected": -465.5611572265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.293169975280762, + "rewards/margins": 7.914725303649902, + "rewards/rejected": -12.207895278930664, + "step": 11232 + }, + { + "epoch": 1.75, + "learning_rate": 5.9085969189273305e-06, + "logits/chosen": -2.4193174839019775, + "logits/rejected": -2.9280033111572266, + "logps/chosen": -413.45379638671875, + "logps/rejected": -469.1190490722656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.685270309448242, + "rewards/margins": 7.1343865394592285, + "rewards/rejected": -12.819656372070312, + "step": 11233 + }, + { + "epoch": 1.75, + "learning_rate": 5.907863478396182e-06, + "logits/chosen": -1.8596265316009521, + "logits/rejected": -2.683548927307129, + "logps/chosen": -102.15963745117188, + "logps/rejected": -207.67691040039062, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.619088888168335, + "rewards/margins": 6.226059913635254, + "rewards/rejected": -9.845149040222168, + "step": 11234 + }, + { + "epoch": 1.75, + "learning_rate": 5.907130037865034e-06, + "logits/chosen": -2.193042516708374, + "logits/rejected": -2.8959696292877197, + "logps/chosen": -90.82374572753906, + "logps/rejected": -231.313232421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.889026165008545, + "rewards/margins": 8.409236907958984, + "rewards/rejected": -13.298263549804688, + "step": 11235 + }, + { + "epoch": 1.75, + "learning_rate": 5.906396597333886e-06, + "logits/chosen": -1.8288092613220215, + "logits/rejected": -2.784334182739258, + "logps/chosen": -168.97152709960938, + "logps/rejected": -405.8929138183594, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106632232666016, + "rewards/margins": 3.773059844970703, + "rewards/rejected": -9.879692077636719, + "step": 11236 + }, + { + "epoch": 1.75, + "learning_rate": 5.905663156802739e-06, + "logits/chosen": -2.5475990772247314, + "logits/rejected": -2.8105201721191406, + "logps/chosen": -172.88262939453125, + "logps/rejected": -472.0694580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239137649536133, + "rewards/margins": 9.995185852050781, + "rewards/rejected": -15.234323501586914, + "step": 11237 + }, + { + "epoch": 1.75, + "learning_rate": 5.904929716271591e-06, + "logits/chosen": -1.3637940883636475, + "logits/rejected": -2.9342217445373535, + "logps/chosen": -188.3615264892578, + "logps/rejected": -382.2893371582031, + "loss": 2.2786, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.5238618850708, + "rewards/margins": 2.9084033966064453, + "rewards/rejected": -11.432265281677246, + "step": 11238 + }, + { + "epoch": 1.75, + "learning_rate": 5.904196275740443e-06, + "logits/chosen": -2.9826953411102295, + "logits/rejected": -3.11594557762146, + "logps/chosen": -64.69176483154297, + "logps/rejected": -295.98211669921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.122173309326172, + "rewards/margins": 8.283571243286133, + "rewards/rejected": -12.405744552612305, + "step": 11239 + }, + { + "epoch": 1.75, + "learning_rate": 5.903462835209295e-06, + "logits/chosen": -1.8147494792938232, + "logits/rejected": -2.572451591491699, + "logps/chosen": -106.40140533447266, + "logps/rejected": -346.9288635253906, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3505144119262695, + "rewards/margins": 7.622006416320801, + "rewards/rejected": -12.97252082824707, + "step": 11240 + }, + { + "epoch": 1.75, + "learning_rate": 5.902729394678147e-06, + "logits/chosen": -1.0606716871261597, + "logits/rejected": -2.913404703140259, + "logps/chosen": -122.50250244140625, + "logps/rejected": -615.022705078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223264694213867, + "rewards/margins": 7.819870948791504, + "rewards/rejected": -14.043135643005371, + "step": 11241 + }, + { + "epoch": 1.75, + "learning_rate": 5.901995954147e-06, + "logits/chosen": -1.6638306379318237, + "logits/rejected": -3.183816909790039, + "logps/chosen": -157.21185302734375, + "logps/rejected": -489.3562927246094, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6487884521484375, + "rewards/margins": 6.426875114440918, + "rewards/rejected": -12.075663566589355, + "step": 11242 + }, + { + "epoch": 1.75, + "learning_rate": 5.901262513615852e-06, + "logits/chosen": -2.52451491355896, + "logits/rejected": -2.952650785446167, + "logps/chosen": -103.76611328125, + "logps/rejected": -290.1862487792969, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.895439147949219, + "rewards/margins": 5.404406547546387, + "rewards/rejected": -10.299845695495605, + "step": 11243 + }, + { + "epoch": 1.75, + "learning_rate": 5.900529073084704e-06, + "logits/chosen": -2.816295623779297, + "logits/rejected": -2.844093084335327, + "logps/chosen": -181.987548828125, + "logps/rejected": -276.1684875488281, + "loss": 0.0231, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.240095138549805, + "rewards/margins": 8.648568153381348, + "rewards/rejected": -12.888663291931152, + "step": 11244 + }, + { + "epoch": 1.75, + "learning_rate": 5.899795632553556e-06, + "logits/chosen": -0.8832107782363892, + "logits/rejected": -2.2149715423583984, + "logps/chosen": -142.06524658203125, + "logps/rejected": -517.8355712890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.031754016876221, + "rewards/margins": 12.63729476928711, + "rewards/rejected": -17.669048309326172, + "step": 11245 + }, + { + "epoch": 1.75, + "learning_rate": 5.899062192022408e-06, + "logits/chosen": -2.7100203037261963, + "logits/rejected": -1.9940743446350098, + "logps/chosen": -288.4729309082031, + "logps/rejected": -210.47805786132812, + "loss": 0.5489, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.165950298309326, + "rewards/margins": 3.4271161556243896, + "rewards/rejected": -9.593066215515137, + "step": 11246 + }, + { + "epoch": 1.75, + "learning_rate": 5.89832875149126e-06, + "logits/chosen": -2.64565110206604, + "logits/rejected": -2.732501983642578, + "logps/chosen": -184.96157836914062, + "logps/rejected": -295.5138854980469, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848188877105713, + "rewards/margins": 7.7433061599731445, + "rewards/rejected": -12.591495513916016, + "step": 11247 + }, + { + "epoch": 1.75, + "learning_rate": 5.897595310960112e-06, + "logits/chosen": -2.8417599201202393, + "logits/rejected": -2.324082851409912, + "logps/chosen": -422.63720703125, + "logps/rejected": -108.21006774902344, + "loss": 0.2362, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.077842712402344, + "rewards/margins": 1.4787843227386475, + "rewards/rejected": -8.55662727355957, + "step": 11248 + }, + { + "epoch": 1.75, + "learning_rate": 5.896861870428964e-06, + "logits/chosen": -2.5308685302734375, + "logits/rejected": -2.237039089202881, + "logps/chosen": -229.87213134765625, + "logps/rejected": -198.76507568359375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.428293704986572, + "rewards/margins": 5.095273017883301, + "rewards/rejected": -9.523566246032715, + "step": 11249 + }, + { + "epoch": 1.75, + "learning_rate": 5.896128429897816e-06, + "logits/chosen": -1.7420377731323242, + "logits/rejected": -2.7679808139801025, + "logps/chosen": -283.7875671386719, + "logps/rejected": -181.7228240966797, + "loss": 0.0633, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.176569938659668, + "rewards/margins": 2.771390676498413, + "rewards/rejected": -8.94796085357666, + "step": 11250 + }, + { + "epoch": 1.75, + "learning_rate": 5.8953949893666685e-06, + "logits/chosen": -2.816927194595337, + "logits/rejected": -3.143697500228882, + "logps/chosen": -220.53842163085938, + "logps/rejected": -407.6350402832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1532983779907227, + "rewards/margins": 8.895593643188477, + "rewards/rejected": -12.048891067504883, + "step": 11251 + }, + { + "epoch": 1.75, + "learning_rate": 5.8946615488355204e-06, + "logits/chosen": -2.7799742221832275, + "logits/rejected": -2.9340600967407227, + "logps/chosen": -142.61956787109375, + "logps/rejected": -283.1583251953125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308990955352783, + "rewards/margins": 7.089458465576172, + "rewards/rejected": -13.398449897766113, + "step": 11252 + }, + { + "epoch": 1.75, + "learning_rate": 5.893928108304372e-06, + "logits/chosen": -2.8068864345550537, + "logits/rejected": -1.1462193727493286, + "logps/chosen": -210.91644287109375, + "logps/rejected": -251.2025146484375, + "loss": 0.6404, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.782098770141602, + "rewards/margins": 4.028214454650879, + "rewards/rejected": -9.81031322479248, + "step": 11253 + }, + { + "epoch": 1.75, + "learning_rate": 5.893194667773224e-06, + "logits/chosen": -2.9113004207611084, + "logits/rejected": -2.676405668258667, + "logps/chosen": -212.9070281982422, + "logps/rejected": -300.245361328125, + "loss": 0.107, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.87572717666626, + "rewards/margins": 4.632588863372803, + "rewards/rejected": -9.508316040039062, + "step": 11254 + }, + { + "epoch": 1.75, + "learning_rate": 5.892461227242077e-06, + "logits/chosen": -1.6994765996932983, + "logits/rejected": -2.8911728858947754, + "logps/chosen": -112.18907165527344, + "logps/rejected": -213.73739624023438, + "loss": 0.7262, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.951936721801758, + "rewards/margins": 0.584923267364502, + "rewards/rejected": -4.53685998916626, + "step": 11255 + }, + { + "epoch": 1.75, + "learning_rate": 5.891727786710929e-06, + "logits/chosen": -2.8770360946655273, + "logits/rejected": -3.0922982692718506, + "logps/chosen": -389.7236633300781, + "logps/rejected": -294.58245849609375, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.027915954589844, + "rewards/margins": 3.359961986541748, + "rewards/rejected": -9.38787841796875, + "step": 11256 + }, + { + "epoch": 1.75, + "learning_rate": 5.8909943461797815e-06, + "logits/chosen": -1.138710379600525, + "logits/rejected": -2.5935933589935303, + "logps/chosen": -115.00531005859375, + "logps/rejected": -347.1335144042969, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.563657283782959, + "rewards/margins": 4.624077796936035, + "rewards/rejected": -12.187734603881836, + "step": 11257 + }, + { + "epoch": 1.75, + "learning_rate": 5.890260905648633e-06, + "logits/chosen": -2.1266634464263916, + "logits/rejected": -3.052777051925659, + "logps/chosen": -108.37554168701172, + "logps/rejected": -393.7390441894531, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7974774837493896, + "rewards/margins": 8.124129295349121, + "rewards/rejected": -11.921606063842773, + "step": 11258 + }, + { + "epoch": 1.75, + "learning_rate": 5.889527465117485e-06, + "logits/chosen": -3.014420509338379, + "logits/rejected": -3.256743907928467, + "logps/chosen": -52.140533447265625, + "logps/rejected": -152.5852813720703, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.237520694732666, + "rewards/margins": 5.410517692565918, + "rewards/rejected": -9.648038864135742, + "step": 11259 + }, + { + "epoch": 1.75, + "learning_rate": 5.888794024586338e-06, + "logits/chosen": -2.840341567993164, + "logits/rejected": -2.975424289703369, + "logps/chosen": -370.85198974609375, + "logps/rejected": -265.15960693359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5931310653686523, + "rewards/margins": 8.782217025756836, + "rewards/rejected": -12.375348091125488, + "step": 11260 + }, + { + "epoch": 1.75, + "learning_rate": 5.88806058405519e-06, + "logits/chosen": -1.357027292251587, + "logits/rejected": -2.9219107627868652, + "logps/chosen": -242.52322387695312, + "logps/rejected": -328.3374938964844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8202438354492188, + "rewards/margins": 7.4911274909973145, + "rewards/rejected": -11.311370849609375, + "step": 11261 + }, + { + "epoch": 1.75, + "learning_rate": 5.887327143524042e-06, + "logits/chosen": -2.591797113418579, + "logits/rejected": -2.902797222137451, + "logps/chosen": -205.49806213378906, + "logps/rejected": -250.9913330078125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795037746429443, + "rewards/margins": 7.187281608581543, + "rewards/rejected": -12.982319831848145, + "step": 11262 + }, + { + "epoch": 1.75, + "learning_rate": 5.886593702992894e-06, + "logits/chosen": -2.8848555088043213, + "logits/rejected": -2.206878662109375, + "logps/chosen": -277.24627685546875, + "logps/rejected": -374.9833984375, + "loss": 0.1465, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.679968357086182, + "rewards/margins": 5.663050651550293, + "rewards/rejected": -11.343019485473633, + "step": 11263 + }, + { + "epoch": 1.75, + "learning_rate": 5.885860262461746e-06, + "logits/chosen": -2.8036410808563232, + "logits/rejected": -3.0385377407073975, + "logps/chosen": -466.9361877441406, + "logps/rejected": -527.94970703125, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.575279712677002, + "rewards/margins": 5.512848854064941, + "rewards/rejected": -10.088129043579102, + "step": 11264 + }, + { + "epoch": 1.75, + "learning_rate": 5.885126821930598e-06, + "logits/chosen": -2.934342384338379, + "logits/rejected": -2.6717331409454346, + "logps/chosen": -396.500732421875, + "logps/rejected": -306.2774963378906, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.488126754760742, + "rewards/margins": 8.453510284423828, + "rewards/rejected": -12.94163703918457, + "step": 11265 + }, + { + "epoch": 1.75, + "learning_rate": 5.88439338139945e-06, + "logits/chosen": -2.66591215133667, + "logits/rejected": -3.119487762451172, + "logps/chosen": -142.9377899169922, + "logps/rejected": -347.589599609375, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.709481716156006, + "rewards/margins": 4.435630798339844, + "rewards/rejected": -8.145112991333008, + "step": 11266 + }, + { + "epoch": 1.75, + "learning_rate": 5.883659940868302e-06, + "logits/chosen": -2.5311460494995117, + "logits/rejected": -2.95906138420105, + "logps/chosen": -122.38308715820312, + "logps/rejected": -219.76339721679688, + "loss": 1.2191, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.209701061248779, + "rewards/margins": 2.4135425090789795, + "rewards/rejected": -8.62324333190918, + "step": 11267 + }, + { + "epoch": 1.75, + "learning_rate": 5.882926500337155e-06, + "logits/chosen": -2.8934385776519775, + "logits/rejected": -2.035625457763672, + "logps/chosen": -745.5510864257812, + "logps/rejected": -383.98028564453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.265031814575195, + "rewards/margins": 8.136642456054688, + "rewards/rejected": -15.401674270629883, + "step": 11268 + }, + { + "epoch": 1.75, + "learning_rate": 5.882193059806007e-06, + "logits/chosen": -2.9866812229156494, + "logits/rejected": -2.5598928928375244, + "logps/chosen": -186.36062622070312, + "logps/rejected": -209.49862670898438, + "loss": 1.0398, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.106906890869141, + "rewards/margins": 2.2607593536376953, + "rewards/rejected": -9.367666244506836, + "step": 11269 + }, + { + "epoch": 1.75, + "learning_rate": 5.8814596192748585e-06, + "logits/chosen": -1.9600660800933838, + "logits/rejected": -1.665267825126648, + "logps/chosen": -453.68890380859375, + "logps/rejected": -420.368408203125, + "loss": 0.1912, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966127395629883, + "rewards/margins": 6.411806106567383, + "rewards/rejected": -12.377933502197266, + "step": 11270 + }, + { + "epoch": 1.75, + "learning_rate": 5.88072617874371e-06, + "logits/chosen": -2.897473096847534, + "logits/rejected": -2.3221089839935303, + "logps/chosen": -643.3740234375, + "logps/rejected": -717.16357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.190457344055176, + "rewards/margins": 10.762533187866211, + "rewards/rejected": -14.952991485595703, + "step": 11271 + }, + { + "epoch": 1.75, + "learning_rate": 5.879992738212562e-06, + "logits/chosen": -2.914147138595581, + "logits/rejected": -1.743595838546753, + "logps/chosen": -974.692626953125, + "logps/rejected": -558.8967895507812, + "loss": 0.1181, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.233293533325195, + "rewards/margins": 4.688127517700195, + "rewards/rejected": -9.92142105102539, + "step": 11272 + }, + { + "epoch": 1.75, + "learning_rate": 5.879259297681415e-06, + "logits/chosen": -1.4559895992279053, + "logits/rejected": -2.4806764125823975, + "logps/chosen": -155.78900146484375, + "logps/rejected": -406.97320556640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8981618881225586, + "rewards/margins": 8.383146286010742, + "rewards/rejected": -12.2813081741333, + "step": 11273 + }, + { + "epoch": 1.75, + "learning_rate": 5.878525857150268e-06, + "logits/chosen": -2.7072596549987793, + "logits/rejected": -2.8035223484039307, + "logps/chosen": -354.24749755859375, + "logps/rejected": -316.2884521484375, + "loss": 0.4017, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.36024284362793, + "rewards/margins": 5.5246686935424805, + "rewards/rejected": -11.88491153717041, + "step": 11274 + }, + { + "epoch": 1.75, + "learning_rate": 5.8777924166191196e-06, + "logits/chosen": -2.6508898735046387, + "logits/rejected": -2.5925192832946777, + "logps/chosen": -209.32058715820312, + "logps/rejected": -370.15594482421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5402042865753174, + "rewards/margins": 7.8454132080078125, + "rewards/rejected": -11.38561725616455, + "step": 11275 + }, + { + "epoch": 1.75, + "learning_rate": 5.8770589760879714e-06, + "logits/chosen": -2.8759679794311523, + "logits/rejected": -3.0185937881469727, + "logps/chosen": -161.04884338378906, + "logps/rejected": -281.72210693359375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.357145309448242, + "rewards/margins": 4.991131782531738, + "rewards/rejected": -9.348276138305664, + "step": 11276 + }, + { + "epoch": 1.75, + "learning_rate": 5.876325535556824e-06, + "logits/chosen": -3.0192694664001465, + "logits/rejected": -2.0673704147338867, + "logps/chosen": -1164.046875, + "logps/rejected": -684.6398315429688, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.579054355621338, + "rewards/margins": 5.659819602966309, + "rewards/rejected": -9.238873481750488, + "step": 11277 + }, + { + "epoch": 1.75, + "learning_rate": 5.875592095025676e-06, + "logits/chosen": -1.8007992506027222, + "logits/rejected": -2.7877657413482666, + "logps/chosen": -338.96636962890625, + "logps/rejected": -517.4849853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5324602127075195, + "rewards/margins": 9.509562492370605, + "rewards/rejected": -15.042022705078125, + "step": 11278 + }, + { + "epoch": 1.75, + "learning_rate": 5.874858654494528e-06, + "logits/chosen": -3.0912539958953857, + "logits/rejected": -3.132779836654663, + "logps/chosen": -335.2948913574219, + "logps/rejected": -497.5655517578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1533620357513428, + "rewards/margins": 8.513101577758789, + "rewards/rejected": -11.666463851928711, + "step": 11279 + }, + { + "epoch": 1.75, + "learning_rate": 5.87412521396338e-06, + "logits/chosen": -2.572970151901245, + "logits/rejected": -2.816497325897217, + "logps/chosen": -312.7169494628906, + "logps/rejected": -513.5245971679688, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2819318771362305, + "rewards/margins": 8.955397605895996, + "rewards/rejected": -15.237329483032227, + "step": 11280 + }, + { + "epoch": 1.75, + "learning_rate": 5.873391773432232e-06, + "logits/chosen": -2.2939844131469727, + "logits/rejected": -2.980976104736328, + "logps/chosen": -107.65782165527344, + "logps/rejected": -374.6402893066406, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8804731369018555, + "rewards/margins": 8.703641891479492, + "rewards/rejected": -13.584115982055664, + "step": 11281 + }, + { + "epoch": 1.75, + "learning_rate": 5.872658332901084e-06, + "logits/chosen": -1.7279220819473267, + "logits/rejected": -2.845402717590332, + "logps/chosen": -181.13323974609375, + "logps/rejected": -319.0600891113281, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.674263954162598, + "rewards/margins": 6.454615592956543, + "rewards/rejected": -13.12887954711914, + "step": 11282 + }, + { + "epoch": 1.75, + "learning_rate": 5.871924892369936e-06, + "logits/chosen": -2.4420888423919678, + "logits/rejected": -2.995885133743286, + "logps/chosen": -186.12579345703125, + "logps/rejected": -348.7269287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4353718757629395, + "rewards/margins": 8.975461959838867, + "rewards/rejected": -12.410833358764648, + "step": 11283 + }, + { + "epoch": 1.75, + "learning_rate": 5.871191451838788e-06, + "logits/chosen": -1.495448350906372, + "logits/rejected": -2.917860984802246, + "logps/chosen": -147.289794921875, + "logps/rejected": -496.46917724609375, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302608489990234, + "rewards/margins": 4.708110809326172, + "rewards/rejected": -10.010719299316406, + "step": 11284 + }, + { + "epoch": 1.76, + "learning_rate": 5.87045801130764e-06, + "logits/chosen": -2.7313666343688965, + "logits/rejected": -2.9711718559265137, + "logps/chosen": -148.2659454345703, + "logps/rejected": -198.57772827148438, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1530895233154297, + "rewards/margins": 8.646171569824219, + "rewards/rejected": -11.799261093139648, + "step": 11285 + }, + { + "epoch": 1.76, + "learning_rate": 5.869724570776493e-06, + "logits/chosen": -2.8842647075653076, + "logits/rejected": -2.7266345024108887, + "logps/chosen": -197.51268005371094, + "logps/rejected": -270.38751220703125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035036087036133, + "rewards/margins": 6.450333595275879, + "rewards/rejected": -11.485368728637695, + "step": 11286 + }, + { + "epoch": 1.76, + "learning_rate": 5.868991130245345e-06, + "logits/chosen": -2.844480276107788, + "logits/rejected": -2.9828946590423584, + "logps/chosen": -376.02423095703125, + "logps/rejected": -412.9360656738281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.096785068511963, + "rewards/margins": 10.610193252563477, + "rewards/rejected": -12.706977844238281, + "step": 11287 + }, + { + "epoch": 1.76, + "learning_rate": 5.8682576897141965e-06, + "logits/chosen": -2.611024856567383, + "logits/rejected": -3.0799756050109863, + "logps/chosen": -154.5015869140625, + "logps/rejected": -261.45111083984375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.615179538726807, + "rewards/margins": 6.3385725021362305, + "rewards/rejected": -10.953752517700195, + "step": 11288 + }, + { + "epoch": 1.76, + "learning_rate": 5.867524249183048e-06, + "logits/chosen": -2.914109706878662, + "logits/rejected": -2.764275312423706, + "logps/chosen": -344.34881591796875, + "logps/rejected": -359.67254638671875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6887025833129883, + "rewards/margins": 5.500252723693848, + "rewards/rejected": -9.188955307006836, + "step": 11289 + }, + { + "epoch": 1.76, + "learning_rate": 5.866790808651901e-06, + "logits/chosen": -2.7341887950897217, + "logits/rejected": -3.1602084636688232, + "logps/chosen": -279.4544677734375, + "logps/rejected": -504.72698974609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397762298583984, + "rewards/margins": 8.074280738830566, + "rewards/rejected": -12.47204303741455, + "step": 11290 + }, + { + "epoch": 1.76, + "learning_rate": 5.866057368120754e-06, + "logits/chosen": -1.605885624885559, + "logits/rejected": -2.8261499404907227, + "logps/chosen": -99.42803955078125, + "logps/rejected": -599.119873046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.601367950439453, + "rewards/margins": 7.240262985229492, + "rewards/rejected": -13.841630935668945, + "step": 11291 + }, + { + "epoch": 1.76, + "learning_rate": 5.865323927589606e-06, + "logits/chosen": -1.7880425453186035, + "logits/rejected": -2.916447639465332, + "logps/chosen": -337.7354736328125, + "logps/rejected": -609.14208984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.897199630737305, + "rewards/margins": 7.898311614990234, + "rewards/rejected": -13.795511245727539, + "step": 11292 + }, + { + "epoch": 1.76, + "learning_rate": 5.864590487058458e-06, + "logits/chosen": -1.7558640241622925, + "logits/rejected": -3.05053448677063, + "logps/chosen": -89.71784973144531, + "logps/rejected": -353.51416015625, + "loss": 0.0646, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.829678058624268, + "rewards/margins": 6.181282997131348, + "rewards/rejected": -12.010960578918457, + "step": 11293 + }, + { + "epoch": 1.76, + "learning_rate": 5.8638570465273095e-06, + "logits/chosen": -2.915761947631836, + "logits/rejected": -3.1027371883392334, + "logps/chosen": -75.60868072509766, + "logps/rejected": -241.24337768554688, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.304347038269043, + "rewards/margins": 6.051191329956055, + "rewards/rejected": -12.355539321899414, + "step": 11294 + }, + { + "epoch": 1.76, + "learning_rate": 5.863123605996162e-06, + "logits/chosen": -2.3057429790496826, + "logits/rejected": -2.918919563293457, + "logps/chosen": -352.52691650390625, + "logps/rejected": -463.2597351074219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2181878089904785, + "rewards/margins": 9.903806686401367, + "rewards/rejected": -13.121994018554688, + "step": 11295 + }, + { + "epoch": 1.76, + "learning_rate": 5.862390165465014e-06, + "logits/chosen": -2.3086085319519043, + "logits/rejected": -2.743605375289917, + "logps/chosen": -199.9915771484375, + "logps/rejected": -309.4759521484375, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.231538772583008, + "rewards/margins": 7.549701690673828, + "rewards/rejected": -14.781240463256836, + "step": 11296 + }, + { + "epoch": 1.76, + "learning_rate": 5.861656724933866e-06, + "logits/chosen": -2.739553213119507, + "logits/rejected": -2.851794481277466, + "logps/chosen": -275.41552734375, + "logps/rejected": -264.21600341796875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.46195125579834, + "rewards/margins": 6.607357978820801, + "rewards/rejected": -11.06930923461914, + "step": 11297 + }, + { + "epoch": 1.76, + "learning_rate": 5.860923284402718e-06, + "logits/chosen": -2.877690315246582, + "logits/rejected": -2.9575107097625732, + "logps/chosen": -260.4110412597656, + "logps/rejected": -395.1736755371094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.415050506591797, + "rewards/margins": 8.765281677246094, + "rewards/rejected": -12.18033218383789, + "step": 11298 + }, + { + "epoch": 1.76, + "learning_rate": 5.86018984387157e-06, + "logits/chosen": -2.3078055381774902, + "logits/rejected": -2.8035807609558105, + "logps/chosen": -146.3428955078125, + "logps/rejected": -593.1116943359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4212470054626465, + "rewards/margins": 9.943565368652344, + "rewards/rejected": -14.364811897277832, + "step": 11299 + }, + { + "epoch": 1.76, + "learning_rate": 5.8594564033404224e-06, + "logits/chosen": -2.772747039794922, + "logits/rejected": -2.4195151329040527, + "logps/chosen": -467.41375732421875, + "logps/rejected": -425.005615234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.083554267883301, + "rewards/margins": 8.751724243164062, + "rewards/rejected": -12.835277557373047, + "step": 11300 + }, + { + "epoch": 1.76, + "learning_rate": 5.858722962809274e-06, + "logits/chosen": -1.7522509098052979, + "logits/rejected": -2.0515098571777344, + "logps/chosen": -64.9737548828125, + "logps/rejected": -302.3483581542969, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.612948894500732, + "rewards/margins": 5.061723232269287, + "rewards/rejected": -10.67467212677002, + "step": 11301 + }, + { + "epoch": 1.76, + "learning_rate": 5.857989522278126e-06, + "logits/chosen": -3.034240484237671, + "logits/rejected": -3.021101951599121, + "logps/chosen": -141.8504180908203, + "logps/rejected": -289.4924011230469, + "loss": 0.5052, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.726428031921387, + "rewards/margins": 4.472330093383789, + "rewards/rejected": -10.198758125305176, + "step": 11302 + }, + { + "epoch": 1.76, + "learning_rate": 5.857256081746978e-06, + "logits/chosen": -2.9582362174987793, + "logits/rejected": -1.8081375360488892, + "logps/chosen": -457.2237548828125, + "logps/rejected": -229.7133026123047, + "loss": 0.8784, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.273088455200195, + "rewards/margins": 1.4929678440093994, + "rewards/rejected": -7.766056060791016, + "step": 11303 + }, + { + "epoch": 1.76, + "learning_rate": 5.856522641215831e-06, + "logits/chosen": -2.2968037128448486, + "logits/rejected": -2.9501991271972656, + "logps/chosen": -270.0607604980469, + "logps/rejected": -344.90301513671875, + "loss": 3.6232, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.848794460296631, + "rewards/margins": 1.901050329208374, + "rewards/rejected": -8.749844551086426, + "step": 11304 + }, + { + "epoch": 1.76, + "learning_rate": 5.855789200684683e-06, + "logits/chosen": -3.021801710128784, + "logits/rejected": -2.881882667541504, + "logps/chosen": -388.0122985839844, + "logps/rejected": -432.8734130859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.005189895629883, + "rewards/margins": 7.2342963218688965, + "rewards/rejected": -12.239486694335938, + "step": 11305 + }, + { + "epoch": 1.76, + "learning_rate": 5.8550557601535346e-06, + "logits/chosen": -2.2363955974578857, + "logits/rejected": -2.8312063217163086, + "logps/chosen": -110.24901580810547, + "logps/rejected": -261.08648681640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.198833465576172, + "rewards/margins": 8.637001991271973, + "rewards/rejected": -13.835836410522461, + "step": 11306 + }, + { + "epoch": 1.76, + "learning_rate": 5.854322319622387e-06, + "logits/chosen": -2.5650789737701416, + "logits/rejected": -2.929857015609741, + "logps/chosen": -93.24150085449219, + "logps/rejected": -304.6455078125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6767373085021973, + "rewards/margins": 7.402209281921387, + "rewards/rejected": -11.078946113586426, + "step": 11307 + }, + { + "epoch": 1.76, + "learning_rate": 5.853588879091239e-06, + "logits/chosen": -1.583261489868164, + "logits/rejected": -2.3966898918151855, + "logps/chosen": -167.76022338867188, + "logps/rejected": -574.815673828125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.112628936767578, + "rewards/margins": 12.970748901367188, + "rewards/rejected": -18.083377838134766, + "step": 11308 + }, + { + "epoch": 1.76, + "learning_rate": 5.852855438560092e-06, + "logits/chosen": -2.249541759490967, + "logits/rejected": -2.858997106552124, + "logps/chosen": -329.261962890625, + "logps/rejected": -409.4166259765625, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.834451675415039, + "rewards/margins": 7.613167762756348, + "rewards/rejected": -13.447620391845703, + "step": 11309 + }, + { + "epoch": 1.76, + "learning_rate": 5.852121998028944e-06, + "logits/chosen": -1.8165814876556396, + "logits/rejected": -2.7859508991241455, + "logps/chosen": -217.111572265625, + "logps/rejected": -476.0751953125, + "loss": 0.1755, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.417156219482422, + "rewards/margins": 4.403010845184326, + "rewards/rejected": -13.82016658782959, + "step": 11310 + }, + { + "epoch": 1.76, + "learning_rate": 5.851388557497796e-06, + "logits/chosen": -1.382144808769226, + "logits/rejected": -2.8703012466430664, + "logps/chosen": -83.26101684570312, + "logps/rejected": -357.7333984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.402775764465332, + "rewards/margins": 7.959843635559082, + "rewards/rejected": -13.362619400024414, + "step": 11311 + }, + { + "epoch": 1.76, + "learning_rate": 5.8506551169666475e-06, + "logits/chosen": -2.8296024799346924, + "logits/rejected": -2.4999747276306152, + "logps/chosen": -146.76951599121094, + "logps/rejected": -206.94915771484375, + "loss": 2.8028, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.613446235656738, + "rewards/margins": 1.6364915370941162, + "rewards/rejected": -10.249938011169434, + "step": 11312 + }, + { + "epoch": 1.76, + "learning_rate": 5.8499216764355e-06, + "logits/chosen": -3.0411555767059326, + "logits/rejected": -2.569793462753296, + "logps/chosen": -170.37835693359375, + "logps/rejected": -336.0800476074219, + "loss": 1.0789, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.798964500427246, + "rewards/margins": 4.262025833129883, + "rewards/rejected": -11.060990333557129, + "step": 11313 + }, + { + "epoch": 1.76, + "learning_rate": 5.849188235904352e-06, + "logits/chosen": -2.800304412841797, + "logits/rejected": -2.7909152507781982, + "logps/chosen": -302.28179931640625, + "logps/rejected": -484.87579345703125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.474147796630859, + "rewards/margins": 7.937788963317871, + "rewards/rejected": -12.411937713623047, + "step": 11314 + }, + { + "epoch": 1.76, + "learning_rate": 5.848454795373204e-06, + "logits/chosen": -1.129472255706787, + "logits/rejected": -2.685056686401367, + "logps/chosen": -161.29843139648438, + "logps/rejected": -756.1903076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2005815505981445, + "rewards/margins": 13.001799583435059, + "rewards/rejected": -18.202381134033203, + "step": 11315 + }, + { + "epoch": 1.76, + "learning_rate": 5.847721354842056e-06, + "logits/chosen": -2.804307222366333, + "logits/rejected": -2.0814952850341797, + "logps/chosen": -451.7244873046875, + "logps/rejected": -367.4474182128906, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308363437652588, + "rewards/margins": 3.0302789211273193, + "rewards/rejected": -9.338642120361328, + "step": 11316 + }, + { + "epoch": 1.76, + "learning_rate": 5.846987914310909e-06, + "logits/chosen": -2.5046231746673584, + "logits/rejected": -3.158132314682007, + "logps/chosen": -175.12295532226562, + "logps/rejected": -208.78146362304688, + "loss": 0.087, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7409253120422363, + "rewards/margins": 4.213824272155762, + "rewards/rejected": -7.954749584197998, + "step": 11317 + }, + { + "epoch": 1.76, + "learning_rate": 5.8462544737797605e-06, + "logits/chosen": -3.013504981994629, + "logits/rejected": -2.5628416538238525, + "logps/chosen": -387.65069580078125, + "logps/rejected": -336.1628112792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5978012681007385, + "rewards/margins": 10.132333755493164, + "rewards/rejected": -10.730134963989258, + "step": 11318 + }, + { + "epoch": 1.76, + "learning_rate": 5.845521033248612e-06, + "logits/chosen": -1.6196492910385132, + "logits/rejected": -2.4286160469055176, + "logps/chosen": -134.9503173828125, + "logps/rejected": -387.00555419921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.477609634399414, + "rewards/margins": 8.320877075195312, + "rewards/rejected": -13.798486709594727, + "step": 11319 + }, + { + "epoch": 1.76, + "learning_rate": 5.844787592717464e-06, + "logits/chosen": -2.335749864578247, + "logits/rejected": -2.8926100730895996, + "logps/chosen": -229.9854736328125, + "logps/rejected": -334.607421875, + "loss": 0.8983, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.58172607421875, + "rewards/margins": 3.2341036796569824, + "rewards/rejected": -7.815829753875732, + "step": 11320 + }, + { + "epoch": 1.76, + "learning_rate": 5.844054152186316e-06, + "logits/chosen": -1.5693128108978271, + "logits/rejected": -2.6754133701324463, + "logps/chosen": -100.6514892578125, + "logps/rejected": -347.62310791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8200011253356934, + "rewards/margins": 9.38961124420166, + "rewards/rejected": -13.209611892700195, + "step": 11321 + }, + { + "epoch": 1.76, + "learning_rate": 5.843320711655169e-06, + "logits/chosen": -2.5283026695251465, + "logits/rejected": -2.974968194961548, + "logps/chosen": -86.38087463378906, + "logps/rejected": -313.65045166015625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6511149406433105, + "rewards/margins": 5.567196846008301, + "rewards/rejected": -10.218311309814453, + "step": 11322 + }, + { + "epoch": 1.76, + "learning_rate": 5.842587271124021e-06, + "logits/chosen": -0.8181435465812683, + "logits/rejected": -2.6772146224975586, + "logps/chosen": -150.98928833007812, + "logps/rejected": -488.51324462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.854458808898926, + "rewards/margins": 14.957051277160645, + "rewards/rejected": -19.81151008605957, + "step": 11323 + }, + { + "epoch": 1.76, + "learning_rate": 5.8418538305928735e-06, + "logits/chosen": -1.279476284980774, + "logits/rejected": -2.891374349594116, + "logps/chosen": -123.17802429199219, + "logps/rejected": -362.1137390136719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.223259925842285, + "rewards/margins": 9.4981689453125, + "rewards/rejected": -13.721427917480469, + "step": 11324 + }, + { + "epoch": 1.76, + "learning_rate": 5.841120390061725e-06, + "logits/chosen": -2.662121534347534, + "logits/rejected": -2.7359983921051025, + "logps/chosen": -162.27415466308594, + "logps/rejected": -330.865966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1473777294158936, + "rewards/margins": 10.010822296142578, + "rewards/rejected": -12.15820026397705, + "step": 11325 + }, + { + "epoch": 1.76, + "learning_rate": 5.840386949530578e-06, + "logits/chosen": -2.744865655899048, + "logits/rejected": -3.098787546157837, + "logps/chosen": -52.858802795410156, + "logps/rejected": -178.87818908691406, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.201031684875488, + "rewards/margins": 6.037687301635742, + "rewards/rejected": -10.23871898651123, + "step": 11326 + }, + { + "epoch": 1.76, + "learning_rate": 5.83965350899943e-06, + "logits/chosen": -2.985578775405884, + "logits/rejected": -2.901543140411377, + "logps/chosen": -225.18209838867188, + "logps/rejected": -275.8565673828125, + "loss": 0.0722, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.000439643859863, + "rewards/margins": 2.871586561203003, + "rewards/rejected": -10.872026443481445, + "step": 11327 + }, + { + "epoch": 1.76, + "learning_rate": 5.838920068468282e-06, + "logits/chosen": -2.7925655841827393, + "logits/rejected": -2.464649200439453, + "logps/chosen": -147.99078369140625, + "logps/rejected": -277.9042053222656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.644199848175049, + "rewards/margins": 9.01335334777832, + "rewards/rejected": -11.657552719116211, + "step": 11328 + }, + { + "epoch": 1.76, + "learning_rate": 5.838186627937134e-06, + "logits/chosen": -2.4294190406799316, + "logits/rejected": -3.0206851959228516, + "logps/chosen": -419.98980712890625, + "logps/rejected": -542.37548828125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597280979156494, + "rewards/margins": 6.5137176513671875, + "rewards/rejected": -12.110998153686523, + "step": 11329 + }, + { + "epoch": 1.76, + "learning_rate": 5.8374531874059856e-06, + "logits/chosen": -2.7289175987243652, + "logits/rejected": -2.9839649200439453, + "logps/chosen": -140.6420135498047, + "logps/rejected": -237.07167053222656, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.164217948913574, + "rewards/margins": 6.727969646453857, + "rewards/rejected": -10.892187118530273, + "step": 11330 + }, + { + "epoch": 1.76, + "learning_rate": 5.836719746874838e-06, + "logits/chosen": -2.8357818126678467, + "logits/rejected": -2.5530693531036377, + "logps/chosen": -570.230224609375, + "logps/rejected": -524.3253173828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8782470226287842, + "rewards/margins": 8.027249336242676, + "rewards/rejected": -9.905496597290039, + "step": 11331 + }, + { + "epoch": 1.76, + "learning_rate": 5.83598630634369e-06, + "logits/chosen": -2.890087127685547, + "logits/rejected": -3.0801479816436768, + "logps/chosen": -124.7106704711914, + "logps/rejected": -268.33343505859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9306423664093018, + "rewards/margins": 7.608951568603516, + "rewards/rejected": -10.539593696594238, + "step": 11332 + }, + { + "epoch": 1.76, + "learning_rate": 5.835252865812542e-06, + "logits/chosen": -1.63931405544281, + "logits/rejected": -2.9326648712158203, + "logps/chosen": -122.09364318847656, + "logps/rejected": -232.79751586914062, + "loss": 0.0692, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.352338790893555, + "rewards/margins": 8.126798629760742, + "rewards/rejected": -12.479137420654297, + "step": 11333 + }, + { + "epoch": 1.76, + "learning_rate": 5.834519425281394e-06, + "logits/chosen": -2.7897818088531494, + "logits/rejected": -3.141780138015747, + "logps/chosen": -104.38874816894531, + "logps/rejected": -149.33551025390625, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.043966770172119, + "rewards/margins": 5.57395601272583, + "rewards/rejected": -8.61792278289795, + "step": 11334 + }, + { + "epoch": 1.76, + "learning_rate": 5.833785984750247e-06, + "logits/chosen": -2.274400234222412, + "logits/rejected": -2.656055212020874, + "logps/chosen": -117.80570983886719, + "logps/rejected": -209.93984985351562, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.712607383728027, + "rewards/margins": 5.8957109451293945, + "rewards/rejected": -11.608318328857422, + "step": 11335 + }, + { + "epoch": 1.76, + "learning_rate": 5.8330525442190985e-06, + "logits/chosen": -2.6991233825683594, + "logits/rejected": -3.031682014465332, + "logps/chosen": -173.33514404296875, + "logps/rejected": -281.61334228515625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6964879035949707, + "rewards/margins": 5.951932907104492, + "rewards/rejected": -9.648420333862305, + "step": 11336 + }, + { + "epoch": 1.76, + "learning_rate": 5.83231910368795e-06, + "logits/chosen": -2.331007957458496, + "logits/rejected": -3.0398948192596436, + "logps/chosen": -663.4124755859375, + "logps/rejected": -644.036376953125, + "loss": 0.4209, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.261314392089844, + "rewards/margins": 4.737970352172852, + "rewards/rejected": -9.999284744262695, + "step": 11337 + }, + { + "epoch": 1.76, + "learning_rate": 5.831585663156802e-06, + "logits/chosen": -2.559307813644409, + "logits/rejected": -3.051069736480713, + "logps/chosen": -355.7490234375, + "logps/rejected": -517.7464599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.199122905731201, + "rewards/margins": 13.243656158447266, + "rewards/rejected": -16.442777633666992, + "step": 11338 + }, + { + "epoch": 1.76, + "learning_rate": 5.830852222625654e-06, + "logits/chosen": -1.7709074020385742, + "logits/rejected": -2.821744203567505, + "logps/chosen": -158.68148803710938, + "logps/rejected": -437.436279296875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.016453742980957, + "rewards/margins": 9.023324966430664, + "rewards/rejected": -14.039778709411621, + "step": 11339 + }, + { + "epoch": 1.76, + "learning_rate": 5.830118782094507e-06, + "logits/chosen": -2.98010516166687, + "logits/rejected": -2.9497008323669434, + "logps/chosen": -138.92514038085938, + "logps/rejected": -203.42291259765625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.164709091186523, + "rewards/margins": 6.254487037658691, + "rewards/rejected": -11.419196128845215, + "step": 11340 + }, + { + "epoch": 1.76, + "learning_rate": 5.82938534156336e-06, + "logits/chosen": -2.073498487472534, + "logits/rejected": -3.1581830978393555, + "logps/chosen": -117.61471557617188, + "logps/rejected": -367.815185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.426353931427002, + "rewards/margins": 9.07847785949707, + "rewards/rejected": -11.50483226776123, + "step": 11341 + }, + { + "epoch": 1.76, + "learning_rate": 5.8286519010322115e-06, + "logits/chosen": -3.090871572494507, + "logits/rejected": -3.1070079803466797, + "logps/chosen": -160.0068359375, + "logps/rejected": -355.063720703125, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.863232135772705, + "rewards/margins": 6.689940452575684, + "rewards/rejected": -12.553173065185547, + "step": 11342 + }, + { + "epoch": 1.76, + "learning_rate": 5.827918460501063e-06, + "logits/chosen": -2.8132545948028564, + "logits/rejected": -1.96028470993042, + "logps/chosen": -211.45814514160156, + "logps/rejected": -225.32785034179688, + "loss": 0.0356, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.646791458129883, + "rewards/margins": 8.589879989624023, + "rewards/rejected": -11.236671447753906, + "step": 11343 + }, + { + "epoch": 1.76, + "learning_rate": 5.827185019969916e-06, + "logits/chosen": -2.1456267833709717, + "logits/rejected": -3.023505687713623, + "logps/chosen": -322.3536071777344, + "logps/rejected": -434.57220458984375, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.463042259216309, + "rewards/margins": 5.281045913696289, + "rewards/rejected": -9.744089126586914, + "step": 11344 + }, + { + "epoch": 1.76, + "learning_rate": 5.826451579438768e-06, + "logits/chosen": -2.8601527214050293, + "logits/rejected": -2.2823851108551025, + "logps/chosen": -138.04049682617188, + "logps/rejected": -343.0148010253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3262264728546143, + "rewards/margins": 9.054075241088867, + "rewards/rejected": -11.380301475524902, + "step": 11345 + }, + { + "epoch": 1.76, + "learning_rate": 5.82571813890762e-06, + "logits/chosen": -2.7658116817474365, + "logits/rejected": -2.9899911880493164, + "logps/chosen": -766.4222412109375, + "logps/rejected": -488.44366455078125, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.545709609985352, + "rewards/margins": 3.331517219543457, + "rewards/rejected": -7.877226829528809, + "step": 11346 + }, + { + "epoch": 1.76, + "learning_rate": 5.824984698376472e-06, + "logits/chosen": -1.1581391096115112, + "logits/rejected": -2.728400468826294, + "logps/chosen": -73.74586486816406, + "logps/rejected": -318.02691650390625, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.214993476867676, + "rewards/margins": 4.454045295715332, + "rewards/rejected": -10.669038772583008, + "step": 11347 + }, + { + "epoch": 1.76, + "learning_rate": 5.824251257845324e-06, + "logits/chosen": -1.2030366659164429, + "logits/rejected": -2.39155912399292, + "logps/chosen": -168.43655395507812, + "logps/rejected": -505.211669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.455691337585449, + "rewards/margins": 9.77484130859375, + "rewards/rejected": -14.230531692504883, + "step": 11348 + }, + { + "epoch": 1.77, + "learning_rate": 5.823517817314176e-06, + "logits/chosen": -2.2143895626068115, + "logits/rejected": -2.741868495941162, + "logps/chosen": -249.83230590820312, + "logps/rejected": -392.0377197265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.375854015350342, + "rewards/margins": 9.903470039367676, + "rewards/rejected": -14.27932357788086, + "step": 11349 + }, + { + "epoch": 1.77, + "learning_rate": 5.822784376783028e-06, + "logits/chosen": -2.7729170322418213, + "logits/rejected": -2.3020870685577393, + "logps/chosen": -189.5186004638672, + "logps/rejected": -201.35067749023438, + "loss": 0.5751, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.3344011306762695, + "rewards/margins": 3.9940185546875, + "rewards/rejected": -10.32841968536377, + "step": 11350 + }, + { + "epoch": 1.77, + "learning_rate": 5.82205093625188e-06, + "logits/chosen": -2.6003668308258057, + "logits/rejected": -2.8504083156585693, + "logps/chosen": -110.31008911132812, + "logps/rejected": -239.6727294921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7861266136169434, + "rewards/margins": 9.04851245880127, + "rewards/rejected": -11.834638595581055, + "step": 11351 + }, + { + "epoch": 1.77, + "learning_rate": 5.821317495720732e-06, + "logits/chosen": -2.6386680603027344, + "logits/rejected": -2.6917548179626465, + "logps/chosen": -309.1993713378906, + "logps/rejected": -531.4614868164062, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.767012596130371, + "rewards/margins": 6.933010578155518, + "rewards/rejected": -13.70002269744873, + "step": 11352 + }, + { + "epoch": 1.77, + "learning_rate": 5.820584055189585e-06, + "logits/chosen": -2.9959867000579834, + "logits/rejected": -2.355886697769165, + "logps/chosen": -432.2913513183594, + "logps/rejected": -362.0948181152344, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6470160484313965, + "rewards/margins": 6.687323093414307, + "rewards/rejected": -12.334339141845703, + "step": 11353 + }, + { + "epoch": 1.77, + "learning_rate": 5.819850614658437e-06, + "logits/chosen": -2.313523769378662, + "logits/rejected": -2.7998805046081543, + "logps/chosen": -236.4075927734375, + "logps/rejected": -271.2898254394531, + "loss": 0.0927, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.664038181304932, + "rewards/margins": 4.57895565032959, + "rewards/rejected": -9.24299430847168, + "step": 11354 + }, + { + "epoch": 1.77, + "learning_rate": 5.8191171741272885e-06, + "logits/chosen": -1.8011691570281982, + "logits/rejected": -2.9212889671325684, + "logps/chosen": -122.18766784667969, + "logps/rejected": -218.56790161132812, + "loss": 1.2003, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9585866928100586, + "rewards/margins": 3.9457194805145264, + "rewards/rejected": -7.904305934906006, + "step": 11355 + }, + { + "epoch": 1.77, + "learning_rate": 5.81838373359614e-06, + "logits/chosen": -2.3390142917633057, + "logits/rejected": -2.768251657485962, + "logps/chosen": -101.39962768554688, + "logps/rejected": -310.4202880859375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.644028663635254, + "rewards/margins": 8.351750373840332, + "rewards/rejected": -12.995779037475586, + "step": 11356 + }, + { + "epoch": 1.77, + "learning_rate": 5.817650293064993e-06, + "logits/chosen": -1.741336464881897, + "logits/rejected": -3.030655860900879, + "logps/chosen": -89.08834838867188, + "logps/rejected": -414.9880676269531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.222425937652588, + "rewards/margins": 9.982921600341797, + "rewards/rejected": -12.205347061157227, + "step": 11357 + }, + { + "epoch": 1.77, + "learning_rate": 5.816916852533846e-06, + "logits/chosen": -1.5282375812530518, + "logits/rejected": -2.9749913215637207, + "logps/chosen": -222.50059509277344, + "logps/rejected": -531.3040161132812, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5617852210998535, + "rewards/margins": 10.385735511779785, + "rewards/rejected": -14.947521209716797, + "step": 11358 + }, + { + "epoch": 1.77, + "learning_rate": 5.816183412002698e-06, + "logits/chosen": -1.7626943588256836, + "logits/rejected": -3.064272165298462, + "logps/chosen": -55.11011505126953, + "logps/rejected": -375.8459777832031, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.759105920791626, + "rewards/margins": 7.33383846282959, + "rewards/rejected": -10.092944145202637, + "step": 11359 + }, + { + "epoch": 1.77, + "learning_rate": 5.8154499714715495e-06, + "logits/chosen": -3.0152909755706787, + "logits/rejected": -2.9295027256011963, + "logps/chosen": -192.5012664794922, + "logps/rejected": -329.3179626464844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7650680541992188, + "rewards/margins": 8.845800399780273, + "rewards/rejected": -12.610868453979492, + "step": 11360 + }, + { + "epoch": 1.77, + "learning_rate": 5.814716530940401e-06, + "logits/chosen": -2.7970402240753174, + "logits/rejected": -1.7704172134399414, + "logps/chosen": -386.01995849609375, + "logps/rejected": -463.96533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8956375122070312, + "rewards/margins": 13.530409812927246, + "rewards/rejected": -16.426048278808594, + "step": 11361 + }, + { + "epoch": 1.77, + "learning_rate": 5.813983090409254e-06, + "logits/chosen": -2.999641180038452, + "logits/rejected": -2.8579351902008057, + "logps/chosen": -296.3469543457031, + "logps/rejected": -243.8713836669922, + "loss": 0.3676, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.154491424560547, + "rewards/margins": 3.4144675731658936, + "rewards/rejected": -7.5689592361450195, + "step": 11362 + }, + { + "epoch": 1.77, + "learning_rate": 5.813249649878106e-06, + "logits/chosen": -2.6729156970977783, + "logits/rejected": -2.3964006900787354, + "logps/chosen": -360.80242919921875, + "logps/rejected": -559.015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.453076124191284, + "rewards/margins": 11.627376556396484, + "rewards/rejected": -15.080451965332031, + "step": 11363 + }, + { + "epoch": 1.77, + "learning_rate": 5.812516209346958e-06, + "logits/chosen": -2.8672447204589844, + "logits/rejected": -2.4959232807159424, + "logps/chosen": -191.18861389160156, + "logps/rejected": -220.32235717773438, + "loss": 0.2182, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.412467002868652, + "rewards/margins": 5.528298377990723, + "rewards/rejected": -9.940765380859375, + "step": 11364 + }, + { + "epoch": 1.77, + "learning_rate": 5.81178276881581e-06, + "logits/chosen": -2.07558274269104, + "logits/rejected": -3.0519678592681885, + "logps/chosen": -181.67074584960938, + "logps/rejected": -371.87286376953125, + "loss": 1.2135, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.868527412414551, + "rewards/margins": 7.610750675201416, + "rewards/rejected": -12.479277610778809, + "step": 11365 + }, + { + "epoch": 1.77, + "learning_rate": 5.8110493282846625e-06, + "logits/chosen": -1.7379578351974487, + "logits/rejected": -2.708261251449585, + "logps/chosen": -141.88433837890625, + "logps/rejected": -288.61346435546875, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318564414978027, + "rewards/margins": 6.791868686676025, + "rewards/rejected": -12.110433578491211, + "step": 11366 + }, + { + "epoch": 1.77, + "learning_rate": 5.810315887753514e-06, + "logits/chosen": -1.9635781049728394, + "logits/rejected": -2.5097718238830566, + "logps/chosen": -197.163818359375, + "logps/rejected": -473.95928955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.558895111083984, + "rewards/margins": 13.05722427368164, + "rewards/rejected": -19.616119384765625, + "step": 11367 + }, + { + "epoch": 1.77, + "learning_rate": 5.809582447222366e-06, + "logits/chosen": -1.4026557207107544, + "logits/rejected": -3.0209758281707764, + "logps/chosen": -69.62749481201172, + "logps/rejected": -591.0463256835938, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.690927505493164, + "rewards/margins": 10.026891708374023, + "rewards/rejected": -14.717819213867188, + "step": 11368 + }, + { + "epoch": 1.77, + "learning_rate": 5.808849006691218e-06, + "logits/chosen": -2.5290279388427734, + "logits/rejected": -2.172168731689453, + "logps/chosen": -494.07330322265625, + "logps/rejected": -509.47991943359375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.982382297515869, + "rewards/margins": 6.036500453948975, + "rewards/rejected": -14.018882751464844, + "step": 11369 + }, + { + "epoch": 1.77, + "learning_rate": 5.80811556616007e-06, + "logits/chosen": -2.731680393218994, + "logits/rejected": -2.174501657485962, + "logps/chosen": -147.77523803710938, + "logps/rejected": -117.94940948486328, + "loss": 0.9065, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.100832939147949, + "rewards/margins": 1.1125071048736572, + "rewards/rejected": -8.213340759277344, + "step": 11370 + }, + { + "epoch": 1.77, + "learning_rate": 5.807382125628923e-06, + "logits/chosen": -1.8068069219589233, + "logits/rejected": -2.7020018100738525, + "logps/chosen": -116.32151794433594, + "logps/rejected": -210.7982635498047, + "loss": 0.5088, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.340312957763672, + "rewards/margins": 2.3616318702697754, + "rewards/rejected": -7.7019453048706055, + "step": 11371 + }, + { + "epoch": 1.77, + "learning_rate": 5.806648685097775e-06, + "logits/chosen": -1.9285550117492676, + "logits/rejected": -2.539689302444458, + "logps/chosen": -201.05154418945312, + "logps/rejected": -329.50994873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.581130027770996, + "rewards/margins": 9.417245864868164, + "rewards/rejected": -16.998374938964844, + "step": 11372 + }, + { + "epoch": 1.77, + "learning_rate": 5.8059152445666265e-06, + "logits/chosen": -2.8320884704589844, + "logits/rejected": -2.8630330562591553, + "logps/chosen": -158.29153442382812, + "logps/rejected": -137.09933471679688, + "loss": 0.1625, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.480362892150879, + "rewards/margins": 3.821255922317505, + "rewards/rejected": -7.301618576049805, + "step": 11373 + }, + { + "epoch": 1.77, + "learning_rate": 5.805181804035479e-06, + "logits/chosen": -0.616156280040741, + "logits/rejected": -2.549393653869629, + "logps/chosen": -94.79222106933594, + "logps/rejected": -386.3260192871094, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.065324783325195, + "rewards/margins": 6.801679611206055, + "rewards/rejected": -13.86700439453125, + "step": 11374 + }, + { + "epoch": 1.77, + "learning_rate": 5.804448363504332e-06, + "logits/chosen": -3.010939598083496, + "logits/rejected": -3.027308464050293, + "logps/chosen": -291.54705810546875, + "logps/rejected": -170.34378051757812, + "loss": 0.7293, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.363671779632568, + "rewards/margins": 1.28758704662323, + "rewards/rejected": -6.651258945465088, + "step": 11375 + }, + { + "epoch": 1.77, + "learning_rate": 5.803714922973184e-06, + "logits/chosen": -1.1545995473861694, + "logits/rejected": -2.853731632232666, + "logps/chosen": -57.56626892089844, + "logps/rejected": -508.0457763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096275329589844, + "rewards/margins": 10.43979263305664, + "rewards/rejected": -14.536067962646484, + "step": 11376 + }, + { + "epoch": 1.77, + "learning_rate": 5.802981482442036e-06, + "logits/chosen": -2.177032947540283, + "logits/rejected": -2.953432321548462, + "logps/chosen": -92.1408462524414, + "logps/rejected": -332.1793212890625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9607534408569336, + "rewards/margins": 8.639997482299805, + "rewards/rejected": -11.600750923156738, + "step": 11377 + }, + { + "epoch": 1.77, + "learning_rate": 5.802248041910888e-06, + "logits/chosen": -2.312633514404297, + "logits/rejected": -2.9747841358184814, + "logps/chosen": -618.7083740234375, + "logps/rejected": -621.0333251953125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7671661376953125, + "rewards/margins": 7.213267803192139, + "rewards/rejected": -12.98043441772461, + "step": 11378 + }, + { + "epoch": 1.77, + "learning_rate": 5.8015146013797395e-06, + "logits/chosen": -2.497648239135742, + "logits/rejected": -2.650257110595703, + "logps/chosen": -171.28453063964844, + "logps/rejected": -420.9075927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.031472206115723, + "rewards/margins": 10.770647048950195, + "rewards/rejected": -15.802120208740234, + "step": 11379 + }, + { + "epoch": 1.77, + "learning_rate": 5.800781160848592e-06, + "logits/chosen": -1.4907792806625366, + "logits/rejected": -2.791024923324585, + "logps/chosen": -334.21923828125, + "logps/rejected": -746.94091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4848899841308594, + "rewards/margins": 11.164307594299316, + "rewards/rejected": -14.649197578430176, + "step": 11380 + }, + { + "epoch": 1.77, + "learning_rate": 5.800047720317444e-06, + "logits/chosen": -2.633934259414673, + "logits/rejected": -2.88325572013855, + "logps/chosen": -481.18115234375, + "logps/rejected": -432.67144775390625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.136018753051758, + "rewards/margins": 6.8062214851379395, + "rewards/rejected": -11.942239761352539, + "step": 11381 + }, + { + "epoch": 1.77, + "learning_rate": 5.799314279786296e-06, + "logits/chosen": -1.1860356330871582, + "logits/rejected": -2.9906816482543945, + "logps/chosen": -136.4040069580078, + "logps/rejected": -406.7689208984375, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.879075050354004, + "rewards/margins": 7.603095054626465, + "rewards/rejected": -12.482170104980469, + "step": 11382 + }, + { + "epoch": 1.77, + "learning_rate": 5.798580839255148e-06, + "logits/chosen": -2.4127140045166016, + "logits/rejected": -2.7024505138397217, + "logps/chosen": -257.3978271484375, + "logps/rejected": -275.1957092285156, + "loss": 0.8074, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.375830173492432, + "rewards/margins": 3.1854379177093506, + "rewards/rejected": -8.561267852783203, + "step": 11383 + }, + { + "epoch": 1.77, + "learning_rate": 5.7978473987240005e-06, + "logits/chosen": -1.7328388690948486, + "logits/rejected": -2.7680158615112305, + "logps/chosen": -130.55154418945312, + "logps/rejected": -208.578857421875, + "loss": 2.3653, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.902290344238281, + "rewards/margins": -0.6573855876922607, + "rewards/rejected": -10.244904518127441, + "step": 11384 + }, + { + "epoch": 1.77, + "learning_rate": 5.7971139581928524e-06, + "logits/chosen": -2.909639835357666, + "logits/rejected": -3.2055001258850098, + "logps/chosen": -164.23797607421875, + "logps/rejected": -337.8951416015625, + "loss": 0.0455, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.698253154754639, + "rewards/margins": 5.377146244049072, + "rewards/rejected": -10.075399398803711, + "step": 11385 + }, + { + "epoch": 1.77, + "learning_rate": 5.796380517661704e-06, + "logits/chosen": -2.443955183029175, + "logits/rejected": -2.951288938522339, + "logps/chosen": -538.8761596679688, + "logps/rejected": -371.29400634765625, + "loss": 0.5928, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.063676357269287, + "rewards/margins": 3.6136457920074463, + "rewards/rejected": -9.677322387695312, + "step": 11386 + }, + { + "epoch": 1.77, + "learning_rate": 5.795647077130556e-06, + "logits/chosen": -1.6983388662338257, + "logits/rejected": -2.2729525566101074, + "logps/chosen": -433.65191650390625, + "logps/rejected": -607.3765869140625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.341352939605713, + "rewards/margins": 9.929152488708496, + "rewards/rejected": -15.27050495147705, + "step": 11387 + }, + { + "epoch": 1.77, + "learning_rate": 5.794913636599408e-06, + "logits/chosen": -2.7020339965820312, + "logits/rejected": -2.3549320697784424, + "logps/chosen": -306.8485412597656, + "logps/rejected": -574.3792114257812, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.940243721008301, + "rewards/margins": 9.823627471923828, + "rewards/rejected": -17.763870239257812, + "step": 11388 + }, + { + "epoch": 1.77, + "learning_rate": 5.794180196068261e-06, + "logits/chosen": -3.0334393978118896, + "logits/rejected": -2.7601356506347656, + "logps/chosen": -199.86654663085938, + "logps/rejected": -255.5360870361328, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.024600505828857, + "rewards/margins": 6.619533061981201, + "rewards/rejected": -10.644133567810059, + "step": 11389 + }, + { + "epoch": 1.77, + "learning_rate": 5.793446755537113e-06, + "logits/chosen": -1.0915886163711548, + "logits/rejected": -2.4118947982788086, + "logps/chosen": -248.67054748535156, + "logps/rejected": -734.7000732421875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.321979999542236, + "rewards/margins": 11.966194152832031, + "rewards/rejected": -16.28817367553711, + "step": 11390 + }, + { + "epoch": 1.77, + "learning_rate": 5.792713315005965e-06, + "logits/chosen": -2.9697134494781494, + "logits/rejected": -2.538827657699585, + "logps/chosen": -464.088623046875, + "logps/rejected": -372.9049072265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.165526390075684, + "rewards/margins": 7.858906269073486, + "rewards/rejected": -12.024433135986328, + "step": 11391 + }, + { + "epoch": 1.77, + "learning_rate": 5.791979874474817e-06, + "logits/chosen": -2.8937084674835205, + "logits/rejected": -2.205688953399658, + "logps/chosen": -660.7680053710938, + "logps/rejected": -492.76654052734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0140581130981445, + "rewards/margins": 8.914567947387695, + "rewards/rejected": -12.928625106811523, + "step": 11392 + }, + { + "epoch": 1.77, + "learning_rate": 5.79124643394367e-06, + "logits/chosen": -2.8454971313476562, + "logits/rejected": -2.5679738521575928, + "logps/chosen": -1334.496337890625, + "logps/rejected": -953.3280029296875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.717504978179932, + "rewards/margins": 10.470938682556152, + "rewards/rejected": -16.18844223022461, + "step": 11393 + }, + { + "epoch": 1.77, + "learning_rate": 5.790512993412522e-06, + "logits/chosen": -3.07850980758667, + "logits/rejected": -2.3958523273468018, + "logps/chosen": -274.136962890625, + "logps/rejected": -264.49560546875, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.162928819656372, + "rewards/margins": 7.042211532592773, + "rewards/rejected": -10.205141067504883, + "step": 11394 + }, + { + "epoch": 1.77, + "learning_rate": 5.789779552881374e-06, + "logits/chosen": -2.9482128620147705, + "logits/rejected": -2.3484041690826416, + "logps/chosen": -485.7049560546875, + "logps/rejected": -414.0357360839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.395005941390991, + "rewards/margins": 11.975894927978516, + "rewards/rejected": -15.370901107788086, + "step": 11395 + }, + { + "epoch": 1.77, + "learning_rate": 5.789046112350226e-06, + "logits/chosen": -2.980889081954956, + "logits/rejected": -2.668990135192871, + "logps/chosen": -393.8145751953125, + "logps/rejected": -453.18963623046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.586207866668701, + "rewards/margins": 9.697123527526855, + "rewards/rejected": -14.283331871032715, + "step": 11396 + }, + { + "epoch": 1.77, + "learning_rate": 5.7883126718190775e-06, + "logits/chosen": -2.8754539489746094, + "logits/rejected": -2.9531400203704834, + "logps/chosen": -183.77017211914062, + "logps/rejected": -305.8843078613281, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.787468910217285, + "rewards/margins": 6.409075736999512, + "rewards/rejected": -9.196544647216797, + "step": 11397 + }, + { + "epoch": 1.77, + "learning_rate": 5.78757923128793e-06, + "logits/chosen": -2.660360813140869, + "logits/rejected": -3.0182998180389404, + "logps/chosen": -163.66366577148438, + "logps/rejected": -183.1840362548828, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8496246337890625, + "rewards/margins": 7.417879581451416, + "rewards/rejected": -12.26750373840332, + "step": 11398 + }, + { + "epoch": 1.77, + "learning_rate": 5.786845790756782e-06, + "logits/chosen": -2.7283709049224854, + "logits/rejected": -2.4885730743408203, + "logps/chosen": -204.2294921875, + "logps/rejected": -217.88551330566406, + "loss": 1.0366, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.6837944984436035, + "rewards/margins": 1.630305528640747, + "rewards/rejected": -7.31410026550293, + "step": 11399 + }, + { + "epoch": 1.77, + "learning_rate": 5.786112350225634e-06, + "logits/chosen": -2.9008564949035645, + "logits/rejected": -2.188065528869629, + "logps/chosen": -278.8110046386719, + "logps/rejected": -382.43499755859375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036235809326172, + "rewards/margins": 9.346460342407227, + "rewards/rejected": -13.382696151733398, + "step": 11400 + }, + { + "epoch": 1.77, + "learning_rate": 5.785378909694486e-06, + "logits/chosen": -2.1171634197235107, + "logits/rejected": -2.597604990005493, + "logps/chosen": -270.42730712890625, + "logps/rejected": -363.7649230957031, + "loss": 0.0708, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.697396278381348, + "rewards/margins": 4.396307468414307, + "rewards/rejected": -11.093704223632812, + "step": 11401 + }, + { + "epoch": 1.77, + "learning_rate": 5.784645469163339e-06, + "logits/chosen": -2.5516552925109863, + "logits/rejected": -2.973191976547241, + "logps/chosen": -62.93254852294922, + "logps/rejected": -258.13812255859375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.388186931610107, + "rewards/margins": 7.02122688293457, + "rewards/rejected": -11.409414291381836, + "step": 11402 + }, + { + "epoch": 1.77, + "learning_rate": 5.7839120286321905e-06, + "logits/chosen": -2.1530556678771973, + "logits/rejected": -3.079139232635498, + "logps/chosen": -244.60964965820312, + "logps/rejected": -488.5455322265625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130697727203369, + "rewards/margins": 8.46274185180664, + "rewards/rejected": -13.593439102172852, + "step": 11403 + }, + { + "epoch": 1.77, + "learning_rate": 5.783178588101042e-06, + "logits/chosen": -2.588266611099243, + "logits/rejected": -2.636989116668701, + "logps/chosen": -209.1322479248047, + "logps/rejected": -391.43310546875, + "loss": 0.2289, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.685918807983398, + "rewards/margins": 7.205539703369141, + "rewards/rejected": -12.891458511352539, + "step": 11404 + }, + { + "epoch": 1.77, + "learning_rate": 5.782445147569894e-06, + "logits/chosen": -2.992919445037842, + "logits/rejected": -2.209967851638794, + "logps/chosen": -282.954345703125, + "logps/rejected": -260.8668212890625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6189727783203125, + "rewards/margins": 5.558878421783447, + "rewards/rejected": -9.177850723266602, + "step": 11405 + }, + { + "epoch": 1.77, + "learning_rate": 5.781711707038747e-06, + "logits/chosen": -2.196262836456299, + "logits/rejected": -2.4687535762786865, + "logps/chosen": -152.10528564453125, + "logps/rejected": -198.53018188476562, + "loss": 0.1622, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.556368827819824, + "rewards/margins": 4.067486763000488, + "rewards/rejected": -10.623855590820312, + "step": 11406 + }, + { + "epoch": 1.77, + "learning_rate": 5.780978266507599e-06, + "logits/chosen": -2.4232168197631836, + "logits/rejected": -2.367447853088379, + "logps/chosen": -265.5032958984375, + "logps/rejected": -505.22247314453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.543815612792969, + "rewards/margins": 11.488885879516602, + "rewards/rejected": -16.03270149230957, + "step": 11407 + }, + { + "epoch": 1.77, + "learning_rate": 5.7802448259764516e-06, + "logits/chosen": -2.6502864360809326, + "logits/rejected": -3.024416208267212, + "logps/chosen": -76.23745727539062, + "logps/rejected": -305.4722900390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.400533437728882, + "rewards/margins": 8.792058944702148, + "rewards/rejected": -12.19259262084961, + "step": 11408 + }, + { + "epoch": 1.77, + "learning_rate": 5.7795113854453034e-06, + "logits/chosen": -2.320172071456909, + "logits/rejected": -2.8353095054626465, + "logps/chosen": -89.19419860839844, + "logps/rejected": -353.57080078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.257065773010254, + "rewards/margins": 10.012825965881348, + "rewards/rejected": -15.269891738891602, + "step": 11409 + }, + { + "epoch": 1.77, + "learning_rate": 5.778777944914155e-06, + "logits/chosen": -2.875439405441284, + "logits/rejected": -2.4966249465942383, + "logps/chosen": -604.1271362304688, + "logps/rejected": -519.4757080078125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7414350509643555, + "rewards/margins": 8.801894187927246, + "rewards/rejected": -11.543329238891602, + "step": 11410 + }, + { + "epoch": 1.77, + "learning_rate": 5.778044504383008e-06, + "logits/chosen": -2.75602388381958, + "logits/rejected": -2.894059419631958, + "logps/chosen": -99.20893859863281, + "logps/rejected": -227.55682373046875, + "loss": 0.0924, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.460216045379639, + "rewards/margins": 3.0824317932128906, + "rewards/rejected": -8.542648315429688, + "step": 11411 + }, + { + "epoch": 1.77, + "learning_rate": 5.77731106385186e-06, + "logits/chosen": -3.1380269527435303, + "logits/rejected": -3.151801586151123, + "logps/chosen": -569.209228515625, + "logps/rejected": -524.4295654296875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9859925508499146, + "rewards/margins": 9.017044067382812, + "rewards/rejected": -11.003036499023438, + "step": 11412 + }, + { + "epoch": 1.77, + "learning_rate": 5.776577623320712e-06, + "logits/chosen": -2.094949722290039, + "logits/rejected": -2.876966714859009, + "logps/chosen": -212.0308837890625, + "logps/rejected": -352.36279296875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.789666175842285, + "rewards/margins": 5.534709930419922, + "rewards/rejected": -11.324376106262207, + "step": 11413 + }, + { + "epoch": 1.78, + "learning_rate": 5.775844182789564e-06, + "logits/chosen": -2.9039931297302246, + "logits/rejected": -2.9695401191711426, + "logps/chosen": -282.5943603515625, + "logps/rejected": -291.4261779785156, + "loss": 1.1275, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.153803825378418, + "rewards/margins": 1.9588406085968018, + "rewards/rejected": -9.11264419555664, + "step": 11414 + }, + { + "epoch": 1.78, + "learning_rate": 5.775110742258416e-06, + "logits/chosen": -2.9568395614624023, + "logits/rejected": -2.4607298374176025, + "logps/chosen": -374.166259765625, + "logps/rejected": -191.29238891601562, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569827079772949, + "rewards/margins": 3.4189107418060303, + "rewards/rejected": -8.988738059997559, + "step": 11415 + }, + { + "epoch": 1.78, + "learning_rate": 5.774377301727268e-06, + "logits/chosen": -2.430462598800659, + "logits/rejected": -2.523925304412842, + "logps/chosen": -100.30580139160156, + "logps/rejected": -210.7938995361328, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.190428256988525, + "rewards/margins": 6.448427200317383, + "rewards/rejected": -10.638855934143066, + "step": 11416 + }, + { + "epoch": 1.78, + "learning_rate": 5.77364386119612e-06, + "logits/chosen": -2.8528151512145996, + "logits/rejected": -2.9579412937164307, + "logps/chosen": -156.80181884765625, + "logps/rejected": -147.79220581054688, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.689727306365967, + "rewards/margins": 1.3478738069534302, + "rewards/rejected": -7.037600994110107, + "step": 11417 + }, + { + "epoch": 1.78, + "learning_rate": 5.772910420664972e-06, + "logits/chosen": -2.9659409523010254, + "logits/rejected": -2.899975299835205, + "logps/chosen": -389.23272705078125, + "logps/rejected": -305.04449462890625, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9632906913757324, + "rewards/margins": 2.908416748046875, + "rewards/rejected": -6.871706962585449, + "step": 11418 + }, + { + "epoch": 1.78, + "learning_rate": 5.772176980133824e-06, + "logits/chosen": -3.006582498550415, + "logits/rejected": -2.7073004245758057, + "logps/chosen": -309.3304138183594, + "logps/rejected": -331.61395263671875, + "loss": 1.0382, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.626880645751953, + "rewards/margins": 3.127127170562744, + "rewards/rejected": -10.754007339477539, + "step": 11419 + }, + { + "epoch": 1.78, + "learning_rate": 5.771443539602677e-06, + "logits/chosen": -1.8047090768814087, + "logits/rejected": -2.786187171936035, + "logps/chosen": -182.3805389404297, + "logps/rejected": -423.87994384765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9849514961242676, + "rewards/margins": 8.63408088684082, + "rewards/rejected": -12.61903190612793, + "step": 11420 + }, + { + "epoch": 1.78, + "learning_rate": 5.7707100990715285e-06, + "logits/chosen": -2.0053622722625732, + "logits/rejected": -3.0175185203552246, + "logps/chosen": -192.18643188476562, + "logps/rejected": -382.39056396484375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4377851486206055, + "rewards/margins": 5.800266265869141, + "rewards/rejected": -10.23805046081543, + "step": 11421 + }, + { + "epoch": 1.78, + "learning_rate": 5.76997665854038e-06, + "logits/chosen": -2.545974016189575, + "logits/rejected": -3.1453206539154053, + "logps/chosen": -121.61396789550781, + "logps/rejected": -466.5291748046875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.854001045227051, + "rewards/margins": 6.83868408203125, + "rewards/rejected": -12.6926851272583, + "step": 11422 + }, + { + "epoch": 1.78, + "learning_rate": 5.769243218009232e-06, + "logits/chosen": -3.0375282764434814, + "logits/rejected": -1.9242854118347168, + "logps/chosen": -481.45343017578125, + "logps/rejected": -235.90821838378906, + "loss": 3.8884, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.258550643920898, + "rewards/margins": 0.07568764686584473, + "rewards/rejected": -10.33423900604248, + "step": 11423 + }, + { + "epoch": 1.78, + "learning_rate": 5.768509777478085e-06, + "logits/chosen": -3.021517276763916, + "logits/rejected": -3.109020233154297, + "logps/chosen": -310.2626037597656, + "logps/rejected": -323.92340087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5767197608947754, + "rewards/margins": 11.318082809448242, + "rewards/rejected": -13.894803047180176, + "step": 11424 + }, + { + "epoch": 1.78, + "learning_rate": 5.767776336946938e-06, + "logits/chosen": -2.276319980621338, + "logits/rejected": -2.8422138690948486, + "logps/chosen": -490.9197998046875, + "logps/rejected": -537.3926391601562, + "loss": 0.7717, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.693178176879883, + "rewards/margins": 1.784818172454834, + "rewards/rejected": -10.477996826171875, + "step": 11425 + }, + { + "epoch": 1.78, + "learning_rate": 5.76704289641579e-06, + "logits/chosen": -2.9079458713531494, + "logits/rejected": -2.9580366611480713, + "logps/chosen": -190.00074768066406, + "logps/rejected": -251.12799072265625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.136714935302734, + "rewards/margins": 5.134670257568359, + "rewards/rejected": -12.271385192871094, + "step": 11426 + }, + { + "epoch": 1.78, + "learning_rate": 5.7663094558846415e-06, + "logits/chosen": -2.7212841510772705, + "logits/rejected": -2.9157354831695557, + "logps/chosen": -402.3165283203125, + "logps/rejected": -518.784423828125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.505281925201416, + "rewards/margins": 6.2405595779418945, + "rewards/rejected": -9.745841979980469, + "step": 11427 + }, + { + "epoch": 1.78, + "learning_rate": 5.765576015353493e-06, + "logits/chosen": -2.483232259750366, + "logits/rejected": -2.090047597885132, + "logps/chosen": -206.60617065429688, + "logps/rejected": -270.2034912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.215136528015137, + "rewards/margins": 11.618842124938965, + "rewards/rejected": -15.833978652954102, + "step": 11428 + }, + { + "epoch": 1.78, + "learning_rate": 5.764842574822346e-06, + "logits/chosen": -3.1001734733581543, + "logits/rejected": -2.529733180999756, + "logps/chosen": -343.07232666015625, + "logps/rejected": -151.49244689941406, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9072921872138977, + "rewards/margins": 7.873503684997559, + "rewards/rejected": -8.78079605102539, + "step": 11429 + }, + { + "epoch": 1.78, + "learning_rate": 5.764109134291198e-06, + "logits/chosen": -2.647954225540161, + "logits/rejected": -2.9748177528381348, + "logps/chosen": -336.41473388671875, + "logps/rejected": -402.270263671875, + "loss": 1.8602, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.039680480957031, + "rewards/margins": -0.8982706069946289, + "rewards/rejected": -7.141409873962402, + "step": 11430 + }, + { + "epoch": 1.78, + "learning_rate": 5.76337569376005e-06, + "logits/chosen": -2.760612726211548, + "logits/rejected": -1.921468734741211, + "logps/chosen": -521.7603149414062, + "logps/rejected": -429.34326171875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.810420274734497, + "rewards/margins": 9.680304527282715, + "rewards/rejected": -12.490724563598633, + "step": 11431 + }, + { + "epoch": 1.78, + "learning_rate": 5.762642253228902e-06, + "logits/chosen": -2.1716725826263428, + "logits/rejected": -2.890395164489746, + "logps/chosen": -137.28939819335938, + "logps/rejected": -179.47171020507812, + "loss": 1.0093, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.240269660949707, + "rewards/margins": 1.6032698154449463, + "rewards/rejected": -8.84354019165039, + "step": 11432 + }, + { + "epoch": 1.78, + "learning_rate": 5.7619088126977544e-06, + "logits/chosen": -2.8587427139282227, + "logits/rejected": -2.8712167739868164, + "logps/chosen": -419.5765380859375, + "logps/rejected": -447.0013122558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.078460931777954, + "rewards/margins": 10.533790588378906, + "rewards/rejected": -13.612251281738281, + "step": 11433 + }, + { + "epoch": 1.78, + "learning_rate": 5.761175372166606e-06, + "logits/chosen": -2.7726070880889893, + "logits/rejected": -3.120525360107422, + "logps/chosen": -176.3052520751953, + "logps/rejected": -532.0279541015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.733587265014648, + "rewards/margins": 11.97973346710205, + "rewards/rejected": -16.713319778442383, + "step": 11434 + }, + { + "epoch": 1.78, + "learning_rate": 5.760441931635458e-06, + "logits/chosen": -2.290897846221924, + "logits/rejected": -2.9499688148498535, + "logps/chosen": -153.24777221679688, + "logps/rejected": -230.16607666015625, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077381610870361, + "rewards/margins": 3.6492185592651367, + "rewards/rejected": -9.726600646972656, + "step": 11435 + }, + { + "epoch": 1.78, + "learning_rate": 5.75970849110431e-06, + "logits/chosen": -2.914583206176758, + "logits/rejected": -2.506932497024536, + "logps/chosen": -199.52322387695312, + "logps/rejected": -232.89169311523438, + "loss": 3.068, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.38703727722168, + "rewards/margins": 1.3373429775238037, + "rewards/rejected": -8.724380493164062, + "step": 11436 + }, + { + "epoch": 1.78, + "learning_rate": 5.758975050573162e-06, + "logits/chosen": -2.860119342803955, + "logits/rejected": -2.0734126567840576, + "logps/chosen": -538.9318237304688, + "logps/rejected": -512.256103515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.733283042907715, + "rewards/margins": 9.699366569519043, + "rewards/rejected": -15.432649612426758, + "step": 11437 + }, + { + "epoch": 1.78, + "learning_rate": 5.758241610042015e-06, + "logits/chosen": -2.848193407058716, + "logits/rejected": -2.4350216388702393, + "logps/chosen": -448.1387939453125, + "logps/rejected": -427.6192932128906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2044525146484375, + "rewards/margins": 8.891693115234375, + "rewards/rejected": -11.096146583557129, + "step": 11438 + }, + { + "epoch": 1.78, + "learning_rate": 5.7575081695108666e-06, + "logits/chosen": -2.19478178024292, + "logits/rejected": -2.342954158782959, + "logps/chosen": -162.96511840820312, + "logps/rejected": -263.9936218261719, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.345855712890625, + "rewards/margins": 6.129767417907715, + "rewards/rejected": -12.475622177124023, + "step": 11439 + }, + { + "epoch": 1.78, + "learning_rate": 5.7567747289797184e-06, + "logits/chosen": -1.797837495803833, + "logits/rejected": -2.801225423812866, + "logps/chosen": -103.68983459472656, + "logps/rejected": -359.60888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5773863792419434, + "rewards/margins": 9.37474250793457, + "rewards/rejected": -12.952129364013672, + "step": 11440 + }, + { + "epoch": 1.78, + "learning_rate": 5.756041288448571e-06, + "logits/chosen": -2.4875030517578125, + "logits/rejected": -2.854206085205078, + "logps/chosen": -253.24435424804688, + "logps/rejected": -229.2235870361328, + "loss": 1.7104, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.568478107452393, + "rewards/margins": 4.078351974487305, + "rewards/rejected": -10.646829605102539, + "step": 11441 + }, + { + "epoch": 1.78, + "learning_rate": 5.755307847917424e-06, + "logits/chosen": -1.555517315864563, + "logits/rejected": -2.744921922683716, + "logps/chosen": -87.04327392578125, + "logps/rejected": -480.83551025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.83416748046875, + "rewards/margins": 11.922306060791016, + "rewards/rejected": -16.756473541259766, + "step": 11442 + }, + { + "epoch": 1.78, + "learning_rate": 5.754574407386276e-06, + "logits/chosen": -2.6428630352020264, + "logits/rejected": -2.897562026977539, + "logps/chosen": -328.88232421875, + "logps/rejected": -483.1745300292969, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.723538875579834, + "rewards/margins": 8.212656021118164, + "rewards/rejected": -10.93619441986084, + "step": 11443 + }, + { + "epoch": 1.78, + "learning_rate": 5.753840966855128e-06, + "logits/chosen": -3.2304704189300537, + "logits/rejected": -2.92819881439209, + "logps/chosen": -570.980712890625, + "logps/rejected": -485.4298095703125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3810129165649414, + "rewards/margins": 6.741579055786133, + "rewards/rejected": -9.122591972351074, + "step": 11444 + }, + { + "epoch": 1.78, + "learning_rate": 5.7531075263239795e-06, + "logits/chosen": -1.739877462387085, + "logits/rejected": -2.576387882232666, + "logps/chosen": -144.125244140625, + "logps/rejected": -334.9427795410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.95536470413208, + "rewards/margins": 15.085966110229492, + "rewards/rejected": -18.041330337524414, + "step": 11445 + }, + { + "epoch": 1.78, + "learning_rate": 5.752374085792831e-06, + "logits/chosen": -1.9947737455368042, + "logits/rejected": -2.6450703144073486, + "logps/chosen": -137.052978515625, + "logps/rejected": -231.3883819580078, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8234105110168457, + "rewards/margins": 6.468827247619629, + "rewards/rejected": -10.292238235473633, + "step": 11446 + }, + { + "epoch": 1.78, + "learning_rate": 5.751640645261684e-06, + "logits/chosen": -2.1254873275756836, + "logits/rejected": -3.0205800533294678, + "logps/chosen": -145.48341369628906, + "logps/rejected": -258.9844970703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.945373058319092, + "rewards/margins": 9.01652717590332, + "rewards/rejected": -11.961899757385254, + "step": 11447 + }, + { + "epoch": 1.78, + "learning_rate": 5.750907204730536e-06, + "logits/chosen": -1.5639082193374634, + "logits/rejected": -2.680431604385376, + "logps/chosen": -115.1286849975586, + "logps/rejected": -285.857666015625, + "loss": 1.4788, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.329512596130371, + "rewards/margins": 3.1306495666503906, + "rewards/rejected": -12.460162162780762, + "step": 11448 + }, + { + "epoch": 1.78, + "learning_rate": 5.750173764199388e-06, + "logits/chosen": -2.9057953357696533, + "logits/rejected": -3.103987455368042, + "logps/chosen": -154.55117797851562, + "logps/rejected": -314.97967529296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5342113971710205, + "rewards/margins": 7.713018417358398, + "rewards/rejected": -11.247230529785156, + "step": 11449 + }, + { + "epoch": 1.78, + "learning_rate": 5.74944032366824e-06, + "logits/chosen": -2.1295037269592285, + "logits/rejected": -2.601001024246216, + "logps/chosen": -284.53961181640625, + "logps/rejected": -360.50701904296875, + "loss": 0.8857, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.104282379150391, + "rewards/margins": 4.039189338684082, + "rewards/rejected": -11.143471717834473, + "step": 11450 + }, + { + "epoch": 1.78, + "learning_rate": 5.7487068831370925e-06, + "logits/chosen": -2.7786800861358643, + "logits/rejected": -1.8641258478164673, + "logps/chosen": -226.9988555908203, + "logps/rejected": -183.2977752685547, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.825181007385254, + "rewards/margins": 7.834704875946045, + "rewards/rejected": -13.65988540649414, + "step": 11451 + }, + { + "epoch": 1.78, + "learning_rate": 5.747973442605944e-06, + "logits/chosen": -2.572052240371704, + "logits/rejected": -2.9582600593566895, + "logps/chosen": -73.35722351074219, + "logps/rejected": -321.70147705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6396484375, + "rewards/margins": 10.593250274658203, + "rewards/rejected": -15.232898712158203, + "step": 11452 + }, + { + "epoch": 1.78, + "learning_rate": 5.747240002074796e-06, + "logits/chosen": -1.1559069156646729, + "logits/rejected": -2.911195993423462, + "logps/chosen": -208.7169952392578, + "logps/rejected": -594.2108154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.335816383361816, + "rewards/margins": 11.271366119384766, + "rewards/rejected": -15.607182502746582, + "step": 11453 + }, + { + "epoch": 1.78, + "learning_rate": 5.746506561543648e-06, + "logits/chosen": -2.855656862258911, + "logits/rejected": -3.0674386024475098, + "logps/chosen": -83.97689819335938, + "logps/rejected": -281.98602294921875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.896744728088379, + "rewards/margins": 6.004316329956055, + "rewards/rejected": -9.901060104370117, + "step": 11454 + }, + { + "epoch": 1.78, + "learning_rate": 5.745773121012501e-06, + "logits/chosen": -1.4733994007110596, + "logits/rejected": -2.3823482990264893, + "logps/chosen": -206.0571746826172, + "logps/rejected": -371.62078857421875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.893378257751465, + "rewards/margins": 9.066352844238281, + "rewards/rejected": -13.95973014831543, + "step": 11455 + }, + { + "epoch": 1.78, + "learning_rate": 5.745039680481353e-06, + "logits/chosen": -2.902399778366089, + "logits/rejected": -2.704920530319214, + "logps/chosen": -162.87005615234375, + "logps/rejected": -305.50299072265625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.170941352844238, + "rewards/margins": 6.606825828552246, + "rewards/rejected": -11.777767181396484, + "step": 11456 + }, + { + "epoch": 1.78, + "learning_rate": 5.744306239950205e-06, + "logits/chosen": -1.2792026996612549, + "logits/rejected": -2.5226118564605713, + "logps/chosen": -592.21337890625, + "logps/rejected": -682.2124633789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8504350185394287, + "rewards/margins": 11.453092575073242, + "rewards/rejected": -15.30352783203125, + "step": 11457 + }, + { + "epoch": 1.78, + "learning_rate": 5.743572799419057e-06, + "logits/chosen": -2.855699300765991, + "logits/rejected": -2.749115467071533, + "logps/chosen": -107.24412536621094, + "logps/rejected": -272.76641845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3041515350341797, + "rewards/margins": 9.208553314208984, + "rewards/rejected": -12.512704849243164, + "step": 11458 + }, + { + "epoch": 1.78, + "learning_rate": 5.742839358887909e-06, + "logits/chosen": -2.7101268768310547, + "logits/rejected": -2.844251871109009, + "logps/chosen": -351.23284912109375, + "logps/rejected": -586.8502197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.526085376739502, + "rewards/margins": 13.106990814208984, + "rewards/rejected": -17.633075714111328, + "step": 11459 + }, + { + "epoch": 1.78, + "learning_rate": 5.742105918356762e-06, + "logits/chosen": -3.0326695442199707, + "logits/rejected": -2.8294007778167725, + "logps/chosen": -355.0613098144531, + "logps/rejected": -246.1831512451172, + "loss": 0.1232, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2236320972442627, + "rewards/margins": 4.5662431716918945, + "rewards/rejected": -7.789875030517578, + "step": 11460 + }, + { + "epoch": 1.78, + "learning_rate": 5.741372477825614e-06, + "logits/chosen": -2.696214199066162, + "logits/rejected": -3.020913600921631, + "logps/chosen": -279.98419189453125, + "logps/rejected": -258.98822021484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.469204902648926, + "rewards/margins": 8.626033782958984, + "rewards/rejected": -14.09523868560791, + "step": 11461 + }, + { + "epoch": 1.78, + "learning_rate": 5.740639037294466e-06, + "logits/chosen": -3.123603105545044, + "logits/rejected": -2.968510150909424, + "logps/chosen": -569.9188842773438, + "logps/rejected": -463.30352783203125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8530113697052, + "rewards/margins": 6.2916669845581055, + "rewards/rejected": -9.144678115844727, + "step": 11462 + }, + { + "epoch": 1.78, + "learning_rate": 5.7399055967633176e-06, + "logits/chosen": -2.851132869720459, + "logits/rejected": -2.338174343109131, + "logps/chosen": -227.27236938476562, + "logps/rejected": -260.8758544921875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.417227268218994, + "rewards/margins": 6.571794509887695, + "rewards/rejected": -9.989021301269531, + "step": 11463 + }, + { + "epoch": 1.78, + "learning_rate": 5.73917215623217e-06, + "logits/chosen": -2.431447982788086, + "logits/rejected": -2.859935760498047, + "logps/chosen": -647.7764892578125, + "logps/rejected": -668.6851806640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.215812683105469, + "rewards/margins": 13.063655853271484, + "rewards/rejected": -18.279468536376953, + "step": 11464 + }, + { + "epoch": 1.78, + "learning_rate": 5.738438715701022e-06, + "logits/chosen": -2.2324795722961426, + "logits/rejected": -2.9257380962371826, + "logps/chosen": -269.5328674316406, + "logps/rejected": -453.0062255859375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.625733375549316, + "rewards/margins": 8.515721321105957, + "rewards/rejected": -15.141454696655273, + "step": 11465 + }, + { + "epoch": 1.78, + "learning_rate": 5.737705275169874e-06, + "logits/chosen": -2.8389506340026855, + "logits/rejected": -1.8914562463760376, + "logps/chosen": -336.9783630371094, + "logps/rejected": -139.61886596679688, + "loss": 1.8487, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.283166408538818, + "rewards/margins": 1.7587101459503174, + "rewards/rejected": -8.041876792907715, + "step": 11466 + }, + { + "epoch": 1.78, + "learning_rate": 5.736971834638726e-06, + "logits/chosen": -0.8119917511940002, + "logits/rejected": -2.9546866416931152, + "logps/chosen": -70.68761444091797, + "logps/rejected": -382.77655029296875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.30907678604126, + "rewards/margins": 6.568913459777832, + "rewards/rejected": -10.87799072265625, + "step": 11467 + }, + { + "epoch": 1.78, + "learning_rate": 5.736238394107578e-06, + "logits/chosen": -1.7644683122634888, + "logits/rejected": -2.1259262561798096, + "logps/chosen": -236.9923095703125, + "logps/rejected": -427.13079833984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8988728523254395, + "rewards/margins": 9.138094902038574, + "rewards/rejected": -13.036968231201172, + "step": 11468 + }, + { + "epoch": 1.78, + "learning_rate": 5.7355049535764305e-06, + "logits/chosen": -2.883207082748413, + "logits/rejected": -1.790634036064148, + "logps/chosen": -496.649658203125, + "logps/rejected": -300.8524475097656, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0680313110351562, + "rewards/margins": 9.488603591918945, + "rewards/rejected": -11.556634902954102, + "step": 11469 + }, + { + "epoch": 1.78, + "learning_rate": 5.734771513045282e-06, + "logits/chosen": -1.8144017457962036, + "logits/rejected": -3.129154682159424, + "logps/chosen": -459.2239990234375, + "logps/rejected": -630.3327026367188, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0821750164031982, + "rewards/margins": 12.098723411560059, + "rewards/rejected": -15.180898666381836, + "step": 11470 + }, + { + "epoch": 1.78, + "learning_rate": 5.734038072514134e-06, + "logits/chosen": -3.137190341949463, + "logits/rejected": -2.1178693771362305, + "logps/chosen": -372.9224853515625, + "logps/rejected": -361.822509765625, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.995762825012207, + "rewards/margins": 4.325784206390381, + "rewards/rejected": -9.32154655456543, + "step": 11471 + }, + { + "epoch": 1.78, + "learning_rate": 5.733304631982986e-06, + "logits/chosen": -2.9184746742248535, + "logits/rejected": -3.2018558979034424, + "logps/chosen": -119.00743103027344, + "logps/rejected": -274.14776611328125, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.06804084777832, + "rewards/margins": 4.209080219268799, + "rewards/rejected": -9.277120590209961, + "step": 11472 + }, + { + "epoch": 1.78, + "learning_rate": 5.732571191451839e-06, + "logits/chosen": -2.0926849842071533, + "logits/rejected": -3.085876941680908, + "logps/chosen": -248.64541625976562, + "logps/rejected": -658.7688598632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.923788070678711, + "rewards/margins": 10.701967239379883, + "rewards/rejected": -15.625755310058594, + "step": 11473 + }, + { + "epoch": 1.78, + "learning_rate": 5.731837750920691e-06, + "logits/chosen": -2.9709737300872803, + "logits/rejected": -2.7609846591949463, + "logps/chosen": -294.627685546875, + "logps/rejected": -324.64385986328125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.385281562805176, + "rewards/margins": 6.838746547698975, + "rewards/rejected": -11.224027633666992, + "step": 11474 + }, + { + "epoch": 1.78, + "learning_rate": 5.7311043103895435e-06, + "logits/chosen": -1.9507458209991455, + "logits/rejected": -2.850209951400757, + "logps/chosen": -328.442138671875, + "logps/rejected": -562.1404418945312, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.668950080871582, + "rewards/margins": 8.047811508178711, + "rewards/rejected": -13.716761589050293, + "step": 11475 + }, + { + "epoch": 1.78, + "learning_rate": 5.730370869858395e-06, + "logits/chosen": -2.678893566131592, + "logits/rejected": -3.0124895572662354, + "logps/chosen": -67.5230941772461, + "logps/rejected": -212.410400390625, + "loss": 0.1103, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1859917640686035, + "rewards/margins": 4.705048561096191, + "rewards/rejected": -8.891040802001953, + "step": 11476 + }, + { + "epoch": 1.78, + "learning_rate": 5.729637429327247e-06, + "logits/chosen": -1.813096046447754, + "logits/rejected": -2.7175614833831787, + "logps/chosen": -178.9583740234375, + "logps/rejected": -231.54129028320312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7456631660461426, + "rewards/margins": 7.675083160400391, + "rewards/rejected": -11.420745849609375, + "step": 11477 + }, + { + "epoch": 1.79, + "learning_rate": 5.7289039887961e-06, + "logits/chosen": -1.359653353691101, + "logits/rejected": -2.967637538909912, + "logps/chosen": -110.90657806396484, + "logps/rejected": -741.732666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253973960876465, + "rewards/margins": 11.878683090209961, + "rewards/rejected": -16.132658004760742, + "step": 11478 + }, + { + "epoch": 1.79, + "learning_rate": 5.728170548264952e-06, + "logits/chosen": -2.9001386165618896, + "logits/rejected": -2.789306640625, + "logps/chosen": -192.11038208007812, + "logps/rejected": -257.2603759765625, + "loss": 0.1361, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228024005889893, + "rewards/margins": 4.554508209228516, + "rewards/rejected": -9.78253173828125, + "step": 11479 + }, + { + "epoch": 1.79, + "learning_rate": 5.727437107733804e-06, + "logits/chosen": -2.0366060733795166, + "logits/rejected": -2.9087436199188232, + "logps/chosen": -167.532958984375, + "logps/rejected": -318.81463623046875, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.333410263061523, + "rewards/margins": 5.2657623291015625, + "rewards/rejected": -10.599172592163086, + "step": 11480 + }, + { + "epoch": 1.79, + "learning_rate": 5.726703667202656e-06, + "logits/chosen": -1.6812682151794434, + "logits/rejected": -2.819660186767578, + "logps/chosen": -285.341552734375, + "logps/rejected": -746.8954467773438, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.962790489196777, + "rewards/margins": 3.4183242321014404, + "rewards/rejected": -10.381114959716797, + "step": 11481 + }, + { + "epoch": 1.79, + "learning_rate": 5.725970226671508e-06, + "logits/chosen": -1.8716691732406616, + "logits/rejected": -2.7680115699768066, + "logps/chosen": -164.34152221679688, + "logps/rejected": -302.38818359375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.95379638671875, + "rewards/margins": 5.984844207763672, + "rewards/rejected": -10.938640594482422, + "step": 11482 + }, + { + "epoch": 1.79, + "learning_rate": 5.72523678614036e-06, + "logits/chosen": -2.3165230751037598, + "logits/rejected": -3.205756425857544, + "logps/chosen": -238.42613220214844, + "logps/rejected": -534.2161865234375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0599493980407715, + "rewards/margins": 6.165534019470215, + "rewards/rejected": -11.225483894348145, + "step": 11483 + }, + { + "epoch": 1.79, + "learning_rate": 5.724503345609212e-06, + "logits/chosen": -2.511791467666626, + "logits/rejected": -2.756836175918579, + "logps/chosen": -346.15289306640625, + "logps/rejected": -447.7396240234375, + "loss": 0.2453, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795949935913086, + "rewards/margins": 3.9224600791931152, + "rewards/rejected": -9.718409538269043, + "step": 11484 + }, + { + "epoch": 1.79, + "learning_rate": 5.723769905078064e-06, + "logits/chosen": -2.721156120300293, + "logits/rejected": -2.2806506156921387, + "logps/chosen": -264.7243957519531, + "logps/rejected": -136.79568481445312, + "loss": 0.1429, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4761505126953125, + "rewards/margins": 2.423269033432007, + "rewards/rejected": -7.899419784545898, + "step": 11485 + }, + { + "epoch": 1.79, + "learning_rate": 5.723036464546916e-06, + "logits/chosen": -2.2367122173309326, + "logits/rejected": -2.8980164527893066, + "logps/chosen": -638.316162109375, + "logps/rejected": -529.9249267578125, + "loss": 3.2798, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.221979141235352, + "rewards/margins": 4.094571113586426, + "rewards/rejected": -12.316549301147461, + "step": 11486 + }, + { + "epoch": 1.79, + "learning_rate": 5.7223030240157686e-06, + "logits/chosen": -2.850376605987549, + "logits/rejected": -2.342125654220581, + "logps/chosen": -158.46554565429688, + "logps/rejected": -338.2296447753906, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.211845874786377, + "rewards/margins": 10.417888641357422, + "rewards/rejected": -15.62973403930664, + "step": 11487 + }, + { + "epoch": 1.79, + "learning_rate": 5.7215695834846205e-06, + "logits/chosen": -2.2053964138031006, + "logits/rejected": -2.899873733520508, + "logps/chosen": -341.4005432128906, + "logps/rejected": -391.5413513183594, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.731454372406006, + "rewards/margins": 6.037721157073975, + "rewards/rejected": -10.76917552947998, + "step": 11488 + }, + { + "epoch": 1.79, + "learning_rate": 5.720836142953472e-06, + "logits/chosen": -2.4375662803649902, + "logits/rejected": -2.6699318885803223, + "logps/chosen": -259.11517333984375, + "logps/rejected": -329.6009521484375, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.316062927246094, + "rewards/margins": 4.13153600692749, + "rewards/rejected": -9.447599411010742, + "step": 11489 + }, + { + "epoch": 1.79, + "learning_rate": 5.720102702422324e-06, + "logits/chosen": -2.780275583267212, + "logits/rejected": -3.0161030292510986, + "logps/chosen": -508.0914001464844, + "logps/rejected": -605.3623657226562, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.705386161804199, + "rewards/margins": 5.759432315826416, + "rewards/rejected": -13.464818954467773, + "step": 11490 + }, + { + "epoch": 1.79, + "learning_rate": 5.719369261891177e-06, + "logits/chosen": -2.8334403038024902, + "logits/rejected": -2.436584711074829, + "logps/chosen": -701.7132568359375, + "logps/rejected": -635.8436889648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8883674144744873, + "rewards/margins": 10.257148742675781, + "rewards/rejected": -14.145515441894531, + "step": 11491 + }, + { + "epoch": 1.79, + "learning_rate": 5.71863582136003e-06, + "logits/chosen": -2.8661019802093506, + "logits/rejected": -2.9758729934692383, + "logps/chosen": -190.7534942626953, + "logps/rejected": -261.704345703125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.544578552246094, + "rewards/margins": 6.316449165344238, + "rewards/rejected": -12.861027717590332, + "step": 11492 + }, + { + "epoch": 1.79, + "learning_rate": 5.7179023808288815e-06, + "logits/chosen": -2.9322116374969482, + "logits/rejected": -2.4491214752197266, + "logps/chosen": -407.2843017578125, + "logps/rejected": -339.94708251953125, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561064720153809, + "rewards/margins": 4.570558547973633, + "rewards/rejected": -11.131624221801758, + "step": 11493 + }, + { + "epoch": 1.79, + "learning_rate": 5.717168940297733e-06, + "logits/chosen": -3.150409460067749, + "logits/rejected": -2.9081199169158936, + "logps/chosen": -170.00308227539062, + "logps/rejected": -292.11773681640625, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.564903259277344, + "rewards/margins": 5.613781929016113, + "rewards/rejected": -10.178686141967773, + "step": 11494 + }, + { + "epoch": 1.79, + "learning_rate": 5.716435499766585e-06, + "logits/chosen": -2.589144468307495, + "logits/rejected": -2.7179698944091797, + "logps/chosen": -165.1661834716797, + "logps/rejected": -270.0386962890625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3892266750335693, + "rewards/margins": 6.746416091918945, + "rewards/rejected": -10.135643005371094, + "step": 11495 + }, + { + "epoch": 1.79, + "learning_rate": 5.715702059235438e-06, + "logits/chosen": -1.404044270515442, + "logits/rejected": -2.4790968894958496, + "logps/chosen": -329.10772705078125, + "logps/rejected": -358.9189147949219, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.201825141906738, + "rewards/margins": 6.852898597717285, + "rewards/rejected": -12.054723739624023, + "step": 11496 + }, + { + "epoch": 1.79, + "learning_rate": 5.71496861870429e-06, + "logits/chosen": -3.0507428646087646, + "logits/rejected": -1.663060188293457, + "logps/chosen": -438.4880065917969, + "logps/rejected": -265.8399658203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.091634511947632, + "rewards/margins": 7.890501022338867, + "rewards/rejected": -10.982135772705078, + "step": 11497 + }, + { + "epoch": 1.79, + "learning_rate": 5.714235178173142e-06, + "logits/chosen": -2.320405960083008, + "logits/rejected": -2.943203926086426, + "logps/chosen": -207.29312133789062, + "logps/rejected": -342.9200439453125, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.311013221740723, + "rewards/margins": 4.483372688293457, + "rewards/rejected": -8.79438591003418, + "step": 11498 + }, + { + "epoch": 1.79, + "learning_rate": 5.713501737641994e-06, + "logits/chosen": -2.782451868057251, + "logits/rejected": -2.9309182167053223, + "logps/chosen": -448.3693542480469, + "logps/rejected": -580.3676147460938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.943309783935547, + "rewards/margins": 9.1884765625, + "rewards/rejected": -12.131786346435547, + "step": 11499 + }, + { + "epoch": 1.79, + "learning_rate": 5.712768297110846e-06, + "logits/chosen": -2.9820728302001953, + "logits/rejected": -3.0769946575164795, + "logps/chosen": -77.59852600097656, + "logps/rejected": -235.54798889160156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.821068048477173, + "rewards/margins": 8.080244064331055, + "rewards/rejected": -10.901311874389648, + "step": 11500 + }, + { + "epoch": 1.79, + "learning_rate": 5.712034856579698e-06, + "logits/chosen": -2.5852274894714355, + "logits/rejected": -3.08440899848938, + "logps/chosen": -434.76043701171875, + "logps/rejected": -392.572265625, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.243714809417725, + "rewards/margins": 3.379246711730957, + "rewards/rejected": -9.62296199798584, + "step": 11501 + }, + { + "epoch": 1.79, + "learning_rate": 5.71130141604855e-06, + "logits/chosen": -2.4450409412384033, + "logits/rejected": -2.6031203269958496, + "logps/chosen": -182.46939086914062, + "logps/rejected": -287.8467712402344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.310214042663574, + "rewards/margins": 7.834007263183594, + "rewards/rejected": -12.144220352172852, + "step": 11502 + }, + { + "epoch": 1.79, + "learning_rate": 5.710567975517402e-06, + "logits/chosen": -2.088470220565796, + "logits/rejected": -2.6839616298675537, + "logps/chosen": -145.37998962402344, + "logps/rejected": -279.77850341796875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165533065795898, + "rewards/margins": 4.720610618591309, + "rewards/rejected": -10.886143684387207, + "step": 11503 + }, + { + "epoch": 1.79, + "learning_rate": 5.709834534986255e-06, + "logits/chosen": -2.5582127571105957, + "logits/rejected": -2.887288808822632, + "logps/chosen": -132.15097045898438, + "logps/rejected": -323.0768737792969, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.007105827331543, + "rewards/margins": 6.59542989730835, + "rewards/rejected": -11.60253620147705, + "step": 11504 + }, + { + "epoch": 1.79, + "learning_rate": 5.709101094455107e-06, + "logits/chosen": -2.7378721237182617, + "logits/rejected": -2.8987295627593994, + "logps/chosen": -152.78338623046875, + "logps/rejected": -205.39297485351562, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3085222244262695, + "rewards/margins": 7.175835609436035, + "rewards/rejected": -10.484357833862305, + "step": 11505 + }, + { + "epoch": 1.79, + "learning_rate": 5.7083676539239585e-06, + "logits/chosen": -2.638131618499756, + "logits/rejected": -3.0548412799835205, + "logps/chosen": -54.14851760864258, + "logps/rejected": -206.2626953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4114506244659424, + "rewards/margins": 8.798372268676758, + "rewards/rejected": -12.209823608398438, + "step": 11506 + }, + { + "epoch": 1.79, + "learning_rate": 5.70763421339281e-06, + "logits/chosen": -2.8792552947998047, + "logits/rejected": -1.3638834953308105, + "logps/chosen": -451.38580322265625, + "logps/rejected": -450.4115905761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.935914516448975, + "rewards/margins": 10.710478782653809, + "rewards/rejected": -15.646392822265625, + "step": 11507 + }, + { + "epoch": 1.79, + "learning_rate": 5.706900772861663e-06, + "logits/chosen": -2.3683862686157227, + "logits/rejected": -2.811277151107788, + "logps/chosen": -303.80810546875, + "logps/rejected": -374.90716552734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.863531589508057, + "rewards/margins": 9.238067626953125, + "rewards/rejected": -15.101598739624023, + "step": 11508 + }, + { + "epoch": 1.79, + "learning_rate": 5.706167332330516e-06, + "logits/chosen": -2.8725979328155518, + "logits/rejected": -3.1862916946411133, + "logps/chosen": -72.51518249511719, + "logps/rejected": -202.26025390625, + "loss": 0.1253, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.176884174346924, + "rewards/margins": 5.591362953186035, + "rewards/rejected": -8.7682466506958, + "step": 11509 + }, + { + "epoch": 1.79, + "learning_rate": 5.705433891799368e-06, + "logits/chosen": -2.615011692047119, + "logits/rejected": -3.011216640472412, + "logps/chosen": -240.8722686767578, + "logps/rejected": -389.06182861328125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.450872898101807, + "rewards/margins": 11.431654930114746, + "rewards/rejected": -15.882527351379395, + "step": 11510 + }, + { + "epoch": 1.79, + "learning_rate": 5.70470045126822e-06, + "logits/chosen": -2.8304152488708496, + "logits/rejected": -3.0631723403930664, + "logps/chosen": -54.29910659790039, + "logps/rejected": -167.14157104492188, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.308587074279785, + "rewards/margins": 6.267359733581543, + "rewards/rejected": -10.575946807861328, + "step": 11511 + }, + { + "epoch": 1.79, + "learning_rate": 5.7039670107370715e-06, + "logits/chosen": -0.7807068228721619, + "logits/rejected": -2.324928045272827, + "logps/chosen": -138.3533935546875, + "logps/rejected": -302.6624755859375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.046351432800293, + "rewards/margins": 7.480401992797852, + "rewards/rejected": -11.526753425598145, + "step": 11512 + }, + { + "epoch": 1.79, + "learning_rate": 5.703233570205924e-06, + "logits/chosen": -2.339346170425415, + "logits/rejected": -2.736178159713745, + "logps/chosen": -297.7193908691406, + "logps/rejected": -351.18634033203125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.476751804351807, + "rewards/margins": 5.573907375335693, + "rewards/rejected": -10.0506591796875, + "step": 11513 + }, + { + "epoch": 1.79, + "learning_rate": 5.702500129674776e-06, + "logits/chosen": -2.4491403102874756, + "logits/rejected": -3.070117235183716, + "logps/chosen": -343.25640869140625, + "logps/rejected": -395.366455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9454090595245361, + "rewards/margins": 9.138696670532227, + "rewards/rejected": -11.0841064453125, + "step": 11514 + }, + { + "epoch": 1.79, + "learning_rate": 5.701766689143628e-06, + "logits/chosen": -2.4665768146514893, + "logits/rejected": -2.9299590587615967, + "logps/chosen": -313.196044921875, + "logps/rejected": -265.30291748046875, + "loss": 0.3506, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.526666641235352, + "rewards/margins": 3.773350715637207, + "rewards/rejected": -9.300017356872559, + "step": 11515 + }, + { + "epoch": 1.79, + "learning_rate": 5.70103324861248e-06, + "logits/chosen": -2.5720555782318115, + "logits/rejected": -2.706833839416504, + "logps/chosen": -146.20709228515625, + "logps/rejected": -241.40771484375, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.158454418182373, + "rewards/margins": 6.005078315734863, + "rewards/rejected": -11.163533210754395, + "step": 11516 + }, + { + "epoch": 1.79, + "learning_rate": 5.700299808081332e-06, + "logits/chosen": -1.8947557210922241, + "logits/rejected": -2.9754810333251953, + "logps/chosen": -389.85186767578125, + "logps/rejected": -447.54083251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.178812026977539, + "rewards/margins": 9.518619537353516, + "rewards/rejected": -13.697431564331055, + "step": 11517 + }, + { + "epoch": 1.79, + "learning_rate": 5.6995663675501844e-06, + "logits/chosen": -2.2456820011138916, + "logits/rejected": -2.788759708404541, + "logps/chosen": -389.70770263671875, + "logps/rejected": -503.843017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599385738372803, + "rewards/margins": 10.13150405883789, + "rewards/rejected": -14.730890274047852, + "step": 11518 + }, + { + "epoch": 1.79, + "learning_rate": 5.698832927019036e-06, + "logits/chosen": -2.6270389556884766, + "logits/rejected": -1.9468737840652466, + "logps/chosen": -276.4482421875, + "logps/rejected": -191.39559936523438, + "loss": 0.8272, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.526063919067383, + "rewards/margins": 2.17673921585083, + "rewards/rejected": -8.702802658081055, + "step": 11519 + }, + { + "epoch": 1.79, + "learning_rate": 5.698099486487888e-06, + "logits/chosen": -2.4854743480682373, + "logits/rejected": -2.3795418739318848, + "logps/chosen": -164.60671997070312, + "logps/rejected": -320.8370666503906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.481320381164551, + "rewards/margins": 8.817628860473633, + "rewards/rejected": -14.2989501953125, + "step": 11520 + }, + { + "epoch": 1.79, + "learning_rate": 5.69736604595674e-06, + "logits/chosen": -2.8767364025115967, + "logits/rejected": -1.7133620977401733, + "logps/chosen": -465.4793701171875, + "logps/rejected": -340.7452087402344, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.217817306518555, + "rewards/margins": 2.5473432540893555, + "rewards/rejected": -10.76516056060791, + "step": 11521 + }, + { + "epoch": 1.79, + "learning_rate": 5.696632605425593e-06, + "logits/chosen": -2.482808828353882, + "logits/rejected": -3.1310646533966064, + "logps/chosen": -128.6280975341797, + "logps/rejected": -339.166015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.878436088562012, + "rewards/margins": 8.203256607055664, + "rewards/rejected": -13.081692695617676, + "step": 11522 + }, + { + "epoch": 1.79, + "learning_rate": 5.695899164894445e-06, + "logits/chosen": -2.9577083587646484, + "logits/rejected": -2.4801557064056396, + "logps/chosen": -179.38943481445312, + "logps/rejected": -189.969482421875, + "loss": 1.663, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.836041450500488, + "rewards/margins": -0.39706897735595703, + "rewards/rejected": -8.438972473144531, + "step": 11523 + }, + { + "epoch": 1.79, + "learning_rate": 5.6951657243632965e-06, + "logits/chosen": -2.2596404552459717, + "logits/rejected": -2.480630397796631, + "logps/chosen": -115.75161743164062, + "logps/rejected": -132.04452514648438, + "loss": 1.4412, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.692493438720703, + "rewards/margins": 0.885718822479248, + "rewards/rejected": -8.578211784362793, + "step": 11524 + }, + { + "epoch": 1.79, + "learning_rate": 5.694432283832149e-06, + "logits/chosen": -2.7569403648376465, + "logits/rejected": -2.7354841232299805, + "logps/chosen": -513.332763671875, + "logps/rejected": -480.8155517578125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1861772537231445, + "rewards/margins": 6.34503173828125, + "rewards/rejected": -11.531209945678711, + "step": 11525 + }, + { + "epoch": 1.79, + "learning_rate": 5.693698843301001e-06, + "logits/chosen": -2.35125994682312, + "logits/rejected": -2.926286458969116, + "logps/chosen": -194.1444854736328, + "logps/rejected": -382.9073486328125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.942585468292236, + "rewards/margins": 6.77774715423584, + "rewards/rejected": -12.720333099365234, + "step": 11526 + }, + { + "epoch": 1.79, + "learning_rate": 5.692965402769854e-06, + "logits/chosen": -2.224470615386963, + "logits/rejected": -2.848942756652832, + "logps/chosen": -74.05419158935547, + "logps/rejected": -412.38458251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.661712884902954, + "rewards/margins": 10.471939086914062, + "rewards/rejected": -14.133651733398438, + "step": 11527 + }, + { + "epoch": 1.79, + "learning_rate": 5.692231962238706e-06, + "logits/chosen": -1.58815598487854, + "logits/rejected": -2.6921677589416504, + "logps/chosen": -198.69097900390625, + "logps/rejected": -453.49560546875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.547451496124268, + "rewards/margins": 6.1660847663879395, + "rewards/rejected": -12.713536262512207, + "step": 11528 + }, + { + "epoch": 1.79, + "learning_rate": 5.691498521707558e-06, + "logits/chosen": -2.7953498363494873, + "logits/rejected": -1.7735446691513062, + "logps/chosen": -220.5780029296875, + "logps/rejected": -261.6112060546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.960254669189453, + "rewards/margins": 6.88956880569458, + "rewards/rejected": -10.849822998046875, + "step": 11529 + }, + { + "epoch": 1.79, + "learning_rate": 5.6907650811764095e-06, + "logits/chosen": -3.0475573539733887, + "logits/rejected": -2.640501022338867, + "logps/chosen": -183.80015563964844, + "logps/rejected": -198.78607177734375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163510322570801, + "rewards/margins": 6.647902488708496, + "rewards/rejected": -12.811412811279297, + "step": 11530 + }, + { + "epoch": 1.79, + "learning_rate": 5.690031640645262e-06, + "logits/chosen": -3.0862185955047607, + "logits/rejected": -2.6196718215942383, + "logps/chosen": -343.6214904785156, + "logps/rejected": -113.4363784790039, + "loss": 3.1624, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.242780685424805, + "rewards/margins": -3.1093316078186035, + "rewards/rejected": -5.133448600769043, + "step": 11531 + }, + { + "epoch": 1.79, + "learning_rate": 5.689298200114114e-06, + "logits/chosen": -2.005695104598999, + "logits/rejected": -2.889998197555542, + "logps/chosen": -177.93690490722656, + "logps/rejected": -360.12652587890625, + "loss": 1.5141, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.7986602783203125, + "rewards/margins": 6.574701309204102, + "rewards/rejected": -14.373361587524414, + "step": 11532 + }, + { + "epoch": 1.79, + "learning_rate": 5.688564759582966e-06, + "logits/chosen": -2.7860865592956543, + "logits/rejected": -1.8534573316574097, + "logps/chosen": -287.7751770019531, + "logps/rejected": -279.6528625488281, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.823980331420898, + "rewards/margins": 6.640275001525879, + "rewards/rejected": -11.464254379272461, + "step": 11533 + }, + { + "epoch": 1.79, + "learning_rate": 5.687831319051818e-06, + "logits/chosen": -2.8993964195251465, + "logits/rejected": -2.9247026443481445, + "logps/chosen": -136.73716735839844, + "logps/rejected": -291.9013977050781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386771202087402, + "rewards/margins": 9.215787887573242, + "rewards/rejected": -15.602558135986328, + "step": 11534 + }, + { + "epoch": 1.79, + "learning_rate": 5.68709787852067e-06, + "logits/chosen": -3.0908854007720947, + "logits/rejected": -2.6399128437042236, + "logps/chosen": -261.6531982421875, + "logps/rejected": -141.0731658935547, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891058921813965, + "rewards/margins": 0.9298843145370483, + "rewards/rejected": -6.8209428787231445, + "step": 11535 + }, + { + "epoch": 1.79, + "learning_rate": 5.6863644379895225e-06, + "logits/chosen": -2.585663080215454, + "logits/rejected": -1.6451160907745361, + "logps/chosen": -245.7633056640625, + "logps/rejected": -311.7951354980469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0441672801971436, + "rewards/margins": 10.680530548095703, + "rewards/rejected": -13.724699020385742, + "step": 11536 + }, + { + "epoch": 1.79, + "learning_rate": 5.685630997458374e-06, + "logits/chosen": -2.9115731716156006, + "logits/rejected": -2.993772506713867, + "logps/chosen": -63.13179016113281, + "logps/rejected": -151.21142578125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6760640144348145, + "rewards/margins": 5.572104454040527, + "rewards/rejected": -7.248168468475342, + "step": 11537 + }, + { + "epoch": 1.79, + "learning_rate": 5.684897556927226e-06, + "logits/chosen": -2.4463706016540527, + "logits/rejected": -2.6922264099121094, + "logps/chosen": -158.64859008789062, + "logps/rejected": -357.9415283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.841814041137695, + "rewards/margins": 9.248268127441406, + "rewards/rejected": -16.0900821685791, + "step": 11538 + }, + { + "epoch": 1.79, + "learning_rate": 5.684164116396078e-06, + "logits/chosen": -2.667778968811035, + "logits/rejected": -2.381376266479492, + "logps/chosen": -264.157470703125, + "logps/rejected": -193.495849609375, + "loss": 1.2725, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.366708755493164, + "rewards/margins": 1.287015438079834, + "rewards/rejected": -7.65372371673584, + "step": 11539 + }, + { + "epoch": 1.79, + "learning_rate": 5.683430675864931e-06, + "logits/chosen": -2.9899117946624756, + "logits/rejected": -3.0402328968048096, + "logps/chosen": -121.96492767333984, + "logps/rejected": -197.87774658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6843622922897339, + "rewards/margins": 9.227829933166504, + "rewards/rejected": -10.912191390991211, + "step": 11540 + }, + { + "epoch": 1.79, + "learning_rate": 5.682697235333783e-06, + "logits/chosen": -1.832275629043579, + "logits/rejected": -2.9569156169891357, + "logps/chosen": -261.99761962890625, + "logps/rejected": -589.73779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.756514072418213, + "rewards/margins": 8.440290451049805, + "rewards/rejected": -12.196805000305176, + "step": 11541 + }, + { + "epoch": 1.8, + "learning_rate": 5.681963794802635e-06, + "logits/chosen": -3.208535671234131, + "logits/rejected": -3.028191089630127, + "logps/chosen": -245.5843505859375, + "logps/rejected": -204.3983612060547, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.627841949462891, + "rewards/margins": 6.348071575164795, + "rewards/rejected": -10.975914001464844, + "step": 11542 + }, + { + "epoch": 1.8, + "learning_rate": 5.681230354271487e-06, + "logits/chosen": -2.8638017177581787, + "logits/rejected": -3.017322540283203, + "logps/chosen": -139.80178833007812, + "logps/rejected": -238.58627319335938, + "loss": 0.1265, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.913741111755371, + "rewards/margins": 4.131777763366699, + "rewards/rejected": -10.04551887512207, + "step": 11543 + }, + { + "epoch": 1.8, + "learning_rate": 5.680496913740339e-06, + "logits/chosen": -3.039097785949707, + "logits/rejected": -3.0692923069000244, + "logps/chosen": -428.3188781738281, + "logps/rejected": -385.3594055175781, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468399524688721, + "rewards/margins": 7.43132209777832, + "rewards/rejected": -12.899721145629883, + "step": 11544 + }, + { + "epoch": 1.8, + "learning_rate": 5.679763473209192e-06, + "logits/chosen": -2.971435070037842, + "logits/rejected": -2.318592071533203, + "logps/chosen": -171.27761840820312, + "logps/rejected": -154.62588500976562, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7813944816589355, + "rewards/margins": 5.781834602355957, + "rewards/rejected": -8.563228607177734, + "step": 11545 + }, + { + "epoch": 1.8, + "learning_rate": 5.679030032678044e-06, + "logits/chosen": -1.2836161851882935, + "logits/rejected": -2.818099021911621, + "logps/chosen": -138.18270874023438, + "logps/rejected": -456.3597717285156, + "loss": 0.0578, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1163249015808105, + "rewards/margins": 5.590178966522217, + "rewards/rejected": -9.706503868103027, + "step": 11546 + }, + { + "epoch": 1.8, + "learning_rate": 5.678296592146896e-06, + "logits/chosen": -2.5226223468780518, + "logits/rejected": -2.57918381690979, + "logps/chosen": -105.04676818847656, + "logps/rejected": -185.49703979492188, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.310179710388184, + "rewards/margins": 6.523535251617432, + "rewards/rejected": -10.833715438842773, + "step": 11547 + }, + { + "epoch": 1.8, + "learning_rate": 5.6775631516157476e-06, + "logits/chosen": -2.9213602542877197, + "logits/rejected": -2.9987130165100098, + "logps/chosen": -386.35394287109375, + "logps/rejected": -486.7951965332031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.584462642669678, + "rewards/margins": 8.607266426086426, + "rewards/rejected": -13.191728591918945, + "step": 11548 + }, + { + "epoch": 1.8, + "learning_rate": 5.6768297110846e-06, + "logits/chosen": -1.7681801319122314, + "logits/rejected": -2.684657335281372, + "logps/chosen": -122.90725708007812, + "logps/rejected": -396.279296875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.998692035675049, + "rewards/margins": 7.191408157348633, + "rewards/rejected": -15.190099716186523, + "step": 11549 + }, + { + "epoch": 1.8, + "learning_rate": 5.676096270553452e-06, + "logits/chosen": -1.794471263885498, + "logits/rejected": -2.753692388534546, + "logps/chosen": -109.08302307128906, + "logps/rejected": -429.56854248046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.167656898498535, + "rewards/margins": 7.627128601074219, + "rewards/rejected": -12.794785499572754, + "step": 11550 + }, + { + "epoch": 1.8, + "learning_rate": 5.675362830022304e-06, + "logits/chosen": -2.0610926151275635, + "logits/rejected": -2.0370404720306396, + "logps/chosen": -288.2220764160156, + "logps/rejected": -135.18528747558594, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.511479139328003, + "rewards/margins": 5.902518272399902, + "rewards/rejected": -9.413997650146484, + "step": 11551 + }, + { + "epoch": 1.8, + "learning_rate": 5.674629389491156e-06, + "logits/chosen": -2.9112064838409424, + "logits/rejected": -2.8296170234680176, + "logps/chosen": -254.1651153564453, + "logps/rejected": -251.78314208984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.36878776550293, + "rewards/margins": 7.06781005859375, + "rewards/rejected": -12.43659782409668, + "step": 11552 + }, + { + "epoch": 1.8, + "learning_rate": 5.673895948960009e-06, + "logits/chosen": -1.7958130836486816, + "logits/rejected": -2.878509998321533, + "logps/chosen": -164.13172912597656, + "logps/rejected": -474.305908203125, + "loss": 0.2643, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.649608135223389, + "rewards/margins": 8.50163745880127, + "rewards/rejected": -14.1512451171875, + "step": 11553 + }, + { + "epoch": 1.8, + "learning_rate": 5.6731625084288605e-06, + "logits/chosen": -2.03084397315979, + "logits/rejected": -2.94077467918396, + "logps/chosen": -67.92451477050781, + "logps/rejected": -466.27166748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.200138092041016, + "rewards/margins": 11.057281494140625, + "rewards/rejected": -16.25741958618164, + "step": 11554 + }, + { + "epoch": 1.8, + "learning_rate": 5.672429067897712e-06, + "logits/chosen": -2.916414737701416, + "logits/rejected": -2.940333127975464, + "logps/chosen": -141.5412139892578, + "logps/rejected": -273.83770751953125, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.106255054473877, + "rewards/margins": 7.98599910736084, + "rewards/rejected": -11.092254638671875, + "step": 11555 + }, + { + "epoch": 1.8, + "learning_rate": 5.671695627366564e-06, + "logits/chosen": -2.134244203567505, + "logits/rejected": -2.8476810455322266, + "logps/chosen": -266.4162902832031, + "logps/rejected": -373.2918701171875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.627992153167725, + "rewards/margins": 5.956367015838623, + "rewards/rejected": -11.584359169006348, + "step": 11556 + }, + { + "epoch": 1.8, + "learning_rate": 5.670962186835416e-06, + "logits/chosen": -1.4287934303283691, + "logits/rejected": -2.7069311141967773, + "logps/chosen": -97.3138427734375, + "logps/rejected": -468.0406799316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.080319404602051, + "rewards/margins": 13.836780548095703, + "rewards/rejected": -20.917098999023438, + "step": 11557 + }, + { + "epoch": 1.8, + "learning_rate": 5.670228746304269e-06, + "logits/chosen": -2.368665933609009, + "logits/rejected": -2.9868521690368652, + "logps/chosen": -187.09320068359375, + "logps/rejected": -265.53118896484375, + "loss": 1.1842, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7652268409729, + "rewards/margins": 3.7208874225616455, + "rewards/rejected": -9.486114501953125, + "step": 11558 + }, + { + "epoch": 1.8, + "learning_rate": 5.669495305773121e-06, + "logits/chosen": -1.804986834526062, + "logits/rejected": -2.8755319118499756, + "logps/chosen": -109.60977935791016, + "logps/rejected": -265.031005859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.405940055847168, + "rewards/margins": 7.666104316711426, + "rewards/rejected": -12.072044372558594, + "step": 11559 + }, + { + "epoch": 1.8, + "learning_rate": 5.6687618652419735e-06, + "logits/chosen": -2.042459726333618, + "logits/rejected": -2.8289403915405273, + "logps/chosen": -197.50010681152344, + "logps/rejected": -567.9420166015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.409505367279053, + "rewards/margins": 7.686075210571289, + "rewards/rejected": -12.0955810546875, + "step": 11560 + }, + { + "epoch": 1.8, + "learning_rate": 5.668028424710825e-06, + "logits/chosen": -2.591447353363037, + "logits/rejected": -2.8537113666534424, + "logps/chosen": -518.1616821289062, + "logps/rejected": -570.954833984375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0484819412231445, + "rewards/margins": 6.827671051025391, + "rewards/rejected": -13.876152992248535, + "step": 11561 + }, + { + "epoch": 1.8, + "learning_rate": 5.667294984179678e-06, + "logits/chosen": -2.429927349090576, + "logits/rejected": -2.893423557281494, + "logps/chosen": -133.40408325195312, + "logps/rejected": -200.2071533203125, + "loss": 0.6836, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.504175186157227, + "rewards/margins": 2.616541862487793, + "rewards/rejected": -7.1207170486450195, + "step": 11562 + }, + { + "epoch": 1.8, + "learning_rate": 5.66656154364853e-06, + "logits/chosen": -3.0211877822875977, + "logits/rejected": -3.125399351119995, + "logps/chosen": -119.8272476196289, + "logps/rejected": -163.71905517578125, + "loss": 0.1827, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.424475193023682, + "rewards/margins": 3.372223377227783, + "rewards/rejected": -8.796698570251465, + "step": 11563 + }, + { + "epoch": 1.8, + "learning_rate": 5.665828103117382e-06, + "logits/chosen": -2.6389822959899902, + "logits/rejected": -3.0069096088409424, + "logps/chosen": -58.824214935302734, + "logps/rejected": -186.27835083007812, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6168437004089355, + "rewards/margins": 6.609857082366943, + "rewards/rejected": -10.226700782775879, + "step": 11564 + }, + { + "epoch": 1.8, + "learning_rate": 5.665094662586234e-06, + "logits/chosen": -2.123258590698242, + "logits/rejected": -2.7090630531311035, + "logps/chosen": -189.34747314453125, + "logps/rejected": -295.6569519042969, + "loss": 0.4007, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.9066081047058105, + "rewards/margins": 4.409940242767334, + "rewards/rejected": -11.316548347473145, + "step": 11565 + }, + { + "epoch": 1.8, + "learning_rate": 5.664361222055086e-06, + "logits/chosen": -2.4322330951690674, + "logits/rejected": -2.363006353378296, + "logps/chosen": -173.80909729003906, + "logps/rejected": -258.82269287109375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.036163330078125, + "rewards/margins": 6.561454772949219, + "rewards/rejected": -11.597618103027344, + "step": 11566 + }, + { + "epoch": 1.8, + "learning_rate": 5.663627781523938e-06, + "logits/chosen": -2.911525249481201, + "logits/rejected": -2.9859015941619873, + "logps/chosen": -78.77481842041016, + "logps/rejected": -178.24435424804688, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4645981788635254, + "rewards/margins": 9.650871276855469, + "rewards/rejected": -13.115468978881836, + "step": 11567 + }, + { + "epoch": 1.8, + "learning_rate": 5.66289434099279e-06, + "logits/chosen": -2.9296340942382812, + "logits/rejected": -3.062363624572754, + "logps/chosen": -279.356689453125, + "logps/rejected": -319.94329833984375, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.350992202758789, + "rewards/margins": 6.71236515045166, + "rewards/rejected": -12.063356399536133, + "step": 11568 + }, + { + "epoch": 1.8, + "learning_rate": 5.662160900461642e-06, + "logits/chosen": -1.3946045637130737, + "logits/rejected": -2.3181910514831543, + "logps/chosen": -248.5750274658203, + "logps/rejected": -340.5480651855469, + "loss": 0.901, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.785813331604004, + "rewards/margins": 3.9241673946380615, + "rewards/rejected": -11.709980964660645, + "step": 11569 + }, + { + "epoch": 1.8, + "learning_rate": 5.661427459930494e-06, + "logits/chosen": -2.075235605239868, + "logits/rejected": -2.5828540325164795, + "logps/chosen": -176.94424438476562, + "logps/rejected": -290.4674072265625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.513237476348877, + "rewards/margins": 5.477116107940674, + "rewards/rejected": -10.99035358428955, + "step": 11570 + }, + { + "epoch": 1.8, + "learning_rate": 5.660694019399347e-06, + "logits/chosen": -2.581631898880005, + "logits/rejected": -3.0448074340820312, + "logps/chosen": -68.75625610351562, + "logps/rejected": -295.7534484863281, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.593859672546387, + "rewards/margins": 7.175313949584961, + "rewards/rejected": -11.769173622131348, + "step": 11571 + }, + { + "epoch": 1.8, + "learning_rate": 5.6599605788681986e-06, + "logits/chosen": -3.007941484451294, + "logits/rejected": -3.001354455947876, + "logps/chosen": -228.7769775390625, + "logps/rejected": -184.53025817871094, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.840819358825684, + "rewards/margins": 6.112659454345703, + "rewards/rejected": -11.953478813171387, + "step": 11572 + }, + { + "epoch": 1.8, + "learning_rate": 5.6592271383370504e-06, + "logits/chosen": -2.6897995471954346, + "logits/rejected": -2.979031562805176, + "logps/chosen": -124.0095443725586, + "logps/rejected": -393.65155029296875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.752131938934326, + "rewards/margins": 11.354925155639648, + "rewards/rejected": -15.107057571411133, + "step": 11573 + }, + { + "epoch": 1.8, + "learning_rate": 5.658493697805902e-06, + "logits/chosen": -1.875236988067627, + "logits/rejected": -2.9917361736297607, + "logps/chosen": -220.51173400878906, + "logps/rejected": -332.6270446777344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.409512519836426, + "rewards/margins": 8.013191223144531, + "rewards/rejected": -12.42270278930664, + "step": 11574 + }, + { + "epoch": 1.8, + "learning_rate": 5.657760257274754e-06, + "logits/chosen": -2.391489267349243, + "logits/rejected": -3.0532162189483643, + "logps/chosen": -78.82503509521484, + "logps/rejected": -615.3751831054688, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.677159786224365, + "rewards/margins": 7.745201110839844, + "rewards/rejected": -14.422361373901367, + "step": 11575 + }, + { + "epoch": 1.8, + "learning_rate": 5.657026816743607e-06, + "logits/chosen": -1.729694128036499, + "logits/rejected": -2.7026240825653076, + "logps/chosen": -100.9625244140625, + "logps/rejected": -389.0718994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.992420196533203, + "rewards/margins": 11.991765975952148, + "rewards/rejected": -16.98418617248535, + "step": 11576 + }, + { + "epoch": 1.8, + "learning_rate": 5.65629337621246e-06, + "logits/chosen": -2.2993271350860596, + "logits/rejected": -2.8141820430755615, + "logps/chosen": -209.55706787109375, + "logps/rejected": -339.9334716796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.153820753097534, + "rewards/margins": 6.537686347961426, + "rewards/rejected": -8.691507339477539, + "step": 11577 + }, + { + "epoch": 1.8, + "learning_rate": 5.6555599356813115e-06, + "logits/chosen": -2.2719268798828125, + "logits/rejected": -1.9074015617370605, + "logps/chosen": -321.53118896484375, + "logps/rejected": -343.42938232421875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.670445442199707, + "rewards/margins": 6.111483573913574, + "rewards/rejected": -13.781929016113281, + "step": 11578 + }, + { + "epoch": 1.8, + "learning_rate": 5.654826495150163e-06, + "logits/chosen": -1.9221302270889282, + "logits/rejected": -2.4649722576141357, + "logps/chosen": -332.2381896972656, + "logps/rejected": -338.5516357421875, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.568747520446777, + "rewards/margins": 4.913930892944336, + "rewards/rejected": -11.482678413391113, + "step": 11579 + }, + { + "epoch": 1.8, + "learning_rate": 5.654093054619016e-06, + "logits/chosen": -2.901534080505371, + "logits/rejected": -2.837639093399048, + "logps/chosen": -184.33465576171875, + "logps/rejected": -195.48947143554688, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183545112609863, + "rewards/margins": 3.9385459423065186, + "rewards/rejected": -9.122091293334961, + "step": 11580 + }, + { + "epoch": 1.8, + "learning_rate": 5.653359614087868e-06, + "logits/chosen": -2.480886936187744, + "logits/rejected": -2.9015860557556152, + "logps/chosen": -210.27670288085938, + "logps/rejected": -392.9532165527344, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.164430141448975, + "rewards/margins": 7.869244575500488, + "rewards/rejected": -12.033674240112305, + "step": 11581 + }, + { + "epoch": 1.8, + "learning_rate": 5.65262617355672e-06, + "logits/chosen": -1.2411653995513916, + "logits/rejected": -2.4478769302368164, + "logps/chosen": -216.3776397705078, + "logps/rejected": -591.8864135742188, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.540729999542236, + "rewards/margins": 8.581954956054688, + "rewards/rejected": -14.122684478759766, + "step": 11582 + }, + { + "epoch": 1.8, + "learning_rate": 5.651892733025572e-06, + "logits/chosen": -2.8185055255889893, + "logits/rejected": -2.149014711380005, + "logps/chosen": -149.08592224121094, + "logps/rejected": -155.9120330810547, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9927167892456055, + "rewards/margins": 3.8033201694488525, + "rewards/rejected": -9.796037673950195, + "step": 11583 + }, + { + "epoch": 1.8, + "learning_rate": 5.651159292494424e-06, + "logits/chosen": -2.2375755310058594, + "logits/rejected": -2.039167881011963, + "logps/chosen": -167.76315307617188, + "logps/rejected": -231.87916564941406, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.759888172149658, + "rewards/margins": 8.090655326843262, + "rewards/rejected": -12.850543975830078, + "step": 11584 + }, + { + "epoch": 1.8, + "learning_rate": 5.650425851963276e-06, + "logits/chosen": -1.816208004951477, + "logits/rejected": -3.169119358062744, + "logps/chosen": -169.3616943359375, + "logps/rejected": -290.2060546875, + "loss": 1.8164, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.708657264709473, + "rewards/margins": 0.6541619300842285, + "rewards/rejected": -6.362819194793701, + "step": 11585 + }, + { + "epoch": 1.8, + "learning_rate": 5.649692411432128e-06, + "logits/chosen": -1.632707118988037, + "logits/rejected": -2.609809160232544, + "logps/chosen": -111.29156494140625, + "logps/rejected": -363.43182373046875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.954878330230713, + "rewards/margins": 5.687450408935547, + "rewards/rejected": -9.642328262329102, + "step": 11586 + }, + { + "epoch": 1.8, + "learning_rate": 5.64895897090098e-06, + "logits/chosen": -2.0869510173797607, + "logits/rejected": -2.779421091079712, + "logps/chosen": -205.645263671875, + "logps/rejected": -506.2640380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439897537231445, + "rewards/margins": 14.486446380615234, + "rewards/rejected": -18.92634391784668, + "step": 11587 + }, + { + "epoch": 1.8, + "learning_rate": 5.648225530369832e-06, + "logits/chosen": -2.4905753135681152, + "logits/rejected": -2.9828858375549316, + "logps/chosen": -680.120361328125, + "logps/rejected": -736.5489501953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9907989501953125, + "rewards/margins": 8.724672317504883, + "rewards/rejected": -14.715471267700195, + "step": 11588 + }, + { + "epoch": 1.8, + "learning_rate": 5.647492089838685e-06, + "logits/chosen": -2.958885908126831, + "logits/rejected": -3.13142728805542, + "logps/chosen": -255.8704071044922, + "logps/rejected": -549.3136596679688, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3390984535217285, + "rewards/margins": 9.157958030700684, + "rewards/rejected": -13.49705696105957, + "step": 11589 + }, + { + "epoch": 1.8, + "learning_rate": 5.646758649307537e-06, + "logits/chosen": -2.932499647140503, + "logits/rejected": -1.9505178928375244, + "logps/chosen": -292.1388854980469, + "logps/rejected": -191.92332458496094, + "loss": 0.8955, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.447127342224121, + "rewards/margins": -0.25196099281311035, + "rewards/rejected": -7.19516658782959, + "step": 11590 + }, + { + "epoch": 1.8, + "learning_rate": 5.6460252087763885e-06, + "logits/chosen": -0.8159576654434204, + "logits/rejected": -2.503232717514038, + "logps/chosen": -253.4335479736328, + "logps/rejected": -615.4826049804688, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6558427810668945, + "rewards/margins": 10.262130737304688, + "rewards/rejected": -15.917973518371582, + "step": 11591 + }, + { + "epoch": 1.8, + "learning_rate": 5.64529176824524e-06, + "logits/chosen": -1.878923773765564, + "logits/rejected": -3.0110042095184326, + "logps/chosen": -121.8718032836914, + "logps/rejected": -275.49853515625, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.43727970123291, + "rewards/margins": 4.621716499328613, + "rewards/rejected": -10.058996200561523, + "step": 11592 + }, + { + "epoch": 1.8, + "learning_rate": 5.644558327714093e-06, + "logits/chosen": -1.1693735122680664, + "logits/rejected": -2.8089587688446045, + "logps/chosen": -96.19437408447266, + "logps/rejected": -362.69403076171875, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.294328212738037, + "rewards/margins": 7.023841857910156, + "rewards/rejected": -12.318170547485352, + "step": 11593 + }, + { + "epoch": 1.8, + "learning_rate": 5.643824887182946e-06, + "logits/chosen": -1.828541874885559, + "logits/rejected": -2.6180875301361084, + "logps/chosen": -451.4068603515625, + "logps/rejected": -785.2515869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3906402587890625, + "rewards/margins": 17.78200340270996, + "rewards/rejected": -24.172645568847656, + "step": 11594 + }, + { + "epoch": 1.8, + "learning_rate": 5.643091446651798e-06, + "logits/chosen": -2.8446555137634277, + "logits/rejected": -1.347783088684082, + "logps/chosen": -564.251220703125, + "logps/rejected": -265.1449279785156, + "loss": 0.1223, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.186180114746094, + "rewards/margins": 3.3344883918762207, + "rewards/rejected": -11.520668029785156, + "step": 11595 + }, + { + "epoch": 1.8, + "learning_rate": 5.6423580061206496e-06, + "logits/chosen": -1.8783544301986694, + "logits/rejected": -2.113260269165039, + "logps/chosen": -244.15882873535156, + "logps/rejected": -473.648193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.804635047912598, + "rewards/margins": 12.892577171325684, + "rewards/rejected": -17.69721221923828, + "step": 11596 + }, + { + "epoch": 1.8, + "learning_rate": 5.6416245655895014e-06, + "logits/chosen": -2.737287998199463, + "logits/rejected": -1.7666901350021362, + "logps/chosen": -508.5343017578125, + "logps/rejected": -526.162841796875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098345756530762, + "rewards/margins": 7.788455963134766, + "rewards/rejected": -13.886801719665527, + "step": 11597 + }, + { + "epoch": 1.8, + "learning_rate": 5.640891125058354e-06, + "logits/chosen": -2.3802921772003174, + "logits/rejected": -2.7357990741729736, + "logps/chosen": -200.85159301757812, + "logps/rejected": -257.86767578125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.30903434753418, + "rewards/margins": 7.494481086730957, + "rewards/rejected": -12.803515434265137, + "step": 11598 + }, + { + "epoch": 1.8, + "learning_rate": 5.640157684527206e-06, + "logits/chosen": -1.4818187952041626, + "logits/rejected": -2.6344754695892334, + "logps/chosen": -137.2263641357422, + "logps/rejected": -370.1511535644531, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204916000366211, + "rewards/margins": 6.7638750076293945, + "rewards/rejected": -12.968791007995605, + "step": 11599 + }, + { + "epoch": 1.8, + "learning_rate": 5.639424243996058e-06, + "logits/chosen": -2.4625911712646484, + "logits/rejected": -2.6928577423095703, + "logps/chosen": -257.6324768066406, + "logps/rejected": -268.1251220703125, + "loss": 0.9864, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.6299026012420654, + "rewards/margins": 4.698319435119629, + "rewards/rejected": -7.328222274780273, + "step": 11600 + }, + { + "epoch": 1.8, + "learning_rate": 5.63869080346491e-06, + "logits/chosen": -3.0077733993530273, + "logits/rejected": -2.9523816108703613, + "logps/chosen": -145.23675537109375, + "logps/rejected": -186.8603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.539586544036865, + "rewards/margins": 8.861259460449219, + "rewards/rejected": -13.400845527648926, + "step": 11601 + }, + { + "epoch": 1.8, + "learning_rate": 5.6379573629337625e-06, + "logits/chosen": -2.804857015609741, + "logits/rejected": -2.9542236328125, + "logps/chosen": -668.165771484375, + "logps/rejected": -528.1619873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.500307083129883, + "rewards/margins": 9.71823501586914, + "rewards/rejected": -14.218541145324707, + "step": 11602 + }, + { + "epoch": 1.8, + "learning_rate": 5.637223922402614e-06, + "logits/chosen": -2.8964412212371826, + "logits/rejected": -1.7851723432540894, + "logps/chosen": -130.61846923828125, + "logps/rejected": -242.4703369140625, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.018678665161133, + "rewards/margins": 5.563098907470703, + "rewards/rejected": -10.581777572631836, + "step": 11603 + }, + { + "epoch": 1.8, + "learning_rate": 5.636490481871466e-06, + "logits/chosen": -1.5311410427093506, + "logits/rejected": -2.689659595489502, + "logps/chosen": -231.1759033203125, + "logps/rejected": -536.753173828125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9071125984191895, + "rewards/margins": 6.518897533416748, + "rewards/rejected": -12.426010131835938, + "step": 11604 + }, + { + "epoch": 1.8, + "learning_rate": 5.635757041340318e-06, + "logits/chosen": -2.6685240268707275, + "logits/rejected": -1.350900411605835, + "logps/chosen": -468.5860595703125, + "logps/rejected": -407.5349426269531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3105099201202393, + "rewards/margins": 9.681253433227539, + "rewards/rejected": -12.9917631149292, + "step": 11605 + }, + { + "epoch": 1.8, + "learning_rate": 5.63502360080917e-06, + "logits/chosen": -2.8235397338867188, + "logits/rejected": -3.0702226161956787, + "logps/chosen": -648.9298095703125, + "logps/rejected": -464.7907409667969, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7000274658203125, + "rewards/margins": 7.451934337615967, + "rewards/rejected": -10.151962280273438, + "step": 11606 + }, + { + "epoch": 1.81, + "learning_rate": 5.634290160278023e-06, + "logits/chosen": -2.389451742172241, + "logits/rejected": -2.8174588680267334, + "logps/chosen": -190.95632934570312, + "logps/rejected": -366.42791748046875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7772018909454346, + "rewards/margins": 8.973531723022461, + "rewards/rejected": -12.750733375549316, + "step": 11607 + }, + { + "epoch": 1.81, + "learning_rate": 5.633556719746875e-06, + "logits/chosen": -2.524460792541504, + "logits/rejected": -2.7329041957855225, + "logps/chosen": -110.15692138671875, + "logps/rejected": -384.9762268066406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79723596572876, + "rewards/margins": 8.414466857910156, + "rewards/rejected": -13.211702346801758, + "step": 11608 + }, + { + "epoch": 1.81, + "learning_rate": 5.6328232792157265e-06, + "logits/chosen": -2.9856226444244385, + "logits/rejected": -2.947577714920044, + "logps/chosen": -278.56195068359375, + "logps/rejected": -289.7535705566406, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.96109676361084, + "rewards/margins": 3.683215618133545, + "rewards/rejected": -9.644311904907227, + "step": 11609 + }, + { + "epoch": 1.81, + "learning_rate": 5.632089838684579e-06, + "logits/chosen": -1.199584722518921, + "logits/rejected": -2.850865602493286, + "logps/chosen": -129.81527709960938, + "logps/rejected": -322.9768981933594, + "loss": 0.201, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.54138708114624, + "rewards/margins": 6.886489391326904, + "rewards/rejected": -11.427876472473145, + "step": 11610 + }, + { + "epoch": 1.81, + "learning_rate": 5.631356398153432e-06, + "logits/chosen": -2.940911293029785, + "logits/rejected": -3.005324602127075, + "logps/chosen": -314.3362731933594, + "logps/rejected": -183.99526977539062, + "loss": 0.1259, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.50147819519043, + "rewards/margins": 2.7003536224365234, + "rewards/rejected": -8.201831817626953, + "step": 11611 + }, + { + "epoch": 1.81, + "learning_rate": 5.630622957622284e-06, + "logits/chosen": -2.9555604457855225, + "logits/rejected": -3.0275368690490723, + "logps/chosen": -88.24069213867188, + "logps/rejected": -252.11770629882812, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.900582313537598, + "rewards/margins": 6.061670303344727, + "rewards/rejected": -10.96225357055664, + "step": 11612 + }, + { + "epoch": 1.81, + "learning_rate": 5.629889517091136e-06, + "logits/chosen": -2.283992052078247, + "logits/rejected": -2.744379997253418, + "logps/chosen": -213.21051025390625, + "logps/rejected": -422.08160400390625, + "loss": 0.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.656500339508057, + "rewards/margins": 2.4660491943359375, + "rewards/rejected": -8.122549057006836, + "step": 11613 + }, + { + "epoch": 1.81, + "learning_rate": 5.629156076559988e-06, + "logits/chosen": -2.2270689010620117, + "logits/rejected": -2.7747678756713867, + "logps/chosen": -161.20574951171875, + "logps/rejected": -256.15289306640625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864142894744873, + "rewards/margins": 6.192257881164551, + "rewards/rejected": -12.056400299072266, + "step": 11614 + }, + { + "epoch": 1.81, + "learning_rate": 5.6284226360288395e-06, + "logits/chosen": -1.9584660530090332, + "logits/rejected": -3.0530827045440674, + "logps/chosen": -87.79640197753906, + "logps/rejected": -412.6541748046875, + "loss": 0.0685, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0176215171813965, + "rewards/margins": 6.368429183959961, + "rewards/rejected": -11.386051177978516, + "step": 11615 + }, + { + "epoch": 1.81, + "learning_rate": 5.627689195497692e-06, + "logits/chosen": -2.6580440998077393, + "logits/rejected": -3.1041386127471924, + "logps/chosen": -219.44078063964844, + "logps/rejected": -486.3746643066406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5800833702087402, + "rewards/margins": 11.306520462036133, + "rewards/rejected": -14.886604309082031, + "step": 11616 + }, + { + "epoch": 1.81, + "learning_rate": 5.626955754966544e-06, + "logits/chosen": -2.5367379188537598, + "logits/rejected": -3.0483341217041016, + "logps/chosen": -300.1937255859375, + "logps/rejected": -514.8585205078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.184329986572266, + "rewards/margins": 11.596362113952637, + "rewards/rejected": -19.780691146850586, + "step": 11617 + }, + { + "epoch": 1.81, + "learning_rate": 5.626222314435396e-06, + "logits/chosen": -1.6941752433776855, + "logits/rejected": -3.033504009246826, + "logps/chosen": -144.7351531982422, + "logps/rejected": -319.74273681640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0916666984558105, + "rewards/margins": 8.638286590576172, + "rewards/rejected": -11.72995376586914, + "step": 11618 + }, + { + "epoch": 1.81, + "learning_rate": 5.625488873904248e-06, + "logits/chosen": -2.4264304637908936, + "logits/rejected": -2.9517862796783447, + "logps/chosen": -138.5775146484375, + "logps/rejected": -386.68505859375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.22898006439209, + "rewards/margins": 7.537084579467773, + "rewards/rejected": -12.766064643859863, + "step": 11619 + }, + { + "epoch": 1.81, + "learning_rate": 5.6247554333731006e-06, + "logits/chosen": -2.5630569458007812, + "logits/rejected": -1.2040598392486572, + "logps/chosen": -196.31686401367188, + "logps/rejected": -270.62115478515625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329103469848633, + "rewards/margins": 5.96073055267334, + "rewards/rejected": -12.289833068847656, + "step": 11620 + }, + { + "epoch": 1.81, + "learning_rate": 5.6240219928419525e-06, + "logits/chosen": -2.8523991107940674, + "logits/rejected": -2.9301390647888184, + "logps/chosen": -444.54180908203125, + "logps/rejected": -628.9208374023438, + "loss": 0.2766, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.238248348236084, + "rewards/margins": 2.9988675117492676, + "rewards/rejected": -10.237115859985352, + "step": 11621 + }, + { + "epoch": 1.81, + "learning_rate": 5.623288552310804e-06, + "logits/chosen": -2.411043643951416, + "logits/rejected": -2.5866384506225586, + "logps/chosen": -116.51393127441406, + "logps/rejected": -195.42312622070312, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.438104152679443, + "rewards/margins": 5.418829441070557, + "rewards/rejected": -9.85693359375, + "step": 11622 + }, + { + "epoch": 1.81, + "learning_rate": 5.622555111779656e-06, + "logits/chosen": -2.07226300239563, + "logits/rejected": -2.634425640106201, + "logps/chosen": -218.97216796875, + "logps/rejected": -591.030029296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.830376625061035, + "rewards/margins": 7.8724470138549805, + "rewards/rejected": -13.702823638916016, + "step": 11623 + }, + { + "epoch": 1.81, + "learning_rate": 5.621821671248508e-06, + "logits/chosen": -2.0308737754821777, + "logits/rejected": -3.073427677154541, + "logps/chosen": -179.24725341796875, + "logps/rejected": -476.3313293457031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.198734283447266, + "rewards/margins": 8.232735633850098, + "rewards/rejected": -12.431469917297363, + "step": 11624 + }, + { + "epoch": 1.81, + "learning_rate": 5.621088230717361e-06, + "logits/chosen": -1.4074525833129883, + "logits/rejected": -2.267028331756592, + "logps/chosen": -247.13851928710938, + "logps/rejected": -511.6983337402344, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.62626314163208, + "rewards/margins": 7.85996150970459, + "rewards/rejected": -14.486225128173828, + "step": 11625 + }, + { + "epoch": 1.81, + "learning_rate": 5.620354790186213e-06, + "logits/chosen": -2.912874221801758, + "logits/rejected": -1.8154752254486084, + "logps/chosen": -333.45654296875, + "logps/rejected": -150.630859375, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2660012245178223, + "rewards/margins": 6.521477699279785, + "rewards/rejected": -8.787479400634766, + "step": 11626 + }, + { + "epoch": 1.81, + "learning_rate": 5.619621349655065e-06, + "logits/chosen": -2.284446954727173, + "logits/rejected": -3.108398675918579, + "logps/chosen": -204.76022338867188, + "logps/rejected": -301.5701599121094, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.075455665588379, + "rewards/margins": 7.667339324951172, + "rewards/rejected": -12.742795944213867, + "step": 11627 + }, + { + "epoch": 1.81, + "learning_rate": 5.618887909123917e-06, + "logits/chosen": -1.9847612380981445, + "logits/rejected": -2.730786085128784, + "logps/chosen": -157.85113525390625, + "logps/rejected": -409.80047607421875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.966516494750977, + "rewards/margins": 7.051922798156738, + "rewards/rejected": -12.018439292907715, + "step": 11628 + }, + { + "epoch": 1.81, + "learning_rate": 5.61815446859277e-06, + "logits/chosen": -2.905937433242798, + "logits/rejected": -2.5138354301452637, + "logps/chosen": -302.00531005859375, + "logps/rejected": -385.8193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.011205196380615, + "rewards/margins": 10.86643123626709, + "rewards/rejected": -15.877635955810547, + "step": 11629 + }, + { + "epoch": 1.81, + "learning_rate": 5.617421028061622e-06, + "logits/chosen": -2.737839937210083, + "logits/rejected": -2.693391799926758, + "logps/chosen": -328.6179504394531, + "logps/rejected": -618.1572875976562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.506917953491211, + "rewards/margins": 10.485769271850586, + "rewards/rejected": -17.992687225341797, + "step": 11630 + }, + { + "epoch": 1.81, + "learning_rate": 5.616687587530474e-06, + "logits/chosen": -1.183477759361267, + "logits/rejected": -2.54282808303833, + "logps/chosen": -157.77716064453125, + "logps/rejected": -502.70257568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5003786087036133, + "rewards/margins": 12.89992904663086, + "rewards/rejected": -16.40030860900879, + "step": 11631 + }, + { + "epoch": 1.81, + "learning_rate": 5.615954146999326e-06, + "logits/chosen": -1.4180022478103638, + "logits/rejected": -2.4865705966949463, + "logps/chosen": -242.2156982421875, + "logps/rejected": -596.5338134765625, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.44719934463501, + "rewards/margins": 9.176471710205078, + "rewards/rejected": -13.62367057800293, + "step": 11632 + }, + { + "epoch": 1.81, + "learning_rate": 5.6152207064681775e-06, + "logits/chosen": -2.8207244873046875, + "logits/rejected": -2.721431255340576, + "logps/chosen": -73.19884490966797, + "logps/rejected": -382.80780029296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.177512168884277, + "rewards/margins": 9.52171516418457, + "rewards/rejected": -13.699227333068848, + "step": 11633 + }, + { + "epoch": 1.81, + "learning_rate": 5.61448726593703e-06, + "logits/chosen": -2.8138587474823, + "logits/rejected": -1.8316940069198608, + "logps/chosen": -263.08258056640625, + "logps/rejected": -422.0721435546875, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.883791923522949, + "rewards/margins": 6.959305763244629, + "rewards/rejected": -14.843097686767578, + "step": 11634 + }, + { + "epoch": 1.81, + "learning_rate": 5.613753825405882e-06, + "logits/chosen": -1.5289865732192993, + "logits/rejected": -3.1069629192352295, + "logps/chosen": -379.6151123046875, + "logps/rejected": -553.8228759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.214526653289795, + "rewards/margins": 10.483680725097656, + "rewards/rejected": -13.69820785522461, + "step": 11635 + }, + { + "epoch": 1.81, + "learning_rate": 5.613020384874734e-06, + "logits/chosen": -2.1565780639648438, + "logits/rejected": -2.906604290008545, + "logps/chosen": -373.64276123046875, + "logps/rejected": -573.9102783203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3835020065307617, + "rewards/margins": 11.627755165100098, + "rewards/rejected": -15.01125717163086, + "step": 11636 + }, + { + "epoch": 1.81, + "learning_rate": 5.612286944343586e-06, + "logits/chosen": -2.8312652111053467, + "logits/rejected": -1.8473870754241943, + "logps/chosen": -766.15283203125, + "logps/rejected": -476.4765930175781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.227569818496704, + "rewards/margins": 10.173255920410156, + "rewards/rejected": -12.400825500488281, + "step": 11637 + }, + { + "epoch": 1.81, + "learning_rate": 5.611553503812439e-06, + "logits/chosen": -2.9356484413146973, + "logits/rejected": -2.119312286376953, + "logps/chosen": -352.65838623046875, + "logps/rejected": -374.0684814453125, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.04155158996582, + "rewards/margins": 3.9697344303131104, + "rewards/rejected": -13.011285781860352, + "step": 11638 + }, + { + "epoch": 1.81, + "learning_rate": 5.6108200632812905e-06, + "logits/chosen": -1.1321043968200684, + "logits/rejected": -2.952010154724121, + "logps/chosen": -203.197509765625, + "logps/rejected": -393.41339111328125, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.002713203430176, + "rewards/margins": 5.0349650382995605, + "rewards/rejected": -12.037677764892578, + "step": 11639 + }, + { + "epoch": 1.81, + "learning_rate": 5.610086622750142e-06, + "logits/chosen": -2.963916063308716, + "logits/rejected": -2.3213868141174316, + "logps/chosen": -233.37222290039062, + "logps/rejected": -252.623291015625, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.571339130401611, + "rewards/margins": 6.7615437507629395, + "rewards/rejected": -12.33288288116455, + "step": 11640 + }, + { + "epoch": 1.81, + "learning_rate": 5.609353182218994e-06, + "logits/chosen": -2.2799713611602783, + "logits/rejected": -2.670074224472046, + "logps/chosen": -121.57542419433594, + "logps/rejected": -302.0794677734375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.915315628051758, + "rewards/margins": 7.208656311035156, + "rewards/rejected": -14.123971939086914, + "step": 11641 + }, + { + "epoch": 1.81, + "learning_rate": 5.608619741687847e-06, + "logits/chosen": -2.7335190773010254, + "logits/rejected": -2.882913589477539, + "logps/chosen": -235.4344940185547, + "logps/rejected": -299.553955078125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.853118896484375, + "rewards/margins": 6.485840797424316, + "rewards/rejected": -12.338959693908691, + "step": 11642 + }, + { + "epoch": 1.81, + "learning_rate": 5.607886301156699e-06, + "logits/chosen": -3.1009960174560547, + "logits/rejected": -2.9123079776763916, + "logps/chosen": -104.87860107421875, + "logps/rejected": -167.8681640625, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.062565326690674, + "rewards/margins": 5.273808002471924, + "rewards/rejected": -10.336373329162598, + "step": 11643 + }, + { + "epoch": 1.81, + "learning_rate": 5.607152860625552e-06, + "logits/chosen": -2.6401207447052, + "logits/rejected": -2.7288155555725098, + "logps/chosen": -316.6332702636719, + "logps/rejected": -372.74481201171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.119253635406494, + "rewards/margins": 7.169828414916992, + "rewards/rejected": -13.289081573486328, + "step": 11644 + }, + { + "epoch": 1.81, + "learning_rate": 5.6064194200944035e-06, + "logits/chosen": -2.6434953212738037, + "logits/rejected": -0.9102199077606201, + "logps/chosen": -351.982177734375, + "logps/rejected": -95.18949890136719, + "loss": 0.9549, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.613048076629639, + "rewards/margins": -0.3557744026184082, + "rewards/rejected": -7.2572736740112305, + "step": 11645 + }, + { + "epoch": 1.81, + "learning_rate": 5.605685979563255e-06, + "logits/chosen": -2.9562156200408936, + "logits/rejected": -3.1614885330200195, + "logps/chosen": -129.90216064453125, + "logps/rejected": -306.91839599609375, + "loss": 0.0445, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.570889472961426, + "rewards/margins": 4.049160003662109, + "rewards/rejected": -8.620048522949219, + "step": 11646 + }, + { + "epoch": 1.81, + "learning_rate": 5.604952539032108e-06, + "logits/chosen": -1.9280555248260498, + "logits/rejected": -2.6459169387817383, + "logps/chosen": -125.56965637207031, + "logps/rejected": -341.45379638671875, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1245574951171875, + "rewards/margins": 5.826960563659668, + "rewards/rejected": -11.951519012451172, + "step": 11647 + }, + { + "epoch": 1.81, + "learning_rate": 5.60421909850096e-06, + "logits/chosen": -2.257065773010254, + "logits/rejected": -2.8491814136505127, + "logps/chosen": -246.3405303955078, + "logps/rejected": -302.02020263671875, + "loss": 0.1307, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.36590051651001, + "rewards/margins": 3.7286276817321777, + "rewards/rejected": -9.094528198242188, + "step": 11648 + }, + { + "epoch": 1.81, + "learning_rate": 5.603485657969812e-06, + "logits/chosen": -2.201387643814087, + "logits/rejected": -3.0905697345733643, + "logps/chosen": -183.01849365234375, + "logps/rejected": -520.07275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2017102241516113, + "rewards/margins": 13.237354278564453, + "rewards/rejected": -15.439064025878906, + "step": 11649 + }, + { + "epoch": 1.81, + "learning_rate": 5.602752217438664e-06, + "logits/chosen": -1.069136142730713, + "logits/rejected": -2.531607151031494, + "logps/chosen": -98.12458038330078, + "logps/rejected": -335.8477478027344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3018336296081543, + "rewards/margins": 10.442720413208008, + "rewards/rejected": -13.74455451965332, + "step": 11650 + }, + { + "epoch": 1.81, + "learning_rate": 5.6020187769075164e-06, + "logits/chosen": -2.242184638977051, + "logits/rejected": -2.7403817176818848, + "logps/chosen": -83.41642761230469, + "logps/rejected": -259.79144287109375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.335967540740967, + "rewards/margins": 7.149034023284912, + "rewards/rejected": -11.485001564025879, + "step": 11651 + }, + { + "epoch": 1.81, + "learning_rate": 5.601285336376368e-06, + "logits/chosen": -2.7776269912719727, + "logits/rejected": -2.0194618701934814, + "logps/chosen": -195.696533203125, + "logps/rejected": -197.38150024414062, + "loss": 0.5554, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.092563152313232, + "rewards/margins": 3.4587647914886475, + "rewards/rejected": -8.551328659057617, + "step": 11652 + }, + { + "epoch": 1.81, + "learning_rate": 5.60055189584522e-06, + "logits/chosen": -1.6184860467910767, + "logits/rejected": -2.360562562942505, + "logps/chosen": -138.4416961669922, + "logps/rejected": -469.82928466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1578164100646973, + "rewards/margins": 12.951648712158203, + "rewards/rejected": -16.109464645385742, + "step": 11653 + }, + { + "epoch": 1.81, + "learning_rate": 5.599818455314072e-06, + "logits/chosen": -1.4539777040481567, + "logits/rejected": -2.6933960914611816, + "logps/chosen": -135.38119506835938, + "logps/rejected": -511.8253173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.558622360229492, + "rewards/margins": 13.54261302947998, + "rewards/rejected": -18.101234436035156, + "step": 11654 + }, + { + "epoch": 1.81, + "learning_rate": 5.599085014782924e-06, + "logits/chosen": -2.274554491043091, + "logits/rejected": -2.9401352405548096, + "logps/chosen": -299.8583984375, + "logps/rejected": -695.12744140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4673967361450195, + "rewards/margins": 8.417156219482422, + "rewards/rejected": -13.884552001953125, + "step": 11655 + }, + { + "epoch": 1.81, + "learning_rate": 5.598351574251777e-06, + "logits/chosen": -1.1860030889511108, + "logits/rejected": -2.3218119144439697, + "logps/chosen": -223.32164001464844, + "logps/rejected": -594.4632568359375, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.380952835083008, + "rewards/margins": 12.455833435058594, + "rewards/rejected": -17.8367862701416, + "step": 11656 + }, + { + "epoch": 1.81, + "learning_rate": 5.5976181337206285e-06, + "logits/chosen": -3.086456537246704, + "logits/rejected": -2.776522159576416, + "logps/chosen": -879.1241455078125, + "logps/rejected": -598.9898681640625, + "loss": 2.1014, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.307438850402832, + "rewards/margins": 1.4318554401397705, + "rewards/rejected": -7.739294528961182, + "step": 11657 + }, + { + "epoch": 1.81, + "learning_rate": 5.59688469318948e-06, + "logits/chosen": -2.9915506839752197, + "logits/rejected": -3.1173930168151855, + "logps/chosen": -114.07038879394531, + "logps/rejected": -264.36138916015625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.61341667175293, + "rewards/margins": 5.995506286621094, + "rewards/rejected": -13.608922958374023, + "step": 11658 + }, + { + "epoch": 1.81, + "learning_rate": 5.596151252658332e-06, + "logits/chosen": -2.836421251296997, + "logits/rejected": -2.976572275161743, + "logps/chosen": -217.23016357421875, + "logps/rejected": -213.97103881835938, + "loss": 1.6987, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.402111053466797, + "rewards/margins": 2.940911293029785, + "rewards/rejected": -12.343021392822266, + "step": 11659 + }, + { + "epoch": 1.81, + "learning_rate": 5.595417812127185e-06, + "logits/chosen": -2.528674602508545, + "logits/rejected": -3.1569886207580566, + "logps/chosen": -153.5944061279297, + "logps/rejected": -332.071044921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173241138458252, + "rewards/margins": 9.210745811462402, + "rewards/rejected": -15.383986473083496, + "step": 11660 + }, + { + "epoch": 1.81, + "learning_rate": 5.594684371596038e-06, + "logits/chosen": -2.5648932456970215, + "logits/rejected": -2.866903781890869, + "logps/chosen": -107.35055541992188, + "logps/rejected": -255.72463989257812, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.218542098999023, + "rewards/margins": 7.215319633483887, + "rewards/rejected": -11.43386173248291, + "step": 11661 + }, + { + "epoch": 1.81, + "learning_rate": 5.59395093106489e-06, + "logits/chosen": -2.2316977977752686, + "logits/rejected": -2.744645357131958, + "logps/chosen": -184.41050720214844, + "logps/rejected": -394.4517822265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.333254814147949, + "rewards/margins": 10.532350540161133, + "rewards/rejected": -14.865604400634766, + "step": 11662 + }, + { + "epoch": 1.81, + "learning_rate": 5.5932174905337415e-06, + "logits/chosen": -1.527534008026123, + "logits/rejected": -2.763240098953247, + "logps/chosen": -111.76810455322266, + "logps/rejected": -257.5354309082031, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.835225820541382, + "rewards/margins": 6.472323417663574, + "rewards/rejected": -10.307548522949219, + "step": 11663 + }, + { + "epoch": 1.81, + "learning_rate": 5.592484050002593e-06, + "logits/chosen": -1.5895148515701294, + "logits/rejected": -2.621723175048828, + "logps/chosen": -201.49465942382812, + "logps/rejected": -295.5517578125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.395414352416992, + "rewards/margins": 5.101064682006836, + "rewards/rejected": -9.496479034423828, + "step": 11664 + }, + { + "epoch": 1.81, + "learning_rate": 5.591750609471446e-06, + "logits/chosen": -2.9972317218780518, + "logits/rejected": -2.9146482944488525, + "logps/chosen": -146.48452758789062, + "logps/rejected": -244.18138122558594, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.16364860534668, + "rewards/margins": 6.0993242263793945, + "rewards/rejected": -10.262972831726074, + "step": 11665 + }, + { + "epoch": 1.81, + "learning_rate": 5.591017168940298e-06, + "logits/chosen": -2.853120803833008, + "logits/rejected": -3.027879476547241, + "logps/chosen": -146.67041015625, + "logps/rejected": -290.30816650390625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.058330535888672, + "rewards/margins": 8.891397476196289, + "rewards/rejected": -13.949728012084961, + "step": 11666 + }, + { + "epoch": 1.81, + "learning_rate": 5.59028372840915e-06, + "logits/chosen": -2.3686931133270264, + "logits/rejected": -2.8652613162994385, + "logps/chosen": -234.89312744140625, + "logps/rejected": -245.4609832763672, + "loss": 0.8367, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.287277698516846, + "rewards/margins": 3.013853073120117, + "rewards/rejected": -10.301130294799805, + "step": 11667 + }, + { + "epoch": 1.81, + "learning_rate": 5.589550287878002e-06, + "logits/chosen": -1.5805554389953613, + "logits/rejected": -2.537463903427124, + "logps/chosen": -157.90869140625, + "logps/rejected": -331.243408203125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826259613037109, + "rewards/margins": 7.169626712799072, + "rewards/rejected": -12.995885848999023, + "step": 11668 + }, + { + "epoch": 1.81, + "learning_rate": 5.5888168473468545e-06, + "logits/chosen": -1.8005399703979492, + "logits/rejected": -2.3873093128204346, + "logps/chosen": -147.47531127929688, + "logps/rejected": -429.5009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.82662296295166, + "rewards/margins": 12.157066345214844, + "rewards/rejected": -16.983688354492188, + "step": 11669 + }, + { + "epoch": 1.81, + "learning_rate": 5.588083406815706e-06, + "logits/chosen": -1.0570436716079712, + "logits/rejected": -2.6881790161132812, + "logps/chosen": -159.90054321289062, + "logps/rejected": -635.2199096679688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677900314331055, + "rewards/margins": 10.668512344360352, + "rewards/rejected": -15.346412658691406, + "step": 11670 + }, + { + "epoch": 1.82, + "learning_rate": 5.587349966284558e-06, + "logits/chosen": -1.3986371755599976, + "logits/rejected": -2.6111252307891846, + "logps/chosen": -205.19522094726562, + "logps/rejected": -570.0764770507812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.592891693115234, + "rewards/margins": 9.431598663330078, + "rewards/rejected": -16.024490356445312, + "step": 11671 + }, + { + "epoch": 1.82, + "learning_rate": 5.58661652575341e-06, + "logits/chosen": -2.9231061935424805, + "logits/rejected": -3.104613780975342, + "logps/chosen": -115.28561401367188, + "logps/rejected": -291.10321044921875, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165234565734863, + "rewards/margins": 6.920755863189697, + "rewards/rejected": -13.085990905761719, + "step": 11672 + }, + { + "epoch": 1.82, + "learning_rate": 5.585883085222262e-06, + "logits/chosen": -2.544417381286621, + "logits/rejected": -2.930070400238037, + "logps/chosen": -441.1326904296875, + "logps/rejected": -476.6912536621094, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439562797546387, + "rewards/margins": 10.21569538116455, + "rewards/rejected": -14.655258178710938, + "step": 11673 + }, + { + "epoch": 1.82, + "learning_rate": 5.585149644691115e-06, + "logits/chosen": -1.4191415309906006, + "logits/rejected": -2.68910813331604, + "logps/chosen": -262.59881591796875, + "logps/rejected": -432.65203857421875, + "loss": 1.1345, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.79796314239502, + "rewards/margins": 3.778097629547119, + "rewards/rejected": -12.576061248779297, + "step": 11674 + }, + { + "epoch": 1.82, + "learning_rate": 5.584416204159967e-06, + "logits/chosen": -2.8565127849578857, + "logits/rejected": -2.7445006370544434, + "logps/chosen": -226.37721252441406, + "logps/rejected": -371.8709716796875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487464904785156, + "rewards/margins": 6.154916763305664, + "rewards/rejected": -12.64238166809082, + "step": 11675 + }, + { + "epoch": 1.82, + "learning_rate": 5.5836827636288185e-06, + "logits/chosen": -1.0409129858016968, + "logits/rejected": -2.7577977180480957, + "logps/chosen": -149.44198608398438, + "logps/rejected": -468.14825439453125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8079118728637695, + "rewards/margins": 8.377883911132812, + "rewards/rejected": -15.185796737670898, + "step": 11676 + }, + { + "epoch": 1.82, + "learning_rate": 5.582949323097671e-06, + "logits/chosen": -2.75954008102417, + "logits/rejected": -2.8597805500030518, + "logps/chosen": -213.12747192382812, + "logps/rejected": -419.26287841796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9899415969848633, + "rewards/margins": 9.122451782226562, + "rewards/rejected": -13.112393379211426, + "step": 11677 + }, + { + "epoch": 1.82, + "learning_rate": 5.582215882566524e-06, + "logits/chosen": -2.624248743057251, + "logits/rejected": -2.912388801574707, + "logps/chosen": -256.93048095703125, + "logps/rejected": -584.76708984375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.54738187789917, + "rewards/margins": 7.285778045654297, + "rewards/rejected": -13.833160400390625, + "step": 11678 + }, + { + "epoch": 1.82, + "learning_rate": 5.581482442035376e-06, + "logits/chosen": -2.928928852081299, + "logits/rejected": -3.1309115886688232, + "logps/chosen": -305.6878662109375, + "logps/rejected": -282.3290100097656, + "loss": 0.5601, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.400456428527832, + "rewards/margins": 2.9063127040863037, + "rewards/rejected": -7.306769371032715, + "step": 11679 + }, + { + "epoch": 1.82, + "learning_rate": 5.580749001504228e-06, + "logits/chosen": -1.3569529056549072, + "logits/rejected": -2.950573682785034, + "logps/chosen": -144.61874389648438, + "logps/rejected": -429.151123046875, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.373924255371094, + "rewards/margins": 3.845258951187134, + "rewards/rejected": -12.219182968139648, + "step": 11680 + }, + { + "epoch": 1.82, + "learning_rate": 5.5800155609730796e-06, + "logits/chosen": -2.167863368988037, + "logits/rejected": -2.857475757598877, + "logps/chosen": -208.8539276123047, + "logps/rejected": -554.565185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.263220310211182, + "rewards/margins": 11.241641998291016, + "rewards/rejected": -16.50486183166504, + "step": 11681 + }, + { + "epoch": 1.82, + "learning_rate": 5.5792821204419314e-06, + "logits/chosen": -2.7597813606262207, + "logits/rejected": -2.420912742614746, + "logps/chosen": -133.55548095703125, + "logps/rejected": -272.60711669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.538665294647217, + "rewards/margins": 10.189395904541016, + "rewards/rejected": -14.728060722351074, + "step": 11682 + }, + { + "epoch": 1.82, + "learning_rate": 5.578548679910784e-06, + "logits/chosen": -1.8792150020599365, + "logits/rejected": -2.901034116744995, + "logps/chosen": -94.32365417480469, + "logps/rejected": -274.57904052734375, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.064233779907227, + "rewards/margins": 4.376644611358643, + "rewards/rejected": -9.440877914428711, + "step": 11683 + }, + { + "epoch": 1.82, + "learning_rate": 5.577815239379636e-06, + "logits/chosen": -2.10011887550354, + "logits/rejected": -2.565896511077881, + "logps/chosen": -193.27734375, + "logps/rejected": -291.03326416015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.234343528747559, + "rewards/margins": 6.85184383392334, + "rewards/rejected": -11.086187362670898, + "step": 11684 + }, + { + "epoch": 1.82, + "learning_rate": 5.577081798848488e-06, + "logits/chosen": -1.7481064796447754, + "logits/rejected": -2.815370559692383, + "logps/chosen": -291.935546875, + "logps/rejected": -364.3854675292969, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6289594173431396, + "rewards/margins": 9.075572967529297, + "rewards/rejected": -12.704532623291016, + "step": 11685 + }, + { + "epoch": 1.82, + "learning_rate": 5.57634835831734e-06, + "logits/chosen": -2.703652858734131, + "logits/rejected": -2.9354207515716553, + "logps/chosen": -197.0321044921875, + "logps/rejected": -313.55181884765625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.329174041748047, + "rewards/margins": 7.395201683044434, + "rewards/rejected": -12.72437572479248, + "step": 11686 + }, + { + "epoch": 1.82, + "learning_rate": 5.5756149177861925e-06, + "logits/chosen": -2.6337788105010986, + "logits/rejected": -1.090211033821106, + "logps/chosen": -231.79953002929688, + "logps/rejected": -182.281494140625, + "loss": 0.1461, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.410930633544922, + "rewards/margins": 2.938103675842285, + "rewards/rejected": -9.349034309387207, + "step": 11687 + }, + { + "epoch": 1.82, + "learning_rate": 5.574881477255044e-06, + "logits/chosen": -1.2852907180786133, + "logits/rejected": -2.735834836959839, + "logps/chosen": -144.4906768798828, + "logps/rejected": -289.3571472167969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.287291526794434, + "rewards/margins": 9.78567123413086, + "rewards/rejected": -15.07296371459961, + "step": 11688 + }, + { + "epoch": 1.82, + "learning_rate": 5.574148036723896e-06, + "logits/chosen": -1.2974752187728882, + "logits/rejected": -2.1623713970184326, + "logps/chosen": -161.51123046875, + "logps/rejected": -437.76531982421875, + "loss": 0.1511, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220943450927734, + "rewards/margins": 4.37569522857666, + "rewards/rejected": -11.596638679504395, + "step": 11689 + }, + { + "epoch": 1.82, + "learning_rate": 5.573414596192748e-06, + "logits/chosen": -2.1311469078063965, + "logits/rejected": -2.6173412799835205, + "logps/chosen": -309.17608642578125, + "logps/rejected": -420.7701416015625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.733445644378662, + "rewards/margins": 7.641579627990723, + "rewards/rejected": -13.375024795532227, + "step": 11690 + }, + { + "epoch": 1.82, + "learning_rate": 5.572681155661601e-06, + "logits/chosen": -2.015803575515747, + "logits/rejected": -2.2782347202301025, + "logps/chosen": -109.08180236816406, + "logps/rejected": -155.73590087890625, + "loss": 0.5365, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.127080917358398, + "rewards/margins": 1.8875160217285156, + "rewards/rejected": -10.014596939086914, + "step": 11691 + }, + { + "epoch": 1.82, + "learning_rate": 5.571947715130453e-06, + "logits/chosen": -2.761497974395752, + "logits/rejected": -2.3520214557647705, + "logps/chosen": -446.57904052734375, + "logps/rejected": -385.82672119140625, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3562397956848145, + "rewards/margins": 3.9638514518737793, + "rewards/rejected": -10.320091247558594, + "step": 11692 + }, + { + "epoch": 1.82, + "learning_rate": 5.571214274599305e-06, + "logits/chosen": -1.8717026710510254, + "logits/rejected": -2.643821954727173, + "logps/chosen": -201.42642211914062, + "logps/rejected": -367.634033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.696163177490234, + "rewards/margins": 8.93648910522461, + "rewards/rejected": -14.632652282714844, + "step": 11693 + }, + { + "epoch": 1.82, + "learning_rate": 5.570480834068157e-06, + "logits/chosen": -2.022355794906616, + "logits/rejected": -2.8337316513061523, + "logps/chosen": -156.83941650390625, + "logps/rejected": -529.9271240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.48378849029541, + "rewards/margins": 10.411459922790527, + "rewards/rejected": -14.895248413085938, + "step": 11694 + }, + { + "epoch": 1.82, + "learning_rate": 5.569747393537009e-06, + "logits/chosen": -3.0512683391571045, + "logits/rejected": -2.4340622425079346, + "logps/chosen": -655.3778686523438, + "logps/rejected": -724.5189208984375, + "loss": 1.1809, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.870832443237305, + "rewards/margins": 1.623852014541626, + "rewards/rejected": -8.494684219360352, + "step": 11695 + }, + { + "epoch": 1.82, + "learning_rate": 5.569013953005862e-06, + "logits/chosen": -2.508199453353882, + "logits/rejected": -2.6078572273254395, + "logps/chosen": -282.66217041015625, + "logps/rejected": -411.8116149902344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.90156364440918, + "rewards/margins": 8.917251586914062, + "rewards/rejected": -13.818815231323242, + "step": 11696 + }, + { + "epoch": 1.82, + "learning_rate": 5.568280512474714e-06, + "logits/chosen": -1.8667919635772705, + "logits/rejected": -2.6603565216064453, + "logps/chosen": -166.31515502929688, + "logps/rejected": -561.0401611328125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3484034538269043, + "rewards/margins": 10.210845947265625, + "rewards/rejected": -13.559249877929688, + "step": 11697 + }, + { + "epoch": 1.82, + "learning_rate": 5.567547071943566e-06, + "logits/chosen": -2.4666993618011475, + "logits/rejected": -3.1554789543151855, + "logps/chosen": -183.27967834472656, + "logps/rejected": -258.8273010253906, + "loss": 1.0042, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.7502970695495605, + "rewards/margins": 4.786020278930664, + "rewards/rejected": -10.536317825317383, + "step": 11698 + }, + { + "epoch": 1.82, + "learning_rate": 5.566813631412418e-06, + "logits/chosen": -2.9061319828033447, + "logits/rejected": -1.824920892715454, + "logps/chosen": -193.7344970703125, + "logps/rejected": -312.20379638671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151944160461426, + "rewards/margins": 9.283722877502441, + "rewards/rejected": -15.435667037963867, + "step": 11699 + }, + { + "epoch": 1.82, + "learning_rate": 5.56608019088127e-06, + "logits/chosen": -2.8681716918945312, + "logits/rejected": -2.6888034343719482, + "logps/chosen": -321.4989013671875, + "logps/rejected": -309.2042236328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5392303466796875, + "rewards/margins": 7.919576644897461, + "rewards/rejected": -14.458806991577148, + "step": 11700 + }, + { + "epoch": 1.82, + "learning_rate": 5.565346750350122e-06, + "logits/chosen": -2.912564754486084, + "logits/rejected": -2.348106861114502, + "logps/chosen": -345.5370788574219, + "logps/rejected": -282.3974914550781, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5481386184692383, + "rewards/margins": 2.3581626415252686, + "rewards/rejected": -5.906301021575928, + "step": 11701 + }, + { + "epoch": 1.82, + "learning_rate": 5.564613309818974e-06, + "logits/chosen": -2.9639697074890137, + "logits/rejected": -2.316969633102417, + "logps/chosen": -234.02679443359375, + "logps/rejected": -213.3622283935547, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837902069091797, + "rewards/margins": 4.527572154998779, + "rewards/rejected": -10.365474700927734, + "step": 11702 + }, + { + "epoch": 1.82, + "learning_rate": 5.563879869287826e-06, + "logits/chosen": -2.035130262374878, + "logits/rejected": -3.0552430152893066, + "logps/chosen": -339.9310302734375, + "logps/rejected": -543.27880859375, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.139843940734863, + "rewards/margins": 3.8377861976623535, + "rewards/rejected": -9.977630615234375, + "step": 11703 + }, + { + "epoch": 1.82, + "learning_rate": 5.563146428756678e-06, + "logits/chosen": -2.996518135070801, + "logits/rejected": -2.728761672973633, + "logps/chosen": -475.350341796875, + "logps/rejected": -325.87725830078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.140965461730957, + "rewards/margins": 8.041007995605469, + "rewards/rejected": -11.181973457336426, + "step": 11704 + }, + { + "epoch": 1.82, + "learning_rate": 5.5624129882255306e-06, + "logits/chosen": -2.82645583152771, + "logits/rejected": -3.0944113731384277, + "logps/chosen": -269.67047119140625, + "logps/rejected": -419.65386962890625, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.049676895141602, + "rewards/margins": 6.597989082336426, + "rewards/rejected": -11.647665023803711, + "step": 11705 + }, + { + "epoch": 1.82, + "learning_rate": 5.5616795476943824e-06, + "logits/chosen": -2.440242052078247, + "logits/rejected": -2.781177282333374, + "logps/chosen": -708.995849609375, + "logps/rejected": -793.600830078125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5368452072143555, + "rewards/margins": 8.04741096496582, + "rewards/rejected": -12.584256172180176, + "step": 11706 + }, + { + "epoch": 1.82, + "learning_rate": 5.560946107163234e-06, + "logits/chosen": -1.8008993864059448, + "logits/rejected": -2.7690155506134033, + "logps/chosen": -129.4859619140625, + "logps/rejected": -196.96397399902344, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.613696575164795, + "rewards/margins": 5.896915912628174, + "rewards/rejected": -10.510612487792969, + "step": 11707 + }, + { + "epoch": 1.82, + "learning_rate": 5.560212666632086e-06, + "logits/chosen": -2.9748036861419678, + "logits/rejected": -2.0491607189178467, + "logps/chosen": -338.5069580078125, + "logps/rejected": -229.46636962890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8623766899108887, + "rewards/margins": 7.751721382141113, + "rewards/rejected": -11.61409854888916, + "step": 11708 + }, + { + "epoch": 1.82, + "learning_rate": 5.559479226100939e-06, + "logits/chosen": -3.0153326988220215, + "logits/rejected": -2.1673760414123535, + "logps/chosen": -474.28668212890625, + "logps/rejected": -545.881103515625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.341865062713623, + "rewards/margins": 7.254002571105957, + "rewards/rejected": -11.595867156982422, + "step": 11709 + }, + { + "epoch": 1.82, + "learning_rate": 5.558745785569791e-06, + "logits/chosen": -3.077590227127075, + "logits/rejected": -2.658202648162842, + "logps/chosen": -183.48431396484375, + "logps/rejected": -336.69097900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8234877586364746, + "rewards/margins": 10.877153396606445, + "rewards/rejected": -14.700641632080078, + "step": 11710 + }, + { + "epoch": 1.82, + "learning_rate": 5.5580123450386435e-06, + "logits/chosen": -1.467475414276123, + "logits/rejected": -2.963064193725586, + "logps/chosen": -159.1962432861328, + "logps/rejected": -639.8605346679688, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.479855060577393, + "rewards/margins": 7.717323303222656, + "rewards/rejected": -14.19717788696289, + "step": 11711 + }, + { + "epoch": 1.82, + "learning_rate": 5.557278904507495e-06, + "logits/chosen": -2.9628419876098633, + "logits/rejected": -2.80084490776062, + "logps/chosen": -268.5165710449219, + "logps/rejected": -312.0068664550781, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.451401710510254, + "rewards/margins": 5.666942596435547, + "rewards/rejected": -9.1183443069458, + "step": 11712 + }, + { + "epoch": 1.82, + "learning_rate": 5.556545463976347e-06, + "logits/chosen": -1.6477686166763306, + "logits/rejected": -2.769813299179077, + "logps/chosen": -376.075439453125, + "logps/rejected": -714.463134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.819331645965576, + "rewards/margins": 11.432975769042969, + "rewards/rejected": -15.252307891845703, + "step": 11713 + }, + { + "epoch": 1.82, + "learning_rate": 5.5558120234452e-06, + "logits/chosen": -3.122201681137085, + "logits/rejected": -2.955174684524536, + "logps/chosen": -72.90487670898438, + "logps/rejected": -164.67135620117188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.240478515625, + "rewards/margins": 8.45564079284668, + "rewards/rejected": -11.69611930847168, + "step": 11714 + }, + { + "epoch": 1.82, + "learning_rate": 5.555078582914052e-06, + "logits/chosen": -1.8422062397003174, + "logits/rejected": -2.9157960414886475, + "logps/chosen": -600.6087646484375, + "logps/rejected": -638.7674560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.059889316558838, + "rewards/margins": 9.094818115234375, + "rewards/rejected": -14.154707908630371, + "step": 11715 + }, + { + "epoch": 1.82, + "learning_rate": 5.554345142382904e-06, + "logits/chosen": -2.7199008464813232, + "logits/rejected": -3.093245267868042, + "logps/chosen": -255.755615234375, + "logps/rejected": -346.9353942871094, + "loss": 0.1065, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.681005477905273, + "rewards/margins": 3.7787907123565674, + "rewards/rejected": -10.459795951843262, + "step": 11716 + }, + { + "epoch": 1.82, + "learning_rate": 5.553611701851756e-06, + "logits/chosen": -2.7925844192504883, + "logits/rejected": -2.538609504699707, + "logps/chosen": -522.0206909179688, + "logps/rejected": -317.69696044921875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.237532615661621, + "rewards/margins": 6.696091651916504, + "rewards/rejected": -11.933624267578125, + "step": 11717 + }, + { + "epoch": 1.82, + "learning_rate": 5.552878261320608e-06, + "logits/chosen": -3.012446165084839, + "logits/rejected": -2.3671436309814453, + "logps/chosen": -569.4845581054688, + "logps/rejected": -699.8389892578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6803085803985596, + "rewards/margins": 7.099839687347412, + "rewards/rejected": -9.78014850616455, + "step": 11718 + }, + { + "epoch": 1.82, + "learning_rate": 5.55214482078946e-06, + "logits/chosen": -1.282570481300354, + "logits/rejected": -2.3185198307037354, + "logps/chosen": -223.80531311035156, + "logps/rejected": -493.9730224609375, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.601367950439453, + "rewards/margins": 9.4273681640625, + "rewards/rejected": -16.028736114501953, + "step": 11719 + }, + { + "epoch": 1.82, + "learning_rate": 5.551411380258312e-06, + "logits/chosen": -2.131385564804077, + "logits/rejected": -2.4068410396575928, + "logps/chosen": -242.47134399414062, + "logps/rejected": -302.7034912109375, + "loss": 0.1679, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.015576362609863, + "rewards/margins": 6.790781497955322, + "rewards/rejected": -10.806358337402344, + "step": 11720 + }, + { + "epoch": 1.82, + "learning_rate": 5.550677939727164e-06, + "logits/chosen": -2.4965908527374268, + "logits/rejected": -2.9627835750579834, + "logps/chosen": -186.67315673828125, + "logps/rejected": -320.4017333984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.403494358062744, + "rewards/margins": 9.622796058654785, + "rewards/rejected": -13.026290893554688, + "step": 11721 + }, + { + "epoch": 1.82, + "learning_rate": 5.549944499196016e-06, + "logits/chosen": -1.7343032360076904, + "logits/rejected": -3.066469669342041, + "logps/chosen": -192.961669921875, + "logps/rejected": -544.4619140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.798025608062744, + "rewards/margins": 10.223833084106445, + "rewards/rejected": -14.021858215332031, + "step": 11722 + }, + { + "epoch": 1.82, + "learning_rate": 5.549211058664869e-06, + "logits/chosen": -2.919557571411133, + "logits/rejected": -2.095266342163086, + "logps/chosen": -915.155029296875, + "logps/rejected": -580.5576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4554443359375, + "rewards/margins": 11.089767456054688, + "rewards/rejected": -14.545211791992188, + "step": 11723 + }, + { + "epoch": 1.82, + "learning_rate": 5.5484776181337205e-06, + "logits/chosen": -3.126835823059082, + "logits/rejected": -2.1658413410186768, + "logps/chosen": -233.40476989746094, + "logps/rejected": -219.0810546875, + "loss": 2.761, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.525588035583496, + "rewards/margins": -0.983600378036499, + "rewards/rejected": -6.541987419128418, + "step": 11724 + }, + { + "epoch": 1.82, + "learning_rate": 5.547744177602572e-06, + "logits/chosen": -3.0740256309509277, + "logits/rejected": -3.087836980819702, + "logps/chosen": -285.40264892578125, + "logps/rejected": -361.34429931640625, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975501298904419, + "rewards/margins": 7.186431884765625, + "rewards/rejected": -10.161932945251465, + "step": 11725 + }, + { + "epoch": 1.82, + "learning_rate": 5.547010737071424e-06, + "logits/chosen": -2.800835132598877, + "logits/rejected": -2.890885829925537, + "logps/chosen": -593.4373779296875, + "logps/rejected": -638.1065673828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035785675048828, + "rewards/margins": 8.102529525756836, + "rewards/rejected": -13.138315200805664, + "step": 11726 + }, + { + "epoch": 1.82, + "learning_rate": 5.546277296540277e-06, + "logits/chosen": -2.8643860816955566, + "logits/rejected": -2.7371957302093506, + "logps/chosen": -416.3354797363281, + "logps/rejected": -347.02813720703125, + "loss": 0.407, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.62009596824646, + "rewards/margins": 5.234078407287598, + "rewards/rejected": -7.85417366027832, + "step": 11727 + }, + { + "epoch": 1.82, + "learning_rate": 5.54554385600913e-06, + "logits/chosen": -3.005833625793457, + "logits/rejected": -2.9880974292755127, + "logps/chosen": -155.4415740966797, + "logps/rejected": -399.5291748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.779280662536621, + "rewards/margins": 11.880864143371582, + "rewards/rejected": -15.660144805908203, + "step": 11728 + }, + { + "epoch": 1.82, + "learning_rate": 5.5448104154779816e-06, + "logits/chosen": -2.888258934020996, + "logits/rejected": -3.115049362182617, + "logps/chosen": -155.24203491210938, + "logps/rejected": -253.30111694335938, + "loss": 0.0589, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.555323600769043, + "rewards/margins": 3.0868775844573975, + "rewards/rejected": -8.64220142364502, + "step": 11729 + }, + { + "epoch": 1.82, + "learning_rate": 5.5440769749468334e-06, + "logits/chosen": -2.7121002674102783, + "logits/rejected": -2.990061044692993, + "logps/chosen": -125.44974517822266, + "logps/rejected": -262.897705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6287922859191895, + "rewards/margins": 9.425037384033203, + "rewards/rejected": -12.05383014678955, + "step": 11730 + }, + { + "epoch": 1.82, + "learning_rate": 5.543343534415685e-06, + "logits/chosen": -0.6300018429756165, + "logits/rejected": -2.5832152366638184, + "logps/chosen": -117.87493896484375, + "logps/rejected": -275.6402893066406, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077019691467285, + "rewards/margins": 3.885855197906494, + "rewards/rejected": -9.962875366210938, + "step": 11731 + }, + { + "epoch": 1.82, + "learning_rate": 5.542610093884538e-06, + "logits/chosen": -3.0593361854553223, + "logits/rejected": -2.2037432193756104, + "logps/chosen": -177.96096801757812, + "logps/rejected": -213.77317810058594, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.773906230926514, + "rewards/margins": 8.340347290039062, + "rewards/rejected": -13.114253997802734, + "step": 11732 + }, + { + "epoch": 1.82, + "learning_rate": 5.54187665335339e-06, + "logits/chosen": -2.8668980598449707, + "logits/rejected": -1.48265540599823, + "logps/chosen": -456.1121826171875, + "logps/rejected": -286.6543273925781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.455610752105713, + "rewards/margins": 8.747228622436523, + "rewards/rejected": -14.202838897705078, + "step": 11733 + }, + { + "epoch": 1.82, + "learning_rate": 5.541143212822242e-06, + "logits/chosen": -3.1267693042755127, + "logits/rejected": -2.9854884147644043, + "logps/chosen": -145.6939697265625, + "logps/rejected": -236.26611328125, + "loss": 0.103, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6230387687683105, + "rewards/margins": 4.097954750061035, + "rewards/rejected": -8.720993995666504, + "step": 11734 + }, + { + "epoch": 1.83, + "learning_rate": 5.540409772291094e-06, + "logits/chosen": -2.56268048286438, + "logits/rejected": -3.019488573074341, + "logps/chosen": -349.2286071777344, + "logps/rejected": -417.7371826171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.728865146636963, + "rewards/margins": 10.190879821777344, + "rewards/rejected": -14.919745445251465, + "step": 11735 + }, + { + "epoch": 1.83, + "learning_rate": 5.539676331759946e-06, + "logits/chosen": -3.0740785598754883, + "logits/rejected": -2.2751758098602295, + "logps/chosen": -317.2921142578125, + "logps/rejected": -241.13320922851562, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620779991149902, + "rewards/margins": 0.9659824371337891, + "rewards/rejected": -6.586762428283691, + "step": 11736 + }, + { + "epoch": 1.83, + "learning_rate": 5.538942891228798e-06, + "logits/chosen": -3.0058159828186035, + "logits/rejected": -3.064004421234131, + "logps/chosen": -202.69493103027344, + "logps/rejected": -336.2746887207031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.723374366760254, + "rewards/margins": 8.419534683227539, + "rewards/rejected": -11.14291000366211, + "step": 11737 + }, + { + "epoch": 1.83, + "learning_rate": 5.53820945069765e-06, + "logits/chosen": -2.058026075363159, + "logits/rejected": -2.959552526473999, + "logps/chosen": -184.1241455078125, + "logps/rejected": -484.33660888671875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.401266098022461, + "rewards/margins": 7.213441848754883, + "rewards/rejected": -10.614707946777344, + "step": 11738 + }, + { + "epoch": 1.83, + "learning_rate": 5.537476010166502e-06, + "logits/chosen": -1.8615422248840332, + "logits/rejected": -2.969101905822754, + "logps/chosen": -187.4171600341797, + "logps/rejected": -418.2893981933594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5694775581359863, + "rewards/margins": 10.023509979248047, + "rewards/rejected": -13.592987060546875, + "step": 11739 + }, + { + "epoch": 1.83, + "learning_rate": 5.536742569635355e-06, + "logits/chosen": -2.6271984577178955, + "logits/rejected": -2.615513324737549, + "logps/chosen": -338.1745910644531, + "logps/rejected": -407.060546875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.757401943206787, + "rewards/margins": 6.834160327911377, + "rewards/rejected": -9.591562271118164, + "step": 11740 + }, + { + "epoch": 1.83, + "learning_rate": 5.536009129104207e-06, + "logits/chosen": -2.3977298736572266, + "logits/rejected": -2.8171608448028564, + "logps/chosen": -207.44760131835938, + "logps/rejected": -354.40203857421875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.543034553527832, + "rewards/margins": 5.288424015045166, + "rewards/rejected": -11.831459045410156, + "step": 11741 + }, + { + "epoch": 1.83, + "learning_rate": 5.5352756885730585e-06, + "logits/chosen": -1.547046184539795, + "logits/rejected": -3.04280161857605, + "logps/chosen": -184.0889892578125, + "logps/rejected": -387.8984680175781, + "loss": 0.8715, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.704320907592773, + "rewards/margins": 2.1577377319335938, + "rewards/rejected": -10.862058639526367, + "step": 11742 + }, + { + "epoch": 1.83, + "learning_rate": 5.53454224804191e-06, + "logits/chosen": -2.9709978103637695, + "logits/rejected": -3.0280768871307373, + "logps/chosen": -101.33808898925781, + "logps/rejected": -150.32125854492188, + "loss": 0.3318, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19239330291748, + "rewards/margins": 2.483405590057373, + "rewards/rejected": -10.675798416137695, + "step": 11743 + }, + { + "epoch": 1.83, + "learning_rate": 5.533808807510763e-06, + "logits/chosen": -2.8045060634613037, + "logits/rejected": -2.6993494033813477, + "logps/chosen": -130.86134338378906, + "logps/rejected": -394.8694152832031, + "loss": 0.7281, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.807989120483398, + "rewards/margins": 7.21962833404541, + "rewards/rejected": -14.027617454528809, + "step": 11744 + }, + { + "epoch": 1.83, + "learning_rate": 5.533075366979616e-06, + "logits/chosen": -1.892639398574829, + "logits/rejected": -2.774655342102051, + "logps/chosen": -349.4320983886719, + "logps/rejected": -553.7122802734375, + "loss": 0.4036, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.202035903930664, + "rewards/margins": 6.703714370727539, + "rewards/rejected": -11.905749320983887, + "step": 11745 + }, + { + "epoch": 1.83, + "learning_rate": 5.532341926448468e-06, + "logits/chosen": -1.83882474899292, + "logits/rejected": -2.727997064590454, + "logps/chosen": -134.27667236328125, + "logps/rejected": -278.09814453125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.737574577331543, + "rewards/margins": 6.593214988708496, + "rewards/rejected": -12.330789566040039, + "step": 11746 + }, + { + "epoch": 1.83, + "learning_rate": 5.53160848591732e-06, + "logits/chosen": -2.884976863861084, + "logits/rejected": -1.9405497312545776, + "logps/chosen": -566.5133056640625, + "logps/rejected": -398.4595947265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.323511600494385, + "rewards/margins": 6.853971481323242, + "rewards/rejected": -11.177482604980469, + "step": 11747 + }, + { + "epoch": 1.83, + "learning_rate": 5.5308750453861715e-06, + "logits/chosen": -2.5801897048950195, + "logits/rejected": -2.5666048526763916, + "logps/chosen": -244.92630004882812, + "logps/rejected": -257.8798828125, + "loss": 3.5235, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.639286041259766, + "rewards/margins": -2.7420201301574707, + "rewards/rejected": -6.897265434265137, + "step": 11748 + }, + { + "epoch": 1.83, + "learning_rate": 5.530141604855024e-06, + "logits/chosen": -3.0801010131835938, + "logits/rejected": -2.197425365447998, + "logps/chosen": -330.954833984375, + "logps/rejected": -183.96595764160156, + "loss": 0.7466, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.417205810546875, + "rewards/margins": 5.604300498962402, + "rewards/rejected": -10.021506309509277, + "step": 11749 + }, + { + "epoch": 1.83, + "learning_rate": 5.529408164323876e-06, + "logits/chosen": -2.549090623855591, + "logits/rejected": -2.700918674468994, + "logps/chosen": -217.31692504882812, + "logps/rejected": -515.587646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8979692459106445, + "rewards/margins": 10.324800491333008, + "rewards/rejected": -16.222768783569336, + "step": 11750 + }, + { + "epoch": 1.83, + "learning_rate": 5.528674723792728e-06, + "logits/chosen": -2.6508824825286865, + "logits/rejected": -3.1755263805389404, + "logps/chosen": -82.82756805419922, + "logps/rejected": -202.4748992919922, + "loss": 0.2116, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067082405090332, + "rewards/margins": 3.7289628982543945, + "rewards/rejected": -9.796045303344727, + "step": 11751 + }, + { + "epoch": 1.83, + "learning_rate": 5.52794128326158e-06, + "logits/chosen": -2.4911699295043945, + "logits/rejected": -2.56990122795105, + "logps/chosen": -156.67474365234375, + "logps/rejected": -310.98724365234375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.417330265045166, + "rewards/margins": 6.841200351715088, + "rewards/rejected": -14.258530616760254, + "step": 11752 + }, + { + "epoch": 1.83, + "learning_rate": 5.527207842730432e-06, + "logits/chosen": -2.5174741744995117, + "logits/rejected": -2.9364306926727295, + "logps/chosen": -255.54385375976562, + "logps/rejected": -383.5960388183594, + "loss": 0.7049, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.808988571166992, + "rewards/margins": 2.9117374420166016, + "rewards/rejected": -12.720726013183594, + "step": 11753 + }, + { + "epoch": 1.83, + "learning_rate": 5.5264744021992845e-06, + "logits/chosen": -2.328218460083008, + "logits/rejected": -2.6760778427124023, + "logps/chosen": -78.38717651367188, + "logps/rejected": -167.0975799560547, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.693123817443848, + "rewards/margins": 4.62513542175293, + "rewards/rejected": -10.318259239196777, + "step": 11754 + }, + { + "epoch": 1.83, + "learning_rate": 5.525740961668136e-06, + "logits/chosen": -2.246708631515503, + "logits/rejected": -3.0780439376831055, + "logps/chosen": -89.08414459228516, + "logps/rejected": -466.755126953125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.619325637817383, + "rewards/margins": 9.51010513305664, + "rewards/rejected": -12.129430770874023, + "step": 11755 + }, + { + "epoch": 1.83, + "learning_rate": 5.525007521136988e-06, + "logits/chosen": -3.0336079597473145, + "logits/rejected": -3.051713466644287, + "logps/chosen": -87.03685760498047, + "logps/rejected": -209.69277954101562, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.099246025085449, + "rewards/margins": 6.569772720336914, + "rewards/rejected": -10.669018745422363, + "step": 11756 + }, + { + "epoch": 1.83, + "learning_rate": 5.52427408060584e-06, + "logits/chosen": -3.0854172706604004, + "logits/rejected": -3.217557191848755, + "logps/chosen": -381.7328186035156, + "logps/rejected": -361.5088195800781, + "loss": 0.1031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006258010864258, + "rewards/margins": 3.357865333557129, + "rewards/rejected": -9.364123344421387, + "step": 11757 + }, + { + "epoch": 1.83, + "learning_rate": 5.523540640074693e-06, + "logits/chosen": -3.0024254322052, + "logits/rejected": -2.938969373703003, + "logps/chosen": -798.1846923828125, + "logps/rejected": -731.1884765625, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.680917739868164, + "rewards/margins": 5.296534538269043, + "rewards/rejected": -9.977452278137207, + "step": 11758 + }, + { + "epoch": 1.83, + "learning_rate": 5.522807199543545e-06, + "logits/chosen": -2.651336193084717, + "logits/rejected": -2.9435696601867676, + "logps/chosen": -246.184326171875, + "logps/rejected": -475.5664367675781, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817111968994141, + "rewards/margins": 6.771518707275391, + "rewards/rejected": -12.588630676269531, + "step": 11759 + }, + { + "epoch": 1.83, + "learning_rate": 5.5220737590123966e-06, + "logits/chosen": -3.0890913009643555, + "logits/rejected": -2.707883596420288, + "logps/chosen": -141.9966278076172, + "logps/rejected": -266.50787353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.114884853363037, + "rewards/margins": 12.133310317993164, + "rewards/rejected": -15.248194694519043, + "step": 11760 + }, + { + "epoch": 1.83, + "learning_rate": 5.521340318481249e-06, + "logits/chosen": -2.847719192504883, + "logits/rejected": -2.1490373611450195, + "logps/chosen": -582.8103637695312, + "logps/rejected": -359.62493896484375, + "loss": 0.4079, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.604959964752197, + "rewards/margins": 1.1270902156829834, + "rewards/rejected": -7.732049942016602, + "step": 11761 + }, + { + "epoch": 1.83, + "learning_rate": 5.520606877950101e-06, + "logits/chosen": -2.9087820053100586, + "logits/rejected": -2.9770426750183105, + "logps/chosen": -234.3634490966797, + "logps/rejected": -368.95428466796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2388904094696045, + "rewards/margins": 8.664361953735352, + "rewards/rejected": -11.903251647949219, + "step": 11762 + }, + { + "epoch": 1.83, + "learning_rate": 5.519873437418954e-06, + "logits/chosen": -2.668546676635742, + "logits/rejected": -3.0071780681610107, + "logps/chosen": -213.97523498535156, + "logps/rejected": -453.402099609375, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.07562255859375, + "rewards/margins": 5.637399673461914, + "rewards/rejected": -9.713022232055664, + "step": 11763 + }, + { + "epoch": 1.83, + "learning_rate": 5.519139996887806e-06, + "logits/chosen": -2.946643114089966, + "logits/rejected": -2.8652114868164062, + "logps/chosen": -488.817626953125, + "logps/rejected": -493.09698486328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3674240112304688, + "rewards/margins": 8.712809562683105, + "rewards/rejected": -11.080233573913574, + "step": 11764 + }, + { + "epoch": 1.83, + "learning_rate": 5.518406556356658e-06, + "logits/chosen": -2.288984537124634, + "logits/rejected": -2.95975661277771, + "logps/chosen": -107.03857421875, + "logps/rejected": -334.7088928222656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528389930725098, + "rewards/margins": 7.8298468589782715, + "rewards/rejected": -12.358236312866211, + "step": 11765 + }, + { + "epoch": 1.83, + "learning_rate": 5.5176731158255095e-06, + "logits/chosen": -2.250838041305542, + "logits/rejected": -2.8944427967071533, + "logps/chosen": -163.6522674560547, + "logps/rejected": -186.40798950195312, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.975488185882568, + "rewards/margins": 7.046361923217773, + "rewards/rejected": -12.0218505859375, + "step": 11766 + }, + { + "epoch": 1.83, + "learning_rate": 5.516939675294362e-06, + "logits/chosen": -2.6629230976104736, + "logits/rejected": -3.0875113010406494, + "logps/chosen": -669.7048950195312, + "logps/rejected": -647.3529663085938, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3632307052612305, + "rewards/margins": 7.61884880065918, + "rewards/rejected": -11.982080459594727, + "step": 11767 + }, + { + "epoch": 1.83, + "learning_rate": 5.516206234763214e-06, + "logits/chosen": -3.018009901046753, + "logits/rejected": -2.8589236736297607, + "logps/chosen": -252.26483154296875, + "logps/rejected": -250.59170532226562, + "loss": 0.9187, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.78525447845459, + "rewards/margins": 0.8369698524475098, + "rewards/rejected": -7.622224807739258, + "step": 11768 + }, + { + "epoch": 1.83, + "learning_rate": 5.515472794232066e-06, + "logits/chosen": -2.5054876804351807, + "logits/rejected": -2.7992122173309326, + "logps/chosen": -168.20553588867188, + "logps/rejected": -332.40093994140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.545087814331055, + "rewards/margins": 8.149572372436523, + "rewards/rejected": -13.694660186767578, + "step": 11769 + }, + { + "epoch": 1.83, + "learning_rate": 5.514739353700918e-06, + "logits/chosen": -1.871691346168518, + "logits/rejected": -2.8360366821289062, + "logps/chosen": -363.11199951171875, + "logps/rejected": -521.1863403320312, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.944065570831299, + "rewards/margins": 8.274269104003906, + "rewards/rejected": -14.218334197998047, + "step": 11770 + }, + { + "epoch": 1.83, + "learning_rate": 5.51400591316977e-06, + "logits/chosen": -2.1090126037597656, + "logits/rejected": -2.8286166191101074, + "logps/chosen": -225.93389892578125, + "logps/rejected": -416.38507080078125, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.238194942474365, + "rewards/margins": 6.478489875793457, + "rewards/rejected": -11.716684341430664, + "step": 11771 + }, + { + "epoch": 1.83, + "learning_rate": 5.5132724726386225e-06, + "logits/chosen": -1.1653120517730713, + "logits/rejected": -2.476182699203491, + "logps/chosen": -86.99288940429688, + "logps/rejected": -387.4815673828125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.569599151611328, + "rewards/margins": 7.275030136108398, + "rewards/rejected": -11.844629287719727, + "step": 11772 + }, + { + "epoch": 1.83, + "learning_rate": 5.512539032107474e-06, + "logits/chosen": -1.1000112295150757, + "logits/rejected": -2.833784341812134, + "logps/chosen": -109.93463897705078, + "logps/rejected": -205.33872985839844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.911831259727478, + "rewards/margins": 7.352791786193848, + "rewards/rejected": -9.264623641967773, + "step": 11773 + }, + { + "epoch": 1.83, + "learning_rate": 5.511805591576326e-06, + "logits/chosen": -2.8190269470214844, + "logits/rejected": -3.0037612915039062, + "logps/chosen": -52.640785217285156, + "logps/rejected": -200.02784729003906, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.202430725097656, + "rewards/margins": 5.599783897399902, + "rewards/rejected": -9.802214622497559, + "step": 11774 + }, + { + "epoch": 1.83, + "learning_rate": 5.511072151045178e-06, + "logits/chosen": -2.673654079437256, + "logits/rejected": -1.778466820716858, + "logps/chosen": -384.7987365722656, + "logps/rejected": -392.132080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.099631309509277, + "rewards/margins": 11.102025985717773, + "rewards/rejected": -16.201658248901367, + "step": 11775 + }, + { + "epoch": 1.83, + "learning_rate": 5.510338710514031e-06, + "logits/chosen": -2.821287155151367, + "logits/rejected": -2.7356557846069336, + "logps/chosen": -423.98858642578125, + "logps/rejected": -437.64837646484375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0566253662109375, + "rewards/margins": 6.932504653930664, + "rewards/rejected": -9.989130020141602, + "step": 11776 + }, + { + "epoch": 1.83, + "learning_rate": 5.509605269982883e-06, + "logits/chosen": -2.961376428604126, + "logits/rejected": -3.0113065242767334, + "logps/chosen": -128.4250946044922, + "logps/rejected": -285.7579040527344, + "loss": 0.2416, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152299880981445, + "rewards/margins": 3.2815234661102295, + "rewards/rejected": -10.433823585510254, + "step": 11777 + }, + { + "epoch": 1.83, + "learning_rate": 5.5088718294517355e-06, + "logits/chosen": -2.813164234161377, + "logits/rejected": -2.935375213623047, + "logps/chosen": -184.44117736816406, + "logps/rejected": -348.3682861328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2964043617248535, + "rewards/margins": 8.552032470703125, + "rewards/rejected": -11.84843635559082, + "step": 11778 + }, + { + "epoch": 1.83, + "learning_rate": 5.508138388920587e-06, + "logits/chosen": -0.6241654753684998, + "logits/rejected": -2.6382360458374023, + "logps/chosen": -124.94279479980469, + "logps/rejected": -510.7088928222656, + "loss": 0.2432, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.422013282775879, + "rewards/margins": 5.559283256530762, + "rewards/rejected": -13.98129653930664, + "step": 11779 + }, + { + "epoch": 1.83, + "learning_rate": 5.50740494838944e-06, + "logits/chosen": -1.2788420915603638, + "logits/rejected": -2.5478224754333496, + "logps/chosen": -133.9981689453125, + "logps/rejected": -421.86614990234375, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.622690200805664, + "rewards/margins": 5.352711200714111, + "rewards/rejected": -13.975400924682617, + "step": 11780 + }, + { + "epoch": 1.83, + "learning_rate": 5.506671507858292e-06, + "logits/chosen": -3.0122177600860596, + "logits/rejected": -2.7516753673553467, + "logps/chosen": -361.6941833496094, + "logps/rejected": -476.4921569824219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.65723991394043, + "rewards/margins": 8.235788345336914, + "rewards/rejected": -13.893028259277344, + "step": 11781 + }, + { + "epoch": 1.83, + "learning_rate": 5.505938067327144e-06, + "logits/chosen": -2.01979398727417, + "logits/rejected": -2.683650255203247, + "logps/chosen": -170.79434204101562, + "logps/rejected": -264.8612060546875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.442676544189453, + "rewards/margins": 6.667715072631836, + "rewards/rejected": -15.110391616821289, + "step": 11782 + }, + { + "epoch": 1.83, + "learning_rate": 5.505204626795996e-06, + "logits/chosen": -1.6506344079971313, + "logits/rejected": -2.548673629760742, + "logps/chosen": -114.759765625, + "logps/rejected": -253.50389099121094, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.644742965698242, + "rewards/margins": 5.545164108276367, + "rewards/rejected": -12.18990707397461, + "step": 11783 + }, + { + "epoch": 1.83, + "learning_rate": 5.504471186264848e-06, + "logits/chosen": -2.292372941970825, + "logits/rejected": -2.892324447631836, + "logps/chosen": -297.334228515625, + "logps/rejected": -361.0756530761719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.835079193115234, + "rewards/margins": 6.864167213439941, + "rewards/rejected": -12.699247360229492, + "step": 11784 + }, + { + "epoch": 1.83, + "learning_rate": 5.5037377457337e-06, + "logits/chosen": -1.7573939561843872, + "logits/rejected": -2.760719060897827, + "logps/chosen": -208.4008331298828, + "logps/rejected": -454.78375244140625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.885687828063965, + "rewards/margins": 7.563093662261963, + "rewards/rejected": -14.448781967163086, + "step": 11785 + }, + { + "epoch": 1.83, + "learning_rate": 5.503004305202552e-06, + "logits/chosen": -2.9047834873199463, + "logits/rejected": -2.917001247406006, + "logps/chosen": -117.65654754638672, + "logps/rejected": -257.7212219238281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.049540996551514, + "rewards/margins": 7.553171634674072, + "rewards/rejected": -11.602712631225586, + "step": 11786 + }, + { + "epoch": 1.83, + "learning_rate": 5.502270864671404e-06, + "logits/chosen": -3.016535520553589, + "logits/rejected": -2.802591562271118, + "logps/chosen": -339.57867431640625, + "logps/rejected": -296.6114501953125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.327307224273682, + "rewards/margins": 10.433109283447266, + "rewards/rejected": -15.760416984558105, + "step": 11787 + }, + { + "epoch": 1.83, + "learning_rate": 5.501537424140256e-06, + "logits/chosen": -2.792787551879883, + "logits/rejected": -2.9344096183776855, + "logps/chosen": -378.3018493652344, + "logps/rejected": -495.47186279296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.305469989776611, + "rewards/margins": 8.304062843322754, + "rewards/rejected": -12.609533309936523, + "step": 11788 + }, + { + "epoch": 1.83, + "learning_rate": 5.500803983609109e-06, + "logits/chosen": -2.177502155303955, + "logits/rejected": -3.030923366546631, + "logps/chosen": -51.72932434082031, + "logps/rejected": -473.04730224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9279794692993164, + "rewards/margins": 12.74500846862793, + "rewards/rejected": -16.672988891601562, + "step": 11789 + }, + { + "epoch": 1.83, + "learning_rate": 5.5000705430779605e-06, + "logits/chosen": -1.5521526336669922, + "logits/rejected": -2.6001811027526855, + "logps/chosen": -134.5137481689453, + "logps/rejected": -322.86920166015625, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569740295410156, + "rewards/margins": 5.4737725257873535, + "rewards/rejected": -11.043512344360352, + "step": 11790 + }, + { + "epoch": 1.83, + "learning_rate": 5.499337102546812e-06, + "logits/chosen": -1.526237964630127, + "logits/rejected": -2.7903623580932617, + "logps/chosen": -216.26690673828125, + "logps/rejected": -677.4401245117188, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.308987617492676, + "rewards/margins": 8.05501937866211, + "rewards/rejected": -15.364006042480469, + "step": 11791 + }, + { + "epoch": 1.83, + "learning_rate": 5.498603662015664e-06, + "logits/chosen": -3.035184144973755, + "logits/rejected": -2.2639036178588867, + "logps/chosen": -387.93133544921875, + "logps/rejected": -144.13336181640625, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3921003341674805, + "rewards/margins": 5.3587446212768555, + "rewards/rejected": -10.750844955444336, + "step": 11792 + }, + { + "epoch": 1.83, + "learning_rate": 5.497870221484516e-06, + "logits/chosen": -2.0094475746154785, + "logits/rejected": -2.8470237255096436, + "logps/chosen": -155.69741821289062, + "logps/rejected": -482.6146240234375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.68328332901001, + "rewards/margins": 6.865601539611816, + "rewards/rejected": -14.548885345458984, + "step": 11793 + }, + { + "epoch": 1.83, + "learning_rate": 5.497136780953369e-06, + "logits/chosen": -2.688401937484741, + "logits/rejected": -2.790480136871338, + "logps/chosen": -131.54287719726562, + "logps/rejected": -371.0695495605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.891697883605957, + "rewards/margins": 11.706642150878906, + "rewards/rejected": -15.59834098815918, + "step": 11794 + }, + { + "epoch": 1.83, + "learning_rate": 5.496403340422222e-06, + "logits/chosen": -2.1244518756866455, + "logits/rejected": -2.582634925842285, + "logps/chosen": -198.2105255126953, + "logps/rejected": -340.75164794921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.77995491027832, + "rewards/margins": 7.981602668762207, + "rewards/rejected": -12.761556625366211, + "step": 11795 + }, + { + "epoch": 1.83, + "learning_rate": 5.4956698998910735e-06, + "logits/chosen": -2.3295910358428955, + "logits/rejected": -2.8044703006744385, + "logps/chosen": -280.6966552734375, + "logps/rejected": -445.84808349609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.91558837890625, + "rewards/margins": 8.409916877746582, + "rewards/rejected": -14.325505256652832, + "step": 11796 + }, + { + "epoch": 1.83, + "learning_rate": 5.494936459359925e-06, + "logits/chosen": -2.934025526046753, + "logits/rejected": -2.6477761268615723, + "logps/chosen": -655.6342163085938, + "logps/rejected": -623.5060424804688, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.424079895019531, + "rewards/margins": 7.711899280548096, + "rewards/rejected": -14.135978698730469, + "step": 11797 + }, + { + "epoch": 1.83, + "learning_rate": 5.494203018828778e-06, + "logits/chosen": -2.4752113819122314, + "logits/rejected": -2.8109664916992188, + "logps/chosen": -209.09725952148438, + "logps/rejected": -358.07183837890625, + "loss": 0.1, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.13955545425415, + "rewards/margins": 3.2075982093811035, + "rewards/rejected": -10.347153663635254, + "step": 11798 + }, + { + "epoch": 1.83, + "learning_rate": 5.49346957829763e-06, + "logits/chosen": -2.8244454860687256, + "logits/rejected": -2.9091739654541016, + "logps/chosen": -140.91375732421875, + "logps/rejected": -344.9688720703125, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3979783058166504, + "rewards/margins": 7.960125923156738, + "rewards/rejected": -11.35810375213623, + "step": 11799 + }, + { + "epoch": 1.84, + "learning_rate": 5.492736137766482e-06, + "logits/chosen": -1.673994779586792, + "logits/rejected": -2.926679849624634, + "logps/chosen": -107.09521484375, + "logps/rejected": -346.0205078125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.480721473693848, + "rewards/margins": 8.118414878845215, + "rewards/rejected": -14.599136352539062, + "step": 11800 + }, + { + "epoch": 1.84, + "learning_rate": 5.492002697235334e-06, + "logits/chosen": -2.9736170768737793, + "logits/rejected": -1.949534296989441, + "logps/chosen": -745.9415283203125, + "logps/rejected": -765.0115356445312, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302162170410156, + "rewards/margins": 6.0586090087890625, + "rewards/rejected": -11.360771179199219, + "step": 11801 + }, + { + "epoch": 1.84, + "learning_rate": 5.491269256704186e-06, + "logits/chosen": -2.892186164855957, + "logits/rejected": -2.7985947132110596, + "logps/chosen": -324.74603271484375, + "logps/rejected": -536.4979858398438, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.288866996765137, + "rewards/margins": 7.808302402496338, + "rewards/rejected": -13.097169876098633, + "step": 11802 + }, + { + "epoch": 1.84, + "learning_rate": 5.490535816173038e-06, + "logits/chosen": -1.416494607925415, + "logits/rejected": -2.649921417236328, + "logps/chosen": -227.3907470703125, + "logps/rejected": -523.94140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.365159034729004, + "rewards/margins": 14.329395294189453, + "rewards/rejected": -18.69455337524414, + "step": 11803 + }, + { + "epoch": 1.84, + "learning_rate": 5.48980237564189e-06, + "logits/chosen": -2.9856064319610596, + "logits/rejected": -2.994757890701294, + "logps/chosen": -122.70370483398438, + "logps/rejected": -332.28839111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.523920059204102, + "rewards/margins": 11.747455596923828, + "rewards/rejected": -16.27137565612793, + "step": 11804 + }, + { + "epoch": 1.84, + "learning_rate": 5.489068935110742e-06, + "logits/chosen": -2.7894790172576904, + "logits/rejected": -2.808119297027588, + "logps/chosen": -53.14622116088867, + "logps/rejected": -144.9235382080078, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.802093029022217, + "rewards/margins": 6.170341491699219, + "rewards/rejected": -9.972434997558594, + "step": 11805 + }, + { + "epoch": 1.84, + "learning_rate": 5.488335494579594e-06, + "logits/chosen": -3.0813052654266357, + "logits/rejected": -1.9114582538604736, + "logps/chosen": -251.08192443847656, + "logps/rejected": -349.8529357910156, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.532280921936035, + "rewards/margins": 6.926042556762695, + "rewards/rejected": -11.458324432373047, + "step": 11806 + }, + { + "epoch": 1.84, + "learning_rate": 5.487602054048447e-06, + "logits/chosen": -2.9717297554016113, + "logits/rejected": -2.9663612842559814, + "logps/chosen": -577.2617797851562, + "logps/rejected": -579.0811157226562, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.289115905761719, + "rewards/margins": 8.829561233520508, + "rewards/rejected": -14.118677139282227, + "step": 11807 + }, + { + "epoch": 1.84, + "learning_rate": 5.486868613517299e-06, + "logits/chosen": -2.4801461696624756, + "logits/rejected": -2.8277881145477295, + "logps/chosen": -90.27464294433594, + "logps/rejected": -215.4616241455078, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0435991287231445, + "rewards/margins": 5.902195930480957, + "rewards/rejected": -9.945795059204102, + "step": 11808 + }, + { + "epoch": 1.84, + "learning_rate": 5.4861351729861505e-06, + "logits/chosen": -1.8357009887695312, + "logits/rejected": -2.929333448410034, + "logps/chosen": -104.92294311523438, + "logps/rejected": -444.6812438964844, + "loss": 0.7231, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.901913642883301, + "rewards/margins": 6.540938854217529, + "rewards/rejected": -12.442852020263672, + "step": 11809 + }, + { + "epoch": 1.84, + "learning_rate": 5.485401732455002e-06, + "logits/chosen": -2.839907646179199, + "logits/rejected": -1.6621456146240234, + "logps/chosen": -608.8984375, + "logps/rejected": -474.93914794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.342237949371338, + "rewards/margins": 11.405531883239746, + "rewards/rejected": -13.747770309448242, + "step": 11810 + }, + { + "epoch": 1.84, + "learning_rate": 5.484668291923855e-06, + "logits/chosen": -1.9578174352645874, + "logits/rejected": -2.9618358612060547, + "logps/chosen": -142.78932189941406, + "logps/rejected": -328.0902404785156, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.618248462677002, + "rewards/margins": 5.365490436553955, + "rewards/rejected": -10.983738899230957, + "step": 11811 + }, + { + "epoch": 1.84, + "learning_rate": 5.483934851392708e-06, + "logits/chosen": -2.803331136703491, + "logits/rejected": -1.1884875297546387, + "logps/chosen": -329.7038269042969, + "logps/rejected": -201.06968688964844, + "loss": 0.9778, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.850503921508789, + "rewards/margins": 1.7822566032409668, + "rewards/rejected": -8.632761001586914, + "step": 11812 + }, + { + "epoch": 1.84, + "learning_rate": 5.48320141086156e-06, + "logits/chosen": -1.7038615942001343, + "logits/rejected": -1.79177987575531, + "logps/chosen": -1080.51513671875, + "logps/rejected": -529.8851928710938, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.403837203979492, + "rewards/margins": 9.425820350646973, + "rewards/rejected": -18.82965850830078, + "step": 11813 + }, + { + "epoch": 1.84, + "learning_rate": 5.4824679703304116e-06, + "logits/chosen": -3.086500406265259, + "logits/rejected": -3.132127046585083, + "logps/chosen": -149.70066833496094, + "logps/rejected": -324.7606201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4097957611083984, + "rewards/margins": 11.220907211303711, + "rewards/rejected": -14.63070297241211, + "step": 11814 + }, + { + "epoch": 1.84, + "learning_rate": 5.4817345297992634e-06, + "logits/chosen": -3.0415165424346924, + "logits/rejected": -3.0270042419433594, + "logps/chosen": -512.0980224609375, + "logps/rejected": -310.6693115234375, + "loss": 1.3515, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.8778581619262695, + "rewards/margins": 1.8599340915679932, + "rewards/rejected": -7.737792015075684, + "step": 11815 + }, + { + "epoch": 1.84, + "learning_rate": 5.481001089268116e-06, + "logits/chosen": -2.2657415866851807, + "logits/rejected": -2.9602835178375244, + "logps/chosen": -75.15726470947266, + "logps/rejected": -367.5513916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.767047643661499, + "rewards/margins": 11.451728820800781, + "rewards/rejected": -14.218775749206543, + "step": 11816 + }, + { + "epoch": 1.84, + "learning_rate": 5.480267648736968e-06, + "logits/chosen": -2.5661463737487793, + "logits/rejected": -2.5566720962524414, + "logps/chosen": -92.19794464111328, + "logps/rejected": -531.0103149414062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7318620681762695, + "rewards/margins": 10.415958404541016, + "rewards/rejected": -15.147819519042969, + "step": 11817 + }, + { + "epoch": 1.84, + "learning_rate": 5.47953420820582e-06, + "logits/chosen": -2.9057116508483887, + "logits/rejected": -2.693248987197876, + "logps/chosen": -221.26222229003906, + "logps/rejected": -257.81072998046875, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50538444519043, + "rewards/margins": 6.989198684692383, + "rewards/rejected": -13.494583129882812, + "step": 11818 + }, + { + "epoch": 1.84, + "learning_rate": 5.478800767674672e-06, + "logits/chosen": -1.5102319717407227, + "logits/rejected": -2.90714693069458, + "logps/chosen": -456.8293762207031, + "logps/rejected": -645.3883056640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.359953880310059, + "rewards/margins": 9.522974967956543, + "rewards/rejected": -13.882928848266602, + "step": 11819 + }, + { + "epoch": 1.84, + "learning_rate": 5.478067327143524e-06, + "logits/chosen": -1.7641141414642334, + "logits/rejected": -2.547022819519043, + "logps/chosen": -162.06314086914062, + "logps/rejected": -395.24462890625, + "loss": 0.4818, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.802791595458984, + "rewards/margins": 5.141138076782227, + "rewards/rejected": -10.943929672241211, + "step": 11820 + }, + { + "epoch": 1.84, + "learning_rate": 5.477333886612376e-06, + "logits/chosen": -1.7501957416534424, + "logits/rejected": -2.7170636653900146, + "logps/chosen": -346.5759582519531, + "logps/rejected": -680.2211303710938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.269546508789062, + "rewards/margins": 8.272027969360352, + "rewards/rejected": -16.541574478149414, + "step": 11821 + }, + { + "epoch": 1.84, + "learning_rate": 5.476600446081228e-06, + "logits/chosen": -2.8908231258392334, + "logits/rejected": -1.729995846748352, + "logps/chosen": -284.11669921875, + "logps/rejected": -273.8719787597656, + "loss": 0.1536, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.811859130859375, + "rewards/margins": 6.460212707519531, + "rewards/rejected": -14.272071838378906, + "step": 11822 + }, + { + "epoch": 1.84, + "learning_rate": 5.47586700555008e-06, + "logits/chosen": -2.780229330062866, + "logits/rejected": -3.0372493267059326, + "logps/chosen": -152.11233520507812, + "logps/rejected": -268.1098937988281, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.517829418182373, + "rewards/margins": 6.1613850593566895, + "rewards/rejected": -10.679214477539062, + "step": 11823 + }, + { + "epoch": 1.84, + "learning_rate": 5.475133565018932e-06, + "logits/chosen": -2.9774818420410156, + "logits/rejected": -2.532747268676758, + "logps/chosen": -226.8861541748047, + "logps/rejected": -277.437744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8009426593780518, + "rewards/margins": 9.702303886413574, + "rewards/rejected": -12.503246307373047, + "step": 11824 + }, + { + "epoch": 1.84, + "learning_rate": 5.474400124487785e-06, + "logits/chosen": -2.6490695476531982, + "logits/rejected": -2.9965648651123047, + "logps/chosen": -83.93862915039062, + "logps/rejected": -291.17608642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.130646228790283, + "rewards/margins": 11.77944278717041, + "rewards/rejected": -15.910089492797852, + "step": 11825 + }, + { + "epoch": 1.84, + "learning_rate": 5.473666683956637e-06, + "logits/chosen": -2.6567611694335938, + "logits/rejected": -2.986161947250366, + "logps/chosen": -262.50335693359375, + "logps/rejected": -408.330078125, + "loss": 0.238, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218662261962891, + "rewards/margins": 3.6859638690948486, + "rewards/rejected": -8.90462589263916, + "step": 11826 + }, + { + "epoch": 1.84, + "learning_rate": 5.4729332434254885e-06, + "logits/chosen": -2.07598614692688, + "logits/rejected": -2.7409424781799316, + "logps/chosen": -162.87203979492188, + "logps/rejected": -417.44952392578125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.360411643981934, + "rewards/margins": 8.948576927185059, + "rewards/rejected": -16.308988571166992, + "step": 11827 + }, + { + "epoch": 1.84, + "learning_rate": 5.472199802894341e-06, + "logits/chosen": -3.174215078353882, + "logits/rejected": -3.2359371185302734, + "logps/chosen": -170.7180938720703, + "logps/rejected": -203.06857299804688, + "loss": 1.9223, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.856110572814941, + "rewards/margins": 2.9199280738830566, + "rewards/rejected": -8.776039123535156, + "step": 11828 + }, + { + "epoch": 1.84, + "learning_rate": 5.471466362363194e-06, + "logits/chosen": -2.515069007873535, + "logits/rejected": -2.9892165660858154, + "logps/chosen": -263.3199157714844, + "logps/rejected": -387.6718444824219, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.888683795928955, + "rewards/margins": 8.512118339538574, + "rewards/rejected": -11.400802612304688, + "step": 11829 + }, + { + "epoch": 1.84, + "learning_rate": 5.470732921832046e-06, + "logits/chosen": -2.035841941833496, + "logits/rejected": -3.042828321456909, + "logps/chosen": -131.67103576660156, + "logps/rejected": -166.85128784179688, + "loss": 0.8459, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.753459930419922, + "rewards/margins": 2.785705089569092, + "rewards/rejected": -8.539165496826172, + "step": 11830 + }, + { + "epoch": 1.84, + "learning_rate": 5.469999481300898e-06, + "logits/chosen": -2.330634832382202, + "logits/rejected": -1.4667818546295166, + "logps/chosen": -251.59323120117188, + "logps/rejected": -327.52630615234375, + "loss": 0.3908, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.362368583679199, + "rewards/margins": 6.69073486328125, + "rewards/rejected": -13.053104400634766, + "step": 11831 + }, + { + "epoch": 1.84, + "learning_rate": 5.46926604076975e-06, + "logits/chosen": -2.046802520751953, + "logits/rejected": -2.8604297637939453, + "logps/chosen": -201.98681640625, + "logps/rejected": -305.63037109375, + "loss": 1.5674, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.970961570739746, + "rewards/margins": 1.6465511322021484, + "rewards/rejected": -9.617512702941895, + "step": 11832 + }, + { + "epoch": 1.84, + "learning_rate": 5.4685326002386015e-06, + "logits/chosen": -2.5644314289093018, + "logits/rejected": -3.133471965789795, + "logps/chosen": -78.7249755859375, + "logps/rejected": -219.03097534179688, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.473284721374512, + "rewards/margins": 6.486786842346191, + "rewards/rejected": -12.960071563720703, + "step": 11833 + }, + { + "epoch": 1.84, + "learning_rate": 5.467799159707454e-06, + "logits/chosen": -1.1839200258255005, + "logits/rejected": -3.000450372695923, + "logps/chosen": -216.8634796142578, + "logps/rejected": -339.57958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9397857189178467, + "rewards/margins": 10.044464111328125, + "rewards/rejected": -13.984251022338867, + "step": 11834 + }, + { + "epoch": 1.84, + "learning_rate": 5.467065719176306e-06, + "logits/chosen": -2.3335466384887695, + "logits/rejected": -3.021745204925537, + "logps/chosen": -621.7562866210938, + "logps/rejected": -627.3648681640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.754680156707764, + "rewards/margins": 7.0545196533203125, + "rewards/rejected": -12.809200286865234, + "step": 11835 + }, + { + "epoch": 1.84, + "learning_rate": 5.466332278645158e-06, + "logits/chosen": -2.7076356410980225, + "logits/rejected": -2.8232686519622803, + "logps/chosen": -158.00259399414062, + "logps/rejected": -306.94921875, + "loss": 0.0635, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.683825492858887, + "rewards/margins": 5.784351348876953, + "rewards/rejected": -13.46817684173584, + "step": 11836 + }, + { + "epoch": 1.84, + "learning_rate": 5.46559883811401e-06, + "logits/chosen": -1.9714730978012085, + "logits/rejected": -2.8736138343811035, + "logps/chosen": -132.20208740234375, + "logps/rejected": -326.822265625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.601243495941162, + "rewards/margins": 4.434179306030273, + "rewards/rejected": -10.035423278808594, + "step": 11837 + }, + { + "epoch": 1.84, + "learning_rate": 5.4648653975828626e-06, + "logits/chosen": -2.982698678970337, + "logits/rejected": -1.9013723134994507, + "logps/chosen": -573.767822265625, + "logps/rejected": -413.8551025390625, + "loss": 1.0549, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.962957859039307, + "rewards/margins": 1.67262601852417, + "rewards/rejected": -9.635583877563477, + "step": 11838 + }, + { + "epoch": 1.84, + "learning_rate": 5.4641319570517144e-06, + "logits/chosen": -1.5871047973632812, + "logits/rejected": -2.0058956146240234, + "logps/chosen": -132.87548828125, + "logps/rejected": -368.170654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4110822677612305, + "rewards/margins": 9.375825881958008, + "rewards/rejected": -13.786908149719238, + "step": 11839 + }, + { + "epoch": 1.84, + "learning_rate": 5.463398516520566e-06, + "logits/chosen": -2.8386480808258057, + "logits/rejected": -2.978628158569336, + "logps/chosen": -101.04609680175781, + "logps/rejected": -349.920654296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.373470783233643, + "rewards/margins": 11.959539413452148, + "rewards/rejected": -16.333009719848633, + "step": 11840 + }, + { + "epoch": 1.84, + "learning_rate": 5.462665075989418e-06, + "logits/chosen": -2.8194406032562256, + "logits/rejected": -2.9268341064453125, + "logps/chosen": -197.76834106445312, + "logps/rejected": -116.0946044921875, + "loss": 0.1393, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.31682825088501, + "rewards/margins": 3.947134494781494, + "rewards/rejected": -8.263962745666504, + "step": 11841 + }, + { + "epoch": 1.84, + "learning_rate": 5.46193163545827e-06, + "logits/chosen": -1.213215947151184, + "logits/rejected": -3.01692271232605, + "logps/chosen": -329.7717590332031, + "logps/rejected": -429.4350280761719, + "loss": 0.2366, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.716856956481934, + "rewards/margins": 4.245379447937012, + "rewards/rejected": -10.962236404418945, + "step": 11842 + }, + { + "epoch": 1.84, + "learning_rate": 5.461198194927123e-06, + "logits/chosen": -2.495445728302002, + "logits/rejected": -3.17172908782959, + "logps/chosen": -128.2049102783203, + "logps/rejected": -637.9536743164062, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.603116989135742, + "rewards/margins": 6.988713264465332, + "rewards/rejected": -13.591830253601074, + "step": 11843 + }, + { + "epoch": 1.84, + "learning_rate": 5.460464754395975e-06, + "logits/chosen": -1.9749507904052734, + "logits/rejected": -2.799417495727539, + "logps/chosen": -259.51654052734375, + "logps/rejected": -509.76318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.138498783111572, + "rewards/margins": 10.567859649658203, + "rewards/rejected": -14.706358909606934, + "step": 11844 + }, + { + "epoch": 1.84, + "learning_rate": 5.459731313864827e-06, + "logits/chosen": -2.600724458694458, + "logits/rejected": -2.967756509780884, + "logps/chosen": -174.93167114257812, + "logps/rejected": -195.03985595703125, + "loss": 1.4237, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.723412036895752, + "rewards/margins": 2.8438315391540527, + "rewards/rejected": -8.567243576049805, + "step": 11845 + }, + { + "epoch": 1.84, + "learning_rate": 5.458997873333679e-06, + "logits/chosen": -1.7836189270019531, + "logits/rejected": -2.8080475330352783, + "logps/chosen": -345.7922668457031, + "logps/rejected": -466.7857666015625, + "loss": 1.5684, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.522194862365723, + "rewards/margins": 5.327558994293213, + "rewards/rejected": -12.849754333496094, + "step": 11846 + }, + { + "epoch": 1.84, + "learning_rate": 5.458264432802532e-06, + "logits/chosen": -2.5809719562530518, + "logits/rejected": -3.0702006816864014, + "logps/chosen": -83.01153564453125, + "logps/rejected": -376.86419677734375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.664603233337402, + "rewards/margins": 5.677026748657227, + "rewards/rejected": -11.341629028320312, + "step": 11847 + }, + { + "epoch": 1.84, + "learning_rate": 5.457530992271384e-06, + "logits/chosen": -2.246614933013916, + "logits/rejected": -2.820611000061035, + "logps/chosen": -105.98994445800781, + "logps/rejected": -386.28460693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.005819797515869, + "rewards/margins": 10.856106758117676, + "rewards/rejected": -13.861927032470703, + "step": 11848 + }, + { + "epoch": 1.84, + "learning_rate": 5.456797551740236e-06, + "logits/chosen": -2.6445276737213135, + "logits/rejected": -3.088207244873047, + "logps/chosen": -130.8540802001953, + "logps/rejected": -387.5955810546875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.920560836791992, + "rewards/margins": 6.808811187744141, + "rewards/rejected": -11.729372024536133, + "step": 11849 + }, + { + "epoch": 1.84, + "learning_rate": 5.456064111209088e-06, + "logits/chosen": -2.9960405826568604, + "logits/rejected": -2.9227869510650635, + "logps/chosen": -185.317138671875, + "logps/rejected": -221.79656982421875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.799588680267334, + "rewards/margins": 6.239044666290283, + "rewards/rejected": -11.038633346557617, + "step": 11850 + }, + { + "epoch": 1.84, + "learning_rate": 5.4553306706779395e-06, + "logits/chosen": -2.7495834827423096, + "logits/rejected": -1.4022021293640137, + "logps/chosen": -248.4033966064453, + "logps/rejected": -163.2124786376953, + "loss": 1.9418, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.254103660583496, + "rewards/margins": 0.9082651138305664, + "rewards/rejected": -10.162368774414062, + "step": 11851 + }, + { + "epoch": 1.84, + "learning_rate": 5.454597230146792e-06, + "logits/chosen": -1.495996356010437, + "logits/rejected": -2.794924259185791, + "logps/chosen": -189.59640502929688, + "logps/rejected": -570.4572143554688, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917483329772949, + "rewards/margins": 7.689475059509277, + "rewards/rejected": -14.606958389282227, + "step": 11852 + }, + { + "epoch": 1.84, + "learning_rate": 5.453863789615644e-06, + "logits/chosen": -2.9612960815429688, + "logits/rejected": -3.078490972518921, + "logps/chosen": -234.76925659179688, + "logps/rejected": -425.90850830078125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.148204326629639, + "rewards/margins": 6.559538841247559, + "rewards/rejected": -10.707742691040039, + "step": 11853 + }, + { + "epoch": 1.84, + "learning_rate": 5.453130349084496e-06, + "logits/chosen": -2.8469326496124268, + "logits/rejected": -3.079511880874634, + "logps/chosen": -81.8753890991211, + "logps/rejected": -366.7900390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7673726081848145, + "rewards/margins": 9.4346923828125, + "rewards/rejected": -13.202064514160156, + "step": 11854 + }, + { + "epoch": 1.84, + "learning_rate": 5.452396908553348e-06, + "logits/chosen": -1.8539454936981201, + "logits/rejected": -2.6909713745117188, + "logps/chosen": -264.03289794921875, + "logps/rejected": -482.18426513671875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.175353527069092, + "rewards/margins": 7.777558326721191, + "rewards/rejected": -14.952911376953125, + "step": 11855 + }, + { + "epoch": 1.84, + "learning_rate": 5.451663468022201e-06, + "logits/chosen": -2.033064603805542, + "logits/rejected": -3.0044732093811035, + "logps/chosen": -252.9137725830078, + "logps/rejected": -304.3210754394531, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.590597629547119, + "rewards/margins": 7.452093601226807, + "rewards/rejected": -10.042691230773926, + "step": 11856 + }, + { + "epoch": 1.84, + "learning_rate": 5.4509300274910525e-06, + "logits/chosen": -3.042323589324951, + "logits/rejected": -2.3921430110931396, + "logps/chosen": -324.06695556640625, + "logps/rejected": -345.9135437011719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.29607355594635, + "rewards/margins": 10.042015075683594, + "rewards/rejected": -11.338088035583496, + "step": 11857 + }, + { + "epoch": 1.84, + "learning_rate": 5.450196586959904e-06, + "logits/chosen": -2.821538209915161, + "logits/rejected": -3.0504822731018066, + "logps/chosen": -367.1625671386719, + "logps/rejected": -248.11215209960938, + "loss": 1.2073, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.077437400817871, + "rewards/margins": -0.7577035427093506, + "rewards/rejected": -7.319733619689941, + "step": 11858 + }, + { + "epoch": 1.84, + "learning_rate": 5.449463146428756e-06, + "logits/chosen": -2.7586963176727295, + "logits/rejected": -2.823193073272705, + "logps/chosen": -181.7780303955078, + "logps/rejected": -141.73971557617188, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930980682373047, + "rewards/margins": 2.555764675140381, + "rewards/rejected": -8.486745834350586, + "step": 11859 + }, + { + "epoch": 1.84, + "learning_rate": 5.448729705897608e-06, + "logits/chosen": -2.835538387298584, + "logits/rejected": -3.002182722091675, + "logps/chosen": -135.03604125976562, + "logps/rejected": -154.4766845703125, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5888564586639404, + "rewards/margins": 6.9407758712768555, + "rewards/rejected": -9.529632568359375, + "step": 11860 + }, + { + "epoch": 1.84, + "learning_rate": 5.447996265366461e-06, + "logits/chosen": -2.338308334350586, + "logits/rejected": -3.0694408416748047, + "logps/chosen": -119.17068481445312, + "logps/rejected": -248.64695739746094, + "loss": 0.3048, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.585482597351074, + "rewards/margins": 4.349490642547607, + "rewards/rejected": -9.934972763061523, + "step": 11861 + }, + { + "epoch": 1.84, + "learning_rate": 5.4472628248353136e-06, + "logits/chosen": -2.9705660343170166, + "logits/rejected": -2.8953702449798584, + "logps/chosen": -155.20950317382812, + "logps/rejected": -272.2341003417969, + "loss": 1.5533, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.411904335021973, + "rewards/margins": 2.1544575691223145, + "rewards/rejected": -9.566362380981445, + "step": 11862 + }, + { + "epoch": 1.84, + "learning_rate": 5.4465293843041654e-06, + "logits/chosen": -2.085775852203369, + "logits/rejected": -2.9179272651672363, + "logps/chosen": -278.6778259277344, + "logps/rejected": -210.55670166015625, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2741570472717285, + "rewards/margins": 0.8874819278717041, + "rewards/rejected": -8.161639213562012, + "step": 11863 + }, + { + "epoch": 1.85, + "learning_rate": 5.445795943773017e-06, + "logits/chosen": -2.364157199859619, + "logits/rejected": -2.328087091445923, + "logps/chosen": -147.56887817382812, + "logps/rejected": -270.92535400390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6323418617248535, + "rewards/margins": 6.8891401290893555, + "rewards/rejected": -10.521482467651367, + "step": 11864 + }, + { + "epoch": 1.85, + "learning_rate": 5.44506250324187e-06, + "logits/chosen": -1.572417140007019, + "logits/rejected": -2.761136293411255, + "logps/chosen": -158.98382568359375, + "logps/rejected": -307.4901123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6333839893341064, + "rewards/margins": 10.443770408630371, + "rewards/rejected": -14.077154159545898, + "step": 11865 + }, + { + "epoch": 1.85, + "learning_rate": 5.444329062710722e-06, + "logits/chosen": -3.020803689956665, + "logits/rejected": -2.8762781620025635, + "logps/chosen": -112.38013458251953, + "logps/rejected": -167.82008361816406, + "loss": 0.0723, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.470304012298584, + "rewards/margins": 4.313895225524902, + "rewards/rejected": -9.784198760986328, + "step": 11866 + }, + { + "epoch": 1.85, + "learning_rate": 5.443595622179574e-06, + "logits/chosen": -1.9356669187545776, + "logits/rejected": -2.870546817779541, + "logps/chosen": -207.01089477539062, + "logps/rejected": -228.31576538085938, + "loss": 0.4806, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.973452568054199, + "rewards/margins": 2.158571243286133, + "rewards/rejected": -8.132023811340332, + "step": 11867 + }, + { + "epoch": 1.85, + "learning_rate": 5.442862181648426e-06, + "logits/chosen": -3.042182683944702, + "logits/rejected": -2.1771669387817383, + "logps/chosen": -315.7171936035156, + "logps/rejected": -460.21990966796875, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.105706214904785, + "rewards/margins": 8.39980697631836, + "rewards/rejected": -15.505514144897461, + "step": 11868 + }, + { + "epoch": 1.85, + "learning_rate": 5.4421287411172776e-06, + "logits/chosen": -1.4426076412200928, + "logits/rejected": -2.392130136489868, + "logps/chosen": -306.99969482421875, + "logps/rejected": -578.205322265625, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.916658878326416, + "rewards/margins": 6.764754295349121, + "rewards/rejected": -11.681413650512695, + "step": 11869 + }, + { + "epoch": 1.85, + "learning_rate": 5.44139530058613e-06, + "logits/chosen": -2.671401262283325, + "logits/rejected": -2.765777111053467, + "logps/chosen": -436.5723571777344, + "logps/rejected": -510.5545959472656, + "loss": 0.4164, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.585386276245117, + "rewards/margins": 5.848598480224609, + "rewards/rejected": -12.433984756469727, + "step": 11870 + }, + { + "epoch": 1.85, + "learning_rate": 5.440661860054982e-06, + "logits/chosen": -2.6461031436920166, + "logits/rejected": -3.1919472217559814, + "logps/chosen": -70.87825012207031, + "logps/rejected": -217.85272216796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7568483352661133, + "rewards/margins": 8.178016662597656, + "rewards/rejected": -9.934864044189453, + "step": 11871 + }, + { + "epoch": 1.85, + "learning_rate": 5.439928419523834e-06, + "logits/chosen": -3.0877671241760254, + "logits/rejected": -2.703991413116455, + "logps/chosen": -299.40020751953125, + "logps/rejected": -207.18931579589844, + "loss": 2.7648, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3493919372558594, + "rewards/margins": 0.7237362861633301, + "rewards/rejected": -4.073128700256348, + "step": 11872 + }, + { + "epoch": 1.85, + "learning_rate": 5.439194978992686e-06, + "logits/chosen": -2.682225227355957, + "logits/rejected": -2.0501625537872314, + "logps/chosen": -326.0448303222656, + "logps/rejected": -304.813232421875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.553240776062012, + "rewards/margins": 6.509284973144531, + "rewards/rejected": -12.06252670288086, + "step": 11873 + }, + { + "epoch": 1.85, + "learning_rate": 5.438461538461539e-06, + "logits/chosen": -1.5766706466674805, + "logits/rejected": -2.199274778366089, + "logps/chosen": -103.20146942138672, + "logps/rejected": -341.58697509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.253986835479736, + "rewards/margins": 9.049640655517578, + "rewards/rejected": -14.303627014160156, + "step": 11874 + }, + { + "epoch": 1.85, + "learning_rate": 5.4377280979303905e-06, + "logits/chosen": -2.1901328563690186, + "logits/rejected": -2.923450231552124, + "logps/chosen": -194.95281982421875, + "logps/rejected": -382.8025817871094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.246273040771484, + "rewards/margins": 8.327554702758789, + "rewards/rejected": -13.573827743530273, + "step": 11875 + }, + { + "epoch": 1.85, + "learning_rate": 5.436994657399242e-06, + "logits/chosen": -2.8407962322235107, + "logits/rejected": -1.350462794303894, + "logps/chosen": -980.1171875, + "logps/rejected": -405.25457763671875, + "loss": 0.9947, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.745244979858398, + "rewards/margins": 3.128565788269043, + "rewards/rejected": -12.873811721801758, + "step": 11876 + }, + { + "epoch": 1.85, + "learning_rate": 5.436261216868094e-06, + "logits/chosen": -1.8481392860412598, + "logits/rejected": -2.908322811126709, + "logps/chosen": -297.7685852050781, + "logps/rejected": -499.684814453125, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7470574378967285, + "rewards/margins": 2.2071573734283447, + "rewards/rejected": -7.954215049743652, + "step": 11877 + }, + { + "epoch": 1.85, + "learning_rate": 5.435527776336947e-06, + "logits/chosen": -2.8209097385406494, + "logits/rejected": -1.9557806253433228, + "logps/chosen": -101.88262939453125, + "logps/rejected": -307.856689453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.929884910583496, + "rewards/margins": 11.67854118347168, + "rewards/rejected": -15.60842514038086, + "step": 11878 + }, + { + "epoch": 1.85, + "learning_rate": 5.4347943358058e-06, + "logits/chosen": -2.2085773944854736, + "logits/rejected": -2.9196157455444336, + "logps/chosen": -52.9648323059082, + "logps/rejected": -312.10540771484375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253668308258057, + "rewards/margins": 6.902280807495117, + "rewards/rejected": -11.155948638916016, + "step": 11879 + }, + { + "epoch": 1.85, + "learning_rate": 5.434060895274652e-06, + "logits/chosen": -2.3265349864959717, + "logits/rejected": -2.9134552478790283, + "logps/chosen": -206.135498046875, + "logps/rejected": -264.1153869628906, + "loss": 0.1895, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.375650882720947, + "rewards/margins": 1.8074661493301392, + "rewards/rejected": -7.183116912841797, + "step": 11880 + }, + { + "epoch": 1.85, + "learning_rate": 5.4333274547435035e-06, + "logits/chosen": -2.998659133911133, + "logits/rejected": -2.3091442584991455, + "logps/chosen": -485.42144775390625, + "logps/rejected": -337.31842041015625, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865725517272949, + "rewards/margins": 3.3371875286102295, + "rewards/rejected": -9.202913284301758, + "step": 11881 + }, + { + "epoch": 1.85, + "learning_rate": 5.432594014212355e-06, + "logits/chosen": -2.8923799991607666, + "logits/rejected": -2.0694000720977783, + "logps/chosen": -445.27325439453125, + "logps/rejected": -415.7828674316406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.122472286224365, + "rewards/margins": 8.276008605957031, + "rewards/rejected": -13.398480415344238, + "step": 11882 + }, + { + "epoch": 1.85, + "learning_rate": 5.431860573681208e-06, + "logits/chosen": -2.9477381706237793, + "logits/rejected": -1.3997738361358643, + "logps/chosen": -434.7671203613281, + "logps/rejected": -395.4219055175781, + "loss": 1.4414, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.767430305480957, + "rewards/margins": 4.259642601013184, + "rewards/rejected": -9.02707290649414, + "step": 11883 + }, + { + "epoch": 1.85, + "learning_rate": 5.43112713315006e-06, + "logits/chosen": -2.8558449745178223, + "logits/rejected": -2.8903048038482666, + "logps/chosen": -211.7902069091797, + "logps/rejected": -263.28997802734375, + "loss": 0.082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.064945220947266, + "rewards/margins": 4.302724838256836, + "rewards/rejected": -10.367670059204102, + "step": 11884 + }, + { + "epoch": 1.85, + "learning_rate": 5.430393692618912e-06, + "logits/chosen": -3.2001149654388428, + "logits/rejected": -3.070175886154175, + "logps/chosen": -525.087646484375, + "logps/rejected": -468.7267761230469, + "loss": 0.065, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.726191520690918, + "rewards/margins": 6.4171857833862305, + "rewards/rejected": -11.143377304077148, + "step": 11885 + }, + { + "epoch": 1.85, + "learning_rate": 5.429660252087764e-06, + "logits/chosen": -2.608933687210083, + "logits/rejected": -1.9703497886657715, + "logps/chosen": -170.5775146484375, + "logps/rejected": -169.72482299804688, + "loss": 0.8706, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.631636619567871, + "rewards/margins": 0.3103208541870117, + "rewards/rejected": -8.941957473754883, + "step": 11886 + }, + { + "epoch": 1.85, + "learning_rate": 5.4289268115566165e-06, + "logits/chosen": -2.6908583641052246, + "logits/rejected": -0.9308328628540039, + "logps/chosen": -269.90911865234375, + "logps/rejected": -87.48111724853516, + "loss": 1.0414, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.414964199066162, + "rewards/margins": 1.2990539073944092, + "rewards/rejected": -7.714017868041992, + "step": 11887 + }, + { + "epoch": 1.85, + "learning_rate": 5.428193371025468e-06, + "logits/chosen": -2.783856153488159, + "logits/rejected": -2.467315435409546, + "logps/chosen": -345.4530029296875, + "logps/rejected": -235.46353149414062, + "loss": 0.6557, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.246413230895996, + "rewards/margins": 1.8439552783966064, + "rewards/rejected": -9.090368270874023, + "step": 11888 + }, + { + "epoch": 1.85, + "learning_rate": 5.42745993049432e-06, + "logits/chosen": -1.7874242067337036, + "logits/rejected": -2.8491697311401367, + "logps/chosen": -185.6411895751953, + "logps/rejected": -432.3864440917969, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.166049957275391, + "rewards/margins": 7.824059009552002, + "rewards/rejected": -11.99010944366455, + "step": 11889 + }, + { + "epoch": 1.85, + "learning_rate": 5.426726489963172e-06, + "logits/chosen": -2.168294668197632, + "logits/rejected": -2.2434194087982178, + "logps/chosen": -332.9241027832031, + "logps/rejected": -357.4805603027344, + "loss": 0.1004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.519026756286621, + "rewards/margins": 2.9596669673919678, + "rewards/rejected": -11.478693008422852, + "step": 11890 + }, + { + "epoch": 1.85, + "learning_rate": 5.425993049432024e-06, + "logits/chosen": -2.147005319595337, + "logits/rejected": -2.705761194229126, + "logps/chosen": -296.399169921875, + "logps/rejected": -395.2281494140625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337946891784668, + "rewards/margins": 6.489880561828613, + "rewards/rejected": -10.827827453613281, + "step": 11891 + }, + { + "epoch": 1.85, + "learning_rate": 5.425259608900877e-06, + "logits/chosen": -2.6092917919158936, + "logits/rejected": -2.595146894454956, + "logps/chosen": -110.05401611328125, + "logps/rejected": -268.97076416015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.571322441101074, + "rewards/margins": 7.636843681335449, + "rewards/rejected": -11.208166122436523, + "step": 11892 + }, + { + "epoch": 1.85, + "learning_rate": 5.4245261683697286e-06, + "logits/chosen": -2.8127622604370117, + "logits/rejected": -2.279370069503784, + "logps/chosen": -238.6842498779297, + "logps/rejected": -280.4229736328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.625023603439331, + "rewards/margins": 8.076622009277344, + "rewards/rejected": -11.701645851135254, + "step": 11893 + }, + { + "epoch": 1.85, + "learning_rate": 5.4237927278385804e-06, + "logits/chosen": -2.8443374633789062, + "logits/rejected": -2.8629074096679688, + "logps/chosen": -740.3818969726562, + "logps/rejected": -737.7169189453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.503973484039307, + "rewards/margins": 7.392342567443848, + "rewards/rejected": -11.896316528320312, + "step": 11894 + }, + { + "epoch": 1.85, + "learning_rate": 5.423059287307433e-06, + "logits/chosen": -2.4751060009002686, + "logits/rejected": -3.0490198135375977, + "logps/chosen": -115.83888244628906, + "logps/rejected": -305.32989501953125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.220419406890869, + "rewards/margins": 8.186725616455078, + "rewards/rejected": -12.407145500183105, + "step": 11895 + }, + { + "epoch": 1.85, + "learning_rate": 5.422325846776286e-06, + "logits/chosen": -2.982668876647949, + "logits/rejected": -1.7207847833633423, + "logps/chosen": -164.4078826904297, + "logps/rejected": -260.8078918457031, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.835671424865723, + "rewards/margins": 5.197813987731934, + "rewards/rejected": -10.033485412597656, + "step": 11896 + }, + { + "epoch": 1.85, + "learning_rate": 5.421592406245138e-06, + "logits/chosen": -2.9840550422668457, + "logits/rejected": -1.8421618938446045, + "logps/chosen": -321.296630859375, + "logps/rejected": -198.7316436767578, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.109175205230713, + "rewards/margins": 8.608491897583008, + "rewards/rejected": -11.717666625976562, + "step": 11897 + }, + { + "epoch": 1.85, + "learning_rate": 5.42085896571399e-06, + "logits/chosen": -1.454060673713684, + "logits/rejected": -2.6281979084014893, + "logps/chosen": -142.89561462402344, + "logps/rejected": -476.806640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.509582996368408, + "rewards/margins": 9.340645790100098, + "rewards/rejected": -15.850229263305664, + "step": 11898 + }, + { + "epoch": 1.85, + "learning_rate": 5.4201255251828415e-06, + "logits/chosen": -2.089902400970459, + "logits/rejected": -2.693537950515747, + "logps/chosen": -114.81857299804688, + "logps/rejected": -295.4418640136719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.808683395385742, + "rewards/margins": 7.9435319900512695, + "rewards/rejected": -12.752216339111328, + "step": 11899 + }, + { + "epoch": 1.85, + "learning_rate": 5.419392084651693e-06, + "logits/chosen": -2.836296796798706, + "logits/rejected": -2.936325788497925, + "logps/chosen": -86.94200897216797, + "logps/rejected": -234.50697326660156, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5613017082214355, + "rewards/margins": 6.396544933319092, + "rewards/rejected": -10.957846641540527, + "step": 11900 + }, + { + "epoch": 1.85, + "learning_rate": 5.418658644120546e-06, + "logits/chosen": -2.96610164642334, + "logits/rejected": -1.9084147214889526, + "logps/chosen": -621.4871215820312, + "logps/rejected": -357.2775573730469, + "loss": 0.867, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.139767646789551, + "rewards/margins": 1.5020442008972168, + "rewards/rejected": -7.641811847686768, + "step": 11901 + }, + { + "epoch": 1.85, + "learning_rate": 5.417925203589398e-06, + "logits/chosen": -2.1394031047821045, + "logits/rejected": -2.9752426147460938, + "logps/chosen": -167.6380615234375, + "logps/rejected": -374.4285583496094, + "loss": 0.9015, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.796030521392822, + "rewards/margins": 1.007230520248413, + "rewards/rejected": -6.803260803222656, + "step": 11902 + }, + { + "epoch": 1.85, + "learning_rate": 5.41719176305825e-06, + "logits/chosen": -3.1368632316589355, + "logits/rejected": -2.6881275177001953, + "logps/chosen": -130.1738739013672, + "logps/rejected": -215.83639526367188, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6525959968566895, + "rewards/margins": 6.740101337432861, + "rewards/rejected": -9.39269733428955, + "step": 11903 + }, + { + "epoch": 1.85, + "learning_rate": 5.416458322527102e-06, + "logits/chosen": -2.949620246887207, + "logits/rejected": -3.1701250076293945, + "logps/chosen": -71.4458999633789, + "logps/rejected": -186.82415771484375, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.621955394744873, + "rewards/margins": 5.139534950256348, + "rewards/rejected": -9.761489868164062, + "step": 11904 + }, + { + "epoch": 1.85, + "learning_rate": 5.4157248819959545e-06, + "logits/chosen": -3.1158664226531982, + "logits/rejected": -2.6577534675598145, + "logps/chosen": -140.22076416015625, + "logps/rejected": -279.9896545410156, + "loss": 1.6462, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.734982967376709, + "rewards/margins": 7.385554313659668, + "rewards/rejected": -13.120537757873535, + "step": 11905 + }, + { + "epoch": 1.85, + "learning_rate": 5.414991441464806e-06, + "logits/chosen": -2.153186559677124, + "logits/rejected": -2.677309274673462, + "logps/chosen": -265.5339050292969, + "logps/rejected": -348.51641845703125, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414854049682617, + "rewards/margins": 8.847328186035156, + "rewards/rejected": -16.262182235717773, + "step": 11906 + }, + { + "epoch": 1.85, + "learning_rate": 5.414258000933658e-06, + "logits/chosen": -2.3054661750793457, + "logits/rejected": -3.0154356956481934, + "logps/chosen": -126.55377960205078, + "logps/rejected": -342.8733825683594, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1450347900390625, + "rewards/margins": 5.272737503051758, + "rewards/rejected": -9.41777229309082, + "step": 11907 + }, + { + "epoch": 1.85, + "learning_rate": 5.41352456040251e-06, + "logits/chosen": -1.630037546157837, + "logits/rejected": -2.690096855163574, + "logps/chosen": -121.57839965820312, + "logps/rejected": -254.10638427734375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7797136306762695, + "rewards/margins": 6.797751426696777, + "rewards/rejected": -11.577465057373047, + "step": 11908 + }, + { + "epoch": 1.85, + "learning_rate": 5.412791119871362e-06, + "logits/chosen": -1.5834609270095825, + "logits/rejected": -2.6347970962524414, + "logps/chosen": -131.79566955566406, + "logps/rejected": -403.019775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7100019454956055, + "rewards/margins": 10.122828483581543, + "rewards/rejected": -15.832830429077148, + "step": 11909 + }, + { + "epoch": 1.85, + "learning_rate": 5.412057679340215e-06, + "logits/chosen": -3.017145872116089, + "logits/rejected": -2.983020782470703, + "logps/chosen": -411.48028564453125, + "logps/rejected": -488.103515625, + "loss": 0.1222, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.467724800109863, + "rewards/margins": 3.452991008758545, + "rewards/rejected": -8.92071533203125, + "step": 11910 + }, + { + "epoch": 1.85, + "learning_rate": 5.411324238809067e-06, + "logits/chosen": -2.6173393726348877, + "logits/rejected": -3.0134105682373047, + "logps/chosen": -160.80572509765625, + "logps/rejected": -292.11273193359375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.491189002990723, + "rewards/margins": 7.553647994995117, + "rewards/rejected": -13.044836044311523, + "step": 11911 + }, + { + "epoch": 1.85, + "learning_rate": 5.410590798277919e-06, + "logits/chosen": -2.8565235137939453, + "logits/rejected": -2.319587230682373, + "logps/chosen": -256.67156982421875, + "logps/rejected": -385.51812744140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6816396713256836, + "rewards/margins": 8.283012390136719, + "rewards/rejected": -11.964651107788086, + "step": 11912 + }, + { + "epoch": 1.85, + "learning_rate": 5.409857357746771e-06, + "logits/chosen": -1.6190412044525146, + "logits/rejected": -2.764533758163452, + "logps/chosen": -189.6441650390625, + "logps/rejected": -620.054931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1763761043548584, + "rewards/margins": 11.517839431762695, + "rewards/rejected": -14.694215774536133, + "step": 11913 + }, + { + "epoch": 1.85, + "learning_rate": 5.409123917215624e-06, + "logits/chosen": -2.2398838996887207, + "logits/rejected": -3.0398967266082764, + "logps/chosen": -201.33169555664062, + "logps/rejected": -387.953369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.87774395942688, + "rewards/margins": 8.902308464050293, + "rewards/rejected": -11.780052185058594, + "step": 11914 + }, + { + "epoch": 1.85, + "learning_rate": 5.408390476684476e-06, + "logits/chosen": -1.1373512744903564, + "logits/rejected": -2.568622589111328, + "logps/chosen": -109.95453643798828, + "logps/rejected": -380.66094970703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468748092651367, + "rewards/margins": 10.238463401794434, + "rewards/rejected": -15.7072114944458, + "step": 11915 + }, + { + "epoch": 1.85, + "learning_rate": 5.407657036153328e-06, + "logits/chosen": -1.501112461090088, + "logits/rejected": -2.729126453399658, + "logps/chosen": -171.52996826171875, + "logps/rejected": -353.70635986328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.762554168701172, + "rewards/margins": 7.791799545288086, + "rewards/rejected": -14.554353713989258, + "step": 11916 + }, + { + "epoch": 1.85, + "learning_rate": 5.40692359562218e-06, + "logits/chosen": -2.7965471744537354, + "logits/rejected": -3.110592842102051, + "logps/chosen": -283.6425476074219, + "logps/rejected": -445.0242614746094, + "loss": 0.0796, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.141695022583008, + "rewards/margins": 4.620377540588379, + "rewards/rejected": -10.76207160949707, + "step": 11917 + }, + { + "epoch": 1.85, + "learning_rate": 5.4061901550910315e-06, + "logits/chosen": -1.7561544179916382, + "logits/rejected": -2.838472843170166, + "logps/chosen": -325.9634094238281, + "logps/rejected": -613.1516723632812, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8099870681762695, + "rewards/margins": 5.5622453689575195, + "rewards/rejected": -12.372232437133789, + "step": 11918 + }, + { + "epoch": 1.85, + "learning_rate": 5.405456714559884e-06, + "logits/chosen": -2.889509916305542, + "logits/rejected": -1.7607239484786987, + "logps/chosen": -233.73301696777344, + "logps/rejected": -260.9409484863281, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5450923442840576, + "rewards/margins": 6.0672807693481445, + "rewards/rejected": -9.612373352050781, + "step": 11919 + }, + { + "epoch": 1.85, + "learning_rate": 5.404723274028736e-06, + "logits/chosen": -3.0346202850341797, + "logits/rejected": -2.5983378887176514, + "logps/chosen": -337.20892333984375, + "logps/rejected": -505.17486572265625, + "loss": 0.0577, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1443099975585938, + "rewards/margins": 4.850203037261963, + "rewards/rejected": -7.994513034820557, + "step": 11920 + }, + { + "epoch": 1.85, + "learning_rate": 5.403989833497588e-06, + "logits/chosen": -2.7989587783813477, + "logits/rejected": -2.3805747032165527, + "logps/chosen": -193.95909118652344, + "logps/rejected": -274.3137512207031, + "loss": 0.1393, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.78199577331543, + "rewards/margins": 4.286281108856201, + "rewards/rejected": -10.068277359008789, + "step": 11921 + }, + { + "epoch": 1.85, + "learning_rate": 5.40325639296644e-06, + "logits/chosen": -2.906348705291748, + "logits/rejected": -2.0068776607513428, + "logps/chosen": -603.50341796875, + "logps/rejected": -316.147705078125, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.259585857391357, + "rewards/margins": 7.165072441101074, + "rewards/rejected": -12.424657821655273, + "step": 11922 + }, + { + "epoch": 1.85, + "learning_rate": 5.4025229524352925e-06, + "logits/chosen": -2.68747615814209, + "logits/rejected": -1.7807660102844238, + "logps/chosen": -540.220458984375, + "logps/rejected": -440.7884826660156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.298684120178223, + "rewards/margins": 8.257801055908203, + "rewards/rejected": -13.556485176086426, + "step": 11923 + }, + { + "epoch": 1.85, + "learning_rate": 5.401789511904144e-06, + "logits/chosen": -2.822472333908081, + "logits/rejected": -2.0135934352874756, + "logps/chosen": -380.15869140625, + "logps/rejected": -276.77508544921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7141809463500977, + "rewards/margins": 9.243446350097656, + "rewards/rejected": -12.957627296447754, + "step": 11924 + }, + { + "epoch": 1.85, + "learning_rate": 5.401056071372996e-06, + "logits/chosen": -2.4921767711639404, + "logits/rejected": -3.0079216957092285, + "logps/chosen": -221.23703002929688, + "logps/rejected": -364.7830810546875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6278023719787598, + "rewards/margins": 7.730723857879639, + "rewards/rejected": -11.358526229858398, + "step": 11925 + }, + { + "epoch": 1.85, + "learning_rate": 5.400322630841848e-06, + "logits/chosen": -2.486328601837158, + "logits/rejected": -1.719854712486267, + "logps/chosen": -212.22976684570312, + "logps/rejected": -441.7524719238281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.147433280944824, + "rewards/margins": 8.161664962768555, + "rewards/rejected": -15.309099197387695, + "step": 11926 + }, + { + "epoch": 1.85, + "learning_rate": 5.399589190310701e-06, + "logits/chosen": -2.1725666522979736, + "logits/rejected": -2.750786542892456, + "logps/chosen": -203.34725952148438, + "logps/rejected": -372.75634765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.063771724700928, + "rewards/margins": 9.175918579101562, + "rewards/rejected": -14.239690780639648, + "step": 11927 + }, + { + "epoch": 1.86, + "learning_rate": 5.398855749779553e-06, + "logits/chosen": -2.955676555633545, + "logits/rejected": -2.721471071243286, + "logps/chosen": -244.907470703125, + "logps/rejected": -225.05712890625, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.483638763427734, + "rewards/margins": 3.118502140045166, + "rewards/rejected": -13.602140426635742, + "step": 11928 + }, + { + "epoch": 1.86, + "learning_rate": 5.398122309248405e-06, + "logits/chosen": -2.964064598083496, + "logits/rejected": -1.5340299606323242, + "logps/chosen": -603.0770263671875, + "logps/rejected": -570.8624267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.092154502868652, + "rewards/margins": 9.63032054901123, + "rewards/rejected": -13.722475051879883, + "step": 11929 + }, + { + "epoch": 1.86, + "learning_rate": 5.397388868717257e-06, + "logits/chosen": -2.810830593109131, + "logits/rejected": -2.1492397785186768, + "logps/chosen": -173.87136840820312, + "logps/rejected": -284.2362060546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.477557182312012, + "rewards/margins": 7.349626064300537, + "rewards/rejected": -12.82718276977539, + "step": 11930 + }, + { + "epoch": 1.86, + "learning_rate": 5.396655428186109e-06, + "logits/chosen": -2.922619104385376, + "logits/rejected": -2.788221836090088, + "logps/chosen": -450.28216552734375, + "logps/rejected": -355.6088562011719, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.260918617248535, + "rewards/margins": 5.424269676208496, + "rewards/rejected": -12.685188293457031, + "step": 11931 + }, + { + "epoch": 1.86, + "learning_rate": 5.395921987654962e-06, + "logits/chosen": -2.4133176803588867, + "logits/rejected": -3.0714223384857178, + "logps/chosen": -114.91477966308594, + "logps/rejected": -334.0116882324219, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743541717529297, + "rewards/margins": 6.955343246459961, + "rewards/rejected": -13.698884963989258, + "step": 11932 + }, + { + "epoch": 1.86, + "learning_rate": 5.395188547123814e-06, + "logits/chosen": -2.925504684448242, + "logits/rejected": -2.4194324016571045, + "logps/chosen": -293.5721130371094, + "logps/rejected": -270.31988525390625, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599736213684082, + "rewards/margins": 4.963873863220215, + "rewards/rejected": -9.563610076904297, + "step": 11933 + }, + { + "epoch": 1.86, + "learning_rate": 5.394455106592666e-06, + "logits/chosen": -2.9279890060424805, + "logits/rejected": -2.329714775085449, + "logps/chosen": -109.50244140625, + "logps/rejected": -310.40057373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7820844650268555, + "rewards/margins": 9.601029396057129, + "rewards/rejected": -13.383113861083984, + "step": 11934 + }, + { + "epoch": 1.86, + "learning_rate": 5.393721666061518e-06, + "logits/chosen": -2.4995219707489014, + "logits/rejected": -2.8378028869628906, + "logps/chosen": -192.71786499023438, + "logps/rejected": -328.5791320800781, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.626217842102051, + "rewards/margins": 7.201376438140869, + "rewards/rejected": -11.827594757080078, + "step": 11935 + }, + { + "epoch": 1.86, + "learning_rate": 5.39298822553037e-06, + "logits/chosen": -1.549832820892334, + "logits/rejected": -2.8292157649993896, + "logps/chosen": -107.6033706665039, + "logps/rejected": -397.1983337402344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.225565910339355, + "rewards/margins": 9.492010116577148, + "rewards/rejected": -17.71757698059082, + "step": 11936 + }, + { + "epoch": 1.86, + "learning_rate": 5.392254784999222e-06, + "logits/chosen": -2.876642942428589, + "logits/rejected": -2.76255464553833, + "logps/chosen": -325.2162170410156, + "logps/rejected": -318.622314453125, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.853391170501709, + "rewards/margins": 4.903919219970703, + "rewards/rejected": -9.75731086730957, + "step": 11937 + }, + { + "epoch": 1.86, + "learning_rate": 5.391521344468074e-06, + "logits/chosen": -3.13852596282959, + "logits/rejected": -2.8716461658477783, + "logps/chosen": -263.50042724609375, + "logps/rejected": -532.4974365234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.588001251220703, + "rewards/margins": 9.137218475341797, + "rewards/rejected": -12.7252197265625, + "step": 11938 + }, + { + "epoch": 1.86, + "learning_rate": 5.390787903936926e-06, + "logits/chosen": -3.0641584396362305, + "logits/rejected": -3.07853364944458, + "logps/chosen": -53.16261291503906, + "logps/rejected": -172.11160278320312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.902705192565918, + "rewards/margins": 7.648860931396484, + "rewards/rejected": -11.551567077636719, + "step": 11939 + }, + { + "epoch": 1.86, + "learning_rate": 5.390054463405778e-06, + "logits/chosen": -3.2581980228424072, + "logits/rejected": -3.2022950649261475, + "logps/chosen": -251.98899841308594, + "logps/rejected": -603.30859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3066582679748535, + "rewards/margins": 9.247403144836426, + "rewards/rejected": -13.554061889648438, + "step": 11940 + }, + { + "epoch": 1.86, + "learning_rate": 5.389321022874631e-06, + "logits/chosen": -2.9865894317626953, + "logits/rejected": -2.5536956787109375, + "logps/chosen": -456.5147705078125, + "logps/rejected": -434.4749755859375, + "loss": 0.5712, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.3527657985687256, + "rewards/margins": 7.441110134124756, + "rewards/rejected": -10.793875694274902, + "step": 11941 + }, + { + "epoch": 1.86, + "learning_rate": 5.3885875823434825e-06, + "logits/chosen": -1.6227785348892212, + "logits/rejected": -3.060523748397827, + "logps/chosen": -425.05450439453125, + "logps/rejected": -521.5264892578125, + "loss": 0.1092, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7036004066467285, + "rewards/margins": 8.42834186553955, + "rewards/rejected": -16.131942749023438, + "step": 11942 + }, + { + "epoch": 1.86, + "learning_rate": 5.387854141812334e-06, + "logits/chosen": -2.967172861099243, + "logits/rejected": -2.5177366733551025, + "logps/chosen": -572.5086669921875, + "logps/rejected": -331.4734802246094, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.196841239929199, + "rewards/margins": 3.3737525939941406, + "rewards/rejected": -9.57059383392334, + "step": 11943 + }, + { + "epoch": 1.86, + "learning_rate": 5.387120701281186e-06, + "logits/chosen": -2.953498125076294, + "logits/rejected": -3.029235601425171, + "logps/chosen": -341.1056213378906, + "logps/rejected": -371.03033447265625, + "loss": 1.399, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.522985458374023, + "rewards/margins": 3.8968148231506348, + "rewards/rejected": -10.4197998046875, + "step": 11944 + }, + { + "epoch": 1.86, + "learning_rate": 5.386387260750039e-06, + "logits/chosen": -2.973506450653076, + "logits/rejected": -2.9734890460968018, + "logps/chosen": -282.60107421875, + "logps/rejected": -258.26312255859375, + "loss": 5.6645, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.236628532409668, + "rewards/margins": -1.4281344413757324, + "rewards/rejected": -7.808494567871094, + "step": 11945 + }, + { + "epoch": 1.86, + "learning_rate": 5.385653820218891e-06, + "logits/chosen": -2.0981574058532715, + "logits/rejected": -2.618929386138916, + "logps/chosen": -390.39410400390625, + "logps/rejected": -453.1689453125, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.939192295074463, + "rewards/margins": 5.417259216308594, + "rewards/rejected": -10.356451034545898, + "step": 11946 + }, + { + "epoch": 1.86, + "learning_rate": 5.3849203796877435e-06, + "logits/chosen": -2.775597095489502, + "logits/rejected": -2.286166191101074, + "logps/chosen": -302.80242919921875, + "logps/rejected": -192.70587158203125, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.505668640136719, + "rewards/margins": 4.833824157714844, + "rewards/rejected": -13.339492797851562, + "step": 11947 + }, + { + "epoch": 1.86, + "learning_rate": 5.3841869391565954e-06, + "logits/chosen": -1.3019063472747803, + "logits/rejected": -2.560845136642456, + "logps/chosen": -72.91889953613281, + "logps/rejected": -324.81231689453125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2463603019714355, + "rewards/margins": 7.137513160705566, + "rewards/rejected": -12.383872985839844, + "step": 11948 + }, + { + "epoch": 1.86, + "learning_rate": 5.383453498625447e-06, + "logits/chosen": -2.201094388961792, + "logits/rejected": -2.9329700469970703, + "logps/chosen": -409.3460693359375, + "logps/rejected": -431.0436706542969, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.808454990386963, + "rewards/margins": 7.509634494781494, + "rewards/rejected": -13.318089485168457, + "step": 11949 + }, + { + "epoch": 1.86, + "learning_rate": 5.3827200580943e-06, + "logits/chosen": -2.7120649814605713, + "logits/rejected": -2.8557333946228027, + "logps/chosen": -497.88189697265625, + "logps/rejected": -643.4139404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362632751464844, + "rewards/margins": 16.04360580444336, + "rewards/rejected": -21.406238555908203, + "step": 11950 + }, + { + "epoch": 1.86, + "learning_rate": 5.381986617563152e-06, + "logits/chosen": -2.8083434104919434, + "logits/rejected": -2.982316493988037, + "logps/chosen": -685.4837646484375, + "logps/rejected": -928.9220581054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6736817359924316, + "rewards/margins": 11.52056884765625, + "rewards/rejected": -15.194250106811523, + "step": 11951 + }, + { + "epoch": 1.86, + "learning_rate": 5.381253177032004e-06, + "logits/chosen": -1.3756150007247925, + "logits/rejected": -2.687671184539795, + "logps/chosen": -136.6076202392578, + "logps/rejected": -472.33319091796875, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.769534587860107, + "rewards/margins": 6.00791072845459, + "rewards/rejected": -11.777444839477539, + "step": 11952 + }, + { + "epoch": 1.86, + "learning_rate": 5.380519736500856e-06, + "logits/chosen": -2.6139612197875977, + "logits/rejected": -2.6450226306915283, + "logps/chosen": -188.22972106933594, + "logps/rejected": -348.4827880859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.289254188537598, + "rewards/margins": 8.247852325439453, + "rewards/rejected": -14.53710651397705, + "step": 11953 + }, + { + "epoch": 1.86, + "learning_rate": 5.379786295969708e-06, + "logits/chosen": -2.3175714015960693, + "logits/rejected": -2.700725555419922, + "logps/chosen": -172.70281982421875, + "logps/rejected": -413.61151123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.57373571395874, + "rewards/margins": 10.585406303405762, + "rewards/rejected": -15.159141540527344, + "step": 11954 + }, + { + "epoch": 1.86, + "learning_rate": 5.37905285543856e-06, + "logits/chosen": -1.8692330121994019, + "logits/rejected": -2.1950271129608154, + "logps/chosen": -245.1533660888672, + "logps/rejected": -389.29876708984375, + "loss": 0.1659, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.116495132446289, + "rewards/margins": 4.565762996673584, + "rewards/rejected": -10.682257652282715, + "step": 11955 + }, + { + "epoch": 1.86, + "learning_rate": 5.378319414907412e-06, + "logits/chosen": -2.2846767902374268, + "logits/rejected": -2.996616840362549, + "logps/chosen": -156.9232940673828, + "logps/rejected": -338.37139892578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7396163940429688, + "rewards/margins": 8.19091796875, + "rewards/rejected": -11.930534362792969, + "step": 11956 + }, + { + "epoch": 1.86, + "learning_rate": 5.377585974376264e-06, + "logits/chosen": -2.3474764823913574, + "logits/rejected": -2.2489030361175537, + "logps/chosen": -469.81048583984375, + "logps/rejected": -651.0413818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3275909423828125, + "rewards/margins": 11.759941101074219, + "rewards/rejected": -16.08753204345703, + "step": 11957 + }, + { + "epoch": 1.86, + "learning_rate": 5.376852533845116e-06, + "logits/chosen": -2.0197365283966064, + "logits/rejected": -2.846351146697998, + "logps/chosen": -232.05166625976562, + "logps/rejected": -375.7040100097656, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2849531173706055, + "rewards/margins": 5.341192722320557, + "rewards/rejected": -11.62614631652832, + "step": 11958 + }, + { + "epoch": 1.86, + "learning_rate": 5.376119093313969e-06, + "logits/chosen": -3.0266103744506836, + "logits/rejected": -1.94448721408844, + "logps/chosen": -376.7756652832031, + "logps/rejected": -454.8052062988281, + "loss": 0.3151, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.786161422729492, + "rewards/margins": 3.3796133995056152, + "rewards/rejected": -11.165775299072266, + "step": 11959 + }, + { + "epoch": 1.86, + "learning_rate": 5.3753856527828205e-06, + "logits/chosen": -3.020826816558838, + "logits/rejected": -3.144057035446167, + "logps/chosen": -305.61322021484375, + "logps/rejected": -529.8314208984375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.972367763519287, + "rewards/margins": 5.724966526031494, + "rewards/rejected": -10.697334289550781, + "step": 11960 + }, + { + "epoch": 1.86, + "learning_rate": 5.374652212251672e-06, + "logits/chosen": -2.7484724521636963, + "logits/rejected": -2.3754072189331055, + "logps/chosen": -271.648681640625, + "logps/rejected": -332.67047119140625, + "loss": 1.0071, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.935884475708008, + "rewards/margins": 3.7602732181549072, + "rewards/rejected": -8.696157455444336, + "step": 11961 + }, + { + "epoch": 1.86, + "learning_rate": 5.373918771720524e-06, + "logits/chosen": -2.749659299850464, + "logits/rejected": -2.029244899749756, + "logps/chosen": -307.014404296875, + "logps/rejected": -295.45269775390625, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.273139476776123, + "rewards/margins": 2.6946191787719727, + "rewards/rejected": -6.9677581787109375, + "step": 11962 + }, + { + "epoch": 1.86, + "learning_rate": 5.373185331189377e-06, + "logits/chosen": -2.9113664627075195, + "logits/rejected": -2.6938564777374268, + "logps/chosen": -179.0233154296875, + "logps/rejected": -249.03964233398438, + "loss": 0.6641, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.865788459777832, + "rewards/margins": 3.7631514072418213, + "rewards/rejected": -8.628939628601074, + "step": 11963 + }, + { + "epoch": 1.86, + "learning_rate": 5.37245189065823e-06, + "logits/chosen": -3.160099506378174, + "logits/rejected": -3.2018187046051025, + "logps/chosen": -159.1900634765625, + "logps/rejected": -277.9385986328125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.650488376617432, + "rewards/margins": 6.666517734527588, + "rewards/rejected": -12.31700611114502, + "step": 11964 + }, + { + "epoch": 1.86, + "learning_rate": 5.371718450127082e-06, + "logits/chosen": -2.3534884452819824, + "logits/rejected": -1.861013412475586, + "logps/chosen": -157.13394165039062, + "logps/rejected": -242.08724975585938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.319605827331543, + "rewards/margins": 9.395617485046387, + "rewards/rejected": -14.71522331237793, + "step": 11965 + }, + { + "epoch": 1.86, + "learning_rate": 5.3709850095959335e-06, + "logits/chosen": -2.4114601612091064, + "logits/rejected": -3.0974888801574707, + "logps/chosen": -72.0753173828125, + "logps/rejected": -274.83148193359375, + "loss": 0.3443, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.298404693603516, + "rewards/margins": 5.017239570617676, + "rewards/rejected": -10.315644264221191, + "step": 11966 + }, + { + "epoch": 1.86, + "learning_rate": 5.370251569064786e-06, + "logits/chosen": -2.6274802684783936, + "logits/rejected": -2.972026824951172, + "logps/chosen": -188.13323974609375, + "logps/rejected": -299.7477722167969, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.164863109588623, + "rewards/margins": 5.260888576507568, + "rewards/rejected": -11.425751686096191, + "step": 11967 + }, + { + "epoch": 1.86, + "learning_rate": 5.369518128533638e-06, + "logits/chosen": -2.795293092727661, + "logits/rejected": -2.3816521167755127, + "logps/chosen": -201.03936767578125, + "logps/rejected": -266.6549072265625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.813714027404785, + "rewards/margins": 6.412090301513672, + "rewards/rejected": -11.225804328918457, + "step": 11968 + }, + { + "epoch": 1.86, + "learning_rate": 5.36878468800249e-06, + "logits/chosen": -1.3843824863433838, + "logits/rejected": -2.9160523414611816, + "logps/chosen": -122.0546875, + "logps/rejected": -574.7523193359375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.777332782745361, + "rewards/margins": 9.922892570495605, + "rewards/rejected": -14.700225830078125, + "step": 11969 + }, + { + "epoch": 1.86, + "learning_rate": 5.368051247471342e-06, + "logits/chosen": -2.877046823501587, + "logits/rejected": -2.7104814052581787, + "logps/chosen": -468.3894958496094, + "logps/rejected": -545.471923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9837894439697266, + "rewards/margins": 8.871541976928711, + "rewards/rejected": -11.855331420898438, + "step": 11970 + }, + { + "epoch": 1.86, + "learning_rate": 5.367317806940194e-06, + "logits/chosen": -2.2256462574005127, + "logits/rejected": -2.8713290691375732, + "logps/chosen": -145.3560791015625, + "logps/rejected": -271.8741149902344, + "loss": 0.1572, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0214433670043945, + "rewards/margins": 3.0598394870758057, + "rewards/rejected": -9.081283569335938, + "step": 11971 + }, + { + "epoch": 1.86, + "learning_rate": 5.3665843664090464e-06, + "logits/chosen": -1.4725030660629272, + "logits/rejected": -2.7747910022735596, + "logps/chosen": -95.97906494140625, + "logps/rejected": -344.94537353515625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.395002841949463, + "rewards/margins": 7.016227722167969, + "rewards/rejected": -11.411230087280273, + "step": 11972 + }, + { + "epoch": 1.86, + "learning_rate": 5.365850925877898e-06, + "logits/chosen": -1.8890857696533203, + "logits/rejected": -2.844439744949341, + "logps/chosen": -165.44198608398438, + "logps/rejected": -428.2867126464844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.511865139007568, + "rewards/margins": 9.264108657836914, + "rewards/rejected": -13.77597427368164, + "step": 11973 + }, + { + "epoch": 1.86, + "learning_rate": 5.36511748534675e-06, + "logits/chosen": -2.1534430980682373, + "logits/rejected": -2.8334052562713623, + "logps/chosen": -110.31890869140625, + "logps/rejected": -258.3746032714844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.832259178161621, + "rewards/margins": 7.4851531982421875, + "rewards/rejected": -12.317412376403809, + "step": 11974 + }, + { + "epoch": 1.86, + "learning_rate": 5.364384044815602e-06, + "logits/chosen": -1.1606725454330444, + "logits/rejected": -2.6961309909820557, + "logps/chosen": -140.47682189941406, + "logps/rejected": -451.8616943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.590839385986328, + "rewards/margins": 10.303176879882812, + "rewards/rejected": -15.89401626586914, + "step": 11975 + }, + { + "epoch": 1.86, + "learning_rate": 5.363650604284455e-06, + "logits/chosen": -2.2042181491851807, + "logits/rejected": -3.049931049346924, + "logps/chosen": -630.57861328125, + "logps/rejected": -486.3558349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8707613945007324, + "rewards/margins": 11.21900749206543, + "rewards/rejected": -14.08976936340332, + "step": 11976 + }, + { + "epoch": 1.86, + "learning_rate": 5.362917163753307e-06, + "logits/chosen": -2.7387380599975586, + "logits/rejected": -0.5352402925491333, + "logps/chosen": -335.42529296875, + "logps/rejected": -103.60447692871094, + "loss": 0.8039, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.1772613525390625, + "rewards/margins": 3.688213348388672, + "rewards/rejected": -7.865474700927734, + "step": 11977 + }, + { + "epoch": 1.86, + "learning_rate": 5.3621837232221586e-06, + "logits/chosen": -3.069946050643921, + "logits/rejected": -3.1057887077331543, + "logps/chosen": -205.64633178710938, + "logps/rejected": -232.73678588867188, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7215499877929688, + "rewards/margins": 6.019873142242432, + "rewards/rejected": -9.741423606872559, + "step": 11978 + }, + { + "epoch": 1.86, + "learning_rate": 5.3614502826910104e-06, + "logits/chosen": -2.803448438644409, + "logits/rejected": -2.9198460578918457, + "logps/chosen": -353.1581726074219, + "logps/rejected": -399.39202880859375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.684849739074707, + "rewards/margins": 9.5647611618042, + "rewards/rejected": -12.249610900878906, + "step": 11979 + }, + { + "epoch": 1.86, + "learning_rate": 5.360716842159863e-06, + "logits/chosen": -0.9148776531219482, + "logits/rejected": -2.026312828063965, + "logps/chosen": -226.11868286132812, + "logps/rejected": -493.3205871582031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.496915578842163, + "rewards/margins": 13.07153606414795, + "rewards/rejected": -15.568450927734375, + "step": 11980 + }, + { + "epoch": 1.86, + "learning_rate": 5.359983401628716e-06, + "logits/chosen": -1.1512751579284668, + "logits/rejected": -2.3672382831573486, + "logps/chosen": -106.86774444580078, + "logps/rejected": -434.9373779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.622344017028809, + "rewards/margins": 10.829063415527344, + "rewards/rejected": -16.45140838623047, + "step": 11981 + }, + { + "epoch": 1.86, + "learning_rate": 5.359249961097568e-06, + "logits/chosen": -1.8577299118041992, + "logits/rejected": -2.9955527782440186, + "logps/chosen": -133.96270751953125, + "logps/rejected": -281.855712890625, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.880875587463379, + "rewards/margins": 5.381425857543945, + "rewards/rejected": -9.26230239868164, + "step": 11982 + }, + { + "epoch": 1.86, + "learning_rate": 5.35851652056642e-06, + "logits/chosen": -1.2734401226043701, + "logits/rejected": -2.9151952266693115, + "logps/chosen": -134.61569213867188, + "logps/rejected": -341.6576232910156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.378238201141357, + "rewards/margins": 8.823345184326172, + "rewards/rejected": -13.201583862304688, + "step": 11983 + }, + { + "epoch": 1.86, + "learning_rate": 5.3577830800352715e-06, + "logits/chosen": -2.13529109954834, + "logits/rejected": -3.0431606769561768, + "logps/chosen": -133.952392578125, + "logps/rejected": -415.5882263183594, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.244232654571533, + "rewards/margins": 6.690215110778809, + "rewards/rejected": -11.9344482421875, + "step": 11984 + }, + { + "epoch": 1.86, + "learning_rate": 5.357049639504124e-06, + "logits/chosen": -2.595794439315796, + "logits/rejected": -2.970041036605835, + "logps/chosen": -529.587158203125, + "logps/rejected": -910.2973022460938, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.831633567810059, + "rewards/margins": 5.977142810821533, + "rewards/rejected": -12.80877685546875, + "step": 11985 + }, + { + "epoch": 1.86, + "learning_rate": 5.356316198972976e-06, + "logits/chosen": -1.172255039215088, + "logits/rejected": -2.7769880294799805, + "logps/chosen": -119.26210021972656, + "logps/rejected": -331.59307861328125, + "loss": 3.1275, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.778149604797363, + "rewards/margins": 1.54776930809021, + "rewards/rejected": -10.325918197631836, + "step": 11986 + }, + { + "epoch": 1.86, + "learning_rate": 5.355582758441828e-06, + "logits/chosen": -2.7960126399993896, + "logits/rejected": -2.7612693309783936, + "logps/chosen": -336.0146484375, + "logps/rejected": -366.3076477050781, + "loss": 3.5222, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.148311614990234, + "rewards/margins": 0.6878142356872559, + "rewards/rejected": -10.836125373840332, + "step": 11987 + }, + { + "epoch": 1.86, + "learning_rate": 5.35484931791068e-06, + "logits/chosen": -2.0599780082702637, + "logits/rejected": -3.0472702980041504, + "logps/chosen": -94.44436645507812, + "logps/rejected": -253.46661376953125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.052182197570801, + "rewards/margins": 5.850950717926025, + "rewards/rejected": -8.903133392333984, + "step": 11988 + }, + { + "epoch": 1.86, + "learning_rate": 5.354115877379532e-06, + "logits/chosen": -2.497920274734497, + "logits/rejected": -2.525059461593628, + "logps/chosen": -288.15966796875, + "logps/rejected": -263.5349426269531, + "loss": 1.2318, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.4048662185668945, + "rewards/margins": 3.9151017665863037, + "rewards/rejected": -9.319968223571777, + "step": 11989 + }, + { + "epoch": 1.86, + "learning_rate": 5.3533824368483845e-06, + "logits/chosen": -2.412834882736206, + "logits/rejected": -2.9157841205596924, + "logps/chosen": -188.46035766601562, + "logps/rejected": -218.16159057617188, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6568946838378906, + "rewards/margins": 6.576769828796387, + "rewards/rejected": -9.233664512634277, + "step": 11990 + }, + { + "epoch": 1.86, + "learning_rate": 5.352648996317236e-06, + "logits/chosen": -2.4787814617156982, + "logits/rejected": -2.863227367401123, + "logps/chosen": -649.1539306640625, + "logps/rejected": -574.5079345703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.475750923156738, + "rewards/margins": 8.046782493591309, + "rewards/rejected": -12.522533416748047, + "step": 11991 + }, + { + "epoch": 1.87, + "learning_rate": 5.351915555786088e-06, + "logits/chosen": -2.763523578643799, + "logits/rejected": -3.0667402744293213, + "logps/chosen": -395.6574401855469, + "logps/rejected": -405.08587646484375, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.026155471801758, + "rewards/margins": 5.666874408721924, + "rewards/rejected": -12.693029403686523, + "step": 11992 + }, + { + "epoch": 1.87, + "learning_rate": 5.35118211525494e-06, + "logits/chosen": -2.8310587406158447, + "logits/rejected": -3.106478691101074, + "logps/chosen": -97.15409851074219, + "logps/rejected": -272.96722412109375, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.48237943649292, + "rewards/margins": 6.897453784942627, + "rewards/rejected": -11.379833221435547, + "step": 11993 + }, + { + "epoch": 1.87, + "learning_rate": 5.350448674723793e-06, + "logits/chosen": -1.3460543155670166, + "logits/rejected": -2.976855516433716, + "logps/chosen": -86.82394409179688, + "logps/rejected": -394.8539123535156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.746448040008545, + "rewards/margins": 9.301923751831055, + "rewards/rejected": -13.048372268676758, + "step": 11994 + }, + { + "epoch": 1.87, + "learning_rate": 5.349715234192645e-06, + "logits/chosen": -2.9989755153656006, + "logits/rejected": -2.992316484451294, + "logps/chosen": -449.2818603515625, + "logps/rejected": -354.8034973144531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9102089405059814, + "rewards/margins": 8.92789077758789, + "rewards/rejected": -9.83810043334961, + "step": 11995 + }, + { + "epoch": 1.87, + "learning_rate": 5.348981793661497e-06, + "logits/chosen": -2.3922317028045654, + "logits/rejected": -2.5356404781341553, + "logps/chosen": -140.74600219726562, + "logps/rejected": -225.08407592773438, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.907845973968506, + "rewards/margins": 6.229969024658203, + "rewards/rejected": -12.137815475463867, + "step": 11996 + }, + { + "epoch": 1.87, + "learning_rate": 5.348248353130349e-06, + "logits/chosen": -2.779062509536743, + "logits/rejected": -3.1358108520507812, + "logps/chosen": -153.2171630859375, + "logps/rejected": -288.997314453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.204291582107544, + "rewards/margins": 7.047552108764648, + "rewards/rejected": -10.25184440612793, + "step": 11997 + }, + { + "epoch": 1.87, + "learning_rate": 5.347514912599201e-06, + "logits/chosen": -1.7409297227859497, + "logits/rejected": -2.6578142642974854, + "logps/chosen": -120.86872100830078, + "logps/rejected": -322.455322265625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5097055435180664, + "rewards/margins": 6.256604194641113, + "rewards/rejected": -9.76630973815918, + "step": 11998 + }, + { + "epoch": 1.87, + "learning_rate": 5.346781472068054e-06, + "logits/chosen": -1.982762336730957, + "logits/rejected": -2.188711643218994, + "logps/chosen": -816.013671875, + "logps/rejected": -732.5147705078125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649785041809082, + "rewards/margins": 4.588569641113281, + "rewards/rejected": -12.238354682922363, + "step": 11999 + }, + { + "epoch": 1.87, + "learning_rate": 5.346048031536906e-06, + "logits/chosen": -2.7240474224090576, + "logits/rejected": -2.9691922664642334, + "logps/chosen": -103.047119140625, + "logps/rejected": -320.7811279296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.546197414398193, + "rewards/margins": 7.831620216369629, + "rewards/rejected": -12.377817153930664, + "step": 12000 + }, + { + "epoch": 1.87, + "learning_rate": 5.345314591005758e-06, + "logits/chosen": -2.8608357906341553, + "logits/rejected": -2.5522778034210205, + "logps/chosen": -433.7230224609375, + "logps/rejected": -486.1380310058594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.806661128997803, + "rewards/margins": 10.438263893127441, + "rewards/rejected": -15.244924545288086, + "step": 12001 + }, + { + "epoch": 1.87, + "learning_rate": 5.3445811504746096e-06, + "logits/chosen": -1.251545786857605, + "logits/rejected": -2.8572967052459717, + "logps/chosen": -98.39373779296875, + "logps/rejected": -294.03887939453125, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.251948356628418, + "rewards/margins": 4.403860092163086, + "rewards/rejected": -12.655807495117188, + "step": 12002 + }, + { + "epoch": 1.87, + "learning_rate": 5.343847709943462e-06, + "logits/chosen": -2.8258426189422607, + "logits/rejected": -1.859236717224121, + "logps/chosen": -436.2217102050781, + "logps/rejected": -413.0138854980469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.686859130859375, + "rewards/margins": 9.248024940490723, + "rewards/rejected": -11.934884071350098, + "step": 12003 + }, + { + "epoch": 1.87, + "learning_rate": 5.343114269412314e-06, + "logits/chosen": -2.4085936546325684, + "logits/rejected": -2.704692840576172, + "logps/chosen": -410.7725830078125, + "logps/rejected": -702.003173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.181980848312378, + "rewards/margins": 13.25269603729248, + "rewards/rejected": -16.434677124023438, + "step": 12004 + }, + { + "epoch": 1.87, + "learning_rate": 5.342380828881166e-06, + "logits/chosen": -2.7931067943573, + "logits/rejected": -3.0634825229644775, + "logps/chosen": -157.7355194091797, + "logps/rejected": -219.45484924316406, + "loss": 0.1289, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2377119064331055, + "rewards/margins": 2.6593587398529053, + "rewards/rejected": -7.897070407867432, + "step": 12005 + }, + { + "epoch": 1.87, + "learning_rate": 5.341647388350018e-06, + "logits/chosen": -1.59795343875885, + "logits/rejected": -2.899134874343872, + "logps/chosen": -165.85873413085938, + "logps/rejected": -682.084228515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.579504013061523, + "rewards/margins": 9.881864547729492, + "rewards/rejected": -16.461368560791016, + "step": 12006 + }, + { + "epoch": 1.87, + "learning_rate": 5.34091394781887e-06, + "logits/chosen": -2.4899377822875977, + "logits/rejected": -2.930560350418091, + "logps/chosen": -149.21006774902344, + "logps/rejected": -490.453857421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.286287784576416, + "rewards/margins": 9.065999984741211, + "rewards/rejected": -13.352287292480469, + "step": 12007 + }, + { + "epoch": 1.87, + "learning_rate": 5.3401805072877225e-06, + "logits/chosen": -1.9882736206054688, + "logits/rejected": -2.904867172241211, + "logps/chosen": -121.00001525878906, + "logps/rejected": -358.7356262207031, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868610143661499, + "rewards/margins": 7.31452751159668, + "rewards/rejected": -11.183137893676758, + "step": 12008 + }, + { + "epoch": 1.87, + "learning_rate": 5.339447066756574e-06, + "logits/chosen": -2.964118242263794, + "logits/rejected": -1.6361632347106934, + "logps/chosen": -262.8297424316406, + "logps/rejected": -107.61515045166016, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.746091365814209, + "rewards/margins": 2.6133575439453125, + "rewards/rejected": -8.35944938659668, + "step": 12009 + }, + { + "epoch": 1.87, + "learning_rate": 5.338713626225426e-06, + "logits/chosen": -1.9940422773361206, + "logits/rejected": -2.818610906600952, + "logps/chosen": -314.192626953125, + "logps/rejected": -430.7713623046875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.184502601623535, + "rewards/margins": 5.469931602478027, + "rewards/rejected": -9.654434204101562, + "step": 12010 + }, + { + "epoch": 1.87, + "learning_rate": 5.337980185694278e-06, + "logits/chosen": -2.7051916122436523, + "logits/rejected": -2.4113049507141113, + "logps/chosen": -409.48590087890625, + "logps/rejected": -504.76849365234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.812936305999756, + "rewards/margins": 6.766406536102295, + "rewards/rejected": -11.57934284210205, + "step": 12011 + }, + { + "epoch": 1.87, + "learning_rate": 5.337246745163131e-06, + "logits/chosen": -2.941549301147461, + "logits/rejected": -3.1815648078918457, + "logps/chosen": -131.69512939453125, + "logps/rejected": -140.48109436035156, + "loss": 0.8336, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.615499019622803, + "rewards/margins": 2.612841844558716, + "rewards/rejected": -7.228341102600098, + "step": 12012 + }, + { + "epoch": 1.87, + "learning_rate": 5.336513304631983e-06, + "logits/chosen": -1.990924596786499, + "logits/rejected": -2.753434658050537, + "logps/chosen": -724.5130615234375, + "logps/rejected": -510.6658630371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8056821823120117, + "rewards/margins": 14.240805625915527, + "rewards/rejected": -18.04648780822754, + "step": 12013 + }, + { + "epoch": 1.87, + "learning_rate": 5.3357798641008355e-06, + "logits/chosen": -1.2807542085647583, + "logits/rejected": -2.8362441062927246, + "logps/chosen": -118.17987823486328, + "logps/rejected": -321.1310729980469, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.032022714614868, + "rewards/margins": 6.018502712249756, + "rewards/rejected": -9.050525665283203, + "step": 12014 + }, + { + "epoch": 1.87, + "learning_rate": 5.335046423569687e-06, + "logits/chosen": -2.9487576484680176, + "logits/rejected": -2.530306577682495, + "logps/chosen": -256.4866638183594, + "logps/rejected": -175.91822814941406, + "loss": 1.9405, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.669191837310791, + "rewards/margins": 0.9685206413269043, + "rewards/rejected": -8.637712478637695, + "step": 12015 + }, + { + "epoch": 1.87, + "learning_rate": 5.33431298303854e-06, + "logits/chosen": -1.541236162185669, + "logits/rejected": -2.6412646770477295, + "logps/chosen": -118.90803527832031, + "logps/rejected": -373.65545654296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1356868743896484, + "rewards/margins": 8.68497371673584, + "rewards/rejected": -11.820660591125488, + "step": 12016 + }, + { + "epoch": 1.87, + "learning_rate": 5.333579542507392e-06, + "logits/chosen": -1.5393469333648682, + "logits/rejected": -2.2289681434631348, + "logps/chosen": -501.575439453125, + "logps/rejected": -693.0184936523438, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6055169105529785, + "rewards/margins": 8.259092330932617, + "rewards/rejected": -13.864608764648438, + "step": 12017 + }, + { + "epoch": 1.87, + "learning_rate": 5.332846101976244e-06, + "logits/chosen": -2.654510498046875, + "logits/rejected": -2.8154406547546387, + "logps/chosen": -145.28065490722656, + "logps/rejected": -288.47210693359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8139514923095703, + "rewards/margins": 8.754688262939453, + "rewards/rejected": -11.568639755249023, + "step": 12018 + }, + { + "epoch": 1.87, + "learning_rate": 5.332112661445096e-06, + "logits/chosen": -2.5239365100860596, + "logits/rejected": -2.82181978225708, + "logps/chosen": -84.15206909179688, + "logps/rejected": -256.8531799316406, + "loss": 0.3728, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.774811267852783, + "rewards/margins": 2.38694429397583, + "rewards/rejected": -8.161755561828613, + "step": 12019 + }, + { + "epoch": 1.87, + "learning_rate": 5.331379220913948e-06, + "logits/chosen": -1.2177587747573853, + "logits/rejected": -2.428673505783081, + "logps/chosen": -196.8021240234375, + "logps/rejected": -448.010009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.542815208435059, + "rewards/margins": 9.503215789794922, + "rewards/rejected": -15.046031951904297, + "step": 12020 + }, + { + "epoch": 1.87, + "learning_rate": 5.3306457803828e-06, + "logits/chosen": -2.7829418182373047, + "logits/rejected": -3.2320966720581055, + "logps/chosen": -46.82512283325195, + "logps/rejected": -258.70379638671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1414220333099365, + "rewards/margins": 6.605557918548584, + "rewards/rejected": -9.746979713439941, + "step": 12021 + }, + { + "epoch": 1.87, + "learning_rate": 5.329912339851652e-06, + "logits/chosen": -2.515202760696411, + "logits/rejected": -2.892775297164917, + "logps/chosen": -145.7350311279297, + "logps/rejected": -366.2384033203125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.81829309463501, + "rewards/margins": 6.372900009155273, + "rewards/rejected": -11.191192626953125, + "step": 12022 + }, + { + "epoch": 1.87, + "learning_rate": 5.329178899320504e-06, + "logits/chosen": -2.8054046630859375, + "logits/rejected": -2.7893989086151123, + "logps/chosen": -204.94091796875, + "logps/rejected": -462.64453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.742619514465332, + "rewards/margins": 8.279109954833984, + "rewards/rejected": -13.021730422973633, + "step": 12023 + }, + { + "epoch": 1.87, + "learning_rate": 5.328445458789356e-06, + "logits/chosen": -2.374380588531494, + "logits/rejected": -2.764763355255127, + "logps/chosen": -178.0428924560547, + "logps/rejected": -336.3310546875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.008707046508789, + "rewards/margins": 5.0630083084106445, + "rewards/rejected": -12.071715354919434, + "step": 12024 + }, + { + "epoch": 1.87, + "learning_rate": 5.327712018258209e-06, + "logits/chosen": -2.5141334533691406, + "logits/rejected": -2.78596568107605, + "logps/chosen": -366.29449462890625, + "logps/rejected": -547.827880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60051155090332, + "rewards/margins": 11.853681564331055, + "rewards/rejected": -16.454193115234375, + "step": 12025 + }, + { + "epoch": 1.87, + "learning_rate": 5.3269785777270606e-06, + "logits/chosen": -2.0777587890625, + "logits/rejected": -2.720094680786133, + "logps/chosen": -221.23147583007812, + "logps/rejected": -293.43328857421875, + "loss": 0.789, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.931388854980469, + "rewards/margins": 1.3697843551635742, + "rewards/rejected": -9.301173210144043, + "step": 12026 + }, + { + "epoch": 1.87, + "learning_rate": 5.3262451371959124e-06, + "logits/chosen": -2.045470714569092, + "logits/rejected": -2.4008681774139404, + "logps/chosen": -175.88278198242188, + "logps/rejected": -375.4438171386719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.296228408813477, + "rewards/margins": 10.596145629882812, + "rewards/rejected": -14.892374992370605, + "step": 12027 + }, + { + "epoch": 1.87, + "learning_rate": 5.325511696664764e-06, + "logits/chosen": -2.9714019298553467, + "logits/rejected": -2.707453966140747, + "logps/chosen": -132.63682556152344, + "logps/rejected": -277.3731689453125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.538304328918457, + "rewards/margins": 5.549798488616943, + "rewards/rejected": -10.088102340698242, + "step": 12028 + }, + { + "epoch": 1.87, + "learning_rate": 5.324778256133616e-06, + "logits/chosen": -2.7604830265045166, + "logits/rejected": -2.2857868671417236, + "logps/chosen": -267.8294982910156, + "logps/rejected": -456.0446472167969, + "loss": 1.2068, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.972774505615234, + "rewards/margins": 4.075455665588379, + "rewards/rejected": -11.048230171203613, + "step": 12029 + }, + { + "epoch": 1.87, + "learning_rate": 5.324044815602469e-06, + "logits/chosen": -3.0192599296569824, + "logits/rejected": -2.2065327167510986, + "logps/chosen": -1051.473876953125, + "logps/rejected": -669.4378051757812, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.147516250610352, + "rewards/margins": 5.646540641784668, + "rewards/rejected": -10.79405689239502, + "step": 12030 + }, + { + "epoch": 1.87, + "learning_rate": 5.323311375071322e-06, + "logits/chosen": -1.8272666931152344, + "logits/rejected": -2.9945578575134277, + "logps/chosen": -442.85504150390625, + "logps/rejected": -721.4905395507812, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.538556098937988, + "rewards/margins": 6.367401123046875, + "rewards/rejected": -12.905957221984863, + "step": 12031 + }, + { + "epoch": 1.87, + "learning_rate": 5.3225779345401735e-06, + "logits/chosen": -1.9919610023498535, + "logits/rejected": -2.929520845413208, + "logps/chosen": -107.00856018066406, + "logps/rejected": -300.376708984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.531558036804199, + "rewards/margins": 9.031515121459961, + "rewards/rejected": -12.563074111938477, + "step": 12032 + }, + { + "epoch": 1.87, + "learning_rate": 5.321844494009025e-06, + "logits/chosen": -1.5973103046417236, + "logits/rejected": -2.9848148822784424, + "logps/chosen": -86.02737426757812, + "logps/rejected": -625.509765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.039438724517822, + "rewards/margins": 9.049413681030273, + "rewards/rejected": -13.088851928710938, + "step": 12033 + }, + { + "epoch": 1.87, + "learning_rate": 5.321111053477878e-06, + "logits/chosen": -2.9694271087646484, + "logits/rejected": -2.883542776107788, + "logps/chosen": -630.2362670898438, + "logps/rejected": -698.5179443359375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.774862766265869, + "rewards/margins": 6.667726039886475, + "rewards/rejected": -11.442588806152344, + "step": 12034 + }, + { + "epoch": 1.87, + "learning_rate": 5.32037761294673e-06, + "logits/chosen": -2.5188465118408203, + "logits/rejected": -3.021528959274292, + "logps/chosen": -77.27588653564453, + "logps/rejected": -197.25389099121094, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.349197864532471, + "rewards/margins": 5.704313278198242, + "rewards/rejected": -10.053510665893555, + "step": 12035 + }, + { + "epoch": 1.87, + "learning_rate": 5.319644172415582e-06, + "logits/chosen": -2.5270802974700928, + "logits/rejected": -2.2100255489349365, + "logps/chosen": -254.13253784179688, + "logps/rejected": -295.73541259765625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1771180629730225, + "rewards/margins": 7.478107452392578, + "rewards/rejected": -10.65522575378418, + "step": 12036 + }, + { + "epoch": 1.87, + "learning_rate": 5.318910731884434e-06, + "logits/chosen": -2.264673948287964, + "logits/rejected": -2.4362595081329346, + "logps/chosen": -254.72720336914062, + "logps/rejected": -318.1177062988281, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.538365364074707, + "rewards/margins": 5.68862771987915, + "rewards/rejected": -10.2269926071167, + "step": 12037 + }, + { + "epoch": 1.87, + "learning_rate": 5.318177291353286e-06, + "logits/chosen": -3.042929172515869, + "logits/rejected": -3.0810317993164062, + "logps/chosen": -105.92620849609375, + "logps/rejected": -332.1043701171875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.064277648925781, + "rewards/margins": 7.940398693084717, + "rewards/rejected": -12.004676818847656, + "step": 12038 + }, + { + "epoch": 1.87, + "learning_rate": 5.317443850822138e-06, + "logits/chosen": -2.643826484680176, + "logits/rejected": -3.05157208442688, + "logps/chosen": -210.91246032714844, + "logps/rejected": -277.18060302734375, + "loss": 0.0837, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.841058254241943, + "rewards/margins": 6.600783348083496, + "rewards/rejected": -11.441841125488281, + "step": 12039 + }, + { + "epoch": 1.87, + "learning_rate": 5.31671041029099e-06, + "logits/chosen": -2.5037455558776855, + "logits/rejected": -2.9197371006011963, + "logps/chosen": -62.947731018066406, + "logps/rejected": -287.20025634765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7069895267486572, + "rewards/margins": 8.308928489685059, + "rewards/rejected": -11.015917778015137, + "step": 12040 + }, + { + "epoch": 1.87, + "learning_rate": 5.315976969759842e-06, + "logits/chosen": -1.8790533542633057, + "logits/rejected": -2.9987196922302246, + "logps/chosen": -171.36932373046875, + "logps/rejected": -468.4748229980469, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8816468715667725, + "rewards/margins": 5.4844560623168945, + "rewards/rejected": -9.36610221862793, + "step": 12041 + }, + { + "epoch": 1.87, + "learning_rate": 5.315243529228694e-06, + "logits/chosen": -2.871166706085205, + "logits/rejected": -2.9869792461395264, + "logps/chosen": -119.43272399902344, + "logps/rejected": -213.8052978515625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9132280349731445, + "rewards/margins": 7.494631767272949, + "rewards/rejected": -11.407859802246094, + "step": 12042 + }, + { + "epoch": 1.87, + "learning_rate": 5.314510088697547e-06, + "logits/chosen": -2.3344480991363525, + "logits/rejected": -3.0007030963897705, + "logps/chosen": -191.8259735107422, + "logps/rejected": -357.9627685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.137126445770264, + "rewards/margins": 10.217896461486816, + "rewards/rejected": -14.355022430419922, + "step": 12043 + }, + { + "epoch": 1.87, + "learning_rate": 5.313776648166399e-06, + "logits/chosen": -2.3138415813446045, + "logits/rejected": -2.8598082065582275, + "logps/chosen": -56.1569938659668, + "logps/rejected": -229.66749572753906, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7263901233673096, + "rewards/margins": 7.634689807891846, + "rewards/rejected": -11.361080169677734, + "step": 12044 + }, + { + "epoch": 1.87, + "learning_rate": 5.3130432076352505e-06, + "logits/chosen": -2.7634875774383545, + "logits/rejected": -3.1047418117523193, + "logps/chosen": -76.61795043945312, + "logps/rejected": -257.05584716796875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9120893478393555, + "rewards/margins": 5.882490158081055, + "rewards/rejected": -8.79457950592041, + "step": 12045 + }, + { + "epoch": 1.87, + "learning_rate": 5.312309767104102e-06, + "logits/chosen": -1.1177403926849365, + "logits/rejected": -2.7687320709228516, + "logps/chosen": -146.2713165283203, + "logps/rejected": -302.21087646484375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.543379306793213, + "rewards/margins": 4.165097236633301, + "rewards/rejected": -9.708476066589355, + "step": 12046 + }, + { + "epoch": 1.87, + "learning_rate": 5.311576326572955e-06, + "logits/chosen": -2.1559841632843018, + "logits/rejected": -2.947052001953125, + "logps/chosen": -120.83636474609375, + "logps/rejected": -379.326904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9382901191711426, + "rewards/margins": 9.32345199584961, + "rewards/rejected": -11.261741638183594, + "step": 12047 + }, + { + "epoch": 1.87, + "learning_rate": 5.310842886041808e-06, + "logits/chosen": -2.4015512466430664, + "logits/rejected": -2.9996330738067627, + "logps/chosen": -152.377197265625, + "logps/rejected": -230.57400512695312, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.956400394439697, + "rewards/margins": 8.201409339904785, + "rewards/rejected": -13.15781021118164, + "step": 12048 + }, + { + "epoch": 1.87, + "learning_rate": 5.31010944551066e-06, + "logits/chosen": -1.6091351509094238, + "logits/rejected": -2.7086660861968994, + "logps/chosen": -153.08477783203125, + "logps/rejected": -467.73895263671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.883490562438965, + "rewards/margins": 9.735868453979492, + "rewards/rejected": -14.61935806274414, + "step": 12049 + }, + { + "epoch": 1.87, + "learning_rate": 5.309376004979512e-06, + "logits/chosen": -2.9319326877593994, + "logits/rejected": -1.9340934753417969, + "logps/chosen": -560.9613037109375, + "logps/rejected": -332.85687255859375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3350555896759033, + "rewards/margins": 6.349575996398926, + "rewards/rejected": -9.68463134765625, + "step": 12050 + }, + { + "epoch": 1.87, + "learning_rate": 5.3086425644483635e-06, + "logits/chosen": -3.059861421585083, + "logits/rejected": -2.784059524536133, + "logps/chosen": -250.38644409179688, + "logps/rejected": -405.6255187988281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.516843318939209, + "rewards/margins": 7.774828910827637, + "rewards/rejected": -13.291671752929688, + "step": 12051 + }, + { + "epoch": 1.87, + "learning_rate": 5.307909123917216e-06, + "logits/chosen": -2.8446719646453857, + "logits/rejected": -2.0590097904205322, + "logps/chosen": -348.0140075683594, + "logps/rejected": -319.69256591796875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.463616371154785, + "rewards/margins": 4.950074195861816, + "rewards/rejected": -9.413690567016602, + "step": 12052 + }, + { + "epoch": 1.87, + "learning_rate": 5.307175683386068e-06, + "logits/chosen": -1.207728624343872, + "logits/rejected": -2.500774383544922, + "logps/chosen": -156.7220916748047, + "logps/rejected": -415.7740783691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.677586317062378, + "rewards/margins": 9.916034698486328, + "rewards/rejected": -13.593622207641602, + "step": 12053 + }, + { + "epoch": 1.87, + "learning_rate": 5.30644224285492e-06, + "logits/chosen": -2.856499671936035, + "logits/rejected": -1.5520237684249878, + "logps/chosen": -240.883544921875, + "logps/rejected": -285.5442199707031, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.89107608795166, + "rewards/margins": 5.5035176277160645, + "rewards/rejected": -9.394594192504883, + "step": 12054 + }, + { + "epoch": 1.87, + "learning_rate": 5.305708802323772e-06, + "logits/chosen": -2.446381092071533, + "logits/rejected": -2.991381883621216, + "logps/chosen": -395.6553649902344, + "logps/rejected": -310.52557373046875, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.49094295501709, + "rewards/margins": 3.0202200412750244, + "rewards/rejected": -10.511163711547852, + "step": 12055 + }, + { + "epoch": 1.87, + "learning_rate": 5.304975361792624e-06, + "logits/chosen": -1.767986536026001, + "logits/rejected": -2.907374620437622, + "logps/chosen": -170.14581298828125, + "logps/rejected": -436.8388366699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.141829967498779, + "rewards/margins": 11.145353317260742, + "rewards/rejected": -15.287182807922363, + "step": 12056 + }, + { + "epoch": 1.88, + "learning_rate": 5.304241921261476e-06, + "logits/chosen": -2.0266690254211426, + "logits/rejected": -2.7814061641693115, + "logps/chosen": -118.2886734008789, + "logps/rejected": -282.9706115722656, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.899730205535889, + "rewards/margins": 5.335349082946777, + "rewards/rejected": -10.235078811645508, + "step": 12057 + }, + { + "epoch": 1.88, + "learning_rate": 5.303508480730328e-06, + "logits/chosen": -2.4403765201568604, + "logits/rejected": -2.991790771484375, + "logps/chosen": -259.3598327636719, + "logps/rejected": -244.48658752441406, + "loss": 0.7332, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.9370579719543457, + "rewards/margins": 4.3224077224731445, + "rewards/rejected": -8.259465217590332, + "step": 12058 + }, + { + "epoch": 1.88, + "learning_rate": 5.30277504019918e-06, + "logits/chosen": -1.9483411312103271, + "logits/rejected": -2.8986997604370117, + "logps/chosen": -149.72706604003906, + "logps/rejected": -463.85467529296875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.65749979019165, + "rewards/margins": 7.0793352127075195, + "rewards/rejected": -12.736835479736328, + "step": 12059 + }, + { + "epoch": 1.88, + "learning_rate": 5.302041599668032e-06, + "logits/chosen": -1.755434513092041, + "logits/rejected": -3.018937587738037, + "logps/chosen": -218.94888305664062, + "logps/rejected": -448.0816955566406, + "loss": 0.1592, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213163375854492, + "rewards/margins": 5.4914116859436035, + "rewards/rejected": -11.704574584960938, + "step": 12060 + }, + { + "epoch": 1.88, + "learning_rate": 5.301308159136885e-06, + "logits/chosen": -2.90907621383667, + "logits/rejected": -1.4403477907180786, + "logps/chosen": -992.0568237304688, + "logps/rejected": -538.5318603515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4525864124298096, + "rewards/margins": 10.150616645812988, + "rewards/rejected": -13.603202819824219, + "step": 12061 + }, + { + "epoch": 1.88, + "learning_rate": 5.300574718605737e-06, + "logits/chosen": -2.5014493465423584, + "logits/rejected": -2.6617114543914795, + "logps/chosen": -168.36962890625, + "logps/rejected": -126.57356262207031, + "loss": 1.2432, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.868509292602539, + "rewards/margins": 0.7337923049926758, + "rewards/rejected": -6.602301597595215, + "step": 12062 + }, + { + "epoch": 1.88, + "learning_rate": 5.2998412780745885e-06, + "logits/chosen": -1.5635007619857788, + "logits/rejected": -2.920283317565918, + "logps/chosen": -171.7225341796875, + "logps/rejected": -370.918212890625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93751335144043, + "rewards/margins": 6.236666679382324, + "rewards/rejected": -12.174179077148438, + "step": 12063 + }, + { + "epoch": 1.88, + "learning_rate": 5.299107837543441e-06, + "logits/chosen": -3.001523733139038, + "logits/rejected": -2.5736606121063232, + "logps/chosen": -579.58544921875, + "logps/rejected": -441.49273681640625, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.467024803161621, + "rewards/margins": 6.4422688484191895, + "rewards/rejected": -11.909294128417969, + "step": 12064 + }, + { + "epoch": 1.88, + "learning_rate": 5.298374397012294e-06, + "logits/chosen": -2.482966899871826, + "logits/rejected": -2.8209056854248047, + "logps/chosen": -289.0418701171875, + "logps/rejected": -433.32977294921875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.537052154541016, + "rewards/margins": 7.844892501831055, + "rewards/rejected": -12.38194465637207, + "step": 12065 + }, + { + "epoch": 1.88, + "learning_rate": 5.297640956481146e-06, + "logits/chosen": -1.8132861852645874, + "logits/rejected": -2.9452521800994873, + "logps/chosen": -156.61114501953125, + "logps/rejected": -333.02667236328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146203517913818, + "rewards/margins": 7.453658103942871, + "rewards/rejected": -13.599861145019531, + "step": 12066 + }, + { + "epoch": 1.88, + "learning_rate": 5.296907515949998e-06, + "logits/chosen": -2.8514516353607178, + "logits/rejected": -3.0465681552886963, + "logps/chosen": -125.95679473876953, + "logps/rejected": -205.28355407714844, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.373716354370117, + "rewards/margins": 5.970244884490967, + "rewards/rejected": -12.343961715698242, + "step": 12067 + }, + { + "epoch": 1.88, + "learning_rate": 5.29617407541885e-06, + "logits/chosen": -2.0427663326263428, + "logits/rejected": -2.838808536529541, + "logps/chosen": -150.3561248779297, + "logps/rejected": -274.4060974121094, + "loss": 0.4213, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.093690395355225, + "rewards/margins": 2.399348258972168, + "rewards/rejected": -7.493038654327393, + "step": 12068 + }, + { + "epoch": 1.88, + "learning_rate": 5.2954406348877015e-06, + "logits/chosen": -2.400336265563965, + "logits/rejected": -2.9282541275024414, + "logps/chosen": -154.38934326171875, + "logps/rejected": -278.6739807128906, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.384549140930176, + "rewards/margins": 7.169665336608887, + "rewards/rejected": -14.554214477539062, + "step": 12069 + }, + { + "epoch": 1.88, + "learning_rate": 5.294707194356554e-06, + "logits/chosen": -1.0089505910873413, + "logits/rejected": -2.4694221019744873, + "logps/chosen": -138.05909729003906, + "logps/rejected": -466.625732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.739347219467163, + "rewards/margins": 11.729310035705566, + "rewards/rejected": -15.468656539916992, + "step": 12070 + }, + { + "epoch": 1.88, + "learning_rate": 5.293973753825406e-06, + "logits/chosen": -2.413362979888916, + "logits/rejected": -2.5329370498657227, + "logps/chosen": -331.0600280761719, + "logps/rejected": -368.869140625, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386183738708496, + "rewards/margins": 7.3060994148254395, + "rewards/rejected": -13.692283630371094, + "step": 12071 + }, + { + "epoch": 1.88, + "learning_rate": 5.293240313294258e-06, + "logits/chosen": -2.572730541229248, + "logits/rejected": -3.043982744216919, + "logps/chosen": -105.58529663085938, + "logps/rejected": -273.78765869140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.546680927276611, + "rewards/margins": 7.736600875854492, + "rewards/rejected": -13.283281326293945, + "step": 12072 + }, + { + "epoch": 1.88, + "learning_rate": 5.29250687276311e-06, + "logits/chosen": -2.7490315437316895, + "logits/rejected": -2.6925575733184814, + "logps/chosen": -124.5665283203125, + "logps/rejected": -315.2062683105469, + "loss": 0.5547, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.107640266418457, + "rewards/margins": 5.505887985229492, + "rewards/rejected": -9.61352825164795, + "step": 12073 + }, + { + "epoch": 1.88, + "learning_rate": 5.291773432231963e-06, + "logits/chosen": -2.3586039543151855, + "logits/rejected": -2.8294215202331543, + "logps/chosen": -80.09742736816406, + "logps/rejected": -272.62603759765625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2250771522521973, + "rewards/margins": 5.764932632446289, + "rewards/rejected": -8.990009307861328, + "step": 12074 + }, + { + "epoch": 1.88, + "learning_rate": 5.2910399917008145e-06, + "logits/chosen": -1.3348664045333862, + "logits/rejected": -3.018505811691284, + "logps/chosen": -136.43930053710938, + "logps/rejected": -425.89447021484375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.764077186584473, + "rewards/margins": 6.293257713317871, + "rewards/rejected": -11.057334899902344, + "step": 12075 + }, + { + "epoch": 1.88, + "learning_rate": 5.290306551169666e-06, + "logits/chosen": -2.8703243732452393, + "logits/rejected": -2.8285441398620605, + "logps/chosen": -130.84291076660156, + "logps/rejected": -256.76324462890625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6759724617004395, + "rewards/margins": 6.184206485748291, + "rewards/rejected": -8.86017894744873, + "step": 12076 + }, + { + "epoch": 1.88, + "learning_rate": 5.289573110638518e-06, + "logits/chosen": -2.3253746032714844, + "logits/rejected": -3.1417863368988037, + "logps/chosen": -105.58575439453125, + "logps/rejected": -362.8944396972656, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3334760665893555, + "rewards/margins": 5.000144958496094, + "rewards/rejected": -11.33362102508545, + "step": 12077 + }, + { + "epoch": 1.88, + "learning_rate": 5.28883967010737e-06, + "logits/chosen": -1.1551152467727661, + "logits/rejected": -2.550196409225464, + "logps/chosen": -88.96834564208984, + "logps/rejected": -257.7664489746094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.189859390258789, + "rewards/margins": 7.840715408325195, + "rewards/rejected": -13.030574798583984, + "step": 12078 + }, + { + "epoch": 1.88, + "learning_rate": 5.288106229576223e-06, + "logits/chosen": -1.494720458984375, + "logits/rejected": -2.464266061782837, + "logps/chosen": -101.6472396850586, + "logps/rejected": -343.62164306640625, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.614036560058594, + "rewards/margins": 6.776397228240967, + "rewards/rejected": -14.390434265136719, + "step": 12079 + }, + { + "epoch": 1.88, + "learning_rate": 5.287372789045075e-06, + "logits/chosen": -2.4787161350250244, + "logits/rejected": -2.712296962738037, + "logps/chosen": -177.2545166015625, + "logps/rejected": -184.94210815429688, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2869977951049805, + "rewards/margins": 6.820369720458984, + "rewards/rejected": -12.107366561889648, + "step": 12080 + }, + { + "epoch": 1.88, + "learning_rate": 5.2866393485139274e-06, + "logits/chosen": -2.485273838043213, + "logits/rejected": -2.9688878059387207, + "logps/chosen": -135.23028564453125, + "logps/rejected": -350.93096923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.362802505493164, + "rewards/margins": 10.966461181640625, + "rewards/rejected": -13.329263687133789, + "step": 12081 + }, + { + "epoch": 1.88, + "learning_rate": 5.285905907982779e-06, + "logits/chosen": -2.858508586883545, + "logits/rejected": -3.0045852661132812, + "logps/chosen": -330.20880126953125, + "logps/rejected": -265.0452880859375, + "loss": 0.2564, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.849447727203369, + "rewards/margins": 5.185765266418457, + "rewards/rejected": -10.035212516784668, + "step": 12082 + }, + { + "epoch": 1.88, + "learning_rate": 5.285172467451632e-06, + "logits/chosen": -1.4712382555007935, + "logits/rejected": -2.041780948638916, + "logps/chosen": -102.49272155761719, + "logps/rejected": -344.11260986328125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997376441955566, + "rewards/margins": 8.4073486328125, + "rewards/rejected": -14.404725074768066, + "step": 12083 + }, + { + "epoch": 1.88, + "learning_rate": 5.284439026920484e-06, + "logits/chosen": -2.3508384227752686, + "logits/rejected": -3.012322187423706, + "logps/chosen": -583.1492919921875, + "logps/rejected": -537.2688598632812, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1485977172851562, + "rewards/margins": 4.23468017578125, + "rewards/rejected": -7.383277893066406, + "step": 12084 + }, + { + "epoch": 1.88, + "learning_rate": 5.283705586389336e-06, + "logits/chosen": -2.402334451675415, + "logits/rejected": -2.9692788124084473, + "logps/chosen": -257.969970703125, + "logps/rejected": -456.474609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.570398330688477, + "rewards/margins": 10.528467178344727, + "rewards/rejected": -16.098865509033203, + "step": 12085 + }, + { + "epoch": 1.88, + "learning_rate": 5.282972145858188e-06, + "logits/chosen": -1.674889087677002, + "logits/rejected": -2.0586605072021484, + "logps/chosen": -204.61314392089844, + "logps/rejected": -343.9673767089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.831578493118286, + "rewards/margins": 9.598791122436523, + "rewards/rejected": -13.430370330810547, + "step": 12086 + }, + { + "epoch": 1.88, + "learning_rate": 5.2822387053270395e-06, + "logits/chosen": -2.5113208293914795, + "logits/rejected": -2.9119250774383545, + "logps/chosen": -160.00335693359375, + "logps/rejected": -256.0407409667969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.656922340393066, + "rewards/margins": 8.266576766967773, + "rewards/rejected": -12.923500061035156, + "step": 12087 + }, + { + "epoch": 1.88, + "learning_rate": 5.281505264795892e-06, + "logits/chosen": -2.030607223510742, + "logits/rejected": -2.1659131050109863, + "logps/chosen": -217.62522888183594, + "logps/rejected": -388.3498229980469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101536750793457, + "rewards/margins": 10.47984504699707, + "rewards/rejected": -16.581382751464844, + "step": 12088 + }, + { + "epoch": 1.88, + "learning_rate": 5.280771824264744e-06, + "logits/chosen": -2.287074089050293, + "logits/rejected": -2.981990337371826, + "logps/chosen": -223.3723602294922, + "logps/rejected": -270.95062255859375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7619218826293945, + "rewards/margins": 6.680663108825684, + "rewards/rejected": -12.442584991455078, + "step": 12089 + }, + { + "epoch": 1.88, + "learning_rate": 5.280038383733596e-06, + "logits/chosen": -1.7959898710250854, + "logits/rejected": -2.7602646350860596, + "logps/chosen": -324.4146728515625, + "logps/rejected": -480.07684326171875, + "loss": 0.7016, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.815848350524902, + "rewards/margins": 9.327848434448242, + "rewards/rejected": -16.143695831298828, + "step": 12090 + }, + { + "epoch": 1.88, + "learning_rate": 5.279304943202448e-06, + "logits/chosen": -2.3904876708984375, + "logits/rejected": -2.7977499961853027, + "logps/chosen": -344.5698547363281, + "logps/rejected": -442.6854248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3889007568359375, + "rewards/margins": 9.791046142578125, + "rewards/rejected": -14.179946899414062, + "step": 12091 + }, + { + "epoch": 1.88, + "learning_rate": 5.278571502671301e-06, + "logits/chosen": -2.5119142532348633, + "logits/rejected": -2.8524272441864014, + "logps/chosen": -291.1952819824219, + "logps/rejected": -390.8548278808594, + "loss": 0.057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6095008850097656, + "rewards/margins": 7.35977840423584, + "rewards/rejected": -10.969279289245605, + "step": 12092 + }, + { + "epoch": 1.88, + "learning_rate": 5.2778380621401525e-06, + "logits/chosen": -2.3367362022399902, + "logits/rejected": -3.0564818382263184, + "logps/chosen": -81.97972106933594, + "logps/rejected": -187.9178924560547, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.570994853973389, + "rewards/margins": 5.909331798553467, + "rewards/rejected": -10.480326652526855, + "step": 12093 + }, + { + "epoch": 1.88, + "learning_rate": 5.277104621609004e-06, + "logits/chosen": -1.3873867988586426, + "logits/rejected": -2.7586076259613037, + "logps/chosen": -200.74754333496094, + "logps/rejected": -404.0528564453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5634491443634033, + "rewards/margins": 8.447214126586914, + "rewards/rejected": -11.010663032531738, + "step": 12094 + }, + { + "epoch": 1.88, + "learning_rate": 5.276371181077856e-06, + "logits/chosen": -2.303605794906616, + "logits/rejected": -2.884456157684326, + "logps/chosen": -423.0096740722656, + "logps/rejected": -520.6966552734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1192994117736816, + "rewards/margins": 10.42982292175293, + "rewards/rejected": -13.549123764038086, + "step": 12095 + }, + { + "epoch": 1.88, + "learning_rate": 5.275637740546708e-06, + "logits/chosen": -1.535848617553711, + "logits/rejected": -2.703758716583252, + "logps/chosen": -152.25437927246094, + "logps/rejected": -361.4864501953125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.192154407501221, + "rewards/margins": 10.902536392211914, + "rewards/rejected": -15.094690322875977, + "step": 12096 + }, + { + "epoch": 1.88, + "learning_rate": 5.274904300015561e-06, + "logits/chosen": -1.6981558799743652, + "logits/rejected": -2.8577442169189453, + "logps/chosen": -208.810546875, + "logps/rejected": -561.4602661132812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.221011638641357, + "rewards/margins": 10.26611328125, + "rewards/rejected": -15.487125396728516, + "step": 12097 + }, + { + "epoch": 1.88, + "learning_rate": 5.274170859484414e-06, + "logits/chosen": -2.800424098968506, + "logits/rejected": -1.9075440168380737, + "logps/chosen": -646.4371337890625, + "logps/rejected": -453.8692626953125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.261562347412109, + "rewards/margins": 6.571430206298828, + "rewards/rejected": -13.832992553710938, + "step": 12098 + }, + { + "epoch": 1.88, + "learning_rate": 5.2734374189532655e-06, + "logits/chosen": -1.105757236480713, + "logits/rejected": -2.617079019546509, + "logps/chosen": -90.69410705566406, + "logps/rejected": -443.17437744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.256181240081787, + "rewards/margins": 12.587048530578613, + "rewards/rejected": -18.843231201171875, + "step": 12099 + }, + { + "epoch": 1.88, + "learning_rate": 5.272703978422117e-06, + "logits/chosen": -2.922842025756836, + "logits/rejected": -2.9597878456115723, + "logps/chosen": -183.20755004882812, + "logps/rejected": -252.33428955078125, + "loss": 0.4509, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.767575263977051, + "rewards/margins": 2.793215036392212, + "rewards/rejected": -7.560790061950684, + "step": 12100 + }, + { + "epoch": 1.88, + "learning_rate": 5.27197053789097e-06, + "logits/chosen": -1.1866575479507446, + "logits/rejected": -2.5657052993774414, + "logps/chosen": -161.93392944335938, + "logps/rejected": -566.1466064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.826666831970215, + "rewards/margins": 12.996726989746094, + "rewards/rejected": -15.823394775390625, + "step": 12101 + }, + { + "epoch": 1.88, + "learning_rate": 5.271237097359822e-06, + "logits/chosen": -2.684969902038574, + "logits/rejected": -3.008350133895874, + "logps/chosen": -106.78636169433594, + "logps/rejected": -160.08815002441406, + "loss": 0.3984, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.580572605133057, + "rewards/margins": 3.4350860118865967, + "rewards/rejected": -9.015658378601074, + "step": 12102 + }, + { + "epoch": 1.88, + "learning_rate": 5.270503656828674e-06, + "logits/chosen": -0.43932563066482544, + "logits/rejected": -2.959104061126709, + "logps/chosen": -137.59954833984375, + "logps/rejected": -606.0416259765625, + "loss": 0.3712, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.897249221801758, + "rewards/margins": 4.0764312744140625, + "rewards/rejected": -12.97368049621582, + "step": 12103 + }, + { + "epoch": 1.88, + "learning_rate": 5.269770216297526e-06, + "logits/chosen": -1.2995011806488037, + "logits/rejected": -2.6747934818267822, + "logps/chosen": -428.95831298828125, + "logps/rejected": -453.95123291015625, + "loss": 0.4807, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.31956672668457, + "rewards/margins": 2.429612159729004, + "rewards/rejected": -8.749178886413574, + "step": 12104 + }, + { + "epoch": 1.88, + "learning_rate": 5.269036775766378e-06, + "logits/chosen": -1.9253536462783813, + "logits/rejected": -2.9937074184417725, + "logps/chosen": -359.9700012207031, + "logps/rejected": -642.8024291992188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.781167984008789, + "rewards/margins": 8.777552604675293, + "rewards/rejected": -13.558719635009766, + "step": 12105 + }, + { + "epoch": 1.88, + "learning_rate": 5.26830333523523e-06, + "logits/chosen": -2.6857714653015137, + "logits/rejected": -1.6193677186965942, + "logps/chosen": -478.3311767578125, + "logps/rejected": -388.2983703613281, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823030471801758, + "rewards/margins": 6.233617782592773, + "rewards/rejected": -13.056648254394531, + "step": 12106 + }, + { + "epoch": 1.88, + "learning_rate": 5.267569894704082e-06, + "logits/chosen": -2.470301628112793, + "logits/rejected": -2.2343368530273438, + "logps/chosen": -520.5277099609375, + "logps/rejected": -459.0302734375, + "loss": 0.052, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792205810546875, + "rewards/margins": 6.890466690063477, + "rewards/rejected": -12.682672500610352, + "step": 12107 + }, + { + "epoch": 1.88, + "learning_rate": 5.266836454172934e-06, + "logits/chosen": -3.024216413497925, + "logits/rejected": -3.0887818336486816, + "logps/chosen": -113.1415023803711, + "logps/rejected": -189.55889892578125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2306392192840576, + "rewards/margins": 6.4046831130981445, + "rewards/rejected": -8.635322570800781, + "step": 12108 + }, + { + "epoch": 1.88, + "learning_rate": 5.266103013641786e-06, + "logits/chosen": -1.7915605306625366, + "logits/rejected": -2.6450114250183105, + "logps/chosen": -134.11740112304688, + "logps/rejected": -278.6809997558594, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.373241901397705, + "rewards/margins": 6.595852851867676, + "rewards/rejected": -11.969094276428223, + "step": 12109 + }, + { + "epoch": 1.88, + "learning_rate": 5.265369573110639e-06, + "logits/chosen": -2.7188820838928223, + "logits/rejected": -3.034771680831909, + "logps/chosen": -85.63954162597656, + "logps/rejected": -249.23324584960938, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7720699310302734, + "rewards/margins": 8.484317779541016, + "rewards/rejected": -11.256387710571289, + "step": 12110 + }, + { + "epoch": 1.88, + "learning_rate": 5.2646361325794906e-06, + "logits/chosen": -2.7553412914276123, + "logits/rejected": -3.0316693782806396, + "logps/chosen": -345.58245849609375, + "logps/rejected": -524.2745971679688, + "loss": 1.7215, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.309833526611328, + "rewards/margins": 0.30107641220092773, + "rewards/rejected": -9.610909461975098, + "step": 12111 + }, + { + "epoch": 1.88, + "learning_rate": 5.2639026920483424e-06, + "logits/chosen": -2.650818347930908, + "logits/rejected": -1.89729905128479, + "logps/chosen": -296.34814453125, + "logps/rejected": -329.699951171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393899440765381, + "rewards/margins": 7.668789863586426, + "rewards/rejected": -13.062688827514648, + "step": 12112 + }, + { + "epoch": 1.88, + "learning_rate": 5.263169251517194e-06, + "logits/chosen": -2.6711249351501465, + "logits/rejected": -2.8710498809814453, + "logps/chosen": -344.79736328125, + "logps/rejected": -863.4248046875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.194782733917236, + "rewards/margins": 5.214311599731445, + "rewards/rejected": -12.409093856811523, + "step": 12113 + }, + { + "epoch": 1.88, + "learning_rate": 5.262435810986047e-06, + "logits/chosen": -2.217005491256714, + "logits/rejected": -2.6971542835235596, + "logps/chosen": -189.3860626220703, + "logps/rejected": -228.4775390625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.376730918884277, + "rewards/margins": 6.0302324295043945, + "rewards/rejected": -11.406963348388672, + "step": 12114 + }, + { + "epoch": 1.88, + "learning_rate": 5.2617023704549e-06, + "logits/chosen": -2.374667167663574, + "logits/rejected": -2.244816780090332, + "logps/chosen": -232.512939453125, + "logps/rejected": -421.0475158691406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.748156547546387, + "rewards/margins": 10.5084228515625, + "rewards/rejected": -15.256579399108887, + "step": 12115 + }, + { + "epoch": 1.88, + "learning_rate": 5.260968929923752e-06, + "logits/chosen": -1.6193418502807617, + "logits/rejected": -2.5170273780822754, + "logps/chosen": -144.8525390625, + "logps/rejected": -411.0906066894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.660835266113281, + "rewards/margins": 12.08736801147461, + "rewards/rejected": -17.74820327758789, + "step": 12116 + }, + { + "epoch": 1.88, + "learning_rate": 5.2602354893926035e-06, + "logits/chosen": -2.8420326709747314, + "logits/rejected": -2.8731436729431152, + "logps/chosen": -85.91952514648438, + "logps/rejected": -262.5419616699219, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9813718795776367, + "rewards/margins": 7.412662506103516, + "rewards/rejected": -10.394034385681152, + "step": 12117 + }, + { + "epoch": 1.88, + "learning_rate": 5.259502048861455e-06, + "logits/chosen": -2.2754063606262207, + "logits/rejected": -2.516383647918701, + "logps/chosen": -186.8159637451172, + "logps/rejected": -402.8523254394531, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.535494804382324, + "rewards/margins": 8.357946395874023, + "rewards/rejected": -15.893442153930664, + "step": 12118 + }, + { + "epoch": 1.88, + "learning_rate": 5.258768608330308e-06, + "logits/chosen": -2.8184332847595215, + "logits/rejected": -2.929682731628418, + "logps/chosen": -272.895263671875, + "logps/rejected": -362.1295166015625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.578216791152954, + "rewards/margins": 6.757818222045898, + "rewards/rejected": -9.336034774780273, + "step": 12119 + }, + { + "epoch": 1.88, + "learning_rate": 5.25803516779916e-06, + "logits/chosen": -2.3038618564605713, + "logits/rejected": -2.8473639488220215, + "logps/chosen": -205.9753875732422, + "logps/rejected": -335.05267333984375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.078119277954102, + "rewards/margins": 6.656462669372559, + "rewards/rejected": -12.734580993652344, + "step": 12120 + }, + { + "epoch": 1.89, + "learning_rate": 5.257301727268012e-06, + "logits/chosen": -3.095317840576172, + "logits/rejected": -2.7960400581359863, + "logps/chosen": -144.0960235595703, + "logps/rejected": -106.24725341796875, + "loss": 2.4346, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.662069320678711, + "rewards/margins": 0.316969633102417, + "rewards/rejected": -6.979039192199707, + "step": 12121 + }, + { + "epoch": 1.89, + "learning_rate": 5.256568286736864e-06, + "logits/chosen": -2.618096113204956, + "logits/rejected": -2.8622848987579346, + "logps/chosen": -68.45232391357422, + "logps/rejected": -199.55404663085938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.500401258468628, + "rewards/margins": 8.649575233459473, + "rewards/rejected": -11.14997673034668, + "step": 12122 + }, + { + "epoch": 1.89, + "learning_rate": 5.2558348462057165e-06, + "logits/chosen": -2.157052993774414, + "logits/rejected": -2.884227991104126, + "logps/chosen": -496.2929992675781, + "logps/rejected": -653.1770629882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.801662445068359, + "rewards/margins": 12.216239929199219, + "rewards/rejected": -19.017902374267578, + "step": 12123 + }, + { + "epoch": 1.89, + "learning_rate": 5.255101405674568e-06, + "logits/chosen": -2.842874050140381, + "logits/rejected": -2.2266523838043213, + "logps/chosen": -254.24977111816406, + "logps/rejected": -349.9032897949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.855196952819824, + "rewards/margins": 10.520532608032227, + "rewards/rejected": -15.375730514526367, + "step": 12124 + }, + { + "epoch": 1.89, + "learning_rate": 5.25436796514342e-06, + "logits/chosen": -2.8825201988220215, + "logits/rejected": -3.0418150424957275, + "logps/chosen": -121.5291519165039, + "logps/rejected": -161.88754272460938, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.581371307373047, + "rewards/margins": 2.9908652305603027, + "rewards/rejected": -9.572237014770508, + "step": 12125 + }, + { + "epoch": 1.89, + "learning_rate": 5.253634524612272e-06, + "logits/chosen": -2.8503592014312744, + "logits/rejected": -3.0311625003814697, + "logps/chosen": -363.21142578125, + "logps/rejected": -501.0030212402344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.90324592590332, + "rewards/margins": 8.775532722473145, + "rewards/rejected": -13.678777694702148, + "step": 12126 + }, + { + "epoch": 1.89, + "learning_rate": 5.252901084081124e-06, + "logits/chosen": -2.3597819805145264, + "logits/rejected": -2.827333927154541, + "logps/chosen": -227.85508728027344, + "logps/rejected": -369.58074951171875, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.662934303283691, + "rewards/margins": 6.402454853057861, + "rewards/rejected": -11.065389633178711, + "step": 12127 + }, + { + "epoch": 1.89, + "learning_rate": 5.252167643549977e-06, + "logits/chosen": -2.9364967346191406, + "logits/rejected": -2.3476216793060303, + "logps/chosen": -156.28651428222656, + "logps/rejected": -240.8629913330078, + "loss": 0.7596, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.179677963256836, + "rewards/margins": 5.234489917755127, + "rewards/rejected": -10.414167404174805, + "step": 12128 + }, + { + "epoch": 1.89, + "learning_rate": 5.251434203018829e-06, + "logits/chosen": -2.2453832626342773, + "logits/rejected": -2.9472923278808594, + "logps/chosen": -92.85347747802734, + "logps/rejected": -190.50927734375, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.276061058044434, + "rewards/margins": 5.8652262687683105, + "rewards/rejected": -11.141286849975586, + "step": 12129 + }, + { + "epoch": 1.89, + "learning_rate": 5.2507007624876805e-06, + "logits/chosen": -1.8733352422714233, + "logits/rejected": -2.6207234859466553, + "logps/chosen": -98.4715576171875, + "logps/rejected": -453.9571533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4566850662231445, + "rewards/margins": 10.880903244018555, + "rewards/rejected": -17.337589263916016, + "step": 12130 + }, + { + "epoch": 1.89, + "learning_rate": 5.249967321956533e-06, + "logits/chosen": -1.6722030639648438, + "logits/rejected": -2.3652191162109375, + "logps/chosen": -259.3042907714844, + "logps/rejected": -278.1488342285156, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.068042755126953, + "rewards/margins": 5.612384796142578, + "rewards/rejected": -13.680427551269531, + "step": 12131 + }, + { + "epoch": 1.89, + "learning_rate": 5.249233881425386e-06, + "logits/chosen": -2.7156660556793213, + "logits/rejected": -2.2542598247528076, + "logps/chosen": -351.3894958496094, + "logps/rejected": -456.1198425292969, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.021495819091797, + "rewards/margins": 8.26180362701416, + "rewards/rejected": -15.283299446105957, + "step": 12132 + }, + { + "epoch": 1.89, + "learning_rate": 5.248500440894238e-06, + "logits/chosen": -1.7424936294555664, + "logits/rejected": -2.4936087131500244, + "logps/chosen": -186.48416137695312, + "logps/rejected": -375.459716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.496947765350342, + "rewards/margins": 10.289714813232422, + "rewards/rejected": -15.786663055419922, + "step": 12133 + }, + { + "epoch": 1.89, + "learning_rate": 5.24776700036309e-06, + "logits/chosen": -2.9516208171844482, + "logits/rejected": -2.851123571395874, + "logps/chosen": -440.8996276855469, + "logps/rejected": -452.75201416015625, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.383277893066406, + "rewards/margins": 5.488705635070801, + "rewards/rejected": -12.871983528137207, + "step": 12134 + }, + { + "epoch": 1.89, + "learning_rate": 5.2470335598319416e-06, + "logits/chosen": -2.309986114501953, + "logits/rejected": -2.7059690952301025, + "logps/chosen": -167.85748291015625, + "logps/rejected": -169.35833740234375, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.842089653015137, + "rewards/margins": 4.160769462585449, + "rewards/rejected": -12.002859115600586, + "step": 12135 + }, + { + "epoch": 1.89, + "learning_rate": 5.2463001193007934e-06, + "logits/chosen": -2.5726430416107178, + "logits/rejected": -2.176842451095581, + "logps/chosen": -277.13336181640625, + "logps/rejected": -259.28912353515625, + "loss": 0.372, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.342519760131836, + "rewards/margins": 4.822775840759277, + "rewards/rejected": -13.165295600891113, + "step": 12136 + }, + { + "epoch": 1.89, + "learning_rate": 5.245566678769646e-06, + "logits/chosen": -2.5508460998535156, + "logits/rejected": -1.8235774040222168, + "logps/chosen": -246.08453369140625, + "logps/rejected": -220.12542724609375, + "loss": 0.9699, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.999292850494385, + "rewards/margins": 1.3403899669647217, + "rewards/rejected": -7.339682579040527, + "step": 12137 + }, + { + "epoch": 1.89, + "learning_rate": 5.244833238238498e-06, + "logits/chosen": -2.6555068492889404, + "logits/rejected": -2.8609232902526855, + "logps/chosen": -492.0148010253906, + "logps/rejected": -523.8451538085938, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.16894245147705, + "rewards/margins": 4.463701248168945, + "rewards/rejected": -12.632643699645996, + "step": 12138 + }, + { + "epoch": 1.89, + "learning_rate": 5.24409979770735e-06, + "logits/chosen": -1.2709159851074219, + "logits/rejected": -2.7349495887756348, + "logps/chosen": -172.86444091796875, + "logps/rejected": -368.8743591308594, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.090975284576416, + "rewards/margins": 7.033193588256836, + "rewards/rejected": -12.12416934967041, + "step": 12139 + }, + { + "epoch": 1.89, + "learning_rate": 5.243366357176202e-06, + "logits/chosen": -1.573798656463623, + "logits/rejected": -2.5093653202056885, + "logps/chosen": -101.56077575683594, + "logps/rejected": -283.5675354003906, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381231307983398, + "rewards/margins": 9.334541320800781, + "rewards/rejected": -15.71577262878418, + "step": 12140 + }, + { + "epoch": 1.89, + "learning_rate": 5.2426329166450545e-06, + "logits/chosen": -2.788717746734619, + "logits/rejected": -2.4033875465393066, + "logps/chosen": -366.97540283203125, + "logps/rejected": -596.3471069335938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.592180252075195, + "rewards/margins": 12.139718055725098, + "rewards/rejected": -20.731897354125977, + "step": 12141 + }, + { + "epoch": 1.89, + "learning_rate": 5.241899476113906e-06, + "logits/chosen": -2.818061113357544, + "logits/rejected": -2.205775260925293, + "logps/chosen": -143.14871215820312, + "logps/rejected": -287.8394775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.093564033508301, + "rewards/margins": 8.957908630371094, + "rewards/rejected": -14.051472663879395, + "step": 12142 + }, + { + "epoch": 1.89, + "learning_rate": 5.241166035582758e-06, + "logits/chosen": -2.4214937686920166, + "logits/rejected": -1.750351071357727, + "logps/chosen": -237.9481201171875, + "logps/rejected": -234.42538452148438, + "loss": 0.0884, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.788790225982666, + "rewards/margins": 6.839507102966309, + "rewards/rejected": -11.628297805786133, + "step": 12143 + }, + { + "epoch": 1.89, + "learning_rate": 5.24043259505161e-06, + "logits/chosen": -2.514439105987549, + "logits/rejected": -3.0892257690429688, + "logps/chosen": -526.4984741210938, + "logps/rejected": -601.0332641601562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5803327560424805, + "rewards/margins": 8.964323043823242, + "rewards/rejected": -16.544654846191406, + "step": 12144 + }, + { + "epoch": 1.89, + "learning_rate": 5.239699154520462e-06, + "logits/chosen": -2.873549699783325, + "logits/rejected": -2.805222988128662, + "logps/chosen": -100.00852966308594, + "logps/rejected": -147.04791259765625, + "loss": 1.7852, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.059955596923828, + "rewards/margins": 2.0602149963378906, + "rewards/rejected": -10.120170593261719, + "step": 12145 + }, + { + "epoch": 1.89, + "learning_rate": 5.238965713989315e-06, + "logits/chosen": -2.648037910461426, + "logits/rejected": -3.0608415603637695, + "logps/chosen": -700.3956298828125, + "logps/rejected": -605.52099609375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.971230506896973, + "rewards/margins": 6.449099063873291, + "rewards/rejected": -13.420330047607422, + "step": 12146 + }, + { + "epoch": 1.89, + "learning_rate": 5.238232273458167e-06, + "logits/chosen": -2.934452533721924, + "logits/rejected": -2.8795254230499268, + "logps/chosen": -139.54522705078125, + "logps/rejected": -345.839111328125, + "loss": 0.5093, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.919167518615723, + "rewards/margins": 3.941802740097046, + "rewards/rejected": -9.860970497131348, + "step": 12147 + }, + { + "epoch": 1.89, + "learning_rate": 5.237498832927019e-06, + "logits/chosen": -2.2166080474853516, + "logits/rejected": -2.9835433959960938, + "logps/chosen": -97.21900939941406, + "logps/rejected": -217.67648315429688, + "loss": 1.1705, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.32594108581543, + "rewards/margins": -0.6985814571380615, + "rewards/rejected": -5.627359867095947, + "step": 12148 + }, + { + "epoch": 1.89, + "learning_rate": 5.236765392395871e-06, + "logits/chosen": -2.0216312408447266, + "logits/rejected": -2.642449378967285, + "logps/chosen": -222.70199584960938, + "logps/rejected": -559.5089111328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9177751541137695, + "rewards/margins": 9.520147323608398, + "rewards/rejected": -16.437923431396484, + "step": 12149 + }, + { + "epoch": 1.89, + "learning_rate": 5.236031951864724e-06, + "logits/chosen": -2.6222095489501953, + "logits/rejected": -3.0804924964904785, + "logps/chosen": -194.96890258789062, + "logps/rejected": -254.31484985351562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.240973949432373, + "rewards/margins": 8.599967002868652, + "rewards/rejected": -12.840940475463867, + "step": 12150 + }, + { + "epoch": 1.89, + "learning_rate": 5.235298511333576e-06, + "logits/chosen": -2.0867371559143066, + "logits/rejected": -2.6379432678222656, + "logps/chosen": -172.82809448242188, + "logps/rejected": -455.5568542480469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7987191677093506, + "rewards/margins": 13.15185546875, + "rewards/rejected": -15.95057487487793, + "step": 12151 + }, + { + "epoch": 1.89, + "learning_rate": 5.234565070802428e-06, + "logits/chosen": -1.1246790885925293, + "logits/rejected": -2.767367362976074, + "logps/chosen": -110.09111022949219, + "logps/rejected": -350.1361999511719, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.316812515258789, + "rewards/margins": 6.826525688171387, + "rewards/rejected": -12.14333724975586, + "step": 12152 + }, + { + "epoch": 1.89, + "learning_rate": 5.23383163027128e-06, + "logits/chosen": -2.170502185821533, + "logits/rejected": -2.8500993251800537, + "logps/chosen": -176.87066650390625, + "logps/rejected": -521.7420043945312, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.109294891357422, + "rewards/margins": 8.395650863647461, + "rewards/rejected": -13.504945755004883, + "step": 12153 + }, + { + "epoch": 1.89, + "learning_rate": 5.233098189740132e-06, + "logits/chosen": -2.8134093284606934, + "logits/rejected": -2.80655574798584, + "logps/chosen": -477.8226318359375, + "logps/rejected": -583.7699584960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.970404148101807, + "rewards/margins": 9.704339981079102, + "rewards/rejected": -15.67474365234375, + "step": 12154 + }, + { + "epoch": 1.89, + "learning_rate": 5.232364749208984e-06, + "logits/chosen": -2.6908445358276367, + "logits/rejected": -2.873244047164917, + "logps/chosen": -113.124755859375, + "logps/rejected": -370.34283447265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.348133087158203, + "rewards/margins": 8.554101943969727, + "rewards/rejected": -13.90223503112793, + "step": 12155 + }, + { + "epoch": 1.89, + "learning_rate": 5.231631308677836e-06, + "logits/chosen": -2.026379346847534, + "logits/rejected": -2.819395065307617, + "logps/chosen": -458.32861328125, + "logps/rejected": -960.8165283203125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.540472507476807, + "rewards/margins": 8.616374015808105, + "rewards/rejected": -15.15684700012207, + "step": 12156 + }, + { + "epoch": 1.89, + "learning_rate": 5.230897868146688e-06, + "logits/chosen": -2.258742094039917, + "logits/rejected": -2.719923973083496, + "logps/chosen": -192.00677490234375, + "logps/rejected": -293.1326904296875, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.953411102294922, + "rewards/margins": 5.598500728607178, + "rewards/rejected": -10.551912307739258, + "step": 12157 + }, + { + "epoch": 1.89, + "learning_rate": 5.23016442761554e-06, + "logits/chosen": -2.7547245025634766, + "logits/rejected": -1.9294768571853638, + "logps/chosen": -360.604248046875, + "logps/rejected": -263.2920837402344, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0942230224609375, + "rewards/margins": 3.4849467277526855, + "rewards/rejected": -8.579170227050781, + "step": 12158 + }, + { + "epoch": 1.89, + "learning_rate": 5.2294309870843926e-06, + "logits/chosen": -2.941807270050049, + "logits/rejected": -2.1821844577789307, + "logps/chosen": -283.9241027832031, + "logps/rejected": -150.0143585205078, + "loss": 1.9825, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.375568866729736, + "rewards/margins": 3.7391843795776367, + "rewards/rejected": -10.114753723144531, + "step": 12159 + }, + { + "epoch": 1.89, + "learning_rate": 5.2286975465532444e-06, + "logits/chosen": -1.6390607357025146, + "logits/rejected": -2.9153568744659424, + "logps/chosen": -140.89907836914062, + "logps/rejected": -568.2125244140625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.206796646118164, + "rewards/margins": 10.039100646972656, + "rewards/rejected": -15.24589729309082, + "step": 12160 + }, + { + "epoch": 1.89, + "learning_rate": 5.227964106022096e-06, + "logits/chosen": -2.91762375831604, + "logits/rejected": -2.9852488040924072, + "logps/chosen": -189.1488037109375, + "logps/rejected": -173.5841064453125, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.790249824523926, + "rewards/margins": 5.476950168609619, + "rewards/rejected": -10.267200469970703, + "step": 12161 + }, + { + "epoch": 1.89, + "learning_rate": 5.227230665490948e-06, + "logits/chosen": -2.768202066421509, + "logits/rejected": -2.8190066814422607, + "logps/chosen": -143.0144805908203, + "logps/rejected": -406.71417236328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.607663631439209, + "rewards/margins": 9.61609172821045, + "rewards/rejected": -15.2237548828125, + "step": 12162 + }, + { + "epoch": 1.89, + "learning_rate": 5.226497224959801e-06, + "logits/chosen": -1.1849020719528198, + "logits/rejected": -1.9404997825622559, + "logps/chosen": -238.30308532714844, + "logps/rejected": -424.7081604003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300292015075684, + "rewards/margins": 11.84589672088623, + "rewards/rejected": -18.146188735961914, + "step": 12163 + }, + { + "epoch": 1.89, + "learning_rate": 5.225763784428653e-06, + "logits/chosen": -2.6498613357543945, + "logits/rejected": -2.72412371635437, + "logps/chosen": -154.40570068359375, + "logps/rejected": -255.5662384033203, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8316497802734375, + "rewards/margins": 6.012270927429199, + "rewards/rejected": -10.843920707702637, + "step": 12164 + }, + { + "epoch": 1.89, + "learning_rate": 5.2250303438975055e-06, + "logits/chosen": -3.050861358642578, + "logits/rejected": -2.275790214538574, + "logps/chosen": -213.05039978027344, + "logps/rejected": -227.81956481933594, + "loss": 3.2148, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.703239440917969, + "rewards/margins": 2.7608141899108887, + "rewards/rejected": -10.4640531539917, + "step": 12165 + }, + { + "epoch": 1.89, + "learning_rate": 5.224296903366357e-06, + "logits/chosen": -2.7151031494140625, + "logits/rejected": -3.035219192504883, + "logps/chosen": -668.9228515625, + "logps/rejected": -746.594482421875, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6697096824646, + "rewards/margins": 13.037618637084961, + "rewards/rejected": -17.70732879638672, + "step": 12166 + }, + { + "epoch": 1.89, + "learning_rate": 5.223563462835209e-06, + "logits/chosen": -1.8175780773162842, + "logits/rejected": -2.735273838043213, + "logps/chosen": -236.67550659179688, + "logps/rejected": -405.73974609375, + "loss": 1.4114, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.264381408691406, + "rewards/margins": 4.240614414215088, + "rewards/rejected": -12.504995346069336, + "step": 12167 + }, + { + "epoch": 1.89, + "learning_rate": 5.222830022304062e-06, + "logits/chosen": -1.7140132188796997, + "logits/rejected": -2.7613346576690674, + "logps/chosen": -102.06039428710938, + "logps/rejected": -381.65472412109375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6959228515625, + "rewards/margins": 6.927182674407959, + "rewards/rejected": -12.623106002807617, + "step": 12168 + }, + { + "epoch": 1.89, + "learning_rate": 5.222096581772914e-06, + "logits/chosen": -1.247599720954895, + "logits/rejected": -2.3531312942504883, + "logps/chosen": -164.66110229492188, + "logps/rejected": -419.7791748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.899742126464844, + "rewards/margins": 9.216687202453613, + "rewards/rejected": -18.116430282592773, + "step": 12169 + }, + { + "epoch": 1.89, + "learning_rate": 5.221363141241766e-06, + "logits/chosen": -2.250227212905884, + "logits/rejected": -2.64371919631958, + "logps/chosen": -268.4402770996094, + "logps/rejected": -483.50567626953125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.56097936630249, + "rewards/margins": 7.066761016845703, + "rewards/rejected": -13.627740859985352, + "step": 12170 + }, + { + "epoch": 1.89, + "learning_rate": 5.220629700710618e-06, + "logits/chosen": -2.8682162761688232, + "logits/rejected": -2.9107213020324707, + "logps/chosen": -306.5279541015625, + "logps/rejected": -458.33050537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.45413064956665, + "rewards/margins": 9.426273345947266, + "rewards/rejected": -15.880403518676758, + "step": 12171 + }, + { + "epoch": 1.89, + "learning_rate": 5.21989626017947e-06, + "logits/chosen": -2.525547981262207, + "logits/rejected": -3.036609649658203, + "logps/chosen": -379.821044921875, + "logps/rejected": -455.9982604980469, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.413113594055176, + "rewards/margins": 3.9600982666015625, + "rewards/rejected": -8.373211860656738, + "step": 12172 + }, + { + "epoch": 1.89, + "learning_rate": 5.219162819648322e-06, + "logits/chosen": -2.714092254638672, + "logits/rejected": -2.1004960536956787, + "logps/chosen": -304.39825439453125, + "logps/rejected": -391.3011474609375, + "loss": 0.158, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.768508434295654, + "rewards/margins": 3.864607334136963, + "rewards/rejected": -10.633115768432617, + "step": 12173 + }, + { + "epoch": 1.89, + "learning_rate": 5.218429379117174e-06, + "logits/chosen": -2.880545139312744, + "logits/rejected": -1.3595449924468994, + "logps/chosen": -745.9647216796875, + "logps/rejected": -399.23870849609375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.434337615966797, + "rewards/margins": 7.718666076660156, + "rewards/rejected": -14.153003692626953, + "step": 12174 + }, + { + "epoch": 1.89, + "learning_rate": 5.217695938586026e-06, + "logits/chosen": -2.6159560680389404, + "logits/rejected": -1.9791110754013062, + "logps/chosen": -219.98873901367188, + "logps/rejected": -152.09927368164062, + "loss": 0.0619, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308578014373779, + "rewards/margins": 3.480356216430664, + "rewards/rejected": -9.788933753967285, + "step": 12175 + }, + { + "epoch": 1.89, + "learning_rate": 5.216962498054878e-06, + "logits/chosen": -2.6230108737945557, + "logits/rejected": -2.8119843006134033, + "logps/chosen": -188.4472198486328, + "logps/rejected": -456.9834899902344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2538323402404785, + "rewards/margins": 10.591829299926758, + "rewards/rejected": -13.845662117004395, + "step": 12176 + }, + { + "epoch": 1.89, + "learning_rate": 5.216229057523731e-06, + "logits/chosen": -2.559708833694458, + "logits/rejected": -2.9486167430877686, + "logps/chosen": -137.7151641845703, + "logps/rejected": -331.5059814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.373922109603882, + "rewards/margins": 8.948278427124023, + "rewards/rejected": -12.322200775146484, + "step": 12177 + }, + { + "epoch": 1.89, + "learning_rate": 5.2154956169925825e-06, + "logits/chosen": -3.0055911540985107, + "logits/rejected": -2.6637394428253174, + "logps/chosen": -250.52391052246094, + "logps/rejected": -263.64422607421875, + "loss": 1.8008, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.156730651855469, + "rewards/margins": 1.7112274169921875, + "rewards/rejected": -11.867958068847656, + "step": 12178 + }, + { + "epoch": 1.89, + "learning_rate": 5.214762176461434e-06, + "logits/chosen": -0.8112053275108337, + "logits/rejected": -2.4894778728485107, + "logps/chosen": -134.12551879882812, + "logps/rejected": -481.2190856933594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.488528728485107, + "rewards/margins": 8.65937614440918, + "rewards/rejected": -14.147905349731445, + "step": 12179 + }, + { + "epoch": 1.89, + "learning_rate": 5.214028735930286e-06, + "logits/chosen": -3.032292127609253, + "logits/rejected": -3.040785312652588, + "logps/chosen": -387.80194091796875, + "logps/rejected": -380.4649353027344, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380881309509277, + "rewards/margins": 6.113842487335205, + "rewards/rejected": -12.49472427368164, + "step": 12180 + }, + { + "epoch": 1.89, + "learning_rate": 5.213295295399139e-06, + "logits/chosen": -1.4618535041809082, + "logits/rejected": -2.8711202144622803, + "logps/chosen": -131.23855590820312, + "logps/rejected": -357.59136962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.611538410186768, + "rewards/margins": 9.557151794433594, + "rewards/rejected": -14.16869068145752, + "step": 12181 + }, + { + "epoch": 1.89, + "learning_rate": 5.212561854867992e-06, + "logits/chosen": -2.179506540298462, + "logits/rejected": -2.6468260288238525, + "logps/chosen": -356.15234375, + "logps/rejected": -463.1658935546875, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.359830856323242, + "rewards/margins": 5.857721328735352, + "rewards/rejected": -14.217552185058594, + "step": 12182 + }, + { + "epoch": 1.89, + "learning_rate": 5.211828414336844e-06, + "logits/chosen": -2.8368325233459473, + "logits/rejected": -2.30253529548645, + "logps/chosen": -152.79234313964844, + "logps/rejected": -203.02191162109375, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.230154037475586, + "rewards/margins": 5.220734596252441, + "rewards/rejected": -12.450888633728027, + "step": 12183 + }, + { + "epoch": 1.89, + "learning_rate": 5.2110949738056955e-06, + "logits/chosen": -2.642703056335449, + "logits/rejected": -2.9994184970855713, + "logps/chosen": -63.951473236083984, + "logps/rejected": -215.4825439453125, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.472053527832031, + "rewards/margins": 5.781164169311523, + "rewards/rejected": -10.253217697143555, + "step": 12184 + }, + { + "epoch": 1.9, + "learning_rate": 5.210361533274547e-06, + "logits/chosen": -2.2470617294311523, + "logits/rejected": -2.9019041061401367, + "logps/chosen": -125.40064239501953, + "logps/rejected": -353.6809997558594, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2134809494018555, + "rewards/margins": 6.420343399047852, + "rewards/rejected": -12.633825302124023, + "step": 12185 + }, + { + "epoch": 1.9, + "learning_rate": 5.2096280927434e-06, + "logits/chosen": -2.7644426822662354, + "logits/rejected": -2.338249683380127, + "logps/chosen": -220.53515625, + "logps/rejected": -268.2450256347656, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7200887203216553, + "rewards/margins": 6.179438591003418, + "rewards/rejected": -9.899527549743652, + "step": 12186 + }, + { + "epoch": 1.9, + "learning_rate": 5.208894652212252e-06, + "logits/chosen": -2.9285292625427246, + "logits/rejected": -2.964723587036133, + "logps/chosen": -168.901611328125, + "logps/rejected": -244.4747772216797, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.881631851196289, + "rewards/margins": 7.248508453369141, + "rewards/rejected": -11.13014030456543, + "step": 12187 + }, + { + "epoch": 1.9, + "learning_rate": 5.208161211681104e-06, + "logits/chosen": -2.3833277225494385, + "logits/rejected": -2.920182704925537, + "logps/chosen": -168.6044921875, + "logps/rejected": -305.41619873046875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5657191276550293, + "rewards/margins": 7.303866863250732, + "rewards/rejected": -10.869585990905762, + "step": 12188 + }, + { + "epoch": 1.9, + "learning_rate": 5.207427771149956e-06, + "logits/chosen": -1.5378468036651611, + "logits/rejected": -2.5241048336029053, + "logps/chosen": -136.7711639404297, + "logps/rejected": -399.1538391113281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.68439245223999, + "rewards/margins": 11.337505340576172, + "rewards/rejected": -19.02189826965332, + "step": 12189 + }, + { + "epoch": 1.9, + "learning_rate": 5.206694330618808e-06, + "logits/chosen": -2.0004451274871826, + "logits/rejected": -2.6999971866607666, + "logps/chosen": -112.29554748535156, + "logps/rejected": -409.76165771484375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.56768798828125, + "rewards/margins": 8.020158767700195, + "rewards/rejected": -13.587846755981445, + "step": 12190 + }, + { + "epoch": 1.9, + "learning_rate": 5.20596089008766e-06, + "logits/chosen": -1.7938408851623535, + "logits/rejected": -2.7884514331817627, + "logps/chosen": -150.08721923828125, + "logps/rejected": -367.7760925292969, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.520513534545898, + "rewards/margins": 6.497452259063721, + "rewards/rejected": -12.017965316772461, + "step": 12191 + }, + { + "epoch": 1.9, + "learning_rate": 5.205227449556512e-06, + "logits/chosen": -2.232348680496216, + "logits/rejected": -2.4070003032684326, + "logps/chosen": -1112.7891845703125, + "logps/rejected": -816.3529052734375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.226698875427246, + "rewards/margins": 9.23909854888916, + "rewards/rejected": -16.465797424316406, + "step": 12192 + }, + { + "epoch": 1.9, + "learning_rate": 5.204494009025364e-06, + "logits/chosen": -1.9030929803848267, + "logits/rejected": -2.484262466430664, + "logps/chosen": -231.87619018554688, + "logps/rejected": -430.3194580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023075580596924, + "rewards/margins": 12.214219093322754, + "rewards/rejected": -17.237295150756836, + "step": 12193 + }, + { + "epoch": 1.9, + "learning_rate": 5.203760568494216e-06, + "logits/chosen": -2.790696144104004, + "logits/rejected": -2.956015110015869, + "logps/chosen": -88.0067138671875, + "logps/rejected": -286.86993408203125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.393774032592773, + "rewards/margins": 6.808346748352051, + "rewards/rejected": -14.202120780944824, + "step": 12194 + }, + { + "epoch": 1.9, + "learning_rate": 5.203027127963069e-06, + "logits/chosen": -2.863645553588867, + "logits/rejected": -2.914961099624634, + "logps/chosen": -246.89019775390625, + "logps/rejected": -239.38394165039062, + "loss": 1.5339, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.590664386749268, + "rewards/margins": 3.1254734992980957, + "rewards/rejected": -9.716137886047363, + "step": 12195 + }, + { + "epoch": 1.9, + "learning_rate": 5.2022936874319205e-06, + "logits/chosen": -3.0357649326324463, + "logits/rejected": -2.3484036922454834, + "logps/chosen": -409.82659912109375, + "logps/rejected": -502.96173095703125, + "loss": 0.1522, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.706560134887695, + "rewards/margins": 3.580366373062134, + "rewards/rejected": -11.28692626953125, + "step": 12196 + }, + { + "epoch": 1.9, + "learning_rate": 5.201560246900772e-06, + "logits/chosen": -1.3304520845413208, + "logits/rejected": -2.628136396408081, + "logps/chosen": -176.23873901367188, + "logps/rejected": -318.4208068847656, + "loss": 1.4522, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.108403205871582, + "rewards/margins": 1.9023816585540771, + "rewards/rejected": -10.010785102844238, + "step": 12197 + }, + { + "epoch": 1.9, + "learning_rate": 5.200826806369625e-06, + "logits/chosen": -1.3479622602462769, + "logits/rejected": -2.8213729858398438, + "logps/chosen": -187.37017822265625, + "logps/rejected": -461.8385009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6618123054504395, + "rewards/margins": 8.738868713378906, + "rewards/rejected": -15.400680541992188, + "step": 12198 + }, + { + "epoch": 1.9, + "learning_rate": 5.200093365838478e-06, + "logits/chosen": -2.1694259643554688, + "logits/rejected": -2.711745262145996, + "logps/chosen": -120.13813781738281, + "logps/rejected": -314.6982727050781, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.405869483947754, + "rewards/margins": 8.297128677368164, + "rewards/rejected": -13.702998161315918, + "step": 12199 + }, + { + "epoch": 1.9, + "learning_rate": 5.19935992530733e-06, + "logits/chosen": -2.9119603633880615, + "logits/rejected": -1.9029330015182495, + "logps/chosen": -249.6048126220703, + "logps/rejected": -180.51541137695312, + "loss": 0.4651, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.579484939575195, + "rewards/margins": 1.0322082042694092, + "rewards/rejected": -8.611693382263184, + "step": 12200 + }, + { + "epoch": 1.9, + "learning_rate": 5.198626484776182e-06, + "logits/chosen": -2.827583074569702, + "logits/rejected": -2.8835055828094482, + "logps/chosen": -138.19216918945312, + "logps/rejected": -288.1102294921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.879520893096924, + "rewards/margins": 7.437732219696045, + "rewards/rejected": -11.317253112792969, + "step": 12201 + }, + { + "epoch": 1.9, + "learning_rate": 5.1978930442450335e-06, + "logits/chosen": -2.3264524936676025, + "logits/rejected": -2.8173351287841797, + "logps/chosen": -246.5220489501953, + "logps/rejected": -376.89166259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0054924488067627, + "rewards/margins": 10.24551010131836, + "rewards/rejected": -13.251001358032227, + "step": 12202 + }, + { + "epoch": 1.9, + "learning_rate": 5.197159603713886e-06, + "logits/chosen": -2.402512311935425, + "logits/rejected": -2.2225992679595947, + "logps/chosen": -686.0802001953125, + "logps/rejected": -747.909912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.967767238616943, + "rewards/margins": 10.607653617858887, + "rewards/rejected": -15.575420379638672, + "step": 12203 + }, + { + "epoch": 1.9, + "learning_rate": 5.196426163182738e-06, + "logits/chosen": -3.1574606895446777, + "logits/rejected": -2.9853622913360596, + "logps/chosen": -82.38665771484375, + "logps/rejected": -105.1363525390625, + "loss": 0.3513, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.26361083984375, + "rewards/margins": 3.6591858863830566, + "rewards/rejected": -8.922797203063965, + "step": 12204 + }, + { + "epoch": 1.9, + "learning_rate": 5.19569272265159e-06, + "logits/chosen": -2.919010639190674, + "logits/rejected": -1.3890093564987183, + "logps/chosen": -196.47157287597656, + "logps/rejected": -275.8267822265625, + "loss": 0.2138, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.715275764465332, + "rewards/margins": 3.9395434856414795, + "rewards/rejected": -8.65481948852539, + "step": 12205 + }, + { + "epoch": 1.9, + "learning_rate": 5.194959282120442e-06, + "logits/chosen": -2.7537827491760254, + "logits/rejected": -3.110544443130493, + "logps/chosen": -159.3940887451172, + "logps/rejected": -198.40802001953125, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.866167068481445, + "rewards/margins": 4.823770523071289, + "rewards/rejected": -12.689937591552734, + "step": 12206 + }, + { + "epoch": 1.9, + "learning_rate": 5.194225841589294e-06, + "logits/chosen": -2.4276046752929688, + "logits/rejected": -2.854405641555786, + "logps/chosen": -247.860595703125, + "logps/rejected": -368.68487548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9951071739196777, + "rewards/margins": 9.410024642944336, + "rewards/rejected": -13.405132293701172, + "step": 12207 + }, + { + "epoch": 1.9, + "learning_rate": 5.1934924010581465e-06, + "logits/chosen": -2.1588990688323975, + "logits/rejected": -2.758978843688965, + "logps/chosen": -260.177490234375, + "logps/rejected": -281.90240478515625, + "loss": 2.3313, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.116003036499023, + "rewards/margins": 1.5351178646087646, + "rewards/rejected": -9.651121139526367, + "step": 12208 + }, + { + "epoch": 1.9, + "learning_rate": 5.192758960526998e-06, + "logits/chosen": -2.8649165630340576, + "logits/rejected": -2.266433000564575, + "logps/chosen": -121.8913803100586, + "logps/rejected": -144.12362670898438, + "loss": 1.6287, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.198736190795898, + "rewards/margins": 2.5525496006011963, + "rewards/rejected": -8.751285552978516, + "step": 12209 + }, + { + "epoch": 1.9, + "learning_rate": 5.19202551999585e-06, + "logits/chosen": -2.793722629547119, + "logits/rejected": -2.768615961074829, + "logps/chosen": -130.05783081054688, + "logps/rejected": -218.0999755859375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.768428325653076, + "rewards/margins": 7.496555328369141, + "rewards/rejected": -13.264984130859375, + "step": 12210 + }, + { + "epoch": 1.9, + "learning_rate": 5.191292079464702e-06, + "logits/chosen": -2.651298761367798, + "logits/rejected": -2.972184419631958, + "logps/chosen": -166.1978759765625, + "logps/rejected": -278.6416320800781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.899163246154785, + "rewards/margins": 7.333376884460449, + "rewards/rejected": -11.232540130615234, + "step": 12211 + }, + { + "epoch": 1.9, + "learning_rate": 5.190558638933555e-06, + "logits/chosen": -2.8804023265838623, + "logits/rejected": -3.2239115238189697, + "logps/chosen": -93.25155639648438, + "logps/rejected": -301.2741394042969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.875680446624756, + "rewards/margins": 9.057506561279297, + "rewards/rejected": -13.933187484741211, + "step": 12212 + }, + { + "epoch": 1.9, + "learning_rate": 5.189825198402407e-06, + "logits/chosen": -2.353893280029297, + "logits/rejected": -2.792888641357422, + "logps/chosen": -206.7943878173828, + "logps/rejected": -252.5018768310547, + "loss": 1.044, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.453733444213867, + "rewards/margins": 3.65565824508667, + "rewards/rejected": -10.109391212463379, + "step": 12213 + }, + { + "epoch": 1.9, + "learning_rate": 5.189091757871259e-06, + "logits/chosen": -2.747525930404663, + "logits/rejected": -1.9993358850479126, + "logps/chosen": -441.42425537109375, + "logps/rejected": -660.0610961914062, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.282263278961182, + "rewards/margins": 8.132991790771484, + "rewards/rejected": -14.415255546569824, + "step": 12214 + }, + { + "epoch": 1.9, + "learning_rate": 5.188358317340111e-06, + "logits/chosen": -2.5839998722076416, + "logits/rejected": -2.31787371635437, + "logps/chosen": -168.94308471679688, + "logps/rejected": -248.01931762695312, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.07377290725708, + "rewards/margins": 6.58713436126709, + "rewards/rejected": -11.660907745361328, + "step": 12215 + }, + { + "epoch": 1.9, + "learning_rate": 5.187624876808963e-06, + "logits/chosen": -2.7397005558013916, + "logits/rejected": -3.1278557777404785, + "logps/chosen": -62.67019271850586, + "logps/rejected": -192.0412139892578, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.66781759262085, + "rewards/margins": 5.465079307556152, + "rewards/rejected": -10.132896423339844, + "step": 12216 + }, + { + "epoch": 1.9, + "learning_rate": 5.186891436277816e-06, + "logits/chosen": -2.457956314086914, + "logits/rejected": -2.8633694648742676, + "logps/chosen": -130.77989196777344, + "logps/rejected": -425.7621765136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2566099166870117, + "rewards/margins": 11.473319053649902, + "rewards/rejected": -14.729928970336914, + "step": 12217 + }, + { + "epoch": 1.9, + "learning_rate": 5.186157995746668e-06, + "logits/chosen": -2.940477132797241, + "logits/rejected": -2.0816547870635986, + "logps/chosen": -280.42486572265625, + "logps/rejected": -367.3399963378906, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.31253719329834, + "rewards/margins": 10.534829139709473, + "rewards/rejected": -15.847366333007812, + "step": 12218 + }, + { + "epoch": 1.9, + "learning_rate": 5.18542455521552e-06, + "logits/chosen": -2.898845911026001, + "logits/rejected": -2.9792211055755615, + "logps/chosen": -135.2941436767578, + "logps/rejected": -376.21246337890625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.154685974121094, + "rewards/margins": 7.216024398803711, + "rewards/rejected": -15.370710372924805, + "step": 12219 + }, + { + "epoch": 1.9, + "learning_rate": 5.1846911146843715e-06, + "logits/chosen": -1.8548160791397095, + "logits/rejected": -2.591149091720581, + "logps/chosen": -160.20791625976562, + "logps/rejected": -270.8939514160156, + "loss": 0.2394, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.284514427185059, + "rewards/margins": 4.0550618171691895, + "rewards/rejected": -11.339576721191406, + "step": 12220 + }, + { + "epoch": 1.9, + "learning_rate": 5.183957674153224e-06, + "logits/chosen": -2.579119920730591, + "logits/rejected": -2.650782585144043, + "logps/chosen": -157.63671875, + "logps/rejected": -311.17474365234375, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.708950042724609, + "rewards/margins": 6.887977600097656, + "rewards/rejected": -14.596927642822266, + "step": 12221 + }, + { + "epoch": 1.9, + "learning_rate": 5.183224233622076e-06, + "logits/chosen": -3.1584792137145996, + "logits/rejected": -2.1945114135742188, + "logps/chosen": -136.44381713867188, + "logps/rejected": -94.17889404296875, + "loss": 2.711, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.802659034729004, + "rewards/margins": -0.580528974533081, + "rewards/rejected": -6.222129821777344, + "step": 12222 + }, + { + "epoch": 1.9, + "learning_rate": 5.182490793090928e-06, + "logits/chosen": -2.7626631259918213, + "logits/rejected": -2.052271604537964, + "logps/chosen": -210.36065673828125, + "logps/rejected": -240.95791625976562, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.341813087463379, + "rewards/margins": 7.671093940734863, + "rewards/rejected": -11.012907028198242, + "step": 12223 + }, + { + "epoch": 1.9, + "learning_rate": 5.18175735255978e-06, + "logits/chosen": -1.754786729812622, + "logits/rejected": -2.9933083057403564, + "logps/chosen": -274.32080078125, + "logps/rejected": -495.8585205078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3264384269714355, + "rewards/margins": 7.444260120391846, + "rewards/rejected": -13.770698547363281, + "step": 12224 + }, + { + "epoch": 1.9, + "learning_rate": 5.181023912028632e-06, + "logits/chosen": -1.9746249914169312, + "logits/rejected": -2.862847328186035, + "logps/chosen": -118.31382751464844, + "logps/rejected": -403.34063720703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061147689819336, + "rewards/margins": 9.642196655273438, + "rewards/rejected": -14.703344345092773, + "step": 12225 + }, + { + "epoch": 1.9, + "learning_rate": 5.1802904714974845e-06, + "logits/chosen": -2.6134486198425293, + "logits/rejected": -2.799480438232422, + "logps/chosen": -383.2750244140625, + "logps/rejected": -372.81695556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.368929386138916, + "rewards/margins": 10.199124336242676, + "rewards/rejected": -13.56805419921875, + "step": 12226 + }, + { + "epoch": 1.9, + "learning_rate": 5.179557030966336e-06, + "logits/chosen": -2.6690754890441895, + "logits/rejected": -1.3199458122253418, + "logps/chosen": -205.8521728515625, + "logps/rejected": -194.9075927734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.001333236694336, + "rewards/margins": 7.2541680335998535, + "rewards/rejected": -11.255500793457031, + "step": 12227 + }, + { + "epoch": 1.9, + "learning_rate": 5.178823590435188e-06, + "logits/chosen": -2.7818734645843506, + "logits/rejected": -2.0492093563079834, + "logps/chosen": -348.8147277832031, + "logps/rejected": -339.74835205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3711462020874023, + "rewards/margins": 10.821937561035156, + "rewards/rejected": -14.193084716796875, + "step": 12228 + }, + { + "epoch": 1.9, + "learning_rate": 5.17809014990404e-06, + "logits/chosen": -2.9519424438476562, + "logits/rejected": -2.8299028873443604, + "logps/chosen": -210.15142822265625, + "logps/rejected": -274.0841064453125, + "loss": 0.4469, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.463506698608398, + "rewards/margins": 2.240851640701294, + "rewards/rejected": -10.704358100891113, + "step": 12229 + }, + { + "epoch": 1.9, + "learning_rate": 5.177356709372893e-06, + "logits/chosen": -2.2488372325897217, + "logits/rejected": -3.0379300117492676, + "logps/chosen": -113.99928283691406, + "logps/rejected": -503.9807434082031, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.270859718322754, + "rewards/margins": 7.833826065063477, + "rewards/rejected": -12.104686737060547, + "step": 12230 + }, + { + "epoch": 1.9, + "learning_rate": 5.176623268841745e-06, + "logits/chosen": -2.900327444076538, + "logits/rejected": -2.3521716594696045, + "logps/chosen": -265.1940612792969, + "logps/rejected": -243.87925720214844, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.072396993637085, + "rewards/margins": 4.882167339324951, + "rewards/rejected": -7.954564094543457, + "step": 12231 + }, + { + "epoch": 1.9, + "learning_rate": 5.1758898283105975e-06, + "logits/chosen": -2.588630437850952, + "logits/rejected": -2.165057897567749, + "logps/chosen": -277.0440673828125, + "logps/rejected": -285.09906005859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.45313024520874, + "rewards/margins": 7.339598655700684, + "rewards/rejected": -11.792729377746582, + "step": 12232 + }, + { + "epoch": 1.9, + "learning_rate": 5.175156387779449e-06, + "logits/chosen": -2.4756596088409424, + "logits/rejected": -2.889845132827759, + "logps/chosen": -169.24061584472656, + "logps/rejected": -312.9273681640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.127067565917969, + "rewards/margins": 9.582897186279297, + "rewards/rejected": -14.709964752197266, + "step": 12233 + }, + { + "epoch": 1.9, + "learning_rate": 5.174422947248301e-06, + "logits/chosen": -2.79929780960083, + "logits/rejected": -2.978663444519043, + "logps/chosen": -208.9678955078125, + "logps/rejected": -377.21844482421875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.181926727294922, + "rewards/margins": 8.73781967163086, + "rewards/rejected": -13.919746398925781, + "step": 12234 + }, + { + "epoch": 1.9, + "learning_rate": 5.173689506717154e-06, + "logits/chosen": -2.804708957672119, + "logits/rejected": -2.190906047821045, + "logps/chosen": -482.93011474609375, + "logps/rejected": -559.1924438476562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.466760635375977, + "rewards/margins": 8.519508361816406, + "rewards/rejected": -15.986268997192383, + "step": 12235 + }, + { + "epoch": 1.9, + "learning_rate": 5.172956066186006e-06, + "logits/chosen": -2.9369754791259766, + "logits/rejected": -2.983011245727539, + "logps/chosen": -214.61256408691406, + "logps/rejected": -153.283203125, + "loss": 0.8539, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.48879337310791, + "rewards/margins": 3.917450428009033, + "rewards/rejected": -10.406244277954102, + "step": 12236 + }, + { + "epoch": 1.9, + "learning_rate": 5.172222625654858e-06, + "logits/chosen": -1.2891294956207275, + "logits/rejected": -2.8198349475860596, + "logps/chosen": -278.032470703125, + "logps/rejected": -648.8668212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.030597686767578, + "rewards/margins": 10.17428207397461, + "rewards/rejected": -16.204879760742188, + "step": 12237 + }, + { + "epoch": 1.9, + "learning_rate": 5.17148918512371e-06, + "logits/chosen": -2.7594454288482666, + "logits/rejected": -1.4969228506088257, + "logps/chosen": -273.2531433105469, + "logps/rejected": -302.4576721191406, + "loss": 0.0563, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8283586502075195, + "rewards/margins": 5.866313934326172, + "rewards/rejected": -11.694672584533691, + "step": 12238 + }, + { + "epoch": 1.9, + "learning_rate": 5.170755744592562e-06, + "logits/chosen": -1.6525685787200928, + "logits/rejected": -2.791002035140991, + "logps/chosen": -129.423583984375, + "logps/rejected": -401.18017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.357760906219482, + "rewards/margins": 11.438239097595215, + "rewards/rejected": -15.795999526977539, + "step": 12239 + }, + { + "epoch": 1.9, + "learning_rate": 5.170022304061414e-06, + "logits/chosen": -1.833786964416504, + "logits/rejected": -2.9028642177581787, + "logps/chosen": -98.0494155883789, + "logps/rejected": -386.8779602050781, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.054718494415283, + "rewards/margins": 7.991557598114014, + "rewards/rejected": -15.046276092529297, + "step": 12240 + }, + { + "epoch": 1.9, + "learning_rate": 5.169288863530266e-06, + "logits/chosen": -1.2170484066009521, + "logits/rejected": -2.8509435653686523, + "logps/chosen": -183.5421142578125, + "logps/rejected": -527.4954833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1839280128479, + "rewards/margins": 11.730470657348633, + "rewards/rejected": -16.914398193359375, + "step": 12241 + }, + { + "epoch": 1.9, + "learning_rate": 5.168555422999118e-06, + "logits/chosen": -2.586639642715454, + "logits/rejected": -2.9652621746063232, + "logps/chosen": -80.21878051757812, + "logps/rejected": -447.9298095703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.55501651763916, + "rewards/margins": 6.212094783782959, + "rewards/rejected": -11.767110824584961, + "step": 12242 + }, + { + "epoch": 1.9, + "learning_rate": 5.16782198246797e-06, + "logits/chosen": -1.89706552028656, + "logits/rejected": -2.7789525985717773, + "logps/chosen": -183.53070068359375, + "logps/rejected": -403.575927734375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.134212493896484, + "rewards/margins": 7.340573310852051, + "rewards/rejected": -12.474785804748535, + "step": 12243 + }, + { + "epoch": 1.9, + "learning_rate": 5.1670885419368226e-06, + "logits/chosen": -2.776552200317383, + "logits/rejected": -2.9763615131378174, + "logps/chosen": -130.56031799316406, + "logps/rejected": -236.79721069335938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4036431312561035, + "rewards/margins": 8.964685440063477, + "rewards/rejected": -11.368329048156738, + "step": 12244 + }, + { + "epoch": 1.9, + "learning_rate": 5.1663551014056744e-06, + "logits/chosen": -2.822693109512329, + "logits/rejected": -2.933906078338623, + "logps/chosen": -130.89810180664062, + "logps/rejected": -475.2723693847656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2334659099578857, + "rewards/margins": 14.278255462646484, + "rewards/rejected": -17.511720657348633, + "step": 12245 + }, + { + "epoch": 1.9, + "learning_rate": 5.165621660874526e-06, + "logits/chosen": -2.879910707473755, + "logits/rejected": -3.098335027694702, + "logps/chosen": -203.19798278808594, + "logps/rejected": -285.1824035644531, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.09092903137207, + "rewards/margins": 4.520418167114258, + "rewards/rejected": -11.611347198486328, + "step": 12246 + }, + { + "epoch": 1.9, + "learning_rate": 5.164888220343378e-06, + "logits/chosen": -1.6909868717193604, + "logits/rejected": -2.860496997833252, + "logps/chosen": -192.21177673339844, + "logps/rejected": -392.87054443359375, + "loss": 0.0465, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.524284362792969, + "rewards/margins": 5.421990871429443, + "rewards/rejected": -11.94627571105957, + "step": 12247 + }, + { + "epoch": 1.9, + "learning_rate": 5.164154779812231e-06, + "logits/chosen": -2.2012178897857666, + "logits/rejected": -2.934993028640747, + "logps/chosen": -333.95391845703125, + "logps/rejected": -297.88323974609375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.35369873046875, + "rewards/margins": 8.771224975585938, + "rewards/rejected": -12.124923706054688, + "step": 12248 + }, + { + "epoch": 1.9, + "learning_rate": 5.163421339281084e-06, + "logits/chosen": -2.6874332427978516, + "logits/rejected": -2.956557512283325, + "logps/chosen": -547.3710327148438, + "logps/rejected": -588.3823852539062, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.315402030944824, + "rewards/margins": 5.829388618469238, + "rewards/rejected": -11.144790649414062, + "step": 12249 + }, + { + "epoch": 1.91, + "learning_rate": 5.1626878987499355e-06, + "logits/chosen": -2.844970941543579, + "logits/rejected": -2.466444253921509, + "logps/chosen": -170.39132690429688, + "logps/rejected": -224.2197723388672, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.613072395324707, + "rewards/margins": 3.0583083629608154, + "rewards/rejected": -9.671380996704102, + "step": 12250 + }, + { + "epoch": 1.91, + "learning_rate": 5.161954458218787e-06, + "logits/chosen": -2.5569400787353516, + "logits/rejected": -2.724057674407959, + "logps/chosen": -77.76690673828125, + "logps/rejected": -264.2072448730469, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.686000823974609, + "rewards/margins": 6.613048553466797, + "rewards/rejected": -12.299049377441406, + "step": 12251 + }, + { + "epoch": 1.91, + "learning_rate": 5.16122101768764e-06, + "logits/chosen": -1.783462643623352, + "logits/rejected": -2.8551316261291504, + "logps/chosen": -271.3016357421875, + "logps/rejected": -570.04150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.788065433502197, + "rewards/margins": 11.677021980285645, + "rewards/rejected": -17.465087890625, + "step": 12252 + }, + { + "epoch": 1.91, + "learning_rate": 5.160487577156492e-06, + "logits/chosen": -2.0622050762176514, + "logits/rejected": -2.4769608974456787, + "logps/chosen": -233.73080444335938, + "logps/rejected": -357.44598388671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.202977180480957, + "rewards/margins": 8.344022750854492, + "rewards/rejected": -13.54699993133545, + "step": 12253 + }, + { + "epoch": 1.91, + "learning_rate": 5.159754136625344e-06, + "logits/chosen": -1.9231910705566406, + "logits/rejected": -2.926687717437744, + "logps/chosen": -133.20172119140625, + "logps/rejected": -339.14404296875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1830339431762695, + "rewards/margins": 6.476921081542969, + "rewards/rejected": -11.659955024719238, + "step": 12254 + }, + { + "epoch": 1.91, + "learning_rate": 5.159020696094196e-06, + "logits/chosen": -2.230509042739868, + "logits/rejected": -3.0824553966522217, + "logps/chosen": -297.198974609375, + "logps/rejected": -461.7980041503906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475664138793945, + "rewards/margins": 8.797452926635742, + "rewards/rejected": -14.273117065429688, + "step": 12255 + }, + { + "epoch": 1.91, + "learning_rate": 5.158287255563048e-06, + "logits/chosen": -1.5292167663574219, + "logits/rejected": -2.4988927841186523, + "logps/chosen": -148.546875, + "logps/rejected": -287.21539306640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.608319282531738, + "rewards/margins": 7.00187873840332, + "rewards/rejected": -15.610197067260742, + "step": 12256 + }, + { + "epoch": 1.91, + "learning_rate": 5.1575538150319e-06, + "logits/chosen": -2.1325623989105225, + "logits/rejected": -2.7942874431610107, + "logps/chosen": -144.00323486328125, + "logps/rejected": -376.68170166015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1199517250061035, + "rewards/margins": 9.891169548034668, + "rewards/rejected": -14.01112174987793, + "step": 12257 + }, + { + "epoch": 1.91, + "learning_rate": 5.156820374500752e-06, + "logits/chosen": -2.462904691696167, + "logits/rejected": -2.9376120567321777, + "logps/chosen": -561.5492553710938, + "logps/rejected": -600.9103393554688, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125967025756836, + "rewards/margins": 5.631015777587891, + "rewards/rejected": -9.756982803344727, + "step": 12258 + }, + { + "epoch": 1.91, + "learning_rate": 5.156086933969604e-06, + "logits/chosen": -0.9789478778839111, + "logits/rejected": -3.0295023918151855, + "logps/chosen": -72.21747589111328, + "logps/rejected": -650.5071411132812, + "loss": 0.1145, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06800651550293, + "rewards/margins": 13.21236801147461, + "rewards/rejected": -19.28037452697754, + "step": 12259 + }, + { + "epoch": 1.91, + "learning_rate": 5.155353493438456e-06, + "logits/chosen": -2.9989266395568848, + "logits/rejected": -2.892458438873291, + "logps/chosen": -974.9519653320312, + "logps/rejected": -602.9805908203125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.889920234680176, + "rewards/margins": 6.080060005187988, + "rewards/rejected": -12.969980239868164, + "step": 12260 + }, + { + "epoch": 1.91, + "learning_rate": 5.154620052907309e-06, + "logits/chosen": -2.6717159748077393, + "logits/rejected": -2.98018217086792, + "logps/chosen": -133.255126953125, + "logps/rejected": -172.22061157226562, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.655841827392578, + "rewards/margins": 7.024150848388672, + "rewards/rejected": -12.67999267578125, + "step": 12261 + }, + { + "epoch": 1.91, + "learning_rate": 5.153886612376161e-06, + "logits/chosen": -2.0959978103637695, + "logits/rejected": -2.522287130355835, + "logps/chosen": -340.7479553222656, + "logps/rejected": -598.7438354492188, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.355765342712402, + "rewards/margins": 6.365151405334473, + "rewards/rejected": -11.720916748046875, + "step": 12262 + }, + { + "epoch": 1.91, + "learning_rate": 5.1531531718450125e-06, + "logits/chosen": -2.978994131088257, + "logits/rejected": -2.389456033706665, + "logps/chosen": -139.95924377441406, + "logps/rejected": -203.926025390625, + "loss": 0.5943, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.495857238769531, + "rewards/margins": 3.7920360565185547, + "rewards/rejected": -12.287893295288086, + "step": 12263 + }, + { + "epoch": 1.91, + "learning_rate": 5.152419731313864e-06, + "logits/chosen": -2.8711977005004883, + "logits/rejected": -1.7126946449279785, + "logps/chosen": -622.863037109375, + "logps/rejected": -405.57403564453125, + "loss": 0.2184, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631908416748047, + "rewards/margins": 2.175323009490967, + "rewards/rejected": -7.807231426239014, + "step": 12264 + }, + { + "epoch": 1.91, + "learning_rate": 5.151686290782717e-06, + "logits/chosen": -2.954807758331299, + "logits/rejected": -2.606670379638672, + "logps/chosen": -557.3735961914062, + "logps/rejected": -451.75640869140625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3145864009857178, + "rewards/margins": 11.804511070251465, + "rewards/rejected": -12.119096755981445, + "step": 12265 + }, + { + "epoch": 1.91, + "learning_rate": 5.15095285025157e-06, + "logits/chosen": -2.897042751312256, + "logits/rejected": -2.5412256717681885, + "logps/chosen": -423.8598327636719, + "logps/rejected": -635.4413452148438, + "loss": 0.1615, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965113162994385, + "rewards/margins": 3.654007911682129, + "rewards/rejected": -9.619121551513672, + "step": 12266 + }, + { + "epoch": 1.91, + "learning_rate": 5.150219409720422e-06, + "logits/chosen": -3.025254249572754, + "logits/rejected": -2.621853828430176, + "logps/chosen": -1072.773193359375, + "logps/rejected": -775.6942138671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8500375747680664, + "rewards/margins": 9.010537147521973, + "rewards/rejected": -12.860574722290039, + "step": 12267 + }, + { + "epoch": 1.91, + "learning_rate": 5.1494859691892736e-06, + "logits/chosen": -3.051665782928467, + "logits/rejected": -2.6516358852386475, + "logps/chosen": -146.4743194580078, + "logps/rejected": -183.240966796875, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166628360748291, + "rewards/margins": 3.945319175720215, + "rewards/rejected": -9.111948013305664, + "step": 12268 + }, + { + "epoch": 1.91, + "learning_rate": 5.1487525286581254e-06, + "logits/chosen": -1.5603145360946655, + "logits/rejected": -2.4104862213134766, + "logps/chosen": -133.57981872558594, + "logps/rejected": -395.4347839355469, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.944766044616699, + "rewards/margins": 6.96556282043457, + "rewards/rejected": -13.91032886505127, + "step": 12269 + }, + { + "epoch": 1.91, + "learning_rate": 5.148019088126978e-06, + "logits/chosen": -1.430727481842041, + "logits/rejected": -2.592452049255371, + "logps/chosen": -120.2078628540039, + "logps/rejected": -396.651611328125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.890758514404297, + "rewards/margins": 8.624935150146484, + "rewards/rejected": -12.515693664550781, + "step": 12270 + }, + { + "epoch": 1.91, + "learning_rate": 5.14728564759583e-06, + "logits/chosen": -2.420313835144043, + "logits/rejected": -2.8571760654449463, + "logps/chosen": -276.96380615234375, + "logps/rejected": -362.09716796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.566880226135254, + "rewards/margins": 8.425895690917969, + "rewards/rejected": -14.992774963378906, + "step": 12271 + }, + { + "epoch": 1.91, + "learning_rate": 5.146552207064682e-06, + "logits/chosen": -2.6849637031555176, + "logits/rejected": -2.600872039794922, + "logps/chosen": -111.8937759399414, + "logps/rejected": -172.98248291015625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.471963882446289, + "rewards/margins": 6.356186866760254, + "rewards/rejected": -10.828150749206543, + "step": 12272 + }, + { + "epoch": 1.91, + "learning_rate": 5.145818766533534e-06, + "logits/chosen": -2.300013780593872, + "logits/rejected": -3.0390701293945312, + "logps/chosen": -128.48094177246094, + "logps/rejected": -382.4826965332031, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.551712512969971, + "rewards/margins": 6.971877098083496, + "rewards/rejected": -12.523590087890625, + "step": 12273 + }, + { + "epoch": 1.91, + "learning_rate": 5.145085326002386e-06, + "logits/chosen": -2.4733850955963135, + "logits/rejected": -2.8131582736968994, + "logps/chosen": -79.54315948486328, + "logps/rejected": -275.28204345703125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.605901718139648, + "rewards/margins": 7.386685371398926, + "rewards/rejected": -11.992587089538574, + "step": 12274 + }, + { + "epoch": 1.91, + "learning_rate": 5.144351885471238e-06, + "logits/chosen": -2.602055072784424, + "logits/rejected": -2.902013063430786, + "logps/chosen": -717.064453125, + "logps/rejected": -673.442626953125, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3130035400390625, + "rewards/margins": 10.81719970703125, + "rewards/rejected": -14.130203247070312, + "step": 12275 + }, + { + "epoch": 1.91, + "learning_rate": 5.14361844494009e-06, + "logits/chosen": -2.2385878562927246, + "logits/rejected": -2.7858924865722656, + "logps/chosen": -264.3719482421875, + "logps/rejected": -268.6897277832031, + "loss": 0.462, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.356122016906738, + "rewards/margins": 1.6463453769683838, + "rewards/rejected": -9.00246810913086, + "step": 12276 + }, + { + "epoch": 1.91, + "learning_rate": 5.142885004408942e-06, + "logits/chosen": -0.9478872418403625, + "logits/rejected": -2.6403725147247314, + "logps/chosen": -102.25105285644531, + "logps/rejected": -358.9931335449219, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6535420417785645, + "rewards/margins": 8.233621597290039, + "rewards/rejected": -13.887163162231445, + "step": 12277 + }, + { + "epoch": 1.91, + "learning_rate": 5.142151563877794e-06, + "logits/chosen": -3.007949113845825, + "logits/rejected": -3.0970990657806396, + "logps/chosen": -116.83694458007812, + "logps/rejected": -313.60833740234375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.717578887939453, + "rewards/margins": 7.224944114685059, + "rewards/rejected": -11.942523002624512, + "step": 12278 + }, + { + "epoch": 1.91, + "learning_rate": 5.141418123346647e-06, + "logits/chosen": -2.6987645626068115, + "logits/rejected": -2.4440696239471436, + "logps/chosen": -258.854736328125, + "logps/rejected": -313.9727783203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.514229774475098, + "rewards/margins": 7.867445945739746, + "rewards/rejected": -13.381675720214844, + "step": 12279 + }, + { + "epoch": 1.91, + "learning_rate": 5.140684682815499e-06, + "logits/chosen": -3.1070899963378906, + "logits/rejected": -2.313779354095459, + "logps/chosen": -128.48513793945312, + "logps/rejected": -208.96719360351562, + "loss": 0.1089, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.923986911773682, + "rewards/margins": 6.7174835205078125, + "rewards/rejected": -11.641470909118652, + "step": 12280 + }, + { + "epoch": 1.91, + "learning_rate": 5.1399512422843505e-06, + "logits/chosen": -2.889526605606079, + "logits/rejected": -1.4912221431732178, + "logps/chosen": -885.3195190429688, + "logps/rejected": -493.84466552734375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.334811687469482, + "rewards/margins": 6.174172878265381, + "rewards/rejected": -10.508984565734863, + "step": 12281 + }, + { + "epoch": 1.91, + "learning_rate": 5.139217801753203e-06, + "logits/chosen": -2.7934603691101074, + "logits/rejected": -2.9892566204071045, + "logps/chosen": -95.86857604980469, + "logps/rejected": -184.99697875976562, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6674652099609375, + "rewards/margins": 5.406525611877441, + "rewards/rejected": -12.073990821838379, + "step": 12282 + }, + { + "epoch": 1.91, + "learning_rate": 5.138484361222055e-06, + "logits/chosen": -0.4141658544540405, + "logits/rejected": -3.1810741424560547, + "logps/chosen": -131.21878051757812, + "logps/rejected": -688.678466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.416958808898926, + "rewards/margins": 15.92807388305664, + "rewards/rejected": -23.345033645629883, + "step": 12283 + }, + { + "epoch": 1.91, + "learning_rate": 5.137750920690908e-06, + "logits/chosen": -1.7462821006774902, + "logits/rejected": -2.7769932746887207, + "logps/chosen": -175.37124633789062, + "logps/rejected": -420.511962890625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.782671928405762, + "rewards/margins": 6.164912223815918, + "rewards/rejected": -11.94758415222168, + "step": 12284 + }, + { + "epoch": 1.91, + "learning_rate": 5.13701748015976e-06, + "logits/chosen": -3.0122902393341064, + "logits/rejected": -2.379657506942749, + "logps/chosen": -651.3873291015625, + "logps/rejected": -478.45330810546875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.382940769195557, + "rewards/margins": 8.714859008789062, + "rewards/rejected": -13.097799301147461, + "step": 12285 + }, + { + "epoch": 1.91, + "learning_rate": 5.136284039628612e-06, + "logits/chosen": -2.735883951187134, + "logits/rejected": -2.9672086238861084, + "logps/chosen": -112.58780670166016, + "logps/rejected": -192.96510314941406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.246537208557129, + "rewards/margins": 7.175810813903809, + "rewards/rejected": -11.422348022460938, + "step": 12286 + }, + { + "epoch": 1.91, + "learning_rate": 5.1355505990974635e-06, + "logits/chosen": -1.7063720226287842, + "logits/rejected": -2.665151834487915, + "logps/chosen": -178.42706298828125, + "logps/rejected": -473.11474609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.798333168029785, + "rewards/margins": 10.45677375793457, + "rewards/rejected": -16.255107879638672, + "step": 12287 + }, + { + "epoch": 1.91, + "learning_rate": 5.134817158566316e-06, + "logits/chosen": -1.5748631954193115, + "logits/rejected": -2.914074659347534, + "logps/chosen": -125.23420715332031, + "logps/rejected": -350.2115783691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.695280075073242, + "rewards/margins": 9.20963191986084, + "rewards/rejected": -16.904911041259766, + "step": 12288 + }, + { + "epoch": 1.91, + "learning_rate": 5.134083718035168e-06, + "logits/chosen": -2.5430755615234375, + "logits/rejected": -3.040160894393921, + "logps/chosen": -244.82395935058594, + "logps/rejected": -397.3330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79095458984375, + "rewards/margins": 11.744827270507812, + "rewards/rejected": -16.535781860351562, + "step": 12289 + }, + { + "epoch": 1.91, + "learning_rate": 5.13335027750402e-06, + "logits/chosen": -2.5647132396698, + "logits/rejected": -2.151880979537964, + "logps/chosen": -514.265380859375, + "logps/rejected": -291.51513671875, + "loss": 3.4642, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.384956359863281, + "rewards/margins": -3.0993502140045166, + "rewards/rejected": -8.285606384277344, + "step": 12290 + }, + { + "epoch": 1.91, + "learning_rate": 5.132616836972872e-06, + "logits/chosen": -2.850689649581909, + "logits/rejected": -2.5882229804992676, + "logps/chosen": -118.63417053222656, + "logps/rejected": -210.92953491210938, + "loss": 0.1251, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0154643058776855, + "rewards/margins": 5.513251304626465, + "rewards/rejected": -9.528716087341309, + "step": 12291 + }, + { + "epoch": 1.91, + "learning_rate": 5.131883396441724e-06, + "logits/chosen": -2.7922985553741455, + "logits/rejected": -2.9657809734344482, + "logps/chosen": -75.58384704589844, + "logps/rejected": -254.68344116210938, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1297407150268555, + "rewards/margins": 9.297418594360352, + "rewards/rejected": -13.427159309387207, + "step": 12292 + }, + { + "epoch": 1.91, + "learning_rate": 5.1311499559105764e-06, + "logits/chosen": -2.3237264156341553, + "logits/rejected": -2.6594831943511963, + "logps/chosen": -247.33688354492188, + "logps/rejected": -385.94427490234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.391035556793213, + "rewards/margins": 10.200395584106445, + "rewards/rejected": -14.5914306640625, + "step": 12293 + }, + { + "epoch": 1.91, + "learning_rate": 5.130416515379428e-06, + "logits/chosen": -2.559937000274658, + "logits/rejected": -2.952439785003662, + "logps/chosen": -278.87408447265625, + "logps/rejected": -441.8509521484375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.301360130310059, + "rewards/margins": 7.026411056518555, + "rewards/rejected": -12.327771186828613, + "step": 12294 + }, + { + "epoch": 1.91, + "learning_rate": 5.12968307484828e-06, + "logits/chosen": -2.9357736110687256, + "logits/rejected": -2.85256290435791, + "logps/chosen": -287.640380859375, + "logps/rejected": -159.45126342773438, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.164924144744873, + "rewards/margins": 3.2756507396698, + "rewards/rejected": -10.440574645996094, + "step": 12295 + }, + { + "epoch": 1.91, + "learning_rate": 5.128949634317132e-06, + "logits/chosen": -1.4783899784088135, + "logits/rejected": -1.8783351182937622, + "logps/chosen": -217.0758056640625, + "logps/rejected": -388.1544189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.22672176361084, + "rewards/margins": 10.954758644104004, + "rewards/rejected": -15.181480407714844, + "step": 12296 + }, + { + "epoch": 1.91, + "learning_rate": 5.128216193785985e-06, + "logits/chosen": -3.000284433364868, + "logits/rejected": -1.8587552309036255, + "logps/chosen": -1302.742431640625, + "logps/rejected": -510.3356628417969, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.002798557281494, + "rewards/margins": 5.81973123550415, + "rewards/rejected": -12.822529792785645, + "step": 12297 + }, + { + "epoch": 1.91, + "learning_rate": 5.127482753254837e-06, + "logits/chosen": -2.7900335788726807, + "logits/rejected": -2.998417615890503, + "logps/chosen": -146.62060546875, + "logps/rejected": -374.7767333984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.18475341796875, + "rewards/margins": 6.60101318359375, + "rewards/rejected": -10.7857666015625, + "step": 12298 + }, + { + "epoch": 1.91, + "learning_rate": 5.1267493127236886e-06, + "logits/chosen": -1.5282593965530396, + "logits/rejected": -2.6381425857543945, + "logps/chosen": -218.52871704101562, + "logps/rejected": -557.6290283203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.792971134185791, + "rewards/margins": 8.037755966186523, + "rewards/rejected": -14.830726623535156, + "step": 12299 + }, + { + "epoch": 1.91, + "learning_rate": 5.126015872192541e-06, + "logits/chosen": -2.1286087036132812, + "logits/rejected": -2.8716020584106445, + "logps/chosen": -131.35650634765625, + "logps/rejected": -166.36090087890625, + "loss": 0.1239, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.838787078857422, + "rewards/margins": 2.4977822303771973, + "rewards/rejected": -11.336568832397461, + "step": 12300 + }, + { + "epoch": 1.91, + "learning_rate": 5.125282431661394e-06, + "logits/chosen": -2.210498094558716, + "logits/rejected": -2.940972089767456, + "logps/chosen": -237.11241149902344, + "logps/rejected": -383.6432800292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.092672824859619, + "rewards/margins": 8.604190826416016, + "rewards/rejected": -13.696863174438477, + "step": 12301 + }, + { + "epoch": 1.91, + "learning_rate": 5.124548991130246e-06, + "logits/chosen": -2.9726719856262207, + "logits/rejected": -3.0262560844421387, + "logps/chosen": -219.5283203125, + "logps/rejected": -344.9789123535156, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.108824729919434, + "rewards/margins": 2.9583518505096436, + "rewards/rejected": -9.067176818847656, + "step": 12302 + }, + { + "epoch": 1.91, + "learning_rate": 5.123815550599098e-06, + "logits/chosen": -1.880135178565979, + "logits/rejected": -3.0866026878356934, + "logps/chosen": -90.4547348022461, + "logps/rejected": -405.20379638671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.739922523498535, + "rewards/margins": 6.783410549163818, + "rewards/rejected": -11.523332595825195, + "step": 12303 + }, + { + "epoch": 1.91, + "learning_rate": 5.12308211006795e-06, + "logits/chosen": -1.8283225297927856, + "logits/rejected": -2.5115966796875, + "logps/chosen": -220.73057556152344, + "logps/rejected": -455.8836669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.399832725524902, + "rewards/margins": 9.928647994995117, + "rewards/rejected": -15.32848072052002, + "step": 12304 + }, + { + "epoch": 1.91, + "learning_rate": 5.1223486695368015e-06, + "logits/chosen": -1.4941374063491821, + "logits/rejected": -2.805115222930908, + "logps/chosen": -188.32223510742188, + "logps/rejected": -498.4613342285156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035731315612793, + "rewards/margins": 7.822402477264404, + "rewards/rejected": -13.858133316040039, + "step": 12305 + }, + { + "epoch": 1.91, + "learning_rate": 5.121615229005654e-06, + "logits/chosen": -2.9174375534057617, + "logits/rejected": -2.3550291061401367, + "logps/chosen": -562.4043579101562, + "logps/rejected": -404.04315185546875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.917329788208008, + "rewards/margins": 6.352163314819336, + "rewards/rejected": -12.269493103027344, + "step": 12306 + }, + { + "epoch": 1.91, + "learning_rate": 5.120881788474506e-06, + "logits/chosen": -1.116076111793518, + "logits/rejected": -2.373540163040161, + "logps/chosen": -288.7808532714844, + "logps/rejected": -426.87109375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.394252777099609, + "rewards/margins": 8.299904823303223, + "rewards/rejected": -15.694158554077148, + "step": 12307 + }, + { + "epoch": 1.91, + "learning_rate": 5.120148347943358e-06, + "logits/chosen": -2.9082086086273193, + "logits/rejected": -3.0095176696777344, + "logps/chosen": -532.9896240234375, + "logps/rejected": -566.4534301757812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.229464054107666, + "rewards/margins": 10.489181518554688, + "rewards/rejected": -15.718645095825195, + "step": 12308 + }, + { + "epoch": 1.91, + "learning_rate": 5.11941490741221e-06, + "logits/chosen": -2.3371524810791016, + "logits/rejected": -2.6324188709259033, + "logps/chosen": -277.27227783203125, + "logps/rejected": -345.882080078125, + "loss": 1.9269, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.461252212524414, + "rewards/margins": 3.9930949211120605, + "rewards/rejected": -13.454347610473633, + "step": 12309 + }, + { + "epoch": 1.91, + "learning_rate": 5.118681466881063e-06, + "logits/chosen": -1.9704877138137817, + "logits/rejected": -2.9435667991638184, + "logps/chosen": -271.771484375, + "logps/rejected": -560.7064208984375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.910852432250977, + "rewards/margins": 10.954119682312012, + "rewards/rejected": -15.864971160888672, + "step": 12310 + }, + { + "epoch": 1.91, + "learning_rate": 5.1179480263499145e-06, + "logits/chosen": -2.921844005584717, + "logits/rejected": -3.040567636489868, + "logps/chosen": -61.63137435913086, + "logps/rejected": -175.25437927246094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117987155914307, + "rewards/margins": 7.924970626831055, + "rewards/rejected": -12.042957305908203, + "step": 12311 + }, + { + "epoch": 1.91, + "learning_rate": 5.117214585818766e-06, + "logits/chosen": -2.788450241088867, + "logits/rejected": -3.118271589279175, + "logps/chosen": -54.507118225097656, + "logps/rejected": -172.13467407226562, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8050312995910645, + "rewards/margins": 5.7093963623046875, + "rewards/rejected": -9.514427185058594, + "step": 12312 + }, + { + "epoch": 1.91, + "learning_rate": 5.116481145287618e-06, + "logits/chosen": -1.7190014123916626, + "logits/rejected": -2.7667126655578613, + "logps/chosen": -164.47451782226562, + "logps/rejected": -344.6502380371094, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.77879524230957, + "rewards/margins": 8.768561363220215, + "rewards/rejected": -13.547355651855469, + "step": 12313 + }, + { + "epoch": 1.92, + "learning_rate": 5.11574770475647e-06, + "logits/chosen": -3.021576166152954, + "logits/rejected": -2.693366050720215, + "logps/chosen": -465.70513916015625, + "logps/rejected": -577.2103271484375, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.579960823059082, + "rewards/margins": 4.957512855529785, + "rewards/rejected": -11.537473678588867, + "step": 12314 + }, + { + "epoch": 1.92, + "learning_rate": 5.115014264225323e-06, + "logits/chosen": -1.9433428049087524, + "logits/rejected": -2.5485050678253174, + "logps/chosen": -249.95718383789062, + "logps/rejected": -380.4617919921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.374628067016602, + "rewards/margins": 7.2283148765563965, + "rewards/rejected": -13.602943420410156, + "step": 12315 + }, + { + "epoch": 1.92, + "learning_rate": 5.114280823694175e-06, + "logits/chosen": -2.4869191646575928, + "logits/rejected": -2.719446897506714, + "logps/chosen": -93.09638214111328, + "logps/rejected": -386.0645446777344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9114718437194824, + "rewards/margins": 7.942680358886719, + "rewards/rejected": -11.85415267944336, + "step": 12316 + }, + { + "epoch": 1.92, + "learning_rate": 5.1135473831630275e-06, + "logits/chosen": -1.063774585723877, + "logits/rejected": -1.8834412097930908, + "logps/chosen": -245.96832275390625, + "logps/rejected": -485.6455078125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.39548921585083, + "rewards/margins": 10.86947250366211, + "rewards/rejected": -17.26496124267578, + "step": 12317 + }, + { + "epoch": 1.92, + "learning_rate": 5.112813942631879e-06, + "logits/chosen": -2.201115608215332, + "logits/rejected": -2.8040411472320557, + "logps/chosen": -146.63897705078125, + "logps/rejected": -377.9356384277344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9610748291015625, + "rewards/margins": 9.259262084960938, + "rewards/rejected": -16.2203369140625, + "step": 12318 + }, + { + "epoch": 1.92, + "learning_rate": 5.112080502100732e-06, + "logits/chosen": -2.266705274581909, + "logits/rejected": -3.063612461090088, + "logps/chosen": -325.4740295410156, + "logps/rejected": -541.5067749023438, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8789873123168945, + "rewards/margins": 8.680791854858398, + "rewards/rejected": -11.559778213500977, + "step": 12319 + }, + { + "epoch": 1.92, + "learning_rate": 5.111347061569584e-06, + "logits/chosen": -3.1831984519958496, + "logits/rejected": -2.906930685043335, + "logps/chosen": -179.13665771484375, + "logps/rejected": -277.3243713378906, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9649770259857178, + "rewards/margins": 4.172334671020508, + "rewards/rejected": -8.137311935424805, + "step": 12320 + }, + { + "epoch": 1.92, + "learning_rate": 5.110613621038436e-06, + "logits/chosen": -3.0443389415740967, + "logits/rejected": -3.020106792449951, + "logps/chosen": -161.33041381835938, + "logps/rejected": -359.716552734375, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.371913909912109, + "rewards/margins": 3.603550910949707, + "rewards/rejected": -8.975464820861816, + "step": 12321 + }, + { + "epoch": 1.92, + "learning_rate": 5.109880180507288e-06, + "logits/chosen": -2.7042832374572754, + "logits/rejected": -3.1442043781280518, + "logps/chosen": -103.12924194335938, + "logps/rejected": -158.3458709716797, + "loss": 0.4205, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.585768699645996, + "rewards/margins": 4.164505958557129, + "rewards/rejected": -9.750274658203125, + "step": 12322 + }, + { + "epoch": 1.92, + "learning_rate": 5.1091467399761396e-06, + "logits/chosen": -2.286681652069092, + "logits/rejected": -2.9320976734161377, + "logps/chosen": -152.5322265625, + "logps/rejected": -304.7061767578125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.446145534515381, + "rewards/margins": 6.760553359985352, + "rewards/rejected": -11.20669937133789, + "step": 12323 + }, + { + "epoch": 1.92, + "learning_rate": 5.108413299444992e-06, + "logits/chosen": -2.248629093170166, + "logits/rejected": -3.0257229804992676, + "logps/chosen": -196.52584838867188, + "logps/rejected": -457.93792724609375, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.855590343475342, + "rewards/margins": 6.281144142150879, + "rewards/rejected": -14.136734962463379, + "step": 12324 + }, + { + "epoch": 1.92, + "learning_rate": 5.107679858913844e-06, + "logits/chosen": -2.922621965408325, + "logits/rejected": -3.0214200019836426, + "logps/chosen": -566.95751953125, + "logps/rejected": -546.1102294921875, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.365324020385742, + "rewards/margins": 5.095685005187988, + "rewards/rejected": -11.46100902557373, + "step": 12325 + }, + { + "epoch": 1.92, + "learning_rate": 5.106946418382696e-06, + "logits/chosen": -0.93837571144104, + "logits/rejected": -2.5343971252441406, + "logps/chosen": -268.96551513671875, + "logps/rejected": -550.854736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.635687351226807, + "rewards/margins": 11.8038330078125, + "rewards/rejected": -16.43952178955078, + "step": 12326 + }, + { + "epoch": 1.92, + "learning_rate": 5.106212977851548e-06, + "logits/chosen": -1.8715041875839233, + "logits/rejected": -3.006843328475952, + "logps/chosen": -226.50244140625, + "logps/rejected": -295.5777587890625, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.612699508666992, + "rewards/margins": 3.8897311687469482, + "rewards/rejected": -11.50243091583252, + "step": 12327 + }, + { + "epoch": 1.92, + "learning_rate": 5.105479537320401e-06, + "logits/chosen": -1.7230476140975952, + "logits/rejected": -2.737379550933838, + "logps/chosen": -143.69175720214844, + "logps/rejected": -334.15570068359375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7369232177734375, + "rewards/margins": 5.474145889282227, + "rewards/rejected": -11.211069107055664, + "step": 12328 + }, + { + "epoch": 1.92, + "learning_rate": 5.1047460967892525e-06, + "logits/chosen": -2.849778413772583, + "logits/rejected": -2.9483723640441895, + "logps/chosen": -195.49331665039062, + "logps/rejected": -342.0749206542969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1489193439483643, + "rewards/margins": 9.363567352294922, + "rewards/rejected": -12.512487411499023, + "step": 12329 + }, + { + "epoch": 1.92, + "learning_rate": 5.104012656258104e-06, + "logits/chosen": -1.9520659446716309, + "logits/rejected": -3.0846927165985107, + "logps/chosen": -243.99526977539062, + "logps/rejected": -321.494384765625, + "loss": 3.5765, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.359484672546387, + "rewards/margins": -2.758283853530884, + "rewards/rejected": -7.601201057434082, + "step": 12330 + }, + { + "epoch": 1.92, + "learning_rate": 5.103279215726956e-06, + "logits/chosen": -2.8868932723999023, + "logits/rejected": -2.4908194541931152, + "logps/chosen": -162.76673889160156, + "logps/rejected": -135.87460327148438, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0858869552612305, + "rewards/margins": 4.792718887329102, + "rewards/rejected": -8.878605842590332, + "step": 12331 + }, + { + "epoch": 1.92, + "learning_rate": 5.102545775195808e-06, + "logits/chosen": -2.8124547004699707, + "logits/rejected": -3.010075330734253, + "logps/chosen": -65.79358673095703, + "logps/rejected": -300.40283203125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.659878730773926, + "rewards/margins": 5.811219215393066, + "rewards/rejected": -10.471097946166992, + "step": 12332 + }, + { + "epoch": 1.92, + "learning_rate": 5.101812334664661e-06, + "logits/chosen": -2.2362308502197266, + "logits/rejected": -3.1101882457733154, + "logps/chosen": -300.2581787109375, + "logps/rejected": -453.5062561035156, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.207756996154785, + "rewards/margins": 8.85634994506836, + "rewards/rejected": -12.064106941223145, + "step": 12333 + }, + { + "epoch": 1.92, + "learning_rate": 5.101078894133514e-06, + "logits/chosen": -2.883517265319824, + "logits/rejected": -2.737241506576538, + "logps/chosen": -718.7597045898438, + "logps/rejected": -477.82574462890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5806429386138916, + "rewards/margins": 6.544306755065918, + "rewards/rejected": -10.12494945526123, + "step": 12334 + }, + { + "epoch": 1.92, + "learning_rate": 5.1003454536023655e-06, + "logits/chosen": -2.911375045776367, + "logits/rejected": -3.136561632156372, + "logps/chosen": -85.89857482910156, + "logps/rejected": -236.779541015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.842895030975342, + "rewards/margins": 7.492705821990967, + "rewards/rejected": -13.335600852966309, + "step": 12335 + }, + { + "epoch": 1.92, + "learning_rate": 5.099612013071217e-06, + "logits/chosen": -2.9638795852661133, + "logits/rejected": -2.9596376419067383, + "logps/chosen": -108.69131469726562, + "logps/rejected": -299.76715087890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.182534217834473, + "rewards/margins": 8.852555274963379, + "rewards/rejected": -14.035089492797852, + "step": 12336 + }, + { + "epoch": 1.92, + "learning_rate": 5.09887857254007e-06, + "logits/chosen": -1.771584391593933, + "logits/rejected": -2.3797152042388916, + "logps/chosen": -114.49502563476562, + "logps/rejected": -410.634033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5573954582214355, + "rewards/margins": 11.635336875915527, + "rewards/rejected": -17.192731857299805, + "step": 12337 + }, + { + "epoch": 1.92, + "learning_rate": 5.098145132008922e-06, + "logits/chosen": -2.1251962184906006, + "logits/rejected": -2.6635704040527344, + "logps/chosen": -125.30506896972656, + "logps/rejected": -352.29534912109375, + "loss": 0.1914, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.157516479492188, + "rewards/margins": 6.13211727142334, + "rewards/rejected": -15.289633750915527, + "step": 12338 + }, + { + "epoch": 1.92, + "learning_rate": 5.097411691477774e-06, + "logits/chosen": -2.059519052505493, + "logits/rejected": -2.8255114555358887, + "logps/chosen": -138.04971313476562, + "logps/rejected": -244.2209014892578, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.162440299987793, + "rewards/margins": 7.5265302658081055, + "rewards/rejected": -16.6889705657959, + "step": 12339 + }, + { + "epoch": 1.92, + "learning_rate": 5.096678250946626e-06, + "logits/chosen": -3.076761245727539, + "logits/rejected": -3.2343266010284424, + "logps/chosen": -261.4053039550781, + "logps/rejected": -380.7903137207031, + "loss": 0.564, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.216401100158691, + "rewards/margins": 5.125565528869629, + "rewards/rejected": -12.34196662902832, + "step": 12340 + }, + { + "epoch": 1.92, + "learning_rate": 5.0959448104154785e-06, + "logits/chosen": -2.6188364028930664, + "logits/rejected": -2.9779999256134033, + "logps/chosen": -61.47216796875, + "logps/rejected": -231.89352416992188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.725682735443115, + "rewards/margins": 7.979992389678955, + "rewards/rejected": -12.70567512512207, + "step": 12341 + }, + { + "epoch": 1.92, + "learning_rate": 5.09521136988433e-06, + "logits/chosen": -2.6796205043792725, + "logits/rejected": -2.981214761734009, + "logps/chosen": -293.9953918457031, + "logps/rejected": -441.62884521484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0822529792785645, + "rewards/margins": 9.683420181274414, + "rewards/rejected": -15.76567268371582, + "step": 12342 + }, + { + "epoch": 1.92, + "learning_rate": 5.094477929353182e-06, + "logits/chosen": -2.778836965560913, + "logits/rejected": -3.086787223815918, + "logps/chosen": -655.6744995117188, + "logps/rejected": -597.1549072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.885495185852051, + "rewards/margins": 9.137290954589844, + "rewards/rejected": -15.022785186767578, + "step": 12343 + }, + { + "epoch": 1.92, + "learning_rate": 5.093744488822034e-06, + "logits/chosen": -1.6209484338760376, + "logits/rejected": -3.0355706214904785, + "logps/chosen": -248.8619842529297, + "logps/rejected": -223.54635620117188, + "loss": 0.7712, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.918516635894775, + "rewards/margins": 2.903491735458374, + "rewards/rejected": -8.82200813293457, + "step": 12344 + }, + { + "epoch": 1.92, + "learning_rate": 5.093011048290886e-06, + "logits/chosen": -2.8079450130462646, + "logits/rejected": -3.105010986328125, + "logps/chosen": -206.1604461669922, + "logps/rejected": -359.58331298828125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.724424839019775, + "rewards/margins": 5.462801456451416, + "rewards/rejected": -11.187226295471191, + "step": 12345 + }, + { + "epoch": 1.92, + "learning_rate": 5.092277607759739e-06, + "logits/chosen": -2.187042713165283, + "logits/rejected": -2.6990702152252197, + "logps/chosen": -308.4978942871094, + "logps/rejected": -576.749267578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239929676055908, + "rewards/margins": 8.309921264648438, + "rewards/rejected": -13.549850463867188, + "step": 12346 + }, + { + "epoch": 1.92, + "learning_rate": 5.091544167228591e-06, + "logits/chosen": -2.89005446434021, + "logits/rejected": -1.972031593322754, + "logps/chosen": -324.133544921875, + "logps/rejected": -266.05499267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4401612281799316, + "rewards/margins": 9.458633422851562, + "rewards/rejected": -11.898794174194336, + "step": 12347 + }, + { + "epoch": 1.92, + "learning_rate": 5.0908107266974425e-06, + "logits/chosen": -2.9898605346679688, + "logits/rejected": -2.7343337535858154, + "logps/chosen": -431.17755126953125, + "logps/rejected": -493.2513732910156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.670623779296875, + "rewards/margins": 10.938407897949219, + "rewards/rejected": -14.609031677246094, + "step": 12348 + }, + { + "epoch": 1.92, + "learning_rate": 5.090077286166294e-06, + "logits/chosen": -1.7718209028244019, + "logits/rejected": -1.9820785522460938, + "logps/chosen": -280.1386413574219, + "logps/rejected": -384.192138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1888909339904785, + "rewards/margins": 9.287847518920898, + "rewards/rejected": -14.476737976074219, + "step": 12349 + }, + { + "epoch": 1.92, + "learning_rate": 5.089343845635147e-06, + "logits/chosen": -3.1547462940216064, + "logits/rejected": -2.708479404449463, + "logps/chosen": -201.40061950683594, + "logps/rejected": -282.4001770019531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8692479133605957, + "rewards/margins": 8.242936134338379, + "rewards/rejected": -12.112184524536133, + "step": 12350 + }, + { + "epoch": 1.92, + "learning_rate": 5.088610405104e-06, + "logits/chosen": -2.9617929458618164, + "logits/rejected": -2.6421990394592285, + "logps/chosen": -304.83001708984375, + "logps/rejected": -509.9864501953125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.262078285217285, + "rewards/margins": 9.435625076293945, + "rewards/rejected": -16.697702407836914, + "step": 12351 + }, + { + "epoch": 1.92, + "learning_rate": 5.087876964572852e-06, + "logits/chosen": -2.2799320220947266, + "logits/rejected": -2.7796518802642822, + "logps/chosen": -689.1993408203125, + "logps/rejected": -656.9688720703125, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.3687105178833, + "rewards/margins": 5.986795425415039, + "rewards/rejected": -14.355504989624023, + "step": 12352 + }, + { + "epoch": 1.92, + "learning_rate": 5.0871435240417035e-06, + "logits/chosen": -1.6536865234375, + "logits/rejected": -2.853076696395874, + "logps/chosen": -131.7556915283203, + "logps/rejected": -416.2087097167969, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.400106430053711, + "rewards/margins": 8.892762184143066, + "rewards/rejected": -16.292869567871094, + "step": 12353 + }, + { + "epoch": 1.92, + "learning_rate": 5.086410083510555e-06, + "logits/chosen": -2.5262393951416016, + "logits/rejected": -2.8246653079986572, + "logps/chosen": -226.70184326171875, + "logps/rejected": -349.0322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3767290115356445, + "rewards/margins": 9.631036758422852, + "rewards/rejected": -14.007766723632812, + "step": 12354 + }, + { + "epoch": 1.92, + "learning_rate": 5.085676642979408e-06, + "logits/chosen": -2.3589580059051514, + "logits/rejected": -2.9082841873168945, + "logps/chosen": -393.6109619140625, + "logps/rejected": -339.4817810058594, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.411541938781738, + "rewards/margins": 5.129543304443359, + "rewards/rejected": -9.541084289550781, + "step": 12355 + }, + { + "epoch": 1.92, + "learning_rate": 5.08494320244826e-06, + "logits/chosen": -2.827644109725952, + "logits/rejected": -1.7187092304229736, + "logps/chosen": -351.7987365722656, + "logps/rejected": -290.79547119140625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0598602294921875, + "rewards/margins": 6.992821216583252, + "rewards/rejected": -10.052680969238281, + "step": 12356 + }, + { + "epoch": 1.92, + "learning_rate": 5.084209761917112e-06, + "logits/chosen": -2.7248334884643555, + "logits/rejected": -3.0137460231781006, + "logps/chosen": -163.94979858398438, + "logps/rejected": -198.4388427734375, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.124454498291016, + "rewards/margins": 3.27422833442688, + "rewards/rejected": -8.398683547973633, + "step": 12357 + }, + { + "epoch": 1.92, + "learning_rate": 5.083476321385964e-06, + "logits/chosen": -2.5645339488983154, + "logits/rejected": -2.92499041557312, + "logps/chosen": -113.58636474609375, + "logps/rejected": -397.2691345214844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.409335136413574, + "rewards/margins": 6.855371475219727, + "rewards/rejected": -11.2647066116333, + "step": 12358 + }, + { + "epoch": 1.92, + "learning_rate": 5.0827428808548165e-06, + "logits/chosen": -2.937516212463379, + "logits/rejected": -2.369962215423584, + "logps/chosen": -411.5687561035156, + "logps/rejected": -512.4155883789062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.366787910461426, + "rewards/margins": 10.052127838134766, + "rewards/rejected": -16.418916702270508, + "step": 12359 + }, + { + "epoch": 1.92, + "learning_rate": 5.082009440323668e-06, + "logits/chosen": -2.2795755863189697, + "logits/rejected": -2.9046623706817627, + "logps/chosen": -164.81646728515625, + "logps/rejected": -386.8958435058594, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.583934783935547, + "rewards/margins": 7.328301429748535, + "rewards/rejected": -12.912236213684082, + "step": 12360 + }, + { + "epoch": 1.92, + "learning_rate": 5.08127599979252e-06, + "logits/chosen": -2.117115020751953, + "logits/rejected": -2.86568284034729, + "logps/chosen": -144.0741424560547, + "logps/rejected": -279.13958740234375, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414306163787842, + "rewards/margins": 6.330686092376709, + "rewards/rejected": -13.74499225616455, + "step": 12361 + }, + { + "epoch": 1.92, + "learning_rate": 5.080542559261372e-06, + "logits/chosen": -2.9745490550994873, + "logits/rejected": -2.6803107261657715, + "logps/chosen": -441.54730224609375, + "logps/rejected": -599.276611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.530930995941162, + "rewards/margins": 14.782859802246094, + "rewards/rejected": -19.313791275024414, + "step": 12362 + }, + { + "epoch": 1.92, + "learning_rate": 5.079809118730224e-06, + "logits/chosen": -2.1903347969055176, + "logits/rejected": -3.0675857067108154, + "logps/chosen": -437.1568603515625, + "logps/rejected": -547.5521850585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837984561920166, + "rewards/margins": 10.93261432647705, + "rewards/rejected": -16.770599365234375, + "step": 12363 + }, + { + "epoch": 1.92, + "learning_rate": 5.079075678199077e-06, + "logits/chosen": -2.433764696121216, + "logits/rejected": -3.0930089950561523, + "logps/chosen": -146.60963439941406, + "logps/rejected": -180.67990112304688, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.800332546234131, + "rewards/margins": 4.9860615730285645, + "rewards/rejected": -11.786394119262695, + "step": 12364 + }, + { + "epoch": 1.92, + "learning_rate": 5.078342237667929e-06, + "logits/chosen": -2.799638032913208, + "logits/rejected": -2.8685426712036133, + "logps/chosen": -102.71726989746094, + "logps/rejected": -114.00140380859375, + "loss": 1.0531, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.49884033203125, + "rewards/margins": 2.157766103744507, + "rewards/rejected": -8.656606674194336, + "step": 12365 + }, + { + "epoch": 1.92, + "learning_rate": 5.0776087971367805e-06, + "logits/chosen": -2.7774739265441895, + "logits/rejected": -3.030121088027954, + "logps/chosen": -156.1930694580078, + "logps/rejected": -345.7098083496094, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.833052158355713, + "rewards/margins": 7.03045654296875, + "rewards/rejected": -12.863508224487305, + "step": 12366 + }, + { + "epoch": 1.92, + "learning_rate": 5.076875356605633e-06, + "logits/chosen": -2.617706775665283, + "logits/rejected": -2.950187921524048, + "logps/chosen": -222.13174438476562, + "logps/rejected": -332.62103271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.046648025512695, + "rewards/margins": 10.629410743713379, + "rewards/rejected": -14.676057815551758, + "step": 12367 + }, + { + "epoch": 1.92, + "learning_rate": 5.076141916074486e-06, + "logits/chosen": -2.8668196201324463, + "logits/rejected": -3.05724835395813, + "logps/chosen": -150.13414001464844, + "logps/rejected": -527.4259033203125, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.737957000732422, + "rewards/margins": 6.295998573303223, + "rewards/rejected": -15.033955574035645, + "step": 12368 + }, + { + "epoch": 1.92, + "learning_rate": 5.075408475543338e-06, + "logits/chosen": -2.8666563034057617, + "logits/rejected": -3.0675156116485596, + "logps/chosen": -659.22900390625, + "logps/rejected": -658.99609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5650551319122314, + "rewards/margins": 7.679285526275635, + "rewards/rejected": -11.244340896606445, + "step": 12369 + }, + { + "epoch": 1.92, + "learning_rate": 5.07467503501219e-06, + "logits/chosen": -2.8158118724823, + "logits/rejected": -2.9624903202056885, + "logps/chosen": -247.07456970214844, + "logps/rejected": -421.911376953125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423975467681885, + "rewards/margins": 9.462496757507324, + "rewards/rejected": -14.886472702026367, + "step": 12370 + }, + { + "epoch": 1.92, + "learning_rate": 5.073941594481042e-06, + "logits/chosen": -2.6309995651245117, + "logits/rejected": -2.817441701889038, + "logps/chosen": -314.66705322265625, + "logps/rejected": -443.019775390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.788570404052734, + "rewards/margins": 10.318754196166992, + "rewards/rejected": -17.107324600219727, + "step": 12371 + }, + { + "epoch": 1.92, + "learning_rate": 5.0732081539498935e-06, + "logits/chosen": -2.152695655822754, + "logits/rejected": -2.932171106338501, + "logps/chosen": -256.7968444824219, + "logps/rejected": -506.9117431640625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6968579292297363, + "rewards/margins": 8.90959358215332, + "rewards/rejected": -12.606451034545898, + "step": 12372 + }, + { + "epoch": 1.92, + "learning_rate": 5.072474713418746e-06, + "logits/chosen": -2.561678171157837, + "logits/rejected": -2.997471570968628, + "logps/chosen": -109.45637512207031, + "logps/rejected": -233.62945556640625, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.206092357635498, + "rewards/margins": 4.660980224609375, + "rewards/rejected": -11.867073059082031, + "step": 12373 + }, + { + "epoch": 1.92, + "learning_rate": 5.071741272887598e-06, + "logits/chosen": -2.695861577987671, + "logits/rejected": -3.0938150882720947, + "logps/chosen": -179.25836181640625, + "logps/rejected": -382.5387268066406, + "loss": 0.6239, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.261634826660156, + "rewards/margins": 4.779716491699219, + "rewards/rejected": -11.041351318359375, + "step": 12374 + }, + { + "epoch": 1.92, + "learning_rate": 5.07100783235645e-06, + "logits/chosen": -2.4742987155914307, + "logits/rejected": -3.0120699405670166, + "logps/chosen": -302.3025207519531, + "logps/rejected": -411.2486877441406, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6497650146484375, + "rewards/margins": 5.046728134155273, + "rewards/rejected": -11.696493148803711, + "step": 12375 + }, + { + "epoch": 1.92, + "learning_rate": 5.070274391825302e-06, + "logits/chosen": -2.8104357719421387, + "logits/rejected": -1.292743444442749, + "logps/chosen": -337.46221923828125, + "logps/rejected": -271.951171875, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.723484992980957, + "rewards/margins": 2.433349370956421, + "rewards/rejected": -10.156834602355957, + "step": 12376 + }, + { + "epoch": 1.92, + "learning_rate": 5.0695409512941546e-06, + "logits/chosen": -2.7920501232147217, + "logits/rejected": -3.0153942108154297, + "logps/chosen": -172.04010009765625, + "logps/rejected": -232.7560272216797, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.07692289352417, + "rewards/margins": 6.539112091064453, + "rewards/rejected": -10.616035461425781, + "step": 12377 + }, + { + "epoch": 1.93, + "learning_rate": 5.0688075107630064e-06, + "logits/chosen": -2.904747724533081, + "logits/rejected": -2.2458417415618896, + "logps/chosen": -339.1219482421875, + "logps/rejected": -269.93963623046875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.465354919433594, + "rewards/margins": 5.216533660888672, + "rewards/rejected": -9.681888580322266, + "step": 12378 + }, + { + "epoch": 1.93, + "learning_rate": 5.068074070231858e-06, + "logits/chosen": -2.1689579486846924, + "logits/rejected": -2.927783727645874, + "logps/chosen": -225.5149688720703, + "logps/rejected": -583.5985107421875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.115144729614258, + "rewards/margins": 8.568890571594238, + "rewards/rejected": -16.68403434753418, + "step": 12379 + }, + { + "epoch": 1.93, + "learning_rate": 5.06734062970071e-06, + "logits/chosen": -2.154618978500366, + "logits/rejected": -2.657071113586426, + "logps/chosen": -80.2470703125, + "logps/rejected": -266.82574462890625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.307275772094727, + "rewards/margins": 7.330865383148193, + "rewards/rejected": -12.638141632080078, + "step": 12380 + }, + { + "epoch": 1.93, + "learning_rate": 5.066607189169562e-06, + "logits/chosen": -2.7685513496398926, + "logits/rejected": -2.8128552436828613, + "logps/chosen": -207.74313354492188, + "logps/rejected": -265.02496337890625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.884260177612305, + "rewards/margins": 5.662087440490723, + "rewards/rejected": -11.546347618103027, + "step": 12381 + }, + { + "epoch": 1.93, + "learning_rate": 5.065873748638415e-06, + "logits/chosen": -2.7772367000579834, + "logits/rejected": -1.4918501377105713, + "logps/chosen": -494.51800537109375, + "logps/rejected": -420.36761474609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0179643630981445, + "rewards/margins": 10.139500617980957, + "rewards/rejected": -15.157464981079102, + "step": 12382 + }, + { + "epoch": 1.93, + "learning_rate": 5.065140308107267e-06, + "logits/chosen": -2.139047145843506, + "logits/rejected": -2.690941095352173, + "logps/chosen": -218.12652587890625, + "logps/rejected": -382.3709411621094, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.049671173095703, + "rewards/margins": 5.772995471954346, + "rewards/rejected": -11.822667121887207, + "step": 12383 + }, + { + "epoch": 1.93, + "learning_rate": 5.064406867576119e-06, + "logits/chosen": -2.7680110931396484, + "logits/rejected": -2.4453125, + "logps/chosen": -285.6640625, + "logps/rejected": -244.71307373046875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023711204528809, + "rewards/margins": 7.0928144454956055, + "rewards/rejected": -13.116525650024414, + "step": 12384 + }, + { + "epoch": 1.93, + "learning_rate": 5.063673427044971e-06, + "logits/chosen": -2.5034050941467285, + "logits/rejected": -2.8653600215911865, + "logps/chosen": -412.2860107421875, + "logps/rejected": -527.549560546875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.842007637023926, + "rewards/margins": 7.619952201843262, + "rewards/rejected": -12.461959838867188, + "step": 12385 + }, + { + "epoch": 1.93, + "learning_rate": 5.062939986513824e-06, + "logits/chosen": -2.0615439414978027, + "logits/rejected": -2.5304858684539795, + "logps/chosen": -145.35357666015625, + "logps/rejected": -351.6849670410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.795948028564453, + "rewards/margins": 9.150371551513672, + "rewards/rejected": -15.946319580078125, + "step": 12386 + }, + { + "epoch": 1.93, + "learning_rate": 5.062206545982676e-06, + "logits/chosen": -1.8647211790084839, + "logits/rejected": -2.542649745941162, + "logps/chosen": -243.66201782226562, + "logps/rejected": -418.34075927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.284853458404541, + "rewards/margins": 12.463630676269531, + "rewards/rejected": -17.748485565185547, + "step": 12387 + }, + { + "epoch": 1.93, + "learning_rate": 5.061473105451528e-06, + "logits/chosen": -1.4881707429885864, + "logits/rejected": -2.9165942668914795, + "logps/chosen": -147.37362670898438, + "logps/rejected": -337.70379638671875, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02186107635498, + "rewards/margins": 6.875952243804932, + "rewards/rejected": -14.897812843322754, + "step": 12388 + }, + { + "epoch": 1.93, + "learning_rate": 5.06073966492038e-06, + "logits/chosen": -2.9647417068481445, + "logits/rejected": -2.7567639350891113, + "logps/chosen": -125.62724304199219, + "logps/rejected": -366.8404846191406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2537732124328613, + "rewards/margins": 8.113616943359375, + "rewards/rejected": -11.367389678955078, + "step": 12389 + }, + { + "epoch": 1.93, + "learning_rate": 5.060006224389232e-06, + "logits/chosen": -2.81428599357605, + "logits/rejected": -2.9112391471862793, + "logps/chosen": -391.8037109375, + "logps/rejected": -494.71429443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.270367622375488, + "rewards/margins": 12.464616775512695, + "rewards/rejected": -16.7349853515625, + "step": 12390 + }, + { + "epoch": 1.93, + "learning_rate": 5.059272783858084e-06, + "logits/chosen": -2.9598865509033203, + "logits/rejected": -1.592466950416565, + "logps/chosen": -429.1217956542969, + "logps/rejected": -344.3419494628906, + "loss": 1.833, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.44186019897461, + "rewards/margins": 2.2773494720458984, + "rewards/rejected": -12.719209671020508, + "step": 12391 + }, + { + "epoch": 1.93, + "learning_rate": 5.058539343326936e-06, + "logits/chosen": -2.733505964279175, + "logits/rejected": -2.9544014930725098, + "logps/chosen": -145.2967529296875, + "logps/rejected": -382.98828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4124040603637695, + "rewards/margins": 10.581528663635254, + "rewards/rejected": -16.993932723999023, + "step": 12392 + }, + { + "epoch": 1.93, + "learning_rate": 5.057805902795788e-06, + "logits/chosen": -2.2290494441986084, + "logits/rejected": -2.640761613845825, + "logps/chosen": -184.5224151611328, + "logps/rejected": -384.82611083984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.341639041900635, + "rewards/margins": 6.847023010253906, + "rewards/rejected": -13.188661575317383, + "step": 12393 + }, + { + "epoch": 1.93, + "learning_rate": 5.05707246226464e-06, + "logits/chosen": -2.1798198223114014, + "logits/rejected": -2.9912993907928467, + "logps/chosen": -130.19517517089844, + "logps/rejected": -400.33770751953125, + "loss": 1.2876, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.105257987976074, + "rewards/margins": 4.258909225463867, + "rewards/rejected": -12.364167213439941, + "step": 12394 + }, + { + "epoch": 1.93, + "learning_rate": 5.056339021733493e-06, + "logits/chosen": -2.7281274795532227, + "logits/rejected": -2.4618053436279297, + "logps/chosen": -733.063232421875, + "logps/rejected": -543.4688720703125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.863986492156982, + "rewards/margins": 8.416589736938477, + "rewards/rejected": -15.280576705932617, + "step": 12395 + }, + { + "epoch": 1.93, + "learning_rate": 5.0556055812023445e-06, + "logits/chosen": -2.9503068923950195, + "logits/rejected": -2.089580774307251, + "logps/chosen": -437.94287109375, + "logps/rejected": -403.550048828125, + "loss": 1.0149, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.22197151184082, + "rewards/margins": 6.828830242156982, + "rewards/rejected": -15.050801277160645, + "step": 12396 + }, + { + "epoch": 1.93, + "learning_rate": 5.054872140671196e-06, + "logits/chosen": -2.3517608642578125, + "logits/rejected": -1.7392574548721313, + "logps/chosen": -414.7535095214844, + "logps/rejected": -251.0888671875, + "loss": 5.519, + "rewards/accuracies": 0.0, + "rewards/chosen": -14.648935317993164, + "rewards/margins": -5.452408790588379, + "rewards/rejected": -9.196526527404785, + "step": 12397 + }, + { + "epoch": 1.93, + "learning_rate": 5.054138700140048e-06, + "logits/chosen": -2.8632614612579346, + "logits/rejected": -1.683663010597229, + "logps/chosen": -316.96087646484375, + "logps/rejected": -369.2290954589844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2763214111328125, + "rewards/margins": 6.765406608581543, + "rewards/rejected": -13.041728019714355, + "step": 12398 + }, + { + "epoch": 1.93, + "learning_rate": 5.053405259608901e-06, + "logits/chosen": -1.793824315071106, + "logits/rejected": -2.7496578693389893, + "logps/chosen": -453.6688232421875, + "logps/rejected": -481.73297119140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7548675537109375, + "rewards/margins": 9.966565132141113, + "rewards/rejected": -15.721431732177734, + "step": 12399 + }, + { + "epoch": 1.93, + "learning_rate": 5.052671819077753e-06, + "logits/chosen": -2.46291446685791, + "logits/rejected": -2.9543371200561523, + "logps/chosen": -292.94781494140625, + "logps/rejected": -602.708984375, + "loss": 1.9962, + "rewards/accuracies": 0.0, + "rewards/chosen": -12.873046875, + "rewards/margins": -1.7851800918579102, + "rewards/rejected": -11.087865829467773, + "step": 12400 + }, + { + "epoch": 1.93, + "learning_rate": 5.0519383785466056e-06, + "logits/chosen": -2.142481565475464, + "logits/rejected": -2.9138379096984863, + "logps/chosen": -213.3284912109375, + "logps/rejected": -367.9700927734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.603418827056885, + "rewards/margins": 7.245226860046387, + "rewards/rejected": -11.84864616394043, + "step": 12401 + }, + { + "epoch": 1.93, + "learning_rate": 5.0512049380154574e-06, + "logits/chosen": -2.666591167449951, + "logits/rejected": -2.9465270042419434, + "logps/chosen": -204.16549682617188, + "logps/rejected": -261.7463073730469, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.476762294769287, + "rewards/margins": 6.489423751831055, + "rewards/rejected": -10.9661865234375, + "step": 12402 + }, + { + "epoch": 1.93, + "learning_rate": 5.050471497484309e-06, + "logits/chosen": -2.1287217140197754, + "logits/rejected": -2.6426708698272705, + "logps/chosen": -160.35037231445312, + "logps/rejected": -413.1189270019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.287628173828125, + "rewards/margins": 10.885995864868164, + "rewards/rejected": -16.17362403869629, + "step": 12403 + }, + { + "epoch": 1.93, + "learning_rate": 5.049738056953162e-06, + "logits/chosen": -2.5593149662017822, + "logits/rejected": -2.9171433448791504, + "logps/chosen": -528.2807006835938, + "logps/rejected": -660.2457275390625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.941664695739746, + "rewards/margins": 8.445206642150879, + "rewards/rejected": -14.386871337890625, + "step": 12404 + }, + { + "epoch": 1.93, + "learning_rate": 5.049004616422014e-06, + "logits/chosen": -1.7833013534545898, + "logits/rejected": -2.731597423553467, + "logps/chosen": -210.48866271972656, + "logps/rejected": -439.1673583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067020416259766, + "rewards/margins": 11.672521591186523, + "rewards/rejected": -17.73954200744629, + "step": 12405 + }, + { + "epoch": 1.93, + "learning_rate": 5.048271175890866e-06, + "logits/chosen": -2.192385673522949, + "logits/rejected": -2.7035653591156006, + "logps/chosen": -99.36067962646484, + "logps/rejected": -353.0513610839844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1143646240234375, + "rewards/margins": 11.40002727508545, + "rewards/rejected": -16.514392852783203, + "step": 12406 + }, + { + "epoch": 1.93, + "learning_rate": 5.047537735359718e-06, + "logits/chosen": -2.057634115219116, + "logits/rejected": -2.4011614322662354, + "logps/chosen": -160.71563720703125, + "logps/rejected": -374.3568115234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.697479248046875, + "rewards/margins": 11.369699478149414, + "rewards/rejected": -20.06717872619629, + "step": 12407 + }, + { + "epoch": 1.93, + "learning_rate": 5.04680429482857e-06, + "logits/chosen": -2.4916329383850098, + "logits/rejected": -2.5639374256134033, + "logps/chosen": -242.17971801757812, + "logps/rejected": -220.56192016601562, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.921562194824219, + "rewards/margins": 3.9763057231903076, + "rewards/rejected": -11.897868156433105, + "step": 12408 + }, + { + "epoch": 1.93, + "learning_rate": 5.046070854297422e-06, + "logits/chosen": -2.9915332794189453, + "logits/rejected": -2.989420175552368, + "logps/chosen": -634.963134765625, + "logps/rejected": -374.81085205078125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.989154815673828, + "rewards/margins": 6.162718296051025, + "rewards/rejected": -14.151872634887695, + "step": 12409 + }, + { + "epoch": 1.93, + "learning_rate": 5.045337413766274e-06, + "logits/chosen": -2.82566499710083, + "logits/rejected": -3.004749059677124, + "logps/chosen": -75.5899658203125, + "logps/rejected": -189.19577026367188, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684782981872559, + "rewards/margins": 6.722675323486328, + "rewards/rejected": -12.407458305358887, + "step": 12410 + }, + { + "epoch": 1.93, + "learning_rate": 5.044603973235126e-06, + "logits/chosen": -1.8985586166381836, + "logits/rejected": -2.955644130706787, + "logps/chosen": -144.38858032226562, + "logps/rejected": -433.6616516113281, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002835273742676, + "rewards/margins": 8.139695167541504, + "rewards/rejected": -14.14253044128418, + "step": 12411 + }, + { + "epoch": 1.93, + "learning_rate": 5.043870532703978e-06, + "logits/chosen": -1.8039360046386719, + "logits/rejected": -2.870723247528076, + "logps/chosen": -145.18069458007812, + "logps/rejected": -378.76361083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.791484355926514, + "rewards/margins": 10.516318321228027, + "rewards/rejected": -15.307802200317383, + "step": 12412 + }, + { + "epoch": 1.93, + "learning_rate": 5.043137092172831e-06, + "logits/chosen": -2.5950112342834473, + "logits/rejected": -2.7730202674865723, + "logps/chosen": -118.07389831542969, + "logps/rejected": -401.9306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.789426326751709, + "rewards/margins": 11.573395729064941, + "rewards/rejected": -17.362823486328125, + "step": 12413 + }, + { + "epoch": 1.93, + "learning_rate": 5.0424036516416825e-06, + "logits/chosen": -2.8850598335266113, + "logits/rejected": -0.6877742409706116, + "logps/chosen": -492.8863220214844, + "logps/rejected": -165.4446563720703, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.495014190673828, + "rewards/margins": 8.308311462402344, + "rewards/rejected": -12.803325653076172, + "step": 12414 + }, + { + "epoch": 1.93, + "learning_rate": 5.041670211110534e-06, + "logits/chosen": -2.8147616386413574, + "logits/rejected": -2.811082124710083, + "logps/chosen": -167.1309356689453, + "logps/rejected": -218.65859985351562, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.500800609588623, + "rewards/margins": 2.8661293983459473, + "rewards/rejected": -9.36693000793457, + "step": 12415 + }, + { + "epoch": 1.93, + "learning_rate": 5.040936770579386e-06, + "logits/chosen": -1.4089192152023315, + "logits/rejected": -2.4705090522766113, + "logps/chosen": -182.00967407226562, + "logps/rejected": -511.4696044921875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4748430252075195, + "rewards/margins": 12.16212272644043, + "rewards/rejected": -17.636966705322266, + "step": 12416 + }, + { + "epoch": 1.93, + "learning_rate": 5.040203330048239e-06, + "logits/chosen": -2.830203056335449, + "logits/rejected": -2.4714787006378174, + "logps/chosen": -176.81884765625, + "logps/rejected": -259.07305908203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7137467861175537, + "rewards/margins": 8.219193458557129, + "rewards/rejected": -11.932940483093262, + "step": 12417 + }, + { + "epoch": 1.93, + "learning_rate": 5.039469889517092e-06, + "logits/chosen": -2.8433680534362793, + "logits/rejected": -2.553570508956909, + "logps/chosen": -229.4871826171875, + "logps/rejected": -208.78005981445312, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.584042549133301, + "rewards/margins": 7.097764492034912, + "rewards/rejected": -9.681806564331055, + "step": 12418 + }, + { + "epoch": 1.93, + "learning_rate": 5.038736448985944e-06, + "logits/chosen": -2.999204635620117, + "logits/rejected": -2.523515224456787, + "logps/chosen": -628.6239013671875, + "logps/rejected": -556.2493896484375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.366373062133789, + "rewards/margins": 6.8427910804748535, + "rewards/rejected": -15.2091646194458, + "step": 12419 + }, + { + "epoch": 1.93, + "learning_rate": 5.0380030084547955e-06, + "logits/chosen": -2.6766774654388428, + "logits/rejected": -2.760575771331787, + "logps/chosen": -240.7669219970703, + "logps/rejected": -269.0255126953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3032636642456055, + "rewards/margins": 5.963876724243164, + "rewards/rejected": -10.26714038848877, + "step": 12420 + }, + { + "epoch": 1.93, + "learning_rate": 5.037269567923647e-06, + "logits/chosen": -2.798312187194824, + "logits/rejected": -3.1447463035583496, + "logps/chosen": -176.14935302734375, + "logps/rejected": -342.2555236816406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.715910911560059, + "rewards/margins": 9.821870803833008, + "rewards/rejected": -14.537782669067383, + "step": 12421 + }, + { + "epoch": 1.93, + "learning_rate": 5.0365361273925e-06, + "logits/chosen": -2.8814070224761963, + "logits/rejected": -2.8418827056884766, + "logps/chosen": -208.69485473632812, + "logps/rejected": -353.8062744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072502136230469, + "rewards/margins": 9.102243423461914, + "rewards/rejected": -13.174745559692383, + "step": 12422 + }, + { + "epoch": 1.93, + "learning_rate": 5.035802686861352e-06, + "logits/chosen": -2.651986598968506, + "logits/rejected": -2.2174735069274902, + "logps/chosen": -361.0690002441406, + "logps/rejected": -392.7659912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.688508033752441, + "rewards/margins": 11.689981460571289, + "rewards/rejected": -17.378488540649414, + "step": 12423 + }, + { + "epoch": 1.93, + "learning_rate": 5.035069246330204e-06, + "logits/chosen": -1.9786421060562134, + "logits/rejected": -2.931232452392578, + "logps/chosen": -161.79296875, + "logps/rejected": -413.162109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.22160005569458, + "rewards/margins": 10.013222694396973, + "rewards/rejected": -13.234823226928711, + "step": 12424 + }, + { + "epoch": 1.93, + "learning_rate": 5.034335805799056e-06, + "logits/chosen": -2.8850345611572266, + "logits/rejected": -1.6225173473358154, + "logps/chosen": -462.1679382324219, + "logps/rejected": -310.41607666015625, + "loss": 1.6297, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.534029006958008, + "rewards/margins": 2.0211195945739746, + "rewards/rejected": -9.555148124694824, + "step": 12425 + }, + { + "epoch": 1.93, + "learning_rate": 5.0336023652679084e-06, + "logits/chosen": -2.6043624877929688, + "logits/rejected": -2.6753880977630615, + "logps/chosen": -76.04230499267578, + "logps/rejected": -171.14492797851562, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.319680213928223, + "rewards/margins": 7.591143608093262, + "rewards/rejected": -12.910823822021484, + "step": 12426 + }, + { + "epoch": 1.93, + "learning_rate": 5.03286892473676e-06, + "logits/chosen": -2.7087864875793457, + "logits/rejected": -2.5936217308044434, + "logps/chosen": -181.12759399414062, + "logps/rejected": -153.29287719726562, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.121212005615234, + "rewards/margins": 1.4595973491668701, + "rewards/rejected": -8.580809593200684, + "step": 12427 + }, + { + "epoch": 1.93, + "learning_rate": 5.032135484205612e-06, + "logits/chosen": -1.24306058883667, + "logits/rejected": -2.669891119003296, + "logps/chosen": -183.9976348876953, + "logps/rejected": -481.15411376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.757521629333496, + "rewards/margins": 13.040626525878906, + "rewards/rejected": -17.79814910888672, + "step": 12428 + }, + { + "epoch": 1.93, + "learning_rate": 5.031402043674464e-06, + "logits/chosen": -1.2159658670425415, + "logits/rejected": -3.0024869441986084, + "logps/chosen": -251.2168731689453, + "logps/rejected": -580.9383544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4788315296173096, + "rewards/margins": 9.393610000610352, + "rewards/rejected": -11.872442245483398, + "step": 12429 + }, + { + "epoch": 1.93, + "learning_rate": 5.030668603143316e-06, + "logits/chosen": -2.6139578819274902, + "logits/rejected": -2.7943663597106934, + "logps/chosen": -226.04010009765625, + "logps/rejected": -238.7886199951172, + "loss": 2.7226, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.689369201660156, + "rewards/margins": 2.9387502670288086, + "rewards/rejected": -11.628120422363281, + "step": 12430 + }, + { + "epoch": 1.93, + "learning_rate": 5.029935162612169e-06, + "logits/chosen": -1.634827971458435, + "logits/rejected": -2.9005000591278076, + "logps/chosen": -228.40277099609375, + "logps/rejected": -355.27490234375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.177186965942383, + "rewards/margins": 7.029761791229248, + "rewards/rejected": -14.206949234008789, + "step": 12431 + }, + { + "epoch": 1.93, + "learning_rate": 5.0292017220810206e-06, + "logits/chosen": -2.9946093559265137, + "logits/rejected": -2.4735348224639893, + "logps/chosen": -286.5223693847656, + "logps/rejected": -351.4774169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110039710998535, + "rewards/margins": 9.555743217468262, + "rewards/rejected": -15.665782928466797, + "step": 12432 + }, + { + "epoch": 1.93, + "learning_rate": 5.0284682815498724e-06, + "logits/chosen": -2.764064311981201, + "logits/rejected": -2.840365409851074, + "logps/chosen": -323.68963623046875, + "logps/rejected": -431.131591796875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.113499164581299, + "rewards/margins": 9.809627532958984, + "rewards/rejected": -13.923126220703125, + "step": 12433 + }, + { + "epoch": 1.93, + "learning_rate": 5.027734841018725e-06, + "logits/chosen": -2.870123863220215, + "logits/rejected": -1.7951198816299438, + "logps/chosen": -415.9030456542969, + "logps/rejected": -366.3847351074219, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.755209445953369, + "rewards/margins": 8.151397705078125, + "rewards/rejected": -10.906607627868652, + "step": 12434 + }, + { + "epoch": 1.93, + "learning_rate": 5.027001400487578e-06, + "logits/chosen": -2.908804178237915, + "logits/rejected": -1.5902466773986816, + "logps/chosen": -274.7152404785156, + "logps/rejected": -228.09584045410156, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.591979026794434, + "rewards/margins": 7.250920295715332, + "rewards/rejected": -13.842899322509766, + "step": 12435 + }, + { + "epoch": 1.93, + "learning_rate": 5.02626795995643e-06, + "logits/chosen": -2.8891003131866455, + "logits/rejected": -2.036868095397949, + "logps/chosen": -312.1886291503906, + "logps/rejected": -458.3078308105469, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.95877742767334, + "rewards/margins": 8.024399757385254, + "rewards/rejected": -13.983177185058594, + "step": 12436 + }, + { + "epoch": 1.93, + "learning_rate": 5.025534519425282e-06, + "logits/chosen": -2.842339038848877, + "logits/rejected": -2.6760716438293457, + "logps/chosen": -566.9606323242188, + "logps/rejected": -550.2716064453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.419589519500732, + "rewards/margins": 7.683015823364258, + "rewards/rejected": -15.102605819702148, + "step": 12437 + }, + { + "epoch": 1.93, + "learning_rate": 5.0248010788941335e-06, + "logits/chosen": -2.70802903175354, + "logits/rejected": -2.9944398403167725, + "logps/chosen": -691.9431762695312, + "logps/rejected": -655.105224609375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.637443542480469, + "rewards/margins": 7.5208420753479, + "rewards/rejected": -14.158285140991211, + "step": 12438 + }, + { + "epoch": 1.93, + "learning_rate": 5.024067638362986e-06, + "logits/chosen": -1.6383620500564575, + "logits/rejected": -2.257979154586792, + "logps/chosen": -121.94091796875, + "logps/rejected": -316.9413146972656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.92734432220459, + "rewards/margins": 7.781701564788818, + "rewards/rejected": -12.70904541015625, + "step": 12439 + }, + { + "epoch": 1.93, + "learning_rate": 5.023334197831838e-06, + "logits/chosen": -2.2313199043273926, + "logits/rejected": -3.076807975769043, + "logps/chosen": -57.899879455566406, + "logps/rejected": -292.8542785644531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.600703239440918, + "rewards/margins": 6.319418907165527, + "rewards/rejected": -10.920122146606445, + "step": 12440 + }, + { + "epoch": 1.93, + "learning_rate": 5.02260075730069e-06, + "logits/chosen": -2.9127397537231445, + "logits/rejected": -2.625253438949585, + "logps/chosen": -321.0938720703125, + "logps/rejected": -457.1112060546875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.593889236450195, + "rewards/margins": 8.6644287109375, + "rewards/rejected": -18.258317947387695, + "step": 12441 + }, + { + "epoch": 1.93, + "learning_rate": 5.021867316769542e-06, + "logits/chosen": -1.8346155881881714, + "logits/rejected": -2.8156330585479736, + "logps/chosen": -207.86666870117188, + "logps/rejected": -511.87115478515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.167235374450684, + "rewards/margins": 7.644238471984863, + "rewards/rejected": -14.811473846435547, + "step": 12442 + }, + { + "epoch": 1.94, + "learning_rate": 5.021133876238394e-06, + "logits/chosen": -2.4374306201934814, + "logits/rejected": -2.9350368976593018, + "logps/chosen": -200.00709533691406, + "logps/rejected": -285.0050048828125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.284475326538086, + "rewards/margins": 4.935800552368164, + "rewards/rejected": -12.22027587890625, + "step": 12443 + }, + { + "epoch": 1.94, + "learning_rate": 5.0204004357072465e-06, + "logits/chosen": -2.3692314624786377, + "logits/rejected": -2.765254259109497, + "logps/chosen": -181.986328125, + "logps/rejected": -422.0714416503906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.939251899719238, + "rewards/margins": 9.261669158935547, + "rewards/rejected": -15.200920104980469, + "step": 12444 + }, + { + "epoch": 1.94, + "learning_rate": 5.019666995176098e-06, + "logits/chosen": -1.8664250373840332, + "logits/rejected": -2.9097695350646973, + "logps/chosen": -153.66632080078125, + "logps/rejected": -509.2688903808594, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.890639305114746, + "rewards/margins": 8.950322151184082, + "rewards/rejected": -15.840961456298828, + "step": 12445 + }, + { + "epoch": 1.94, + "learning_rate": 5.01893355464495e-06, + "logits/chosen": -2.4257988929748535, + "logits/rejected": -2.9191973209381104, + "logps/chosen": -575.7342529296875, + "logps/rejected": -621.4511108398438, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.524332523345947, + "rewards/margins": 4.98812198638916, + "rewards/rejected": -12.512454986572266, + "step": 12446 + }, + { + "epoch": 1.94, + "learning_rate": 5.018200114113802e-06, + "logits/chosen": -2.44761323928833, + "logits/rejected": -3.016376495361328, + "logps/chosen": -104.15544128417969, + "logps/rejected": -525.518798828125, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.371337890625, + "rewards/margins": 3.4172427654266357, + "rewards/rejected": -11.788580894470215, + "step": 12447 + }, + { + "epoch": 1.94, + "learning_rate": 5.017466673582655e-06, + "logits/chosen": -2.2106783390045166, + "logits/rejected": -2.127485513687134, + "logps/chosen": -372.38043212890625, + "logps/rejected": -448.87615966796875, + "loss": 0.3569, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.453404426574707, + "rewards/margins": 7.6559038162231445, + "rewards/rejected": -14.109308242797852, + "step": 12448 + }, + { + "epoch": 1.94, + "learning_rate": 5.016733233051507e-06, + "logits/chosen": -1.8756599426269531, + "logits/rejected": -2.537888765335083, + "logps/chosen": -237.11325073242188, + "logps/rejected": -284.6367492675781, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.31966781616211, + "rewards/margins": 4.180636405944824, + "rewards/rejected": -12.500303268432617, + "step": 12449 + }, + { + "epoch": 1.94, + "learning_rate": 5.015999792520359e-06, + "logits/chosen": -2.4836130142211914, + "logits/rejected": -3.0523855686187744, + "logps/chosen": -120.10430145263672, + "logps/rejected": -279.7642822265625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.925831317901611, + "rewards/margins": 6.079184055328369, + "rewards/rejected": -11.00501537322998, + "step": 12450 + }, + { + "epoch": 1.94, + "learning_rate": 5.015266351989211e-06, + "logits/chosen": -2.4396181106567383, + "logits/rejected": -2.5276663303375244, + "logps/chosen": -111.52561950683594, + "logps/rejected": -235.53463745117188, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.784882068634033, + "rewards/margins": 5.486418724060059, + "rewards/rejected": -10.27130126953125, + "step": 12451 + }, + { + "epoch": 1.94, + "learning_rate": 5.014532911458063e-06, + "logits/chosen": -2.971987724304199, + "logits/rejected": -2.2484354972839355, + "logps/chosen": -212.36013793945312, + "logps/rejected": -465.6016845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.946834564208984, + "rewards/margins": 10.885485649108887, + "rewards/rejected": -16.832321166992188, + "step": 12452 + }, + { + "epoch": 1.94, + "learning_rate": 5.013799470926916e-06, + "logits/chosen": -2.631765127182007, + "logits/rejected": -3.118218183517456, + "logps/chosen": -554.214111328125, + "logps/rejected": -425.3838806152344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.439058780670166, + "rewards/margins": 7.8417253494262695, + "rewards/rejected": -15.280784606933594, + "step": 12453 + }, + { + "epoch": 1.94, + "learning_rate": 5.013066030395768e-06, + "logits/chosen": -2.2379984855651855, + "logits/rejected": -2.844743251800537, + "logps/chosen": -269.9068298339844, + "logps/rejected": -311.5220947265625, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.255515098571777, + "rewards/margins": 5.692660331726074, + "rewards/rejected": -11.948175430297852, + "step": 12454 + }, + { + "epoch": 1.94, + "learning_rate": 5.01233258986462e-06, + "logits/chosen": -2.5847034454345703, + "logits/rejected": -2.326918125152588, + "logps/chosen": -232.76080322265625, + "logps/rejected": -430.839599609375, + "loss": 0.585, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.55465316772461, + "rewards/margins": 5.585095405578613, + "rewards/rejected": -14.139749526977539, + "step": 12455 + }, + { + "epoch": 1.94, + "learning_rate": 5.0115991493334716e-06, + "logits/chosen": -2.8210628032684326, + "logits/rejected": -3.023904323577881, + "logps/chosen": -243.28587341308594, + "logps/rejected": -408.80303955078125, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651758193969727, + "rewards/margins": 5.073615074157715, + "rewards/rejected": -11.725373268127441, + "step": 12456 + }, + { + "epoch": 1.94, + "learning_rate": 5.010865708802324e-06, + "logits/chosen": -1.2180932760238647, + "logits/rejected": -2.311140298843384, + "logps/chosen": -159.5286102294922, + "logps/rejected": -284.4787292480469, + "loss": 1.7378, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.954717636108398, + "rewards/margins": 0.33514904975891113, + "rewards/rejected": -11.28986644744873, + "step": 12457 + }, + { + "epoch": 1.94, + "learning_rate": 5.010132268271176e-06, + "logits/chosen": -2.9043421745300293, + "logits/rejected": -2.98825740814209, + "logps/chosen": -68.42623901367188, + "logps/rejected": -232.85931396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.298898220062256, + "rewards/margins": 11.534616470336914, + "rewards/rejected": -15.833515167236328, + "step": 12458 + }, + { + "epoch": 1.94, + "learning_rate": 5.009398827740028e-06, + "logits/chosen": -2.096605062484741, + "logits/rejected": -2.697014570236206, + "logps/chosen": -150.9918212890625, + "logps/rejected": -308.632568359375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.448792457580566, + "rewards/margins": 8.093633651733398, + "rewards/rejected": -15.542426109313965, + "step": 12459 + }, + { + "epoch": 1.94, + "learning_rate": 5.00866538720888e-06, + "logits/chosen": -2.1034274101257324, + "logits/rejected": -2.6176021099090576, + "logps/chosen": -243.39041137695312, + "logps/rejected": -383.47857666015625, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.423438549041748, + "rewards/margins": 4.030045032501221, + "rewards/rejected": -11.453483581542969, + "step": 12460 + }, + { + "epoch": 1.94, + "learning_rate": 5.007931946677732e-06, + "logits/chosen": -3.0246529579162598, + "logits/rejected": -3.093909740447998, + "logps/chosen": -276.0218811035156, + "logps/rejected": -298.0089416503906, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.14377498626709, + "rewards/margins": 5.392568588256836, + "rewards/rejected": -13.536344528198242, + "step": 12461 + }, + { + "epoch": 1.94, + "learning_rate": 5.0071985061465845e-06, + "logits/chosen": -2.8354363441467285, + "logits/rejected": -2.569934129714966, + "logps/chosen": -251.10276794433594, + "logps/rejected": -358.34588623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422937870025635, + "rewards/margins": 9.266744613647461, + "rewards/rejected": -13.689682960510254, + "step": 12462 + }, + { + "epoch": 1.94, + "learning_rate": 5.006465065615436e-06, + "logits/chosen": -2.9064924716949463, + "logits/rejected": -3.028827667236328, + "logps/chosen": -856.6911010742188, + "logps/rejected": -791.6143798828125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894379138946533, + "rewards/margins": 8.11515998840332, + "rewards/rejected": -14.009539604187012, + "step": 12463 + }, + { + "epoch": 1.94, + "learning_rate": 5.005731625084288e-06, + "logits/chosen": -1.8355889320373535, + "logits/rejected": -2.7176144123077393, + "logps/chosen": -300.8770751953125, + "logps/rejected": -399.761962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.732087135314941, + "rewards/margins": 11.461063385009766, + "rewards/rejected": -17.193151473999023, + "step": 12464 + }, + { + "epoch": 1.94, + "learning_rate": 5.00499818455314e-06, + "logits/chosen": -2.3575358390808105, + "logits/rejected": -3.1273043155670166, + "logps/chosen": -200.81045532226562, + "logps/rejected": -369.72381591796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159627914428711, + "rewards/margins": 7.053900718688965, + "rewards/rejected": -14.213528633117676, + "step": 12465 + }, + { + "epoch": 1.94, + "learning_rate": 5.004264744021993e-06, + "logits/chosen": -2.5453455448150635, + "logits/rejected": -2.1400680541992188, + "logps/chosen": -449.6128845214844, + "logps/rejected": -241.50244140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.684028625488281, + "rewards/margins": 9.050804138183594, + "rewards/rejected": -13.734833717346191, + "step": 12466 + }, + { + "epoch": 1.94, + "learning_rate": 5.003531303490845e-06, + "logits/chosen": -2.843534231185913, + "logits/rejected": -2.8231165409088135, + "logps/chosen": -395.322998046875, + "logps/rejected": -530.5640869140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.991182804107666, + "rewards/margins": 7.854746341705322, + "rewards/rejected": -13.845929145812988, + "step": 12467 + }, + { + "epoch": 1.94, + "learning_rate": 5.0027978629596975e-06, + "logits/chosen": -2.913478136062622, + "logits/rejected": -1.9777058362960815, + "logps/chosen": -172.25363159179688, + "logps/rejected": -169.6349639892578, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.381083011627197, + "rewards/margins": 5.176875114440918, + "rewards/rejected": -10.557958602905273, + "step": 12468 + }, + { + "epoch": 1.94, + "learning_rate": 5.002064422428549e-06, + "logits/chosen": -2.905885696411133, + "logits/rejected": -3.0578291416168213, + "logps/chosen": -296.21527099609375, + "logps/rejected": -209.92327880859375, + "loss": 0.0915, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13787841796875, + "rewards/margins": 6.466048240661621, + "rewards/rejected": -14.603926658630371, + "step": 12469 + }, + { + "epoch": 1.94, + "learning_rate": 5.001330981897401e-06, + "logits/chosen": -1.521124243736267, + "logits/rejected": -2.7715539932250977, + "logps/chosen": -244.84730529785156, + "logps/rejected": -434.5748596191406, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.579371929168701, + "rewards/margins": 7.682319641113281, + "rewards/rejected": -15.26169204711914, + "step": 12470 + }, + { + "epoch": 1.94, + "learning_rate": 5.000597541366254e-06, + "logits/chosen": -2.3644044399261475, + "logits/rejected": -2.6380505561828613, + "logps/chosen": -188.23382568359375, + "logps/rejected": -464.2891845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.822397708892822, + "rewards/margins": 9.630776405334473, + "rewards/rejected": -16.453174591064453, + "step": 12471 + }, + { + "epoch": 1.94, + "learning_rate": 4.999864100835106e-06, + "logits/chosen": -2.5797810554504395, + "logits/rejected": -2.8516461849212646, + "logps/chosen": -67.44741821289062, + "logps/rejected": -254.78724670410156, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.221502304077148, + "rewards/margins": 8.042926788330078, + "rewards/rejected": -14.264429092407227, + "step": 12472 + }, + { + "epoch": 1.94, + "learning_rate": 4.999130660303958e-06, + "logits/chosen": -2.4749274253845215, + "logits/rejected": -2.426600933074951, + "logps/chosen": -241.94268798828125, + "logps/rejected": -401.37451171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0540361404418945, + "rewards/margins": 9.576874732971191, + "rewards/rejected": -16.630910873413086, + "step": 12473 + }, + { + "epoch": 1.94, + "learning_rate": 4.99839721977281e-06, + "logits/chosen": -2.7669293880462646, + "logits/rejected": -2.384470224380493, + "logps/chosen": -345.8921203613281, + "logps/rejected": -387.0517883300781, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.976269721984863, + "rewards/margins": 7.918458938598633, + "rewards/rejected": -14.894728660583496, + "step": 12474 + }, + { + "epoch": 1.94, + "learning_rate": 4.997663779241662e-06, + "logits/chosen": -2.9889490604400635, + "logits/rejected": -2.3505022525787354, + "logps/chosen": -419.7568359375, + "logps/rejected": -626.7776489257812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.523012638092041, + "rewards/margins": 8.537049293518066, + "rewards/rejected": -15.060062408447266, + "step": 12475 + }, + { + "epoch": 1.94, + "learning_rate": 4.996930338710514e-06, + "logits/chosen": -2.6531894207000732, + "logits/rejected": -2.7301666736602783, + "logps/chosen": -399.46136474609375, + "logps/rejected": -365.3263244628906, + "loss": 0.09, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.697122573852539, + "rewards/margins": 3.239981174468994, + "rewards/rejected": -12.937103271484375, + "step": 12476 + }, + { + "epoch": 1.94, + "learning_rate": 4.996196898179366e-06, + "logits/chosen": -2.89646053314209, + "logits/rejected": -2.2939438819885254, + "logps/chosen": -503.77972412109375, + "logps/rejected": -380.158447265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.00177001953125, + "rewards/margins": 8.539618492126465, + "rewards/rejected": -12.541388511657715, + "step": 12477 + }, + { + "epoch": 1.94, + "learning_rate": 4.995463457648218e-06, + "logits/chosen": -2.6214637756347656, + "logits/rejected": -1.9340946674346924, + "logps/chosen": -301.22955322265625, + "logps/rejected": -332.0462646484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.930105686187744, + "rewards/margins": 7.747137069702148, + "rewards/rejected": -12.677242279052734, + "step": 12478 + }, + { + "epoch": 1.94, + "learning_rate": 4.99473001711707e-06, + "logits/chosen": -2.8593623638153076, + "logits/rejected": -0.9427589178085327, + "logps/chosen": -425.4886779785156, + "logps/rejected": -121.71543884277344, + "loss": 4.0354, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.715688705444336, + "rewards/margins": -2.05755877494812, + "rewards/rejected": -10.658130645751953, + "step": 12479 + }, + { + "epoch": 1.94, + "learning_rate": 4.993996576585923e-06, + "logits/chosen": -3.0029218196868896, + "logits/rejected": -2.94637393951416, + "logps/chosen": -159.09898376464844, + "logps/rejected": -147.67251586914062, + "loss": 3.1422, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.605746269226074, + "rewards/margins": 0.015054941177368164, + "rewards/rejected": -10.620800971984863, + "step": 12480 + }, + { + "epoch": 1.94, + "learning_rate": 4.9932631360547745e-06, + "logits/chosen": -2.299267530441284, + "logits/rejected": -2.7823801040649414, + "logps/chosen": -376.109619140625, + "logps/rejected": -411.5921630859375, + "loss": 0.3122, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.415106296539307, + "rewards/margins": 6.342439651489258, + "rewards/rejected": -12.757545471191406, + "step": 12481 + }, + { + "epoch": 1.94, + "learning_rate": 4.992529695523626e-06, + "logits/chosen": -3.082481861114502, + "logits/rejected": -2.8815743923187256, + "logps/chosen": -486.0248718261719, + "logps/rejected": -370.45941162109375, + "loss": 1.6111, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.510406017303467, + "rewards/margins": 2.295811176300049, + "rewards/rejected": -9.806217193603516, + "step": 12482 + }, + { + "epoch": 1.94, + "learning_rate": 4.991796254992478e-06, + "logits/chosen": -2.9698448181152344, + "logits/rejected": -2.2537312507629395, + "logps/chosen": -417.8690185546875, + "logps/rejected": -454.15423583984375, + "loss": 1.1336, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.249953269958496, + "rewards/margins": 3.6156444549560547, + "rewards/rejected": -12.86559772491455, + "step": 12483 + }, + { + "epoch": 1.94, + "learning_rate": 4.991062814461331e-06, + "logits/chosen": -2.4484736919403076, + "logits/rejected": -2.4587459564208984, + "logps/chosen": -212.97671508789062, + "logps/rejected": -350.14324951171875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.200165748596191, + "rewards/margins": 6.577937126159668, + "rewards/rejected": -11.77810287475586, + "step": 12484 + }, + { + "epoch": 1.94, + "learning_rate": 4.990329373930184e-06, + "logits/chosen": -2.377377986907959, + "logits/rejected": -2.9354774951934814, + "logps/chosen": -172.82672119140625, + "logps/rejected": -382.63818359375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5930986404418945, + "rewards/margins": 7.8523359298706055, + "rewards/rejected": -12.4454345703125, + "step": 12485 + }, + { + "epoch": 1.94, + "learning_rate": 4.9895959333990355e-06, + "logits/chosen": -2.896465539932251, + "logits/rejected": -3.0081429481506348, + "logps/chosen": -142.32765197753906, + "logps/rejected": -237.79055786132812, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.148100852966309, + "rewards/margins": 6.9150238037109375, + "rewards/rejected": -11.063125610351562, + "step": 12486 + }, + { + "epoch": 1.94, + "learning_rate": 4.988862492867887e-06, + "logits/chosen": -2.947322368621826, + "logits/rejected": -2.748185873031616, + "logps/chosen": -594.5311279296875, + "logps/rejected": -451.85687255859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.699129104614258, + "rewards/margins": 8.72889518737793, + "rewards/rejected": -13.428024291992188, + "step": 12487 + }, + { + "epoch": 1.94, + "learning_rate": 4.98812905233674e-06, + "logits/chosen": -0.7325335741043091, + "logits/rejected": -2.910510778427124, + "logps/chosen": -118.22794342041016, + "logps/rejected": -615.5695190429688, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.514395713806152, + "rewards/margins": 7.4330925941467285, + "rewards/rejected": -13.947488784790039, + "step": 12488 + }, + { + "epoch": 1.94, + "learning_rate": 4.987395611805592e-06, + "logits/chosen": -2.294628620147705, + "logits/rejected": -2.959458351135254, + "logps/chosen": -266.69879150390625, + "logps/rejected": -590.9957275390625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.63549518585205, + "rewards/margins": 7.383092880249023, + "rewards/rejected": -17.01858901977539, + "step": 12489 + }, + { + "epoch": 1.94, + "learning_rate": 4.986662171274444e-06, + "logits/chosen": -1.524561882019043, + "logits/rejected": -2.911531448364258, + "logps/chosen": -107.86929321289062, + "logps/rejected": -465.6357727050781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.84438419342041, + "rewards/margins": 8.549924850463867, + "rewards/rejected": -13.394309043884277, + "step": 12490 + }, + { + "epoch": 1.94, + "learning_rate": 4.985928730743296e-06, + "logits/chosen": -2.6173043251037598, + "logits/rejected": -2.971747875213623, + "logps/chosen": -84.0987548828125, + "logps/rejected": -184.5902099609375, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.194446086883545, + "rewards/margins": 5.196645736694336, + "rewards/rejected": -9.391092300415039, + "step": 12491 + }, + { + "epoch": 1.94, + "learning_rate": 4.985195290212148e-06, + "logits/chosen": -0.9584122896194458, + "logits/rejected": -2.378589630126953, + "logps/chosen": -235.1031494140625, + "logps/rejected": -544.3233642578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.78255558013916, + "rewards/margins": 9.724227905273438, + "rewards/rejected": -16.50678253173828, + "step": 12492 + }, + { + "epoch": 1.94, + "learning_rate": 4.984461849681e-06, + "logits/chosen": -1.8615121841430664, + "logits/rejected": -2.3358538150787354, + "logps/chosen": -122.13853454589844, + "logps/rejected": -347.5333251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1211748123168945, + "rewards/margins": 10.6981201171875, + "rewards/rejected": -17.819293975830078, + "step": 12493 + }, + { + "epoch": 1.94, + "learning_rate": 4.983728409149852e-06, + "logits/chosen": -2.326810121536255, + "logits/rejected": -2.6523115634918213, + "logps/chosen": -177.95675659179688, + "logps/rejected": -336.62982177734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.952787160873413, + "rewards/margins": 9.154902458190918, + "rewards/rejected": -13.10768985748291, + "step": 12494 + }, + { + "epoch": 1.94, + "learning_rate": 4.982994968618704e-06, + "logits/chosen": -2.5176711082458496, + "logits/rejected": -3.043175220489502, + "logps/chosen": -140.10183715820312, + "logps/rejected": -289.88092041015625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.362361907958984, + "rewards/margins": 7.514588832855225, + "rewards/rejected": -13.87695026397705, + "step": 12495 + }, + { + "epoch": 1.94, + "learning_rate": 4.982261528087556e-06, + "logits/chosen": -1.91317880153656, + "logits/rejected": -2.8919665813446045, + "logps/chosen": -84.33948516845703, + "logps/rejected": -328.430419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.381802558898926, + "rewards/margins": 10.388197898864746, + "rewards/rejected": -15.770000457763672, + "step": 12496 + }, + { + "epoch": 1.94, + "learning_rate": 4.981528087556409e-06, + "logits/chosen": -2.209461212158203, + "logits/rejected": -3.0277717113494873, + "logps/chosen": -103.65144348144531, + "logps/rejected": -352.9186706542969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.418051719665527, + "rewards/margins": 7.145597457885742, + "rewards/rejected": -13.56364917755127, + "step": 12497 + }, + { + "epoch": 1.94, + "learning_rate": 4.980794647025261e-06, + "logits/chosen": -2.019970417022705, + "logits/rejected": -2.5595574378967285, + "logps/chosen": -117.70039367675781, + "logps/rejected": -421.31646728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.88071870803833, + "rewards/margins": 14.70330810546875, + "rewards/rejected": -18.584026336669922, + "step": 12498 + }, + { + "epoch": 1.94, + "learning_rate": 4.9800612064941125e-06, + "logits/chosen": -2.3906972408294678, + "logits/rejected": -3.0522379875183105, + "logps/chosen": -340.4978942871094, + "logps/rejected": -478.259521484375, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.968392372131348, + "rewards/margins": 5.067145347595215, + "rewards/rejected": -10.035537719726562, + "step": 12499 + }, + { + "epoch": 1.94, + "learning_rate": 4.979327765962964e-06, + "logits/chosen": -3.038109064102173, + "logits/rejected": -3.1103246212005615, + "logps/chosen": -220.35943603515625, + "logps/rejected": -264.8957214355469, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2837114334106445, + "rewards/margins": 5.897646903991699, + "rewards/rejected": -10.181358337402344, + "step": 12500 + }, + { + "epoch": 1.94, + "learning_rate": 4.978594325431817e-06, + "logits/chosen": -2.693589687347412, + "logits/rejected": -3.0432181358337402, + "logps/chosen": -158.34791564941406, + "logps/rejected": -259.64678955078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.998910903930664, + "rewards/margins": 7.317018508911133, + "rewards/rejected": -12.315929412841797, + "step": 12501 + }, + { + "epoch": 1.94, + "learning_rate": 4.97786088490067e-06, + "logits/chosen": -1.932227611541748, + "logits/rejected": -3.0329740047454834, + "logps/chosen": -214.09130859375, + "logps/rejected": -309.8384704589844, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.993640422821045, + "rewards/margins": 6.610593795776367, + "rewards/rejected": -10.604233741760254, + "step": 12502 + }, + { + "epoch": 1.94, + "learning_rate": 4.977127444369522e-06, + "logits/chosen": -2.091968059539795, + "logits/rejected": -2.6761980056762695, + "logps/chosen": -384.64337158203125, + "logps/rejected": -543.82177734375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.314324378967285, + "rewards/margins": 9.752934455871582, + "rewards/rejected": -17.067258834838867, + "step": 12503 + }, + { + "epoch": 1.94, + "learning_rate": 4.976394003838374e-06, + "logits/chosen": -1.7233542203903198, + "logits/rejected": -2.689668655395508, + "logps/chosen": -153.6208953857422, + "logps/rejected": -306.0101013183594, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.074663162231445, + "rewards/margins": 4.962337493896484, + "rewards/rejected": -14.03700065612793, + "step": 12504 + }, + { + "epoch": 1.94, + "learning_rate": 4.9756605633072255e-06, + "logits/chosen": -2.939016103744507, + "logits/rejected": -2.5033063888549805, + "logps/chosen": -752.1630859375, + "logps/rejected": -481.9054870605469, + "loss": 1.8283, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.860162734985352, + "rewards/margins": 2.075986385345459, + "rewards/rejected": -10.936149597167969, + "step": 12505 + }, + { + "epoch": 1.94, + "learning_rate": 4.974927122776078e-06, + "logits/chosen": -2.1222095489501953, + "logits/rejected": -2.9716122150421143, + "logps/chosen": -275.7059631347656, + "logps/rejected": -517.072998046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.131704807281494, + "rewards/margins": 7.392244338989258, + "rewards/rejected": -12.523948669433594, + "step": 12506 + }, + { + "epoch": 1.95, + "learning_rate": 4.97419368224493e-06, + "logits/chosen": -2.760249614715576, + "logits/rejected": -2.691362142562866, + "logps/chosen": -291.4886779785156, + "logps/rejected": -394.1378173828125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0819573402404785, + "rewards/margins": 8.946391105651855, + "rewards/rejected": -14.028348922729492, + "step": 12507 + }, + { + "epoch": 1.95, + "learning_rate": 4.973460241713782e-06, + "logits/chosen": -2.3733620643615723, + "logits/rejected": -2.6023716926574707, + "logps/chosen": -145.12728881835938, + "logps/rejected": -279.6446838378906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.735697269439697, + "rewards/margins": 8.416879653930664, + "rewards/rejected": -15.152576446533203, + "step": 12508 + }, + { + "epoch": 1.95, + "learning_rate": 4.972726801182634e-06, + "logits/chosen": -2.562021017074585, + "logits/rejected": -2.929521083831787, + "logps/chosen": -409.0913391113281, + "logps/rejected": -454.7115173339844, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2885823249816895, + "rewards/margins": 10.074853897094727, + "rewards/rejected": -15.363435745239258, + "step": 12509 + }, + { + "epoch": 1.95, + "learning_rate": 4.971993360651486e-06, + "logits/chosen": -2.9302589893341064, + "logits/rejected": -2.124225378036499, + "logps/chosen": -409.111328125, + "logps/rejected": -424.7362976074219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.431547164916992, + "rewards/margins": 9.343145370483398, + "rewards/rejected": -15.77469253540039, + "step": 12510 + }, + { + "epoch": 1.95, + "learning_rate": 4.9712599201203384e-06, + "logits/chosen": -2.5116143226623535, + "logits/rejected": -2.8682806491851807, + "logps/chosen": -105.84068298339844, + "logps/rejected": -537.059814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.273412704467773, + "rewards/margins": 13.85476303100586, + "rewards/rejected": -19.128175735473633, + "step": 12511 + }, + { + "epoch": 1.95, + "learning_rate": 4.97052647958919e-06, + "logits/chosen": -1.9002375602722168, + "logits/rejected": -2.78226375579834, + "logps/chosen": -196.64402770996094, + "logps/rejected": -376.5354919433594, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5233564376831055, + "rewards/margins": 6.74688720703125, + "rewards/rejected": -14.270243644714355, + "step": 12512 + }, + { + "epoch": 1.95, + "learning_rate": 4.969793039058042e-06, + "logits/chosen": -2.2404420375823975, + "logits/rejected": -2.7770423889160156, + "logps/chosen": -213.86329650878906, + "logps/rejected": -297.14605712890625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661794662475586, + "rewards/margins": 6.695633411407471, + "rewards/rejected": -13.357427597045898, + "step": 12513 + }, + { + "epoch": 1.95, + "learning_rate": 4.969059598526894e-06, + "logits/chosen": -2.5300886631011963, + "logits/rejected": -2.9182653427124023, + "logps/chosen": -500.438232421875, + "logps/rejected": -400.7463684082031, + "loss": 2.3812, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.187978744506836, + "rewards/margins": -0.9233734607696533, + "rewards/rejected": -9.264605522155762, + "step": 12514 + }, + { + "epoch": 1.95, + "learning_rate": 4.968326157995747e-06, + "logits/chosen": -2.0360536575317383, + "logits/rejected": -3.0728790760040283, + "logps/chosen": -352.1142272949219, + "logps/rejected": -450.3998718261719, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.900269508361816, + "rewards/margins": 5.459846019744873, + "rewards/rejected": -13.360115051269531, + "step": 12515 + }, + { + "epoch": 1.95, + "learning_rate": 4.967592717464599e-06, + "logits/chosen": -2.8915441036224365, + "logits/rejected": -3.1571178436279297, + "logps/chosen": -208.60911560058594, + "logps/rejected": -266.5076599121094, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.429227352142334, + "rewards/margins": 7.109709739685059, + "rewards/rejected": -13.538936614990234, + "step": 12516 + }, + { + "epoch": 1.95, + "learning_rate": 4.9668592769334505e-06, + "logits/chosen": -2.428431510925293, + "logits/rejected": -2.793874502182007, + "logps/chosen": -185.0802001953125, + "logps/rejected": -273.025390625, + "loss": 0.0449, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.568986892700195, + "rewards/margins": 4.021292686462402, + "rewards/rejected": -12.590279579162598, + "step": 12517 + }, + { + "epoch": 1.95, + "learning_rate": 4.966125836402303e-06, + "logits/chosen": -2.981917142868042, + "logits/rejected": -2.3069405555725098, + "logps/chosen": -217.3431396484375, + "logps/rejected": -258.9117736816406, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.999566555023193, + "rewards/margins": 3.471285343170166, + "rewards/rejected": -9.47085189819336, + "step": 12518 + }, + { + "epoch": 1.95, + "learning_rate": 4.965392395871155e-06, + "logits/chosen": -2.7689363956451416, + "logits/rejected": -2.6338303089141846, + "logps/chosen": -331.57281494140625, + "logps/rejected": -395.96148681640625, + "loss": 0.0684, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.850349426269531, + "rewards/margins": 4.605978488922119, + "rewards/rejected": -15.456327438354492, + "step": 12519 + }, + { + "epoch": 1.95, + "learning_rate": 4.964658955340008e-06, + "logits/chosen": -2.996178150177002, + "logits/rejected": -3.0686264038085938, + "logps/chosen": -170.33596801757812, + "logps/rejected": -258.68218994140625, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.430283069610596, + "rewards/margins": 6.174944877624512, + "rewards/rejected": -12.605228424072266, + "step": 12520 + }, + { + "epoch": 1.95, + "learning_rate": 4.96392551480886e-06, + "logits/chosen": -2.0843186378479004, + "logits/rejected": -2.7496535778045654, + "logps/chosen": -198.88677978515625, + "logps/rejected": -416.3295593261719, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.027417182922363, + "rewards/margins": 3.8036980628967285, + "rewards/rejected": -10.83111572265625, + "step": 12521 + }, + { + "epoch": 1.95, + "learning_rate": 4.963192074277712e-06, + "logits/chosen": -2.9956905841827393, + "logits/rejected": -2.9064669609069824, + "logps/chosen": -115.51013946533203, + "logps/rejected": -218.114990234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6833906173706055, + "rewards/margins": 7.7648773193359375, + "rewards/rejected": -15.448266983032227, + "step": 12522 + }, + { + "epoch": 1.95, + "learning_rate": 4.9624586337465635e-06, + "logits/chosen": -1.531630516052246, + "logits/rejected": -2.7816903591156006, + "logps/chosen": -156.6314697265625, + "logps/rejected": -430.0353088378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.141999244689941, + "rewards/margins": 12.220104217529297, + "rewards/rejected": -17.362102508544922, + "step": 12523 + }, + { + "epoch": 1.95, + "learning_rate": 4.961725193215416e-06, + "logits/chosen": -2.8850347995758057, + "logits/rejected": -1.9006962776184082, + "logps/chosen": -630.0654296875, + "logps/rejected": -495.6678771972656, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.027454376220703, + "rewards/margins": 9.144546508789062, + "rewards/rejected": -16.172000885009766, + "step": 12524 + }, + { + "epoch": 1.95, + "learning_rate": 4.960991752684268e-06, + "logits/chosen": -3.1855690479278564, + "logits/rejected": -3.2227303981781006, + "logps/chosen": -78.60469055175781, + "logps/rejected": -148.36196899414062, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.563880920410156, + "rewards/margins": 4.7188849449157715, + "rewards/rejected": -9.282766342163086, + "step": 12525 + }, + { + "epoch": 1.95, + "learning_rate": 4.96025831215312e-06, + "logits/chosen": -2.7142162322998047, + "logits/rejected": -3.093869686126709, + "logps/chosen": -333.0459289550781, + "logps/rejected": -483.9215087890625, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.900741577148438, + "rewards/margins": 3.483319044113159, + "rewards/rejected": -12.384060859680176, + "step": 12526 + }, + { + "epoch": 1.95, + "learning_rate": 4.959524871621972e-06, + "logits/chosen": -2.792116403579712, + "logits/rejected": -2.133124589920044, + "logps/chosen": -208.45933532714844, + "logps/rejected": -253.58685302734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.038150787353516, + "rewards/margins": 7.715146064758301, + "rewards/rejected": -13.753296852111816, + "step": 12527 + }, + { + "epoch": 1.95, + "learning_rate": 4.958791431090825e-06, + "logits/chosen": -2.3965003490448, + "logits/rejected": -2.7664523124694824, + "logps/chosen": -408.83050537109375, + "logps/rejected": -527.564208984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.411494255065918, + "rewards/margins": 8.248202323913574, + "rewards/rejected": -15.659696578979492, + "step": 12528 + }, + { + "epoch": 1.95, + "learning_rate": 4.9580579905596765e-06, + "logits/chosen": -2.47339129447937, + "logits/rejected": -2.7998011112213135, + "logps/chosen": -172.6129150390625, + "logps/rejected": -302.8777770996094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3205413818359375, + "rewards/margins": 7.24530029296875, + "rewards/rejected": -11.565841674804688, + "step": 12529 + }, + { + "epoch": 1.95, + "learning_rate": 4.957324550028528e-06, + "logits/chosen": -2.1168324947357178, + "logits/rejected": -2.747209310531616, + "logps/chosen": -82.0488510131836, + "logps/rejected": -216.41937255859375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.620457649230957, + "rewards/margins": 7.520721435546875, + "rewards/rejected": -11.141179084777832, + "step": 12530 + }, + { + "epoch": 1.95, + "learning_rate": 4.95659110949738e-06, + "logits/chosen": -2.619349718093872, + "logits/rejected": -2.9798471927642822, + "logps/chosen": -69.36493682861328, + "logps/rejected": -260.70208740234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5868072509765625, + "rewards/margins": 9.156326293945312, + "rewards/rejected": -14.743133544921875, + "step": 12531 + }, + { + "epoch": 1.95, + "learning_rate": 4.955857668966232e-06, + "logits/chosen": -3.1884865760803223, + "logits/rejected": -2.052946090698242, + "logps/chosen": -346.91632080078125, + "logps/rejected": -289.9662170410156, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.578883409500122, + "rewards/margins": 7.678075790405273, + "rewards/rejected": -10.256958961486816, + "step": 12532 + }, + { + "epoch": 1.95, + "learning_rate": 4.955124228435085e-06, + "logits/chosen": -2.274127721786499, + "logits/rejected": -2.3653109073638916, + "logps/chosen": -301.614501953125, + "logps/rejected": -418.481201171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.025078773498535, + "rewards/margins": 7.287068843841553, + "rewards/rejected": -14.31214714050293, + "step": 12533 + }, + { + "epoch": 1.95, + "learning_rate": 4.954390787903937e-06, + "logits/chosen": -1.5518829822540283, + "logits/rejected": -2.7260992527008057, + "logps/chosen": -243.32781982421875, + "logps/rejected": -466.3050231933594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.034558296203613, + "rewards/margins": 9.587589263916016, + "rewards/rejected": -16.622148513793945, + "step": 12534 + }, + { + "epoch": 1.95, + "learning_rate": 4.9536573473727894e-06, + "logits/chosen": -3.017275094985962, + "logits/rejected": -3.0339269638061523, + "logps/chosen": -141.29615783691406, + "logps/rejected": -260.83612060546875, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802959442138672, + "rewards/margins": 4.7039690017700195, + "rewards/rejected": -12.506928443908691, + "step": 12535 + }, + { + "epoch": 1.95, + "learning_rate": 4.952923906841641e-06, + "logits/chosen": -3.0226023197174072, + "logits/rejected": -2.9805870056152344, + "logps/chosen": -120.56835174560547, + "logps/rejected": -364.9692687988281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.733185768127441, + "rewards/margins": 7.436036109924316, + "rewards/rejected": -13.169221878051758, + "step": 12536 + }, + { + "epoch": 1.95, + "learning_rate": 4.952190466310494e-06, + "logits/chosen": -1.4783531427383423, + "logits/rejected": -2.6855759620666504, + "logps/chosen": -325.991943359375, + "logps/rejected": -337.0577087402344, + "loss": 1.5538, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.709103584289551, + "rewards/margins": 2.9957587718963623, + "rewards/rejected": -10.704862594604492, + "step": 12537 + }, + { + "epoch": 1.95, + "learning_rate": 4.951457025779346e-06, + "logits/chosen": -1.2171348333358765, + "logits/rejected": -2.823150157928467, + "logps/chosen": -142.64830017089844, + "logps/rejected": -460.3501281738281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.591452598571777, + "rewards/margins": 8.070602416992188, + "rewards/rejected": -15.662055015563965, + "step": 12538 + }, + { + "epoch": 1.95, + "learning_rate": 4.950723585248198e-06, + "logits/chosen": -2.7448604106903076, + "logits/rejected": -1.558793067932129, + "logps/chosen": -228.20687866210938, + "logps/rejected": -228.14215087890625, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034576892852783, + "rewards/margins": 5.812203884124756, + "rewards/rejected": -11.846780776977539, + "step": 12539 + }, + { + "epoch": 1.95, + "learning_rate": 4.94999014471705e-06, + "logits/chosen": -2.7455265522003174, + "logits/rejected": -2.6318469047546387, + "logps/chosen": -495.18951416015625, + "logps/rejected": -503.5067138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.817533016204834, + "rewards/margins": 12.280888557434082, + "rewards/rejected": -17.098421096801758, + "step": 12540 + }, + { + "epoch": 1.95, + "learning_rate": 4.9492567041859016e-06, + "logits/chosen": -1.9555386304855347, + "logits/rejected": -3.0181713104248047, + "logps/chosen": -154.64918518066406, + "logps/rejected": -585.3350830078125, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.187217712402344, + "rewards/margins": 6.573193550109863, + "rewards/rejected": -14.760412216186523, + "step": 12541 + }, + { + "epoch": 1.95, + "learning_rate": 4.948523263654754e-06, + "logits/chosen": -1.6600468158721924, + "logits/rejected": -2.3904755115509033, + "logps/chosen": -154.49362182617188, + "logps/rejected": -593.6653442382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.962585926055908, + "rewards/margins": 13.345086097717285, + "rewards/rejected": -20.30767250061035, + "step": 12542 + }, + { + "epoch": 1.95, + "learning_rate": 4.947789823123606e-06, + "logits/chosen": -2.080645799636841, + "logits/rejected": -2.7314093112945557, + "logps/chosen": -330.56744384765625, + "logps/rejected": -422.0729675292969, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.643040657043457, + "rewards/margins": 6.353965759277344, + "rewards/rejected": -11.9970064163208, + "step": 12543 + }, + { + "epoch": 1.95, + "learning_rate": 4.947056382592458e-06, + "logits/chosen": -2.5524699687957764, + "logits/rejected": -3.137540817260742, + "logps/chosen": -123.29258728027344, + "logps/rejected": -326.53765869140625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.422464847564697, + "rewards/margins": 6.0869293212890625, + "rewards/rejected": -11.509393692016602, + "step": 12544 + }, + { + "epoch": 1.95, + "learning_rate": 4.94632294206131e-06, + "logits/chosen": -2.7857747077941895, + "logits/rejected": -2.9172310829162598, + "logps/chosen": -418.2445373535156, + "logps/rejected": -358.2349853515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.275244235992432, + "rewards/margins": 7.610262870788574, + "rewards/rejected": -11.885507583618164, + "step": 12545 + }, + { + "epoch": 1.95, + "learning_rate": 4.945589501530163e-06, + "logits/chosen": -2.9312751293182373, + "logits/rejected": -3.1433706283569336, + "logps/chosen": -181.5048828125, + "logps/rejected": -254.26345825195312, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.595069885253906, + "rewards/margins": 7.6007280349731445, + "rewards/rejected": -12.19579792022705, + "step": 12546 + }, + { + "epoch": 1.95, + "learning_rate": 4.9448560609990145e-06, + "logits/chosen": -2.970557451248169, + "logits/rejected": -3.122498035430908, + "logps/chosen": -506.9656677246094, + "logps/rejected": -546.6925048828125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.63153076171875, + "rewards/margins": 8.730430603027344, + "rewards/rejected": -16.361961364746094, + "step": 12547 + }, + { + "epoch": 1.95, + "learning_rate": 4.944122620467866e-06, + "logits/chosen": -3.144315719604492, + "logits/rejected": -2.3286406993865967, + "logps/chosen": -375.099365234375, + "logps/rejected": -257.3313293457031, + "loss": 3.3529, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.882011413574219, + "rewards/margins": 0.37877535820007324, + "rewards/rejected": -7.260787010192871, + "step": 12548 + }, + { + "epoch": 1.95, + "learning_rate": 4.943389179936718e-06, + "logits/chosen": -2.086780309677124, + "logits/rejected": -3.144465208053589, + "logps/chosen": -72.8277587890625, + "logps/rejected": -296.544677734375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.044034481048584, + "rewards/margins": 7.9533586502075195, + "rewards/rejected": -11.997393608093262, + "step": 12549 + }, + { + "epoch": 1.95, + "learning_rate": 4.94265573940557e-06, + "logits/chosen": -2.5706334114074707, + "logits/rejected": -2.6304030418395996, + "logps/chosen": -82.37510681152344, + "logps/rejected": -260.09710693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.879563331604004, + "rewards/margins": 10.634827613830566, + "rewards/rejected": -16.51439094543457, + "step": 12550 + }, + { + "epoch": 1.95, + "learning_rate": 4.941922298874423e-06, + "logits/chosen": -0.7039976716041565, + "logits/rejected": -2.9237582683563232, + "logps/chosen": -308.80303955078125, + "logps/rejected": -858.62158203125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.135910987854004, + "rewards/margins": 9.289767265319824, + "rewards/rejected": -19.425678253173828, + "step": 12551 + }, + { + "epoch": 1.95, + "learning_rate": 4.941188858343276e-06, + "logits/chosen": -3.012563943862915, + "logits/rejected": -2.564197301864624, + "logps/chosen": -447.7061767578125, + "logps/rejected": -488.189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9245691299438477, + "rewards/margins": 10.658166885375977, + "rewards/rejected": -13.58273696899414, + "step": 12552 + }, + { + "epoch": 1.95, + "learning_rate": 4.9404554178121275e-06, + "logits/chosen": -2.5642237663269043, + "logits/rejected": -2.802217483520508, + "logps/chosen": -238.28619384765625, + "logps/rejected": -327.1008605957031, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.483375549316406, + "rewards/margins": 7.873113632202148, + "rewards/rejected": -15.356489181518555, + "step": 12553 + }, + { + "epoch": 1.95, + "learning_rate": 4.939721977280979e-06, + "logits/chosen": -2.1863105297088623, + "logits/rejected": -2.886691093444824, + "logps/chosen": -189.5425262451172, + "logps/rejected": -332.66888427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.494147300720215, + "rewards/margins": 9.552196502685547, + "rewards/rejected": -15.046344757080078, + "step": 12554 + }, + { + "epoch": 1.95, + "learning_rate": 4.938988536749832e-06, + "logits/chosen": -1.9935111999511719, + "logits/rejected": -2.9567718505859375, + "logps/chosen": -395.2755126953125, + "logps/rejected": -548.4608764648438, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661309242248535, + "rewards/margins": 5.978339195251465, + "rewards/rejected": -12.6396484375, + "step": 12555 + }, + { + "epoch": 1.95, + "learning_rate": 4.938255096218684e-06, + "logits/chosen": -3.1271698474884033, + "logits/rejected": -2.9881503582000732, + "logps/chosen": -352.08245849609375, + "logps/rejected": -345.296142578125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4083359241485596, + "rewards/margins": 7.627447605133057, + "rewards/rejected": -11.035783767700195, + "step": 12556 + }, + { + "epoch": 1.95, + "learning_rate": 4.937521655687536e-06, + "logits/chosen": -2.8779876232147217, + "logits/rejected": -3.010915756225586, + "logps/chosen": -396.2362976074219, + "logps/rejected": -406.7689514160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8813562393188477, + "rewards/margins": 12.046499252319336, + "rewards/rejected": -14.9278564453125, + "step": 12557 + }, + { + "epoch": 1.95, + "learning_rate": 4.936788215156388e-06, + "logits/chosen": -2.841282367706299, + "logits/rejected": -1.8779305219650269, + "logps/chosen": -278.3337097167969, + "logps/rejected": -264.4105529785156, + "loss": 0.0705, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.868528366088867, + "rewards/margins": 5.456239700317383, + "rewards/rejected": -12.32476806640625, + "step": 12558 + }, + { + "epoch": 1.95, + "learning_rate": 4.93605477462524e-06, + "logits/chosen": -1.809295415878296, + "logits/rejected": -2.8236560821533203, + "logps/chosen": -169.1650390625, + "logps/rejected": -375.9676513671875, + "loss": 0.6254, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.853884696960449, + "rewards/margins": 5.557591438293457, + "rewards/rejected": -13.411476135253906, + "step": 12559 + }, + { + "epoch": 1.95, + "learning_rate": 4.935321334094092e-06, + "logits/chosen": -2.9332919120788574, + "logits/rejected": -2.9815714359283447, + "logps/chosen": -80.3179702758789, + "logps/rejected": -208.90853881835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.95332145690918, + "rewards/margins": 9.1315336227417, + "rewards/rejected": -14.084855079650879, + "step": 12560 + }, + { + "epoch": 1.95, + "learning_rate": 4.934587893562944e-06, + "logits/chosen": -2.3061373233795166, + "logits/rejected": -2.8762831687927246, + "logps/chosen": -422.5398864746094, + "logps/rejected": -431.2955017089844, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5377349853515625, + "rewards/margins": 6.630189418792725, + "rewards/rejected": -13.167924880981445, + "step": 12561 + }, + { + "epoch": 1.95, + "learning_rate": 4.933854453031796e-06, + "logits/chosen": -2.831636667251587, + "logits/rejected": -3.0414669513702393, + "logps/chosen": -129.64068603515625, + "logps/rejected": -293.0224609375, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.633602142333984, + "rewards/margins": 4.92665958404541, + "rewards/rejected": -11.560261726379395, + "step": 12562 + }, + { + "epoch": 1.95, + "learning_rate": 4.933121012500648e-06, + "logits/chosen": -1.9960427284240723, + "logits/rejected": -3.1005918979644775, + "logps/chosen": -98.47616577148438, + "logps/rejected": -400.3077697753906, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.043705463409424, + "rewards/margins": 6.639969348907471, + "rewards/rejected": -13.683674812316895, + "step": 12563 + }, + { + "epoch": 1.95, + "learning_rate": 4.932387571969501e-06, + "logits/chosen": -1.4252738952636719, + "logits/rejected": -2.9275107383728027, + "logps/chosen": -178.29400634765625, + "logps/rejected": -460.4991455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.906460762023926, + "rewards/margins": 12.401456832885742, + "rewards/rejected": -19.307918548583984, + "step": 12564 + }, + { + "epoch": 1.95, + "learning_rate": 4.9316541314383526e-06, + "logits/chosen": -3.0595712661743164, + "logits/rejected": -2.882874011993408, + "logps/chosen": -218.55160522460938, + "logps/rejected": -375.67303466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7481434345245361, + "rewards/margins": 11.678414344787598, + "rewards/rejected": -13.426557540893555, + "step": 12565 + }, + { + "epoch": 1.95, + "learning_rate": 4.9309206909072044e-06, + "logits/chosen": -3.0978829860687256, + "logits/rejected": -2.823551893234253, + "logps/chosen": -400.13726806640625, + "logps/rejected": -362.71209716796875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.957588195800781, + "rewards/margins": 5.706436634063721, + "rewards/rejected": -11.664024353027344, + "step": 12566 + }, + { + "epoch": 1.95, + "learning_rate": 4.930187250376056e-06, + "logits/chosen": -2.8576295375823975, + "logits/rejected": -2.843045949935913, + "logps/chosen": -771.5082397460938, + "logps/rejected": -572.0333862304688, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.205907821655273, + "rewards/margins": 5.829831600189209, + "rewards/rejected": -13.03573989868164, + "step": 12567 + }, + { + "epoch": 1.95, + "learning_rate": 4.929453809844909e-06, + "logits/chosen": -2.758591413497925, + "logits/rejected": -3.0887672901153564, + "logps/chosen": -85.41874694824219, + "logps/rejected": -230.47994995117188, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.528810501098633, + "rewards/margins": 5.694418907165527, + "rewards/rejected": -11.22322940826416, + "step": 12568 + }, + { + "epoch": 1.95, + "learning_rate": 4.928720369313762e-06, + "logits/chosen": -2.8222858905792236, + "logits/rejected": -2.4036169052124023, + "logps/chosen": -443.68902587890625, + "logps/rejected": -383.01123046875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.205388069152832, + "rewards/margins": 6.30864143371582, + "rewards/rejected": -15.514028549194336, + "step": 12569 + }, + { + "epoch": 1.95, + "learning_rate": 4.927986928782614e-06, + "logits/chosen": -2.9731557369232178, + "logits/rejected": -1.8691504001617432, + "logps/chosen": -381.76531982421875, + "logps/rejected": -165.35037231445312, + "loss": 1.4368, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.617125034332275, + "rewards/margins": 3.4298818111419678, + "rewards/rejected": -10.047006607055664, + "step": 12570 + }, + { + "epoch": 1.96, + "learning_rate": 4.9272534882514655e-06, + "logits/chosen": -2.329411029815674, + "logits/rejected": -2.6610963344573975, + "logps/chosen": -130.0662384033203, + "logps/rejected": -460.2745361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360426902770996, + "rewards/margins": 11.912677764892578, + "rewards/rejected": -18.27310562133789, + "step": 12571 + }, + { + "epoch": 1.96, + "learning_rate": 4.926520047720317e-06, + "logits/chosen": -1.860565423965454, + "logits/rejected": -2.8585715293884277, + "logps/chosen": -141.84352111816406, + "logps/rejected": -365.84722900390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.438052177429199, + "rewards/margins": 8.073671340942383, + "rewards/rejected": -15.511723518371582, + "step": 12572 + }, + { + "epoch": 1.96, + "learning_rate": 4.92578660718917e-06, + "logits/chosen": -2.7635414600372314, + "logits/rejected": -1.9720430374145508, + "logps/chosen": -318.06866455078125, + "logps/rejected": -376.03900146484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.94927978515625, + "rewards/margins": 9.588937759399414, + "rewards/rejected": -16.538217544555664, + "step": 12573 + }, + { + "epoch": 1.96, + "learning_rate": 4.925053166658022e-06, + "logits/chosen": -3.078357219696045, + "logits/rejected": -3.144146680831909, + "logps/chosen": -89.1190414428711, + "logps/rejected": -272.400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.130788803100586, + "rewards/margins": 10.418757438659668, + "rewards/rejected": -12.549546241760254, + "step": 12574 + }, + { + "epoch": 1.96, + "learning_rate": 4.924319726126874e-06, + "logits/chosen": -2.382415771484375, + "logits/rejected": -2.939661979675293, + "logps/chosen": -187.91488647460938, + "logps/rejected": -355.96270751953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7151944637298584, + "rewards/margins": 8.109879493713379, + "rewards/rejected": -11.825074195861816, + "step": 12575 + }, + { + "epoch": 1.96, + "learning_rate": 4.923586285595726e-06, + "logits/chosen": -1.7112423181533813, + "logits/rejected": -2.899834632873535, + "logps/chosen": -118.1954345703125, + "logps/rejected": -450.9377136230469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917115211486816, + "rewards/margins": 11.187374114990234, + "rewards/rejected": -18.104488372802734, + "step": 12576 + }, + { + "epoch": 1.96, + "learning_rate": 4.9228528450645785e-06, + "logits/chosen": -1.0565721988677979, + "logits/rejected": -2.9013547897338867, + "logps/chosen": -106.527587890625, + "logps/rejected": -444.995849609375, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.483221054077148, + "rewards/margins": 6.282635688781738, + "rewards/rejected": -11.765857696533203, + "step": 12577 + }, + { + "epoch": 1.96, + "learning_rate": 4.92211940453343e-06, + "logits/chosen": -2.32185959815979, + "logits/rejected": -2.537445306777954, + "logps/chosen": -207.2336883544922, + "logps/rejected": -530.6095581054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.79475212097168, + "rewards/margins": 11.90373706817627, + "rewards/rejected": -17.698490142822266, + "step": 12578 + }, + { + "epoch": 1.96, + "learning_rate": 4.921385964002282e-06, + "logits/chosen": -2.5496041774749756, + "logits/rejected": -2.514632225036621, + "logps/chosen": -242.17208862304688, + "logps/rejected": -225.04135131835938, + "loss": 0.7082, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.948143005371094, + "rewards/margins": 3.8639187812805176, + "rewards/rejected": -12.81206226348877, + "step": 12579 + }, + { + "epoch": 1.96, + "learning_rate": 4.920652523471134e-06, + "logits/chosen": -2.9560234546661377, + "logits/rejected": -2.7907636165618896, + "logps/chosen": -816.7669067382812, + "logps/rejected": -608.112548828125, + "loss": 0.8763, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.244560241699219, + "rewards/margins": 1.256103277206421, + "rewards/rejected": -9.500663757324219, + "step": 12580 + }, + { + "epoch": 1.96, + "learning_rate": 4.919919082939986e-06, + "logits/chosen": -2.05743670463562, + "logits/rejected": -2.5317180156707764, + "logps/chosen": -389.055908203125, + "logps/rejected": -625.9022216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.541224479675293, + "rewards/margins": 10.010625839233398, + "rewards/rejected": -17.551851272583008, + "step": 12581 + }, + { + "epoch": 1.96, + "learning_rate": 4.919185642408839e-06, + "logits/chosen": -1.6843305826187134, + "logits/rejected": -2.8661675453186035, + "logps/chosen": -297.9359130859375, + "logps/rejected": -617.1302490234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.227970123291016, + "rewards/margins": 6.465964317321777, + "rewards/rejected": -13.69393539428711, + "step": 12582 + }, + { + "epoch": 1.96, + "learning_rate": 4.918452201877691e-06, + "logits/chosen": -3.1182045936584473, + "logits/rejected": -2.6798386573791504, + "logps/chosen": -260.6309814453125, + "logps/rejected": -213.38583374023438, + "loss": 0.2235, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.480451583862305, + "rewards/margins": 2.173623561859131, + "rewards/rejected": -11.654074668884277, + "step": 12583 + }, + { + "epoch": 1.96, + "learning_rate": 4.9177187613465425e-06, + "logits/chosen": -2.999983072280884, + "logits/rejected": -2.527188539505005, + "logps/chosen": -462.1903381347656, + "logps/rejected": -560.9890747070312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5926103591918945, + "rewards/margins": 9.553179740905762, + "rewards/rejected": -14.145790100097656, + "step": 12584 + }, + { + "epoch": 1.96, + "learning_rate": 4.916985320815395e-06, + "logits/chosen": -0.8082779049873352, + "logits/rejected": -2.847032070159912, + "logps/chosen": -106.9332046508789, + "logps/rejected": -419.09332275390625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.491952896118164, + "rewards/margins": 6.63047981262207, + "rewards/rejected": -13.122432708740234, + "step": 12585 + }, + { + "epoch": 1.96, + "learning_rate": 4.916251880284248e-06, + "logits/chosen": -2.3945817947387695, + "logits/rejected": -2.767092704772949, + "logps/chosen": -104.35081481933594, + "logps/rejected": -296.57391357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.472231864929199, + "rewards/margins": 10.62666130065918, + "rewards/rejected": -15.098894119262695, + "step": 12586 + }, + { + "epoch": 1.96, + "learning_rate": 4.9155184397531e-06, + "logits/chosen": -2.808577060699463, + "logits/rejected": -2.9736487865448, + "logps/chosen": -190.46896362304688, + "logps/rejected": -324.3460388183594, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.823276519775391, + "rewards/margins": 9.022080421447754, + "rewards/rejected": -13.845357894897461, + "step": 12587 + }, + { + "epoch": 1.96, + "learning_rate": 4.914784999221952e-06, + "logits/chosen": -2.9067788124084473, + "logits/rejected": -2.183863878250122, + "logps/chosen": -204.5327911376953, + "logps/rejected": -410.16162109375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.794097900390625, + "rewards/margins": 11.773451805114746, + "rewards/rejected": -17.567550659179688, + "step": 12588 + }, + { + "epoch": 1.96, + "learning_rate": 4.9140515586908036e-06, + "logits/chosen": -2.800008535385132, + "logits/rejected": -2.674901008605957, + "logps/chosen": -535.6807861328125, + "logps/rejected": -387.70013427734375, + "loss": 2.5135, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.869102478027344, + "rewards/margins": 2.6513898372650146, + "rewards/rejected": -12.520492553710938, + "step": 12589 + }, + { + "epoch": 1.96, + "learning_rate": 4.9133181181596554e-06, + "logits/chosen": -2.7333524227142334, + "logits/rejected": -3.04732608795166, + "logps/chosen": -230.6417236328125, + "logps/rejected": -499.5932922363281, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.068563461303711, + "rewards/margins": 4.66823673248291, + "rewards/rejected": -12.736801147460938, + "step": 12590 + }, + { + "epoch": 1.96, + "learning_rate": 4.912584677628508e-06, + "logits/chosen": -2.5070652961730957, + "logits/rejected": -1.738616943359375, + "logps/chosen": -169.55136108398438, + "logps/rejected": -229.9564666748047, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97092866897583, + "rewards/margins": 5.423363208770752, + "rewards/rejected": -11.394291877746582, + "step": 12591 + }, + { + "epoch": 1.96, + "learning_rate": 4.91185123709736e-06, + "logits/chosen": -2.1717212200164795, + "logits/rejected": -2.902878522872925, + "logps/chosen": -127.48089599609375, + "logps/rejected": -474.1510009765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.313020706176758, + "rewards/margins": 11.354171752929688, + "rewards/rejected": -16.667192459106445, + "step": 12592 + }, + { + "epoch": 1.96, + "learning_rate": 4.911117796566212e-06, + "logits/chosen": -2.322587490081787, + "logits/rejected": -2.9532699584960938, + "logps/chosen": -107.16824340820312, + "logps/rejected": -427.93743896484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.678213119506836, + "rewards/margins": 10.63202953338623, + "rewards/rejected": -16.31024169921875, + "step": 12593 + }, + { + "epoch": 1.96, + "learning_rate": 4.910384356035064e-06, + "logits/chosen": -2.8013620376586914, + "logits/rejected": -2.9895496368408203, + "logps/chosen": -107.25840759277344, + "logps/rejected": -265.4325256347656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.649672031402588, + "rewards/margins": 7.835573673248291, + "rewards/rejected": -10.485245704650879, + "step": 12594 + }, + { + "epoch": 1.96, + "learning_rate": 4.9096509155039165e-06, + "logits/chosen": -2.905778408050537, + "logits/rejected": -2.702223777770996, + "logps/chosen": -498.0672912597656, + "logps/rejected": -543.260009765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.487902641296387, + "rewards/margins": 9.861433982849121, + "rewards/rejected": -14.349336624145508, + "step": 12595 + }, + { + "epoch": 1.96, + "learning_rate": 4.908917474972768e-06, + "logits/chosen": -3.1226422786712646, + "logits/rejected": -2.617079019546509, + "logps/chosen": -132.3726348876953, + "logps/rejected": -204.56695556640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0630593299865723, + "rewards/margins": 8.586000442504883, + "rewards/rejected": -10.649059295654297, + "step": 12596 + }, + { + "epoch": 1.96, + "learning_rate": 4.90818403444162e-06, + "logits/chosen": -2.932239532470703, + "logits/rejected": -2.502017021179199, + "logps/chosen": -330.54095458984375, + "logps/rejected": -170.8499755859375, + "loss": 0.3582, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.338700771331787, + "rewards/margins": 2.9028449058532715, + "rewards/rejected": -10.241545677185059, + "step": 12597 + }, + { + "epoch": 1.96, + "learning_rate": 4.907450593910472e-06, + "logits/chosen": -2.285006284713745, + "logits/rejected": -2.956686496734619, + "logps/chosen": -227.27224731445312, + "logps/rejected": -497.92303466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.665560722351074, + "rewards/margins": 10.055593490600586, + "rewards/rejected": -13.721153259277344, + "step": 12598 + }, + { + "epoch": 1.96, + "learning_rate": 4.906717153379324e-06, + "logits/chosen": -2.932445764541626, + "logits/rejected": -2.7491085529327393, + "logps/chosen": -216.3953857421875, + "logps/rejected": -398.8956604003906, + "loss": 0.2603, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.450542449951172, + "rewards/margins": 4.154644966125488, + "rewards/rejected": -13.60518741607666, + "step": 12599 + }, + { + "epoch": 1.96, + "learning_rate": 4.905983712848177e-06, + "logits/chosen": -1.3573846817016602, + "logits/rejected": -2.896655797958374, + "logps/chosen": -167.926025390625, + "logps/rejected": -414.61883544921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9578752517700195, + "rewards/margins": 7.896666526794434, + "rewards/rejected": -14.854541778564453, + "step": 12600 + }, + { + "epoch": 1.96, + "learning_rate": 4.905250272317029e-06, + "logits/chosen": -3.0198419094085693, + "logits/rejected": -2.411752939224243, + "logps/chosen": -316.4537353515625, + "logps/rejected": -316.649658203125, + "loss": 0.7256, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.166635036468506, + "rewards/margins": 5.974295616149902, + "rewards/rejected": -11.140931129455566, + "step": 12601 + }, + { + "epoch": 1.96, + "learning_rate": 4.904516831785881e-06, + "logits/chosen": -3.0623531341552734, + "logits/rejected": -2.7587292194366455, + "logps/chosen": -283.89666748046875, + "logps/rejected": -386.0718994140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79616641998291, + "rewards/margins": 11.832468032836914, + "rewards/rejected": -16.62863540649414, + "step": 12602 + }, + { + "epoch": 1.96, + "learning_rate": 4.903783391254733e-06, + "logits/chosen": -2.2890853881835938, + "logits/rejected": -2.3139781951904297, + "logps/chosen": -617.4417724609375, + "logps/rejected": -860.2442626953125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.076838493347168, + "rewards/margins": 12.116975784301758, + "rewards/rejected": -19.19381332397461, + "step": 12603 + }, + { + "epoch": 1.96, + "learning_rate": 4.903049950723586e-06, + "logits/chosen": -2.3742012977600098, + "logits/rejected": -2.9293289184570312, + "logps/chosen": -160.2078399658203, + "logps/rejected": -488.2969970703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.652048587799072, + "rewards/margins": 14.853826522827148, + "rewards/rejected": -19.505874633789062, + "step": 12604 + }, + { + "epoch": 1.96, + "learning_rate": 4.902316510192438e-06, + "logits/chosen": -2.652942419052124, + "logits/rejected": -2.0051000118255615, + "logps/chosen": -245.52096557617188, + "logps/rejected": -315.21270751953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.978761672973633, + "rewards/margins": 8.010120391845703, + "rewards/rejected": -11.988882064819336, + "step": 12605 + }, + { + "epoch": 1.96, + "learning_rate": 4.90158306966129e-06, + "logits/chosen": -1.0582083463668823, + "logits/rejected": -2.8333911895751953, + "logps/chosen": -134.58106994628906, + "logps/rejected": -341.0579528808594, + "loss": 0.2111, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.08670425415039, + "rewards/margins": 4.205307960510254, + "rewards/rejected": -14.292013168334961, + "step": 12606 + }, + { + "epoch": 1.96, + "learning_rate": 4.900849629130142e-06, + "logits/chosen": -2.8310554027557373, + "logits/rejected": -3.1636056900024414, + "logps/chosen": -188.71493530273438, + "logps/rejected": -328.390869140625, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.168428421020508, + "rewards/margins": 4.490967750549316, + "rewards/rejected": -10.659396171569824, + "step": 12607 + }, + { + "epoch": 1.96, + "learning_rate": 4.9001161885989935e-06, + "logits/chosen": -2.9079749584198, + "logits/rejected": -1.9283969402313232, + "logps/chosen": -449.6310119628906, + "logps/rejected": -545.0340576171875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.053276062011719, + "rewards/margins": 8.208158493041992, + "rewards/rejected": -16.26143455505371, + "step": 12608 + }, + { + "epoch": 1.96, + "learning_rate": 4.899382748067846e-06, + "logits/chosen": -3.029921770095825, + "logits/rejected": -2.7846486568450928, + "logps/chosen": -346.6946105957031, + "logps/rejected": -233.69143676757812, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.449798583984375, + "rewards/margins": 4.771234512329102, + "rewards/rejected": -13.221033096313477, + "step": 12609 + }, + { + "epoch": 1.96, + "learning_rate": 4.898649307536698e-06, + "logits/chosen": -2.1367855072021484, + "logits/rejected": -2.7920005321502686, + "logps/chosen": -496.82061767578125, + "logps/rejected": -689.5406494140625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.152717590332031, + "rewards/margins": 7.090768814086914, + "rewards/rejected": -13.243486404418945, + "step": 12610 + }, + { + "epoch": 1.96, + "learning_rate": 4.89791586700555e-06, + "logits/chosen": -2.3716440200805664, + "logits/rejected": -2.5615475177764893, + "logps/chosen": -144.5242919921875, + "logps/rejected": -326.92266845703125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.158365249633789, + "rewards/margins": 10.427547454833984, + "rewards/rejected": -17.585912704467773, + "step": 12611 + }, + { + "epoch": 1.96, + "learning_rate": 4.897182426474402e-06, + "logits/chosen": -1.6799697875976562, + "logits/rejected": -2.8086252212524414, + "logps/chosen": -134.79135131835938, + "logps/rejected": -448.0915832519531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066770553588867, + "rewards/margins": 10.433040618896484, + "rewards/rejected": -16.49981117248535, + "step": 12612 + }, + { + "epoch": 1.96, + "learning_rate": 4.896448985943255e-06, + "logits/chosen": -2.6181676387786865, + "logits/rejected": -2.173214912414551, + "logps/chosen": -220.58462524414062, + "logps/rejected": -165.08401489257812, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.844860076904297, + "rewards/margins": 1.324573278427124, + "rewards/rejected": -11.16943359375, + "step": 12613 + }, + { + "epoch": 1.96, + "learning_rate": 4.8957155454121065e-06, + "logits/chosen": -2.8417060375213623, + "logits/rejected": -2.7887113094329834, + "logps/chosen": -548.8079833984375, + "logps/rejected": -395.1680603027344, + "loss": 3.7098, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.959447860717773, + "rewards/margins": -2.2065274715423584, + "rewards/rejected": -8.752921104431152, + "step": 12614 + }, + { + "epoch": 1.96, + "learning_rate": 4.894982104880958e-06, + "logits/chosen": -1.4668430089950562, + "logits/rejected": -2.449167490005493, + "logps/chosen": -271.0210876464844, + "logps/rejected": -389.7173156738281, + "loss": 0.0949, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.117403984069824, + "rewards/margins": 5.413600921630859, + "rewards/rejected": -13.531003952026367, + "step": 12615 + }, + { + "epoch": 1.96, + "learning_rate": 4.89424866434981e-06, + "logits/chosen": -2.8298614025115967, + "logits/rejected": -2.243673324584961, + "logps/chosen": -158.3112030029297, + "logps/rejected": -427.1883850097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.809279680252075, + "rewards/margins": 11.664663314819336, + "rewards/rejected": -15.473943710327148, + "step": 12616 + }, + { + "epoch": 1.96, + "learning_rate": 4.893515223818662e-06, + "logits/chosen": -1.8064357042312622, + "logits/rejected": -2.8020832538604736, + "logps/chosen": -273.66265869140625, + "logps/rejected": -446.13665771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.649087905883789, + "rewards/margins": 10.123197555541992, + "rewards/rejected": -16.77228546142578, + "step": 12617 + }, + { + "epoch": 1.96, + "learning_rate": 4.892781783287515e-06, + "logits/chosen": -2.4422125816345215, + "logits/rejected": -2.6792845726013184, + "logps/chosen": -231.0546875, + "logps/rejected": -401.4552001953125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.410301208496094, + "rewards/margins": 6.971972942352295, + "rewards/rejected": -12.382274627685547, + "step": 12618 + }, + { + "epoch": 1.96, + "learning_rate": 4.8920483427563675e-06, + "logits/chosen": -2.126354217529297, + "logits/rejected": -3.146148920059204, + "logps/chosen": -267.0587158203125, + "logps/rejected": -575.5010986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.468637466430664, + "rewards/margins": 11.12100887298584, + "rewards/rejected": -16.589645385742188, + "step": 12619 + }, + { + "epoch": 1.96, + "learning_rate": 4.891314902225219e-06, + "logits/chosen": -2.131215810775757, + "logits/rejected": -3.096933603286743, + "logps/chosen": -114.76677703857422, + "logps/rejected": -288.121826171875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.405986785888672, + "rewards/margins": 5.189139366149902, + "rewards/rejected": -10.595126152038574, + "step": 12620 + }, + { + "epoch": 1.96, + "learning_rate": 4.890581461694071e-06, + "logits/chosen": -1.418538212776184, + "logits/rejected": -3.0260775089263916, + "logps/chosen": -87.99034118652344, + "logps/rejected": -280.8216552734375, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02952766418457, + "rewards/margins": 4.810003280639648, + "rewards/rejected": -12.839530944824219, + "step": 12621 + }, + { + "epoch": 1.96, + "learning_rate": 4.889848021162924e-06, + "logits/chosen": -2.957082509994507, + "logits/rejected": -2.883746862411499, + "logps/chosen": -145.24655151367188, + "logps/rejected": -223.32919311523438, + "loss": 2.8174, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.705227851867676, + "rewards/margins": 1.3694145679473877, + "rewards/rejected": -10.074642181396484, + "step": 12622 + }, + { + "epoch": 1.96, + "learning_rate": 4.889114580631776e-06, + "logits/chosen": -1.779317021369934, + "logits/rejected": -3.0522119998931885, + "logps/chosen": -163.98719787597656, + "logps/rejected": -434.53131103515625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4680888652801514, + "rewards/margins": 9.335392951965332, + "rewards/rejected": -11.803482055664062, + "step": 12623 + }, + { + "epoch": 1.96, + "learning_rate": 4.888381140100628e-06, + "logits/chosen": -2.9125046730041504, + "logits/rejected": -2.798675537109375, + "logps/chosen": -157.8351593017578, + "logps/rejected": -287.498291015625, + "loss": 0.5904, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.835466384887695, + "rewards/margins": 1.7550654411315918, + "rewards/rejected": -12.590531349182129, + "step": 12624 + }, + { + "epoch": 1.96, + "learning_rate": 4.88764769956948e-06, + "logits/chosen": -2.9651193618774414, + "logits/rejected": -2.187736988067627, + "logps/chosen": -227.223388671875, + "logps/rejected": -199.4807586669922, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9625205993652344, + "rewards/margins": 4.128166198730469, + "rewards/rejected": -8.090686798095703, + "step": 12625 + }, + { + "epoch": 1.96, + "learning_rate": 4.886914259038332e-06, + "logits/chosen": -3.000913619995117, + "logits/rejected": -3.1024057865142822, + "logps/chosen": -101.13729858398438, + "logps/rejected": -236.6399383544922, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.057217597961426, + "rewards/margins": 7.256977558135986, + "rewards/rejected": -12.31419563293457, + "step": 12626 + }, + { + "epoch": 1.96, + "learning_rate": 4.886180818507184e-06, + "logits/chosen": -1.7372653484344482, + "logits/rejected": -2.840928792953491, + "logps/chosen": -183.33160400390625, + "logps/rejected": -391.14447021484375, + "loss": 0.9396, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.354786396026611, + "rewards/margins": 3.4746477603912354, + "rewards/rejected": -9.829434394836426, + "step": 12627 + }, + { + "epoch": 1.96, + "learning_rate": 4.885447377976036e-06, + "logits/chosen": -1.882014513015747, + "logits/rejected": -3.0831689834594727, + "logps/chosen": -227.1126708984375, + "logps/rejected": -578.9779052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4408297538757324, + "rewards/margins": 9.546900749206543, + "rewards/rejected": -12.987730026245117, + "step": 12628 + }, + { + "epoch": 1.96, + "learning_rate": 4.884713937444888e-06, + "logits/chosen": -3.110722303390503, + "logits/rejected": -3.1765167713165283, + "logps/chosen": -115.93704223632812, + "logps/rejected": -244.671875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.817746162414551, + "rewards/margins": 7.990772247314453, + "rewards/rejected": -10.808518409729004, + "step": 12629 + }, + { + "epoch": 1.96, + "learning_rate": 4.88398049691374e-06, + "logits/chosen": -0.8251755833625793, + "logits/rejected": -2.6236140727996826, + "logps/chosen": -109.66285705566406, + "logps/rejected": -465.04412841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.788697242736816, + "rewards/margins": 12.532793045043945, + "rewards/rejected": -17.321491241455078, + "step": 12630 + }, + { + "epoch": 1.96, + "learning_rate": 4.883247056382593e-06, + "logits/chosen": -3.0567092895507812, + "logits/rejected": -2.9286465644836426, + "logps/chosen": -112.43025207519531, + "logps/rejected": -453.99786376953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.323904991149902, + "rewards/margins": 9.065671920776367, + "rewards/rejected": -14.389577865600586, + "step": 12631 + }, + { + "epoch": 1.96, + "learning_rate": 4.8825136158514445e-06, + "logits/chosen": -2.8599934577941895, + "logits/rejected": -2.0052638053894043, + "logps/chosen": -171.79440307617188, + "logps/rejected": -161.26075744628906, + "loss": 1.0402, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.550527572631836, + "rewards/margins": 1.1979122161865234, + "rewards/rejected": -7.748439788818359, + "step": 12632 + }, + { + "epoch": 1.96, + "learning_rate": 4.881780175320296e-06, + "logits/chosen": -3.1527743339538574, + "logits/rejected": -2.695159435272217, + "logps/chosen": -1104.3128662109375, + "logps/rejected": -692.3407592773438, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3709990978240967, + "rewards/margins": 9.217745780944824, + "rewards/rejected": -11.5887451171875, + "step": 12633 + }, + { + "epoch": 1.96, + "learning_rate": 4.881046734789148e-06, + "logits/chosen": -2.6482789516448975, + "logits/rejected": -2.7766189575195312, + "logps/chosen": -172.90379333496094, + "logps/rejected": -259.77337646484375, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.74822998046875, + "rewards/margins": 5.7732110023498535, + "rewards/rejected": -12.521440505981445, + "step": 12634 + }, + { + "epoch": 1.97, + "learning_rate": 4.880313294258001e-06, + "logits/chosen": -2.799335479736328, + "logits/rejected": -2.6614136695861816, + "logps/chosen": -307.74542236328125, + "logps/rejected": -460.341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1533403396606445, + "rewards/margins": 9.425504684448242, + "rewards/rejected": -13.578845977783203, + "step": 12635 + }, + { + "epoch": 1.97, + "learning_rate": 4.879579853726854e-06, + "logits/chosen": -2.7629239559173584, + "logits/rejected": -2.115684986114502, + "logps/chosen": -482.38128662109375, + "logps/rejected": -511.7698974609375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.015013694763184, + "rewards/margins": 8.578079223632812, + "rewards/rejected": -16.59309196472168, + "step": 12636 + }, + { + "epoch": 1.97, + "learning_rate": 4.878846413195706e-06, + "logits/chosen": -2.244375705718994, + "logits/rejected": -3.0935750007629395, + "logps/chosen": -145.168701171875, + "logps/rejected": -276.02191162109375, + "loss": 0.586, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.81911849975586, + "rewards/margins": 0.2873528003692627, + "rewards/rejected": -9.106471061706543, + "step": 12637 + }, + { + "epoch": 1.97, + "learning_rate": 4.8781129726645575e-06, + "logits/chosen": -1.7516343593597412, + "logits/rejected": -2.8285720348358154, + "logps/chosen": -259.70367431640625, + "logps/rejected": -358.9173278808594, + "loss": 0.0618, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.114841461181641, + "rewards/margins": 4.97646427154541, + "rewards/rejected": -11.09130573272705, + "step": 12638 + }, + { + "epoch": 1.97, + "learning_rate": 4.877379532133409e-06, + "logits/chosen": -3.1089746952056885, + "logits/rejected": -2.965613603591919, + "logps/chosen": -117.58613586425781, + "logps/rejected": -194.1541748046875, + "loss": 0.0307, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.648080825805664, + "rewards/margins": 3.4984779357910156, + "rewards/rejected": -9.14655876159668, + "step": 12639 + }, + { + "epoch": 1.97, + "learning_rate": 4.876646091602262e-06, + "logits/chosen": -2.7104926109313965, + "logits/rejected": -1.0261931419372559, + "logps/chosen": -347.5076904296875, + "logps/rejected": -134.57272338867188, + "loss": 1.1421, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.739232063293457, + "rewards/margins": -0.21692156791687012, + "rewards/rejected": -6.522310256958008, + "step": 12640 + }, + { + "epoch": 1.97, + "learning_rate": 4.875912651071114e-06, + "logits/chosen": -2.6879489421844482, + "logits/rejected": -2.458332061767578, + "logps/chosen": -191.7415771484375, + "logps/rejected": -408.9104919433594, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.260780334472656, + "rewards/margins": 9.35300064086914, + "rewards/rejected": -15.613780975341797, + "step": 12641 + }, + { + "epoch": 1.97, + "learning_rate": 4.875179210539966e-06, + "logits/chosen": -2.632828950881958, + "logits/rejected": -2.4136013984680176, + "logps/chosen": -222.94482421875, + "logps/rejected": -235.5005340576172, + "loss": 1.8058, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.098073959350586, + "rewards/margins": -0.06634974479675293, + "rewards/rejected": -9.031723976135254, + "step": 12642 + }, + { + "epoch": 1.97, + "learning_rate": 4.874445770008818e-06, + "logits/chosen": -2.823301315307617, + "logits/rejected": -2.9704947471618652, + "logps/chosen": -466.60888671875, + "logps/rejected": -387.5545959472656, + "loss": 0.1686, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3288445472717285, + "rewards/margins": 5.7950439453125, + "rewards/rejected": -11.12388801574707, + "step": 12643 + }, + { + "epoch": 1.97, + "learning_rate": 4.8737123294776704e-06, + "logits/chosen": -2.676616907119751, + "logits/rejected": -3.0595412254333496, + "logps/chosen": -111.93544006347656, + "logps/rejected": -240.17843627929688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1626830101013184, + "rewards/margins": 8.104436874389648, + "rewards/rejected": -11.267120361328125, + "step": 12644 + }, + { + "epoch": 1.97, + "learning_rate": 4.872978888946522e-06, + "logits/chosen": -1.8826857805252075, + "logits/rejected": -3.1262638568878174, + "logps/chosen": -301.809326171875, + "logps/rejected": -450.01690673828125, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.260491371154785, + "rewards/margins": 5.988861560821533, + "rewards/rejected": -12.249353408813477, + "step": 12645 + }, + { + "epoch": 1.97, + "learning_rate": 4.872245448415374e-06, + "logits/chosen": -3.0481154918670654, + "logits/rejected": -2.96110463142395, + "logps/chosen": -127.68511962890625, + "logps/rejected": -313.50732421875, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.432084083557129, + "rewards/margins": 6.644599914550781, + "rewards/rejected": -11.07668399810791, + "step": 12646 + }, + { + "epoch": 1.97, + "learning_rate": 4.871512007884226e-06, + "logits/chosen": -0.5836163759231567, + "logits/rejected": -2.915283679962158, + "logps/chosen": -138.47610473632812, + "logps/rejected": -682.6279296875, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.849713325500488, + "rewards/margins": 6.027132511138916, + "rewards/rejected": -13.876846313476562, + "step": 12647 + }, + { + "epoch": 1.97, + "learning_rate": 4.870778567353078e-06, + "logits/chosen": -3.138481616973877, + "logits/rejected": -3.083545446395874, + "logps/chosen": -191.63961791992188, + "logps/rejected": -304.51971435546875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.435351371765137, + "rewards/margins": 7.517336368560791, + "rewards/rejected": -12.952688217163086, + "step": 12648 + }, + { + "epoch": 1.97, + "learning_rate": 4.870045126821931e-06, + "logits/chosen": -2.5921459197998047, + "logits/rejected": -2.981239080429077, + "logps/chosen": -581.354736328125, + "logps/rejected": -493.11248779296875, + "loss": 1.2974, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.521430969238281, + "rewards/margins": 2.6703171730041504, + "rewards/rejected": -8.191747665405273, + "step": 12649 + }, + { + "epoch": 1.97, + "learning_rate": 4.8693116862907825e-06, + "logits/chosen": -2.9908549785614014, + "logits/rejected": -2.6357176303863525, + "logps/chosen": -203.50094604492188, + "logps/rejected": -180.23162841796875, + "loss": 0.748, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.98716926574707, + "rewards/margins": -0.07517433166503906, + "rewards/rejected": -7.911994934082031, + "step": 12650 + }, + { + "epoch": 1.97, + "learning_rate": 4.8685782457596344e-06, + "logits/chosen": -3.0849971771240234, + "logits/rejected": -3.0081043243408203, + "logps/chosen": -392.6581115722656, + "logps/rejected": -494.13433837890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1613454818725586, + "rewards/margins": 8.947641372680664, + "rewards/rejected": -12.108987808227539, + "step": 12651 + }, + { + "epoch": 1.97, + "learning_rate": 4.867844805228487e-06, + "logits/chosen": -2.064326763153076, + "logits/rejected": -2.8499324321746826, + "logps/chosen": -76.30046844482422, + "logps/rejected": -280.8486022949219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.161935567855835, + "rewards/margins": 11.285028457641602, + "rewards/rejected": -13.446964263916016, + "step": 12652 + }, + { + "epoch": 1.97, + "learning_rate": 4.86711136469734e-06, + "logits/chosen": -1.4625896215438843, + "logits/rejected": -2.8146297931671143, + "logps/chosen": -121.62080383300781, + "logps/rejected": -304.80108642578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.072497367858887, + "rewards/margins": 8.00575065612793, + "rewards/rejected": -15.078248977661133, + "step": 12653 + }, + { + "epoch": 1.97, + "learning_rate": 4.866377924166192e-06, + "logits/chosen": -2.868168354034424, + "logits/rejected": -2.3658559322357178, + "logps/chosen": -234.72634887695312, + "logps/rejected": -204.01869201660156, + "loss": 0.502, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.379130840301514, + "rewards/margins": 4.648942470550537, + "rewards/rejected": -10.02807331085205, + "step": 12654 + }, + { + "epoch": 1.97, + "learning_rate": 4.865644483635044e-06, + "logits/chosen": -2.7271652221679688, + "logits/rejected": -2.974440097808838, + "logps/chosen": -195.36363220214844, + "logps/rejected": -199.35897827148438, + "loss": 1.7752, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.59461498260498, + "rewards/margins": 1.2688415050506592, + "rewards/rejected": -9.863456726074219, + "step": 12655 + }, + { + "epoch": 1.97, + "learning_rate": 4.8649110431038955e-06, + "logits/chosen": -2.7481861114501953, + "logits/rejected": -2.975229501724243, + "logps/chosen": -612.9660034179688, + "logps/rejected": -507.7052917480469, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.307121753692627, + "rewards/margins": 6.036324501037598, + "rewards/rejected": -10.343446731567383, + "step": 12656 + }, + { + "epoch": 1.97, + "learning_rate": 4.864177602572747e-06, + "logits/chosen": -1.5066689252853394, + "logits/rejected": -2.646008014678955, + "logps/chosen": -185.18365478515625, + "logps/rejected": -427.909912109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.276598930358887, + "rewards/margins": 9.057317733764648, + "rewards/rejected": -14.333917617797852, + "step": 12657 + }, + { + "epoch": 1.97, + "learning_rate": 4.8634441620416e-06, + "logits/chosen": -2.3969855308532715, + "logits/rejected": -2.626859664916992, + "logps/chosen": -283.74273681640625, + "logps/rejected": -303.2229309082031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6186065673828125, + "rewards/margins": 8.806883811950684, + "rewards/rejected": -14.42548942565918, + "step": 12658 + }, + { + "epoch": 1.97, + "learning_rate": 4.862710721510452e-06, + "logits/chosen": -2.700076103210449, + "logits/rejected": -2.074547529220581, + "logps/chosen": -254.69276428222656, + "logps/rejected": -259.61907958984375, + "loss": 0.0536, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.445936679840088, + "rewards/margins": 4.673851013183594, + "rewards/rejected": -12.119787216186523, + "step": 12659 + }, + { + "epoch": 1.97, + "learning_rate": 4.861977280979304e-06, + "logits/chosen": -2.1332991123199463, + "logits/rejected": -3.0616776943206787, + "logps/chosen": -137.8341064453125, + "logps/rejected": -437.6093444824219, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.796242713928223, + "rewards/margins": 4.901179313659668, + "rewards/rejected": -11.69742202758789, + "step": 12660 + }, + { + "epoch": 1.97, + "learning_rate": 4.861243840448156e-06, + "logits/chosen": -2.8619203567504883, + "logits/rejected": -2.656785488128662, + "logps/chosen": -264.7794189453125, + "logps/rejected": -276.90325927734375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7141380310058594, + "rewards/margins": 8.115312576293945, + "rewards/rejected": -10.829450607299805, + "step": 12661 + }, + { + "epoch": 1.97, + "learning_rate": 4.8605103999170085e-06, + "logits/chosen": -2.9871745109558105, + "logits/rejected": -0.7617807388305664, + "logps/chosen": -593.9451904296875, + "logps/rejected": -151.47552490234375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012194827198982239, + "rewards/margins": 6.4850969314575195, + "rewards/rejected": -6.4972920417785645, + "step": 12662 + }, + { + "epoch": 1.97, + "learning_rate": 4.85977695938586e-06, + "logits/chosen": -2.9484972953796387, + "logits/rejected": -2.1678121089935303, + "logps/chosen": -228.0513916015625, + "logps/rejected": -311.62261962890625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.424994468688965, + "rewards/margins": 6.896543502807617, + "rewards/rejected": -12.321537971496582, + "step": 12663 + }, + { + "epoch": 1.97, + "learning_rate": 4.859043518854712e-06, + "logits/chosen": -2.3111443519592285, + "logits/rejected": -2.660910129547119, + "logps/chosen": -348.42108154296875, + "logps/rejected": -463.230224609375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.377349853515625, + "rewards/margins": 5.420620918273926, + "rewards/rejected": -12.79797077178955, + "step": 12664 + }, + { + "epoch": 1.97, + "learning_rate": 4.858310078323564e-06, + "logits/chosen": -2.2789835929870605, + "logits/rejected": -2.949706554412842, + "logps/chosen": -239.59042358398438, + "logps/rejected": -402.538818359375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119431018829346, + "rewards/margins": 9.671689987182617, + "rewards/rejected": -13.791120529174805, + "step": 12665 + }, + { + "epoch": 1.97, + "learning_rate": 4.857576637792416e-06, + "logits/chosen": -1.365574836730957, + "logits/rejected": -2.247070789337158, + "logps/chosen": -110.46742248535156, + "logps/rejected": -286.18280029296875, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.185190677642822, + "rewards/margins": 3.5658645629882812, + "rewards/rejected": -9.751054763793945, + "step": 12666 + }, + { + "epoch": 1.97, + "learning_rate": 4.856843197261269e-06, + "logits/chosen": -1.6518067121505737, + "logits/rejected": -2.89780855178833, + "logps/chosen": -71.11128234863281, + "logps/rejected": -181.51840209960938, + "loss": 0.2119, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3805341720581055, + "rewards/margins": 3.944136142730713, + "rewards/rejected": -9.324670791625977, + "step": 12667 + }, + { + "epoch": 1.97, + "learning_rate": 4.856109756730121e-06, + "logits/chosen": -2.5110678672790527, + "logits/rejected": -2.9940710067749023, + "logps/chosen": -168.65032958984375, + "logps/rejected": -373.65960693359375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.441730976104736, + "rewards/margins": 7.716267108917236, + "rewards/rejected": -13.157998085021973, + "step": 12668 + }, + { + "epoch": 1.97, + "learning_rate": 4.855376316198973e-06, + "logits/chosen": -1.6377947330474854, + "logits/rejected": -2.900495767593384, + "logps/chosen": -142.5452117919922, + "logps/rejected": -299.2952880859375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.714330673217773, + "rewards/margins": 5.466909408569336, + "rewards/rejected": -10.18124008178711, + "step": 12669 + }, + { + "epoch": 1.97, + "learning_rate": 4.854642875667825e-06, + "logits/chosen": -2.650815486907959, + "logits/rejected": -3.098076581954956, + "logps/chosen": -191.06585693359375, + "logps/rejected": -230.08956909179688, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.292728424072266, + "rewards/margins": 4.609450817108154, + "rewards/rejected": -9.902179718017578, + "step": 12670 + }, + { + "epoch": 1.97, + "learning_rate": 4.853909435136678e-06, + "logits/chosen": -2.6703109741210938, + "logits/rejected": -2.988935708999634, + "logps/chosen": -275.17938232421875, + "logps/rejected": -453.7363586425781, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42514181137085, + "rewards/margins": 6.582399368286133, + "rewards/rejected": -13.007540702819824, + "step": 12671 + }, + { + "epoch": 1.97, + "learning_rate": 4.85317599460553e-06, + "logits/chosen": -2.70627498626709, + "logits/rejected": -3.2131128311157227, + "logps/chosen": -182.09902954101562, + "logps/rejected": -374.6159973144531, + "loss": 0.4664, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.522469997406006, + "rewards/margins": 2.5253360271453857, + "rewards/rejected": -8.047805786132812, + "step": 12672 + }, + { + "epoch": 1.97, + "learning_rate": 4.852442554074382e-06, + "logits/chosen": -2.6582677364349365, + "logits/rejected": -2.9787352085113525, + "logps/chosen": -138.525146484375, + "logps/rejected": -324.9041442871094, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.069488525390625, + "rewards/margins": 7.00115442276001, + "rewards/rejected": -14.070642471313477, + "step": 12673 + }, + { + "epoch": 1.97, + "learning_rate": 4.8517091135432336e-06, + "logits/chosen": -2.8889427185058594, + "logits/rejected": -2.544680595397949, + "logps/chosen": -215.68911743164062, + "logps/rejected": -159.00906372070312, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.231957912445068, + "rewards/margins": 6.649614334106445, + "rewards/rejected": -10.881572723388672, + "step": 12674 + }, + { + "epoch": 1.97, + "learning_rate": 4.850975673012086e-06, + "logits/chosen": -2.058779001235962, + "logits/rejected": -2.9747233390808105, + "logps/chosen": -207.14169311523438, + "logps/rejected": -336.6393737792969, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.828273296356201, + "rewards/margins": 7.496535778045654, + "rewards/rejected": -11.324809074401855, + "step": 12675 + }, + { + "epoch": 1.97, + "learning_rate": 4.850242232480938e-06, + "logits/chosen": -2.6752431392669678, + "logits/rejected": -3.0673205852508545, + "logps/chosen": -106.27619934082031, + "logps/rejected": -390.18670654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9787487983703613, + "rewards/margins": 11.591604232788086, + "rewards/rejected": -15.570352554321289, + "step": 12676 + }, + { + "epoch": 1.97, + "learning_rate": 4.84950879194979e-06, + "logits/chosen": -2.727267026901245, + "logits/rejected": -3.1055212020874023, + "logps/chosen": -351.2535095214844, + "logps/rejected": -503.45465087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.370818138122559, + "rewards/margins": 11.714515686035156, + "rewards/rejected": -18.08533477783203, + "step": 12677 + }, + { + "epoch": 1.97, + "learning_rate": 4.848775351418642e-06, + "logits/chosen": -2.839097261428833, + "logits/rejected": -2.5156941413879395, + "logps/chosen": -356.0816650390625, + "logps/rejected": -387.6488952636719, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951519012451172, + "rewards/margins": 8.097175598144531, + "rewards/rejected": -13.048694610595703, + "step": 12678 + }, + { + "epoch": 1.97, + "learning_rate": 4.848041910887494e-06, + "logits/chosen": -2.111455202102661, + "logits/rejected": -2.8844051361083984, + "logps/chosen": -148.60256958007812, + "logps/rejected": -703.3729248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.252622127532959, + "rewards/margins": 14.598312377929688, + "rewards/rejected": -18.850934982299805, + "step": 12679 + }, + { + "epoch": 1.97, + "learning_rate": 4.8473084703563465e-06, + "logits/chosen": -2.934696912765503, + "logits/rejected": -1.088126540184021, + "logps/chosen": -667.2610473632812, + "logps/rejected": -372.265625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2540693283081055, + "rewards/margins": 8.043604850769043, + "rewards/rejected": -15.297674179077148, + "step": 12680 + }, + { + "epoch": 1.97, + "learning_rate": 4.846575029825198e-06, + "logits/chosen": -3.183704376220703, + "logits/rejected": -2.912931442260742, + "logps/chosen": -532.8123779296875, + "logps/rejected": -405.61883544921875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3307037353515625, + "rewards/margins": 5.117259979248047, + "rewards/rejected": -10.44796371459961, + "step": 12681 + }, + { + "epoch": 1.97, + "learning_rate": 4.84584158929405e-06, + "logits/chosen": -2.5418994426727295, + "logits/rejected": -2.820345401763916, + "logps/chosen": -74.97555541992188, + "logps/rejected": -224.99850463867188, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.145362854003906, + "rewards/margins": 6.6978864669799805, + "rewards/rejected": -10.843250274658203, + "step": 12682 + }, + { + "epoch": 1.97, + "learning_rate": 4.845108148762902e-06, + "logits/chosen": -2.21744441986084, + "logits/rejected": -1.1580462455749512, + "logps/chosen": -306.8486022949219, + "logps/rejected": -310.8857421875, + "loss": 0.9644, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.26955509185791, + "rewards/margins": 6.885771751403809, + "rewards/rejected": -13.155326843261719, + "step": 12683 + }, + { + "epoch": 1.97, + "learning_rate": 4.844374708231755e-06, + "logits/chosen": -2.711146116256714, + "logits/rejected": -2.690697431564331, + "logps/chosen": -78.46968078613281, + "logps/rejected": -276.12652587890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.213017702102661, + "rewards/margins": 10.573205947875977, + "rewards/rejected": -13.786223411560059, + "step": 12684 + }, + { + "epoch": 1.97, + "learning_rate": 4.843641267700607e-06, + "logits/chosen": -1.3241567611694336, + "logits/rejected": -2.839829921722412, + "logps/chosen": -108.96627807617188, + "logps/rejected": -313.4405212402344, + "loss": 0.0887, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.372085571289062, + "rewards/margins": 4.2399001121521, + "rewards/rejected": -12.61198616027832, + "step": 12685 + }, + { + "epoch": 1.97, + "learning_rate": 4.842907827169459e-06, + "logits/chosen": -1.4995993375778198, + "logits/rejected": -2.842066764831543, + "logps/chosen": -145.19581604003906, + "logps/rejected": -483.3703918457031, + "loss": 0.0402, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067286968231201, + "rewards/margins": 6.223045349121094, + "rewards/rejected": -12.290332794189453, + "step": 12686 + }, + { + "epoch": 1.97, + "learning_rate": 4.842174386638311e-06, + "logits/chosen": -1.6734613180160522, + "logits/rejected": -2.3587400913238525, + "logps/chosen": -301.16290283203125, + "logps/rejected": -672.935302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2341766357421875, + "rewards/margins": 14.576640129089355, + "rewards/rejected": -18.81081771850586, + "step": 12687 + }, + { + "epoch": 1.97, + "learning_rate": 4.841440946107163e-06, + "logits/chosen": -2.9794344902038574, + "logits/rejected": -2.405653715133667, + "logps/chosen": -364.1811218261719, + "logps/rejected": -425.4630432128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.541849613189697, + "rewards/margins": 10.154852867126465, + "rewards/rejected": -15.69670295715332, + "step": 12688 + }, + { + "epoch": 1.97, + "learning_rate": 4.840707505576016e-06, + "logits/chosen": -2.8482086658477783, + "logits/rejected": -2.17610502243042, + "logps/chosen": -217.85794067382812, + "logps/rejected": -481.90264892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.190549850463867, + "rewards/margins": 10.743090629577637, + "rewards/rejected": -18.93364143371582, + "step": 12689 + }, + { + "epoch": 1.97, + "learning_rate": 4.839974065044868e-06, + "logits/chosen": -2.8941397666931152, + "logits/rejected": -2.9968483448028564, + "logps/chosen": -440.5117492675781, + "logps/rejected": -460.5009460449219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.643399238586426, + "rewards/margins": 10.302300453186035, + "rewards/rejected": -14.945699691772461, + "step": 12690 + }, + { + "epoch": 1.97, + "learning_rate": 4.83924062451372e-06, + "logits/chosen": -2.3243095874786377, + "logits/rejected": -2.748870849609375, + "logps/chosen": -202.57566833496094, + "logps/rejected": -723.53271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.211040496826172, + "rewards/margins": 14.333436012268066, + "rewards/rejected": -19.544477462768555, + "step": 12691 + }, + { + "epoch": 1.97, + "learning_rate": 4.838507183982572e-06, + "logits/chosen": -2.7787373065948486, + "logits/rejected": -2.0842068195343018, + "logps/chosen": -197.76925659179688, + "logps/rejected": -274.45526123046875, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.583230018615723, + "rewards/margins": 5.702866554260254, + "rewards/rejected": -11.286096572875977, + "step": 12692 + }, + { + "epoch": 1.97, + "learning_rate": 4.837773743451424e-06, + "logits/chosen": -1.8172993659973145, + "logits/rejected": -2.9349420070648193, + "logps/chosen": -302.7220764160156, + "logps/rejected": -510.86419677734375, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.367295742034912, + "rewards/margins": 5.585160732269287, + "rewards/rejected": -10.9524564743042, + "step": 12693 + }, + { + "epoch": 1.97, + "learning_rate": 4.837040302920276e-06, + "logits/chosen": -2.9073352813720703, + "logits/rejected": -2.8987114429473877, + "logps/chosen": -275.322998046875, + "logps/rejected": -394.46234130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.413618564605713, + "rewards/margins": 10.617696762084961, + "rewards/rejected": -12.031314849853516, + "step": 12694 + }, + { + "epoch": 1.97, + "learning_rate": 4.836306862389128e-06, + "logits/chosen": -2.199021339416504, + "logits/rejected": -3.1282827854156494, + "logps/chosen": -104.52831268310547, + "logps/rejected": -381.91162109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.455598831176758, + "rewards/margins": 7.3876423835754395, + "rewards/rejected": -11.843240737915039, + "step": 12695 + }, + { + "epoch": 1.97, + "learning_rate": 4.83557342185798e-06, + "logits/chosen": -2.4314606189727783, + "logits/rejected": -2.8285436630249023, + "logps/chosen": -239.4758758544922, + "logps/rejected": -271.81915283203125, + "loss": 0.0855, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.037339210510254, + "rewards/margins": 4.16332483291626, + "rewards/rejected": -10.200664520263672, + "step": 12696 + }, + { + "epoch": 1.97, + "learning_rate": 4.834839981326832e-06, + "logits/chosen": -2.6191959381103516, + "logits/rejected": -2.70574688911438, + "logps/chosen": -102.16419982910156, + "logps/rejected": -238.6699981689453, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.41952657699585, + "rewards/margins": 5.582313537597656, + "rewards/rejected": -11.001840591430664, + "step": 12697 + }, + { + "epoch": 1.97, + "learning_rate": 4.8341065407956846e-06, + "logits/chosen": -3.0057497024536133, + "logits/rejected": -1.7625501155853271, + "logps/chosen": -691.1507568359375, + "logps/rejected": -520.6488037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.114422798156738, + "rewards/margins": 10.386247634887695, + "rewards/rejected": -17.500669479370117, + "step": 12698 + }, + { + "epoch": 1.97, + "learning_rate": 4.8333731002645364e-06, + "logits/chosen": -2.2705624103546143, + "logits/rejected": -2.9599485397338867, + "logps/chosen": -393.9990539550781, + "logps/rejected": -777.47314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.995603561401367, + "rewards/margins": 13.336668968200684, + "rewards/rejected": -17.332273483276367, + "step": 12699 + }, + { + "epoch": 1.98, + "learning_rate": 4.832639659733388e-06, + "logits/chosen": -2.023367404937744, + "logits/rejected": -2.96394681930542, + "logps/chosen": -275.6995849609375, + "logps/rejected": -467.53778076171875, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.232393264770508, + "rewards/margins": 5.193009853363037, + "rewards/rejected": -11.425403594970703, + "step": 12700 + }, + { + "epoch": 1.98, + "learning_rate": 4.83190621920224e-06, + "logits/chosen": -2.5514416694641113, + "logits/rejected": -2.8261656761169434, + "logps/chosen": -231.3531036376953, + "logps/rejected": -224.84474182128906, + "loss": 2.9572, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.671710014343262, + "rewards/margins": 1.0306196212768555, + "rewards/rejected": -9.702329635620117, + "step": 12701 + }, + { + "epoch": 1.98, + "learning_rate": 4.831172778671093e-06, + "logits/chosen": -2.072385787963867, + "logits/rejected": -2.773580551147461, + "logps/chosen": -196.23086547851562, + "logps/rejected": -427.7419128417969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.707731246948242, + "rewards/margins": 8.27041244506836, + "rewards/rejected": -16.9781436920166, + "step": 12702 + }, + { + "epoch": 1.98, + "learning_rate": 4.830439338139945e-06, + "logits/chosen": -2.9592769145965576, + "logits/rejected": -2.348252534866333, + "logps/chosen": -321.1667785644531, + "logps/rejected": -365.6181945800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.841479778289795, + "rewards/margins": 11.651229858398438, + "rewards/rejected": -15.49271011352539, + "step": 12703 + }, + { + "epoch": 1.98, + "learning_rate": 4.8297058976087975e-06, + "logits/chosen": -2.651698112487793, + "logits/rejected": -2.853930950164795, + "logps/chosen": -221.52032470703125, + "logps/rejected": -330.45965576171875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.849644184112549, + "rewards/margins": 7.809512615203857, + "rewards/rejected": -12.659156799316406, + "step": 12704 + }, + { + "epoch": 1.98, + "learning_rate": 4.828972457077649e-06, + "logits/chosen": -2.6943070888519287, + "logits/rejected": -3.027841091156006, + "logps/chosen": -187.24240112304688, + "logps/rejected": -249.1219482421875, + "loss": 0.7573, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.032770156860352, + "rewards/margins": 3.6433825492858887, + "rewards/rejected": -11.676152229309082, + "step": 12705 + }, + { + "epoch": 1.98, + "learning_rate": 4.828239016546501e-06, + "logits/chosen": -2.9811437129974365, + "logits/rejected": -2.328981637954712, + "logps/chosen": -332.5790710449219, + "logps/rejected": -310.4986572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5839637517929077, + "rewards/margins": 9.57661247253418, + "rewards/rejected": -11.160575866699219, + "step": 12706 + }, + { + "epoch": 1.98, + "learning_rate": 4.827505576015354e-06, + "logits/chosen": -2.7406837940216064, + "logits/rejected": -2.9293298721313477, + "logps/chosen": -220.3865966796875, + "logps/rejected": -384.21185302734375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909939765930176, + "rewards/margins": 6.803767204284668, + "rewards/rejected": -12.713706970214844, + "step": 12707 + }, + { + "epoch": 1.98, + "learning_rate": 4.826772135484206e-06, + "logits/chosen": -2.5882320404052734, + "logits/rejected": -3.0429859161376953, + "logps/chosen": -309.4358215332031, + "logps/rejected": -424.4943542480469, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014538764953613, + "rewards/margins": 7.887531280517578, + "rewards/rejected": -13.902069091796875, + "step": 12708 + }, + { + "epoch": 1.98, + "learning_rate": 4.826038694953058e-06, + "logits/chosen": -2.408465623855591, + "logits/rejected": -3.081676483154297, + "logps/chosen": -130.3988800048828, + "logps/rejected": -207.1676025390625, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5101165771484375, + "rewards/margins": 4.96600341796875, + "rewards/rejected": -11.476119995117188, + "step": 12709 + }, + { + "epoch": 1.98, + "learning_rate": 4.82530525442191e-06, + "logits/chosen": -2.9102835655212402, + "logits/rejected": -2.415868043899536, + "logps/chosen": -471.3348388671875, + "logps/rejected": -467.2643737792969, + "loss": 0.9232, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.10833740234375, + "rewards/margins": 1.0252184867858887, + "rewards/rejected": -8.133556365966797, + "step": 12710 + }, + { + "epoch": 1.98, + "learning_rate": 4.824571813890762e-06, + "logits/chosen": -0.9323347210884094, + "logits/rejected": -1.5913516283035278, + "logps/chosen": -371.687255859375, + "logps/rejected": -381.72601318359375, + "loss": 1.5053, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.085295677185059, + "rewards/margins": 4.115650653839111, + "rewards/rejected": -12.200946807861328, + "step": 12711 + }, + { + "epoch": 1.98, + "learning_rate": 4.823838373359614e-06, + "logits/chosen": -2.262880325317383, + "logits/rejected": -2.8633737564086914, + "logps/chosen": -193.82754516601562, + "logps/rejected": -189.86126708984375, + "loss": 0.498, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.9862284660339355, + "rewards/margins": 1.6693408489227295, + "rewards/rejected": -8.655569076538086, + "step": 12712 + }, + { + "epoch": 1.98, + "learning_rate": 4.823104932828466e-06, + "logits/chosen": -2.9864320755004883, + "logits/rejected": -2.5135059356689453, + "logps/chosen": -167.50521850585938, + "logps/rejected": -352.3206481933594, + "loss": 3.2911, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.27703857421875, + "rewards/margins": 1.9921467304229736, + "rewards/rejected": -10.269185066223145, + "step": 12713 + }, + { + "epoch": 1.98, + "learning_rate": 4.822371492297318e-06, + "logits/chosen": -2.8698437213897705, + "logits/rejected": -2.762298345565796, + "logps/chosen": -205.1719970703125, + "logps/rejected": -274.8792724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.746394157409668, + "rewards/margins": 9.531105041503906, + "rewards/rejected": -15.277499198913574, + "step": 12714 + }, + { + "epoch": 1.98, + "learning_rate": 4.821638051766171e-06, + "logits/chosen": -1.9113515615463257, + "logits/rejected": -2.9614951610565186, + "logps/chosen": -168.016357421875, + "logps/rejected": -301.01983642578125, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348056793212891, + "rewards/margins": 5.058375358581543, + "rewards/rejected": -9.406432151794434, + "step": 12715 + }, + { + "epoch": 1.98, + "learning_rate": 4.820904611235023e-06, + "logits/chosen": -2.9010932445526123, + "logits/rejected": -2.431736946105957, + "logps/chosen": -749.3593139648438, + "logps/rejected": -475.12237548828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.602079153060913, + "rewards/margins": 7.4275970458984375, + "rewards/rejected": -11.02967643737793, + "step": 12716 + }, + { + "epoch": 1.98, + "learning_rate": 4.8201711707038745e-06, + "logits/chosen": -2.47663950920105, + "logits/rejected": -2.9871201515197754, + "logps/chosen": -113.04989624023438, + "logps/rejected": -579.536376953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08465051651001, + "rewards/margins": 11.067846298217773, + "rewards/rejected": -15.152496337890625, + "step": 12717 + }, + { + "epoch": 1.98, + "learning_rate": 4.819437730172726e-06, + "logits/chosen": -2.5967748165130615, + "logits/rejected": -2.7722597122192383, + "logps/chosen": -154.3182373046875, + "logps/rejected": -225.47853088378906, + "loss": 0.039, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.159478187561035, + "rewards/margins": 4.252681732177734, + "rewards/rejected": -12.41215991973877, + "step": 12718 + }, + { + "epoch": 1.98, + "learning_rate": 4.818704289641578e-06, + "logits/chosen": -2.5268242359161377, + "logits/rejected": -2.90956974029541, + "logps/chosen": -254.2264404296875, + "logps/rejected": -277.6929931640625, + "loss": 0.2277, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5422468185424805, + "rewards/margins": 5.157533168792725, + "rewards/rejected": -9.699779510498047, + "step": 12719 + }, + { + "epoch": 1.98, + "learning_rate": 4.817970849110431e-06, + "logits/chosen": -1.8905688524246216, + "logits/rejected": -2.7632999420166016, + "logps/chosen": -142.70492553710938, + "logps/rejected": -384.8916015625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1797590255737305, + "rewards/margins": 8.435731887817383, + "rewards/rejected": -12.615490913391113, + "step": 12720 + }, + { + "epoch": 1.98, + "learning_rate": 4.817237408579284e-06, + "logits/chosen": -2.366672992706299, + "logits/rejected": -2.8932559490203857, + "logps/chosen": -202.20645141601562, + "logps/rejected": -416.54522705078125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9846848249435425, + "rewards/margins": 11.525176048278809, + "rewards/rejected": -13.50986099243164, + "step": 12721 + }, + { + "epoch": 1.98, + "learning_rate": 4.8165039680481356e-06, + "logits/chosen": -2.8083932399749756, + "logits/rejected": -2.314605236053467, + "logps/chosen": -220.93739318847656, + "logps/rejected": -181.86306762695312, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.608002185821533, + "rewards/margins": 4.357943534851074, + "rewards/rejected": -8.965946197509766, + "step": 12722 + }, + { + "epoch": 1.98, + "learning_rate": 4.8157705275169874e-06, + "logits/chosen": -2.063891649246216, + "logits/rejected": -2.667085647583008, + "logps/chosen": -253.94137573242188, + "logps/rejected": -361.5543212890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144601821899414, + "rewards/margins": 7.418928146362305, + "rewards/rejected": -13.563529968261719, + "step": 12723 + }, + { + "epoch": 1.98, + "learning_rate": 4.81503708698584e-06, + "logits/chosen": -2.9726216793060303, + "logits/rejected": -3.074416399002075, + "logps/chosen": -111.47486877441406, + "logps/rejected": -163.36160278320312, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.203043460845947, + "rewards/margins": 7.636785507202148, + "rewards/rejected": -13.839828491210938, + "step": 12724 + }, + { + "epoch": 1.98, + "learning_rate": 4.814303646454692e-06, + "logits/chosen": -2.8854193687438965, + "logits/rejected": -3.0946993827819824, + "logps/chosen": -66.56385803222656, + "logps/rejected": -238.55335998535156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7869253158569336, + "rewards/margins": 8.567550659179688, + "rewards/rejected": -11.354476928710938, + "step": 12725 + }, + { + "epoch": 1.98, + "learning_rate": 4.813570205923544e-06, + "logits/chosen": -2.1779348850250244, + "logits/rejected": -2.6029469966888428, + "logps/chosen": -144.55496215820312, + "logps/rejected": -262.5661315917969, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.127798080444336, + "rewards/margins": 5.8209452629089355, + "rewards/rejected": -12.94874382019043, + "step": 12726 + }, + { + "epoch": 1.98, + "learning_rate": 4.812836765392396e-06, + "logits/chosen": -2.6358397006988525, + "logits/rejected": -3.0049898624420166, + "logps/chosen": -160.89486694335938, + "logps/rejected": -349.4566650390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.859672546386719, + "rewards/margins": 9.89199447631836, + "rewards/rejected": -14.751667022705078, + "step": 12727 + }, + { + "epoch": 1.98, + "learning_rate": 4.812103324861248e-06, + "logits/chosen": -1.9129891395568848, + "logits/rejected": -2.949767827987671, + "logps/chosen": -103.40568542480469, + "logps/rejected": -348.6025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.189600944519043, + "rewards/margins": 11.395015716552734, + "rewards/rejected": -15.584616661071777, + "step": 12728 + }, + { + "epoch": 1.98, + "learning_rate": 4.8113698843301e-06, + "logits/chosen": -2.297745943069458, + "logits/rejected": -2.8263182640075684, + "logps/chosen": -163.25946044921875, + "logps/rejected": -249.5108642578125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.019040584564209, + "rewards/margins": 6.716994762420654, + "rewards/rejected": -11.736035346984863, + "step": 12729 + }, + { + "epoch": 1.98, + "learning_rate": 4.810636443798952e-06, + "logits/chosen": -2.9357171058654785, + "logits/rejected": -3.017153263092041, + "logps/chosen": -317.43035888671875, + "logps/rejected": -338.608154296875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.846189022064209, + "rewards/margins": 5.365986347198486, + "rewards/rejected": -12.212175369262695, + "step": 12730 + }, + { + "epoch": 1.98, + "learning_rate": 4.809903003267804e-06, + "logits/chosen": -2.8797013759613037, + "logits/rejected": -2.7274792194366455, + "logps/chosen": -166.0993194580078, + "logps/rejected": -310.3985595703125, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.792450904846191, + "rewards/margins": 4.741034507751465, + "rewards/rejected": -11.533485412597656, + "step": 12731 + }, + { + "epoch": 1.98, + "learning_rate": 4.809169562736656e-06, + "logits/chosen": -2.595813274383545, + "logits/rejected": -2.9768412113189697, + "logps/chosen": -351.2134704589844, + "logps/rejected": -419.7963562011719, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.404729843139648, + "rewards/margins": 7.850110054016113, + "rewards/rejected": -13.254838943481445, + "step": 12732 + }, + { + "epoch": 1.98, + "learning_rate": 4.808436122205509e-06, + "logits/chosen": -2.853707790374756, + "logits/rejected": -2.9915764331817627, + "logps/chosen": -256.6741027832031, + "logps/rejected": -437.16064453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.312260627746582, + "rewards/margins": 8.52212142944336, + "rewards/rejected": -12.834383010864258, + "step": 12733 + }, + { + "epoch": 1.98, + "learning_rate": 4.807702681674361e-06, + "logits/chosen": -2.876298666000366, + "logits/rejected": -2.959447145462036, + "logps/chosen": -176.03176879882812, + "logps/rejected": -290.7409362792969, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.424452781677246, + "rewards/margins": 8.379295349121094, + "rewards/rejected": -12.80374813079834, + "step": 12734 + }, + { + "epoch": 1.98, + "learning_rate": 4.8069692411432125e-06, + "logits/chosen": -2.5958666801452637, + "logits/rejected": -2.6737959384918213, + "logps/chosen": -188.8585205078125, + "logps/rejected": -441.2440490722656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9286060333251953, + "rewards/margins": 10.88960075378418, + "rewards/rejected": -14.818206787109375, + "step": 12735 + }, + { + "epoch": 1.98, + "learning_rate": 4.806235800612064e-06, + "logits/chosen": -1.6117961406707764, + "logits/rejected": -2.9880080223083496, + "logps/chosen": -57.598663330078125, + "logps/rejected": -233.2415008544922, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.544710636138916, + "rewards/margins": 5.015822887420654, + "rewards/rejected": -9.56053352355957, + "step": 12736 + }, + { + "epoch": 1.98, + "learning_rate": 4.805502360080917e-06, + "logits/chosen": -2.921365976333618, + "logits/rejected": -2.61714506149292, + "logps/chosen": -473.9149169921875, + "logps/rejected": -482.64752197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.468222141265869, + "rewards/margins": 13.115358352661133, + "rewards/rejected": -16.583580017089844, + "step": 12737 + }, + { + "epoch": 1.98, + "learning_rate": 4.80476891954977e-06, + "logits/chosen": -3.0584187507629395, + "logits/rejected": -2.44140362739563, + "logps/chosen": -628.1898803710938, + "logps/rejected": -529.7275390625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.121920585632324, + "rewards/margins": 6.2244110107421875, + "rewards/rejected": -12.346332550048828, + "step": 12738 + }, + { + "epoch": 1.98, + "learning_rate": 4.804035479018622e-06, + "logits/chosen": -1.9623321294784546, + "logits/rejected": -2.8059799671173096, + "logps/chosen": -239.0453643798828, + "logps/rejected": -487.3631591796875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7308502197265625, + "rewards/margins": 8.563847541809082, + "rewards/rejected": -14.294697761535645, + "step": 12739 + }, + { + "epoch": 1.98, + "learning_rate": 4.803302038487474e-06, + "logits/chosen": -2.621941328048706, + "logits/rejected": -2.201568365097046, + "logps/chosen": -110.113525390625, + "logps/rejected": -494.170654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.686549425125122, + "rewards/margins": 11.741933822631836, + "rewards/rejected": -15.428483963012695, + "step": 12740 + }, + { + "epoch": 1.98, + "learning_rate": 4.8025685979563255e-06, + "logits/chosen": -1.826583981513977, + "logits/rejected": -2.811248779296875, + "logps/chosen": -149.46339416503906, + "logps/rejected": -481.4686279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.736462593078613, + "rewards/margins": 12.109674453735352, + "rewards/rejected": -18.84613800048828, + "step": 12741 + }, + { + "epoch": 1.98, + "learning_rate": 4.801835157425178e-06, + "logits/chosen": -1.6334010362625122, + "logits/rejected": -2.758837938308716, + "logps/chosen": -235.5590057373047, + "logps/rejected": -428.56011962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.914738655090332, + "rewards/margins": 13.7334566116333, + "rewards/rejected": -17.648195266723633, + "step": 12742 + }, + { + "epoch": 1.98, + "learning_rate": 4.80110171689403e-06, + "logits/chosen": -1.7271307706832886, + "logits/rejected": -2.903608560562134, + "logps/chosen": -224.79910278320312, + "logps/rejected": -494.8145751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.695754051208496, + "rewards/margins": 9.198932647705078, + "rewards/rejected": -15.89468765258789, + "step": 12743 + }, + { + "epoch": 1.98, + "learning_rate": 4.800368276362882e-06, + "logits/chosen": -3.0105140209198, + "logits/rejected": -2.0683352947235107, + "logps/chosen": -280.7881774902344, + "logps/rejected": -353.68707275390625, + "loss": 0.8551, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.978133201599121, + "rewards/margins": 3.6676297187805176, + "rewards/rejected": -10.645763397216797, + "step": 12744 + }, + { + "epoch": 1.98, + "learning_rate": 4.799634835831734e-06, + "logits/chosen": -2.407968759536743, + "logits/rejected": -3.0158369541168213, + "logps/chosen": -93.91259002685547, + "logps/rejected": -312.3994140625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2565605640411377, + "rewards/margins": 7.462730407714844, + "rewards/rejected": -10.719291687011719, + "step": 12745 + }, + { + "epoch": 1.98, + "learning_rate": 4.798901395300586e-06, + "logits/chosen": -2.6973717212677, + "logits/rejected": -3.103480577468872, + "logps/chosen": -74.12368774414062, + "logps/rejected": -288.0964050292969, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.633228302001953, + "rewards/margins": 11.47270393371582, + "rewards/rejected": -15.105932235717773, + "step": 12746 + }, + { + "epoch": 1.98, + "learning_rate": 4.7981679547694385e-06, + "logits/chosen": -2.928083896636963, + "logits/rejected": -2.010762929916382, + "logps/chosen": -325.8952331542969, + "logps/rejected": -295.1964111328125, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.4408597946167, + "rewards/margins": 4.657999515533447, + "rewards/rejected": -15.098859786987305, + "step": 12747 + }, + { + "epoch": 1.98, + "learning_rate": 4.79743451423829e-06, + "logits/chosen": -2.247671604156494, + "logits/rejected": -2.9263017177581787, + "logps/chosen": -84.95117950439453, + "logps/rejected": -240.0536346435547, + "loss": 0.0917, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.897324562072754, + "rewards/margins": 5.2183518409729, + "rewards/rejected": -12.115676879882812, + "step": 12748 + }, + { + "epoch": 1.98, + "learning_rate": 4.796701073707142e-06, + "logits/chosen": -0.9733361601829529, + "logits/rejected": -1.8091251850128174, + "logps/chosen": -254.34844970703125, + "logps/rejected": -519.5844116210938, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.009774684906006, + "rewards/margins": 10.453288078308105, + "rewards/rejected": -13.46306324005127, + "step": 12749 + }, + { + "epoch": 1.98, + "learning_rate": 4.795967633175994e-06, + "logits/chosen": -2.7705020904541016, + "logits/rejected": -1.7410584688186646, + "logps/chosen": -215.28900146484375, + "logps/rejected": -202.8623046875, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.015539169311523, + "rewards/margins": 5.509620666503906, + "rewards/rejected": -10.52515983581543, + "step": 12750 + }, + { + "epoch": 1.98, + "learning_rate": 4.795234192644847e-06, + "logits/chosen": -3.0120182037353516, + "logits/rejected": -3.056985855102539, + "logps/chosen": -57.804664611816406, + "logps/rejected": -265.7545166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.399838924407959, + "rewards/margins": 10.984634399414062, + "rewards/rejected": -14.38447380065918, + "step": 12751 + }, + { + "epoch": 1.98, + "learning_rate": 4.794500752113699e-06, + "logits/chosen": -2.6914725303649902, + "logits/rejected": -3.0208353996276855, + "logps/chosen": -146.6279296875, + "logps/rejected": -313.4110107421875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.384086608886719, + "rewards/margins": 5.944215774536133, + "rewards/rejected": -10.328302383422852, + "step": 12752 + }, + { + "epoch": 1.98, + "learning_rate": 4.7937673115825506e-06, + "logits/chosen": -1.7768921852111816, + "logits/rejected": -2.4709713459014893, + "logps/chosen": -440.48004150390625, + "logps/rejected": -399.5322570800781, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.852353096008301, + "rewards/margins": 8.432822227478027, + "rewards/rejected": -13.285175323486328, + "step": 12753 + }, + { + "epoch": 1.98, + "learning_rate": 4.793033871051403e-06, + "logits/chosen": -2.451221227645874, + "logits/rejected": -1.3105794191360474, + "logps/chosen": -249.87704467773438, + "logps/rejected": -84.10014343261719, + "loss": 4.4086, + "rewards/accuracies": 0.0, + "rewards/chosen": -11.684696197509766, + "rewards/margins": -4.337624549865723, + "rewards/rejected": -7.347071170806885, + "step": 12754 + }, + { + "epoch": 1.98, + "learning_rate": 4.792300430520255e-06, + "logits/chosen": -2.988704204559326, + "logits/rejected": -3.192054033279419, + "logps/chosen": -161.29409790039062, + "logps/rejected": -487.5285949707031, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.868860721588135, + "rewards/margins": 10.270086288452148, + "rewards/rejected": -16.138946533203125, + "step": 12755 + }, + { + "epoch": 1.98, + "learning_rate": 4.791566989989108e-06, + "logits/chosen": -2.985910415649414, + "logits/rejected": -2.902012825012207, + "logps/chosen": -207.9149932861328, + "logps/rejected": -205.53176879882812, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.029125213623047, + "rewards/margins": 7.423120498657227, + "rewards/rejected": -12.452245712280273, + "step": 12756 + }, + { + "epoch": 1.98, + "learning_rate": 4.79083354945796e-06, + "logits/chosen": -2.5512495040893555, + "logits/rejected": -2.908167600631714, + "logps/chosen": -135.0827178955078, + "logps/rejected": -256.31256103515625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.27236795425415, + "rewards/margins": 5.455694675445557, + "rewards/rejected": -11.728062629699707, + "step": 12757 + }, + { + "epoch": 1.98, + "learning_rate": 4.790100108926812e-06, + "logits/chosen": -1.8291404247283936, + "logits/rejected": -3.020160436630249, + "logps/chosen": -183.56402587890625, + "logps/rejected": -465.5973205566406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.032898426055908, + "rewards/margins": 10.065420150756836, + "rewards/rejected": -12.098318099975586, + "step": 12758 + }, + { + "epoch": 1.98, + "learning_rate": 4.7893666683956635e-06, + "logits/chosen": -2.236295461654663, + "logits/rejected": -2.5287115573883057, + "logps/chosen": -127.9191665649414, + "logps/rejected": -282.0179748535156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7236456871032715, + "rewards/margins": 9.309416770935059, + "rewards/rejected": -14.033061981201172, + "step": 12759 + }, + { + "epoch": 1.98, + "learning_rate": 4.788633227864516e-06, + "logits/chosen": -3.1005449295043945, + "logits/rejected": -3.169532537460327, + "logps/chosen": -362.2273254394531, + "logps/rejected": -251.81820678710938, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08535099029541, + "rewards/margins": 6.323753356933594, + "rewards/rejected": -12.409104347229004, + "step": 12760 + }, + { + "epoch": 1.98, + "learning_rate": 4.787899787333368e-06, + "logits/chosen": -2.887148857116699, + "logits/rejected": -3.0001797676086426, + "logps/chosen": -252.20521545410156, + "logps/rejected": -131.20907592773438, + "loss": 0.9127, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.513157367706299, + "rewards/margins": 1.6638197898864746, + "rewards/rejected": -8.176977157592773, + "step": 12761 + }, + { + "epoch": 1.98, + "learning_rate": 4.78716634680222e-06, + "logits/chosen": -2.8834848403930664, + "logits/rejected": -2.8209550380706787, + "logps/chosen": -117.98177337646484, + "logps/rejected": -411.7357177734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.133402347564697, + "rewards/margins": 10.044317245483398, + "rewards/rejected": -15.177719116210938, + "step": 12762 + }, + { + "epoch": 1.98, + "learning_rate": 4.786432906271072e-06, + "logits/chosen": -2.4994728565216064, + "logits/rejected": -3.0112967491149902, + "logps/chosen": -637.7388305664062, + "logps/rejected": -570.9252319335938, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6862683296203613, + "rewards/margins": 7.218698501586914, + "rewards/rejected": -10.904966354370117, + "step": 12763 + }, + { + "epoch": 1.99, + "learning_rate": 4.785699465739925e-06, + "logits/chosen": -2.6831257343292236, + "logits/rejected": -2.8801348209381104, + "logps/chosen": -630.1437377929688, + "logps/rejected": -787.3510131835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096119403839111, + "rewards/margins": 9.681673049926758, + "rewards/rejected": -15.777792930603027, + "step": 12764 + }, + { + "epoch": 1.99, + "learning_rate": 4.7849660252087765e-06, + "logits/chosen": -2.8867123126983643, + "logits/rejected": -2.35788631439209, + "logps/chosen": -749.9546508789062, + "logps/rejected": -627.4298095703125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.008793830871582, + "rewards/margins": 6.462824821472168, + "rewards/rejected": -10.47161865234375, + "step": 12765 + }, + { + "epoch": 1.99, + "learning_rate": 4.784232584677628e-06, + "logits/chosen": -2.0023200511932373, + "logits/rejected": -2.9268884658813477, + "logps/chosen": -122.11972045898438, + "logps/rejected": -455.0986022949219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.086978435516357, + "rewards/margins": 10.066680908203125, + "rewards/rejected": -15.15365982055664, + "step": 12766 + }, + { + "epoch": 1.99, + "learning_rate": 4.78349914414648e-06, + "logits/chosen": -2.904425621032715, + "logits/rejected": -2.614542245864868, + "logps/chosen": -338.564453125, + "logps/rejected": -354.2103271484375, + "loss": 0.8085, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.5623650550842285, + "rewards/margins": 6.1979079246521, + "rewards/rejected": -13.760272979736328, + "step": 12767 + }, + { + "epoch": 1.99, + "learning_rate": 4.782765703615332e-06, + "logits/chosen": -3.107883930206299, + "logits/rejected": -2.4893264770507812, + "logps/chosen": -483.15179443359375, + "logps/rejected": -386.28546142578125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.626240253448486, + "rewards/margins": 7.139334678649902, + "rewards/rejected": -13.76557445526123, + "step": 12768 + }, + { + "epoch": 1.99, + "learning_rate": 4.782032263084185e-06, + "logits/chosen": -2.8735544681549072, + "logits/rejected": -2.0890116691589355, + "logps/chosen": -435.87237548828125, + "logps/rejected": -486.933837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.873997449874878, + "rewards/margins": 11.618245124816895, + "rewards/rejected": -14.492242813110352, + "step": 12769 + }, + { + "epoch": 1.99, + "learning_rate": 4.781298822553037e-06, + "logits/chosen": -2.31235671043396, + "logits/rejected": -2.948147773742676, + "logps/chosen": -164.66989135742188, + "logps/rejected": -369.0993957519531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.917132616043091, + "rewards/margins": 11.127817153930664, + "rewards/rejected": -14.044949531555176, + "step": 12770 + }, + { + "epoch": 1.99, + "learning_rate": 4.7805653820218895e-06, + "logits/chosen": -2.7528536319732666, + "logits/rejected": -2.315387725830078, + "logps/chosen": -303.27685546875, + "logps/rejected": -569.36474609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10654354095459, + "rewards/margins": 10.162687301635742, + "rewards/rejected": -16.26923179626465, + "step": 12771 + }, + { + "epoch": 1.99, + "learning_rate": 4.779831941490741e-06, + "logits/chosen": -1.230843424797058, + "logits/rejected": -1.7270236015319824, + "logps/chosen": -46.21685028076172, + "logps/rejected": -188.28732299804688, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.552248477935791, + "rewards/margins": 7.312982082366943, + "rewards/rejected": -9.865230560302734, + "step": 12772 + }, + { + "epoch": 1.99, + "learning_rate": 4.779098500959594e-06, + "logits/chosen": -1.608007550239563, + "logits/rejected": -3.037493944168091, + "logps/chosen": -219.98147583007812, + "logps/rejected": -448.0049743652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.641240119934082, + "rewards/margins": 12.269495010375977, + "rewards/rejected": -15.910734176635742, + "step": 12773 + }, + { + "epoch": 1.99, + "learning_rate": 4.778365060428446e-06, + "logits/chosen": -3.0086801052093506, + "logits/rejected": -2.9578959941864014, + "logps/chosen": -364.24383544921875, + "logps/rejected": -229.15399169921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.819274425506592, + "rewards/margins": 7.283142566680908, + "rewards/rejected": -12.1024169921875, + "step": 12774 + }, + { + "epoch": 1.99, + "learning_rate": 4.777631619897298e-06, + "logits/chosen": -1.6483675241470337, + "logits/rejected": -2.527031183242798, + "logps/chosen": -140.40187072753906, + "logps/rejected": -312.60528564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.86442494392395, + "rewards/margins": 10.168556213378906, + "rewards/rejected": -13.032980918884277, + "step": 12775 + }, + { + "epoch": 1.99, + "learning_rate": 4.77689817936615e-06, + "logits/chosen": -2.84212064743042, + "logits/rejected": -2.734895944595337, + "logps/chosen": -447.83026123046875, + "logps/rejected": -349.52044677734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7042908668518066, + "rewards/margins": 8.048537254333496, + "rewards/rejected": -11.752827644348145, + "step": 12776 + }, + { + "epoch": 1.99, + "learning_rate": 4.776164738835002e-06, + "logits/chosen": -2.7846903800964355, + "logits/rejected": -2.914079189300537, + "logps/chosen": -364.869873046875, + "logps/rejected": -309.22705078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.351243495941162, + "rewards/margins": 8.317630767822266, + "rewards/rejected": -12.66887378692627, + "step": 12777 + }, + { + "epoch": 1.99, + "learning_rate": 4.775431298303854e-06, + "logits/chosen": -2.6007585525512695, + "logits/rejected": -2.588613510131836, + "logps/chosen": -249.8463897705078, + "logps/rejected": -480.5954895019531, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.728384017944336, + "rewards/margins": 9.169303894042969, + "rewards/rejected": -13.897687911987305, + "step": 12778 + }, + { + "epoch": 1.99, + "learning_rate": 4.774697857772706e-06, + "logits/chosen": -2.781433582305908, + "logits/rejected": -1.7412863969802856, + "logps/chosen": -171.66140747070312, + "logps/rejected": -123.32566833496094, + "loss": 0.2218, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.254189491271973, + "rewards/margins": 1.5579171180725098, + "rewards/rejected": -7.812106132507324, + "step": 12779 + }, + { + "epoch": 1.99, + "learning_rate": 4.773964417241558e-06, + "logits/chosen": -2.6666932106018066, + "logits/rejected": -2.921542167663574, + "logps/chosen": -101.05270385742188, + "logps/rejected": -341.9883728027344, + "loss": 0.431, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.384384632110596, + "rewards/margins": 3.459056854248047, + "rewards/rejected": -9.843441009521484, + "step": 12780 + }, + { + "epoch": 1.99, + "learning_rate": 4.77323097671041e-06, + "logits/chosen": -3.2016420364379883, + "logits/rejected": -2.9128518104553223, + "logps/chosen": -347.37115478515625, + "logps/rejected": -370.263427734375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.630407810211182, + "rewards/margins": 6.150615215301514, + "rewards/rejected": -11.781023025512695, + "step": 12781 + }, + { + "epoch": 1.99, + "learning_rate": 4.772497536179263e-06, + "logits/chosen": -2.325305938720703, + "logits/rejected": -2.8774328231811523, + "logps/chosen": -83.95726013183594, + "logps/rejected": -212.48941040039062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.14210319519043, + "rewards/margins": 8.553874969482422, + "rewards/rejected": -12.695977210998535, + "step": 12782 + }, + { + "epoch": 1.99, + "learning_rate": 4.7717640956481145e-06, + "logits/chosen": -2.6701905727386475, + "logits/rejected": -2.8893015384674072, + "logps/chosen": -151.02891540527344, + "logps/rejected": -251.7003173828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.255096673965454, + "rewards/margins": 6.950198173522949, + "rewards/rejected": -10.205294609069824, + "step": 12783 + }, + { + "epoch": 1.99, + "learning_rate": 4.771030655116966e-06, + "logits/chosen": -1.8895808458328247, + "logits/rejected": -2.664515256881714, + "logps/chosen": -231.42747497558594, + "logps/rejected": -367.672607421875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.751138687133789, + "rewards/margins": 7.74630069732666, + "rewards/rejected": -12.497438430786133, + "step": 12784 + }, + { + "epoch": 1.99, + "learning_rate": 4.770297214585818e-06, + "logits/chosen": -1.993670105934143, + "logits/rejected": -2.8188183307647705, + "logps/chosen": -285.5245361328125, + "logps/rejected": -560.7183837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.814681529998779, + "rewards/margins": 11.430582046508789, + "rewards/rejected": -17.245264053344727, + "step": 12785 + }, + { + "epoch": 1.99, + "learning_rate": 4.76956377405467e-06, + "logits/chosen": -2.151324987411499, + "logits/rejected": -3.0531303882598877, + "logps/chosen": -208.16156005859375, + "logps/rejected": -514.6666259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.701896667480469, + "rewards/margins": 12.07516860961914, + "rewards/rejected": -16.77706527709961, + "step": 12786 + }, + { + "epoch": 1.99, + "learning_rate": 4.768830333523523e-06, + "logits/chosen": -2.400344133377075, + "logits/rejected": -2.855393171310425, + "logps/chosen": -298.16864013671875, + "logps/rejected": -436.62286376953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.964390754699707, + "rewards/margins": 8.021465301513672, + "rewards/rejected": -11.985856056213379, + "step": 12787 + }, + { + "epoch": 1.99, + "learning_rate": 4.768096892992376e-06, + "logits/chosen": -2.922487258911133, + "logits/rejected": -1.517059087753296, + "logps/chosen": -263.4178161621094, + "logps/rejected": -138.57574462890625, + "loss": 0.1062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2208945751190186, + "rewards/margins": 2.2889819145202637, + "rewards/rejected": -5.509876251220703, + "step": 12788 + }, + { + "epoch": 1.99, + "learning_rate": 4.7673634524612275e-06, + "logits/chosen": -2.968454122543335, + "logits/rejected": -2.224761486053467, + "logps/chosen": -409.1291809082031, + "logps/rejected": -357.01220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.048096656799316, + "rewards/margins": 10.253318786621094, + "rewards/rejected": -18.301414489746094, + "step": 12789 + }, + { + "epoch": 1.99, + "learning_rate": 4.766630011930079e-06, + "logits/chosen": -2.6413118839263916, + "logits/rejected": -3.0259339809417725, + "logps/chosen": -206.34254455566406, + "logps/rejected": -335.83709716796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.665854454040527, + "rewards/margins": 7.804184436798096, + "rewards/rejected": -12.470039367675781, + "step": 12790 + }, + { + "epoch": 1.99, + "learning_rate": 4.765896571398932e-06, + "logits/chosen": -2.2094173431396484, + "logits/rejected": -2.8563153743743896, + "logps/chosen": -216.71405029296875, + "logps/rejected": -468.7017517089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.960482597351074, + "rewards/margins": 12.372527122497559, + "rewards/rejected": -17.333009719848633, + "step": 12791 + }, + { + "epoch": 1.99, + "learning_rate": 4.765163130867784e-06, + "logits/chosen": -2.962176561355591, + "logits/rejected": -2.9902539253234863, + "logps/chosen": -151.97763061523438, + "logps/rejected": -280.2513427734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1399102210998535, + "rewards/margins": 8.147115707397461, + "rewards/rejected": -11.287025451660156, + "step": 12792 + }, + { + "epoch": 1.99, + "learning_rate": 4.764429690336636e-06, + "logits/chosen": -1.869643211364746, + "logits/rejected": -2.9186253547668457, + "logps/chosen": -136.21112060546875, + "logps/rejected": -384.15234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.788515090942383, + "rewards/margins": 10.16722297668457, + "rewards/rejected": -14.955738067626953, + "step": 12793 + }, + { + "epoch": 1.99, + "learning_rate": 4.763696249805488e-06, + "logits/chosen": -1.8016647100448608, + "logits/rejected": -2.5868260860443115, + "logps/chosen": -66.42520904541016, + "logps/rejected": -246.9300079345703, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.437372207641602, + "rewards/margins": 3.688788414001465, + "rewards/rejected": -9.126160621643066, + "step": 12794 + }, + { + "epoch": 1.99, + "learning_rate": 4.76296280927434e-06, + "logits/chosen": -1.9036113023757935, + "logits/rejected": -2.880845546722412, + "logps/chosen": -308.8485107421875, + "logps/rejected": -421.543212890625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194167613983154, + "rewards/margins": 6.78935432434082, + "rewards/rejected": -12.983522415161133, + "step": 12795 + }, + { + "epoch": 1.99, + "learning_rate": 4.762229368743192e-06, + "logits/chosen": -3.0519156455993652, + "logits/rejected": -3.13877010345459, + "logps/chosen": -67.36736297607422, + "logps/rejected": -180.59588623046875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7912182807922363, + "rewards/margins": 6.541831970214844, + "rewards/rejected": -9.333049774169922, + "step": 12796 + }, + { + "epoch": 1.99, + "learning_rate": 4.761495928212044e-06, + "logits/chosen": -2.969407081604004, + "logits/rejected": -2.9901187419891357, + "logps/chosen": -297.5213317871094, + "logps/rejected": -347.04052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.419953346252441, + "rewards/margins": 9.929314613342285, + "rewards/rejected": -15.349267959594727, + "step": 12797 + }, + { + "epoch": 1.99, + "learning_rate": 4.760762487680896e-06, + "logits/chosen": -2.922699451446533, + "logits/rejected": -1.6309159994125366, + "logps/chosen": -360.5143737792969, + "logps/rejected": -362.6160583496094, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.570937156677246, + "rewards/margins": 6.856551170349121, + "rewards/rejected": -11.427488327026367, + "step": 12798 + }, + { + "epoch": 1.99, + "learning_rate": 4.760029047149748e-06, + "logits/chosen": -2.7116148471832275, + "logits/rejected": -2.9821536540985107, + "logps/chosen": -558.6810302734375, + "logps/rejected": -421.12982177734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1151227951049805, + "rewards/margins": 8.455135345458984, + "rewards/rejected": -14.570257186889648, + "step": 12799 + }, + { + "epoch": 1.99, + "learning_rate": 4.759295606618601e-06, + "logits/chosen": -2.96980357170105, + "logits/rejected": -1.2741106748580933, + "logps/chosen": -830.8788452148438, + "logps/rejected": -434.52044677734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7104458808898926, + "rewards/margins": 10.905447959899902, + "rewards/rejected": -13.615894317626953, + "step": 12800 + }, + { + "epoch": 1.99, + "learning_rate": 4.758562166087453e-06, + "logits/chosen": -2.135497808456421, + "logits/rejected": -2.4924981594085693, + "logps/chosen": -467.03668212890625, + "logps/rejected": -501.5206298828125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.491652011871338, + "rewards/margins": 7.822821140289307, + "rewards/rejected": -13.314473152160645, + "step": 12801 + }, + { + "epoch": 1.99, + "learning_rate": 4.7578287255563045e-06, + "logits/chosen": -2.812718629837036, + "logits/rejected": -2.1208412647247314, + "logps/chosen": -185.8875274658203, + "logps/rejected": -184.03350830078125, + "loss": 0.0506, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7747721672058105, + "rewards/margins": 2.9713711738586426, + "rewards/rejected": -10.746143341064453, + "step": 12802 + }, + { + "epoch": 1.99, + "learning_rate": 4.757095285025156e-06, + "logits/chosen": -2.643449544906616, + "logits/rejected": -3.079066753387451, + "logps/chosen": -106.2036361694336, + "logps/rejected": -222.2591552734375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7072982788085938, + "rewards/margins": 6.326639175415039, + "rewards/rejected": -10.033937454223633, + "step": 12803 + }, + { + "epoch": 1.99, + "learning_rate": 4.756361844494009e-06, + "logits/chosen": -2.7826850414276123, + "logits/rejected": -3.0931944847106934, + "logps/chosen": -235.85862731933594, + "logps/rejected": -471.109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.367605209350586, + "rewards/margins": 11.872316360473633, + "rewards/rejected": -17.23992156982422, + "step": 12804 + }, + { + "epoch": 1.99, + "learning_rate": 4.755628403962862e-06, + "logits/chosen": -2.5194694995880127, + "logits/rejected": -3.0144219398498535, + "logps/chosen": -332.3865966796875, + "logps/rejected": -441.55596923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651317596435547, + "rewards/margins": 10.116533279418945, + "rewards/rejected": -16.767850875854492, + "step": 12805 + }, + { + "epoch": 1.99, + "learning_rate": 4.754894963431714e-06, + "logits/chosen": -2.9468276500701904, + "logits/rejected": -3.035416603088379, + "logps/chosen": -102.1229248046875, + "logps/rejected": -196.40414428710938, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301647186279297, + "rewards/margins": 5.614645957946777, + "rewards/rejected": -9.916292190551758, + "step": 12806 + }, + { + "epoch": 1.99, + "learning_rate": 4.7541615229005656e-06, + "logits/chosen": -2.6499645709991455, + "logits/rejected": -2.7957470417022705, + "logps/chosen": -129.52838134765625, + "logps/rejected": -294.6512451171875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.814358711242676, + "rewards/margins": 6.527535915374756, + "rewards/rejected": -11.341894149780273, + "step": 12807 + }, + { + "epoch": 1.99, + "learning_rate": 4.7534280823694174e-06, + "logits/chosen": -2.3791959285736084, + "logits/rejected": -2.86216139793396, + "logps/chosen": -222.8210906982422, + "logps/rejected": -407.86810302734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118961334228516, + "rewards/margins": 8.440879821777344, + "rewards/rejected": -12.55984115600586, + "step": 12808 + }, + { + "epoch": 1.99, + "learning_rate": 4.75269464183827e-06, + "logits/chosen": -2.211434841156006, + "logits/rejected": -2.785208225250244, + "logps/chosen": -132.72320556640625, + "logps/rejected": -337.7565612792969, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.650079250335693, + "rewards/margins": 6.488298416137695, + "rewards/rejected": -11.138378143310547, + "step": 12809 + }, + { + "epoch": 1.99, + "learning_rate": 4.751961201307122e-06, + "logits/chosen": -1.6178518533706665, + "logits/rejected": -2.4928338527679443, + "logps/chosen": -201.85421752929688, + "logps/rejected": -477.7695617675781, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.942401885986328, + "rewards/margins": 12.631967544555664, + "rewards/rejected": -17.574369430541992, + "step": 12810 + }, + { + "epoch": 1.99, + "learning_rate": 4.751227760775974e-06, + "logits/chosen": -2.8685641288757324, + "logits/rejected": -3.02449631690979, + "logps/chosen": -269.0810852050781, + "logps/rejected": -364.7557373046875, + "loss": 2.2455, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.214659690856934, + "rewards/margins": 1.7507121562957764, + "rewards/rejected": -11.965372085571289, + "step": 12811 + }, + { + "epoch": 1.99, + "learning_rate": 4.750494320244826e-06, + "logits/chosen": -3.0545296669006348, + "logits/rejected": -2.995342254638672, + "logps/chosen": -187.1681671142578, + "logps/rejected": -208.84320068359375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93030309677124, + "rewards/margins": 4.213839530944824, + "rewards/rejected": -10.144142150878906, + "step": 12812 + }, + { + "epoch": 1.99, + "learning_rate": 4.7497608797136785e-06, + "logits/chosen": -2.8893678188323975, + "logits/rejected": -3.0533740520477295, + "logps/chosen": -276.2113952636719, + "logps/rejected": -203.27883911132812, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.300981521606445, + "rewards/margins": 5.290072917938232, + "rewards/rejected": -9.59105396270752, + "step": 12813 + }, + { + "epoch": 1.99, + "learning_rate": 4.74902743918253e-06, + "logits/chosen": -1.1799769401550293, + "logits/rejected": -3.0803756713867188, + "logps/chosen": -159.09725952148438, + "logps/rejected": -539.7259521484375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.613194465637207, + "rewards/margins": 6.859074592590332, + "rewards/rejected": -15.472269058227539, + "step": 12814 + }, + { + "epoch": 1.99, + "learning_rate": 4.748293998651382e-06, + "logits/chosen": -2.2667391300201416, + "logits/rejected": -3.027191162109375, + "logps/chosen": -349.14813232421875, + "logps/rejected": -576.1588745117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.451653480529785, + "rewards/margins": 11.676923751831055, + "rewards/rejected": -14.128576278686523, + "step": 12815 + }, + { + "epoch": 1.99, + "learning_rate": 4.747560558120234e-06, + "logits/chosen": -2.1222314834594727, + "logits/rejected": -2.8536720275878906, + "logps/chosen": -129.10037231445312, + "logps/rejected": -228.30023193359375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.42396354675293, + "rewards/margins": 3.7664546966552734, + "rewards/rejected": -12.190418243408203, + "step": 12816 + }, + { + "epoch": 1.99, + "learning_rate": 4.746827117589086e-06, + "logits/chosen": -2.9380972385406494, + "logits/rejected": -3.0465564727783203, + "logps/chosen": -46.77177429199219, + "logps/rejected": -201.0829620361328, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6429595947265625, + "rewards/margins": 9.34480094909668, + "rewards/rejected": -12.987760543823242, + "step": 12817 + }, + { + "epoch": 1.99, + "learning_rate": 4.746093677057939e-06, + "logits/chosen": -2.719940423965454, + "logits/rejected": -2.4418163299560547, + "logps/chosen": -298.223388671875, + "logps/rejected": -324.7052307128906, + "loss": 1.3256, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.227836608886719, + "rewards/margins": 1.4214038848876953, + "rewards/rejected": -9.649240493774414, + "step": 12818 + }, + { + "epoch": 1.99, + "learning_rate": 4.745360236526791e-06, + "logits/chosen": -2.905142307281494, + "logits/rejected": -2.9446706771850586, + "logps/chosen": -163.0492706298828, + "logps/rejected": -327.3424072265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.874841213226318, + "rewards/margins": 7.4120073318481445, + "rewards/rejected": -12.286848068237305, + "step": 12819 + }, + { + "epoch": 1.99, + "learning_rate": 4.7446267959956425e-06, + "logits/chosen": -2.401380777359009, + "logits/rejected": -2.775400400161743, + "logps/chosen": -454.8055725097656, + "logps/rejected": -526.832763671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8428802490234375, + "rewards/margins": 9.048441886901855, + "rewards/rejected": -13.89132308959961, + "step": 12820 + }, + { + "epoch": 1.99, + "learning_rate": 4.743893355464495e-06, + "logits/chosen": -1.7604756355285645, + "logits/rejected": -2.617135524749756, + "logps/chosen": -101.9920425415039, + "logps/rejected": -349.3066711425781, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.017239093780518, + "rewards/margins": 8.758068084716797, + "rewards/rejected": -15.775306701660156, + "step": 12821 + }, + { + "epoch": 1.99, + "learning_rate": 4.743159914933348e-06, + "logits/chosen": -2.8011088371276855, + "logits/rejected": -1.6160438060760498, + "logps/chosen": -203.59437561035156, + "logps/rejected": -187.34725952148438, + "loss": 0.4381, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.215456008911133, + "rewards/margins": 3.5901103019714355, + "rewards/rejected": -12.80556583404541, + "step": 12822 + }, + { + "epoch": 1.99, + "learning_rate": 4.7424264744022e-06, + "logits/chosen": -2.6678693294525146, + "logits/rejected": -3.0397353172302246, + "logps/chosen": -120.9096908569336, + "logps/rejected": -322.3678894042969, + "loss": 0.1112, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5867509841918945, + "rewards/margins": 4.618454456329346, + "rewards/rejected": -11.205205917358398, + "step": 12823 + }, + { + "epoch": 1.99, + "learning_rate": 4.741693033871052e-06, + "logits/chosen": -1.517186164855957, + "logits/rejected": -2.8304827213287354, + "logps/chosen": -115.09211730957031, + "logps/rejected": -305.8011474609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.375253200531006, + "rewards/margins": 8.578304290771484, + "rewards/rejected": -14.953557968139648, + "step": 12824 + }, + { + "epoch": 1.99, + "learning_rate": 4.740959593339904e-06, + "logits/chosen": -3.009977102279663, + "logits/rejected": -2.8206000328063965, + "logps/chosen": -263.55572509765625, + "logps/rejected": -420.9465026855469, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.72527551651001, + "rewards/margins": 9.174434661865234, + "rewards/rejected": -13.899709701538086, + "step": 12825 + }, + { + "epoch": 1.99, + "learning_rate": 4.7402261528087555e-06, + "logits/chosen": -2.999516248703003, + "logits/rejected": -2.113137722015381, + "logps/chosen": -233.01846313476562, + "logps/rejected": -189.7369384765625, + "loss": 0.1971, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.593412399291992, + "rewards/margins": 5.871339321136475, + "rewards/rejected": -9.464752197265625, + "step": 12826 + }, + { + "epoch": 1.99, + "learning_rate": 4.739492712277608e-06, + "logits/chosen": -2.3884615898132324, + "logits/rejected": -2.8649091720581055, + "logps/chosen": -103.1280288696289, + "logps/rejected": -220.18887329101562, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.342191696166992, + "rewards/margins": 6.360807418823242, + "rewards/rejected": -10.702999114990234, + "step": 12827 + }, + { + "epoch": 2.0, + "learning_rate": 4.73875927174646e-06, + "logits/chosen": -1.6419062614440918, + "logits/rejected": -2.9644131660461426, + "logps/chosen": -284.1050720214844, + "logps/rejected": -520.2357177734375, + "loss": 0.1378, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.790932655334473, + "rewards/margins": 5.665233612060547, + "rewards/rejected": -11.45616626739502, + "step": 12828 + }, + { + "epoch": 2.0, + "learning_rate": 4.738025831215312e-06, + "logits/chosen": -2.931001663208008, + "logits/rejected": -2.792318344116211, + "logps/chosen": -473.1026611328125, + "logps/rejected": -861.9320678710938, + "loss": 0.0807, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.288311958312988, + "rewards/margins": 2.5180442333221436, + "rewards/rejected": -12.806356430053711, + "step": 12829 + }, + { + "epoch": 2.0, + "learning_rate": 4.737292390684164e-06, + "logits/chosen": -1.883080005645752, + "logits/rejected": -2.902723550796509, + "logps/chosen": -124.56837463378906, + "logps/rejected": -421.0782470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1902689933776855, + "rewards/margins": 10.889944076538086, + "rewards/rejected": -15.08021354675293, + "step": 12830 + }, + { + "epoch": 2.0, + "learning_rate": 4.7365589501530166e-06, + "logits/chosen": -2.630485773086548, + "logits/rejected": -2.4434163570404053, + "logps/chosen": -203.57882690429688, + "logps/rejected": -445.6797180175781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93694543838501, + "rewards/margins": 11.785005569458008, + "rewards/rejected": -17.72195053100586, + "step": 12831 + }, + { + "epoch": 2.0, + "learning_rate": 4.7358255096218684e-06, + "logits/chosen": -2.8297810554504395, + "logits/rejected": -2.6122519969940186, + "logps/chosen": -480.7049865722656, + "logps/rejected": -445.6156005859375, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7038164138793945, + "rewards/margins": 6.712686538696289, + "rewards/rejected": -12.416502952575684, + "step": 12832 + }, + { + "epoch": 2.0, + "learning_rate": 4.73509206909072e-06, + "logits/chosen": -1.974197506904602, + "logits/rejected": -2.674584150314331, + "logps/chosen": -175.71615600585938, + "logps/rejected": -296.6540222167969, + "loss": 1.6767, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.177196502685547, + "rewards/margins": 2.62446665763855, + "rewards/rejected": -11.801663398742676, + "step": 12833 + }, + { + "epoch": 2.0, + "learning_rate": 4.734358628559572e-06, + "logits/chosen": -1.8515044450759888, + "logits/rejected": -2.4476945400238037, + "logps/chosen": -213.83935546875, + "logps/rejected": -388.89453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.135054588317871, + "rewards/margins": 8.620453834533691, + "rewards/rejected": -14.755508422851562, + "step": 12834 + }, + { + "epoch": 2.0, + "learning_rate": 4.733625188028424e-06, + "logits/chosen": -2.5420727729797363, + "logits/rejected": -2.7608206272125244, + "logps/chosen": -142.4905242919922, + "logps/rejected": -348.341796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.534381866455078, + "rewards/margins": 9.162589073181152, + "rewards/rejected": -13.696969985961914, + "step": 12835 + }, + { + "epoch": 2.0, + "learning_rate": 4.732891747497277e-06, + "logits/chosen": -1.976729154586792, + "logits/rejected": -2.7484354972839355, + "logps/chosen": -300.17022705078125, + "logps/rejected": -351.87933349609375, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.637179374694824, + "rewards/margins": 4.883920669555664, + "rewards/rejected": -10.521100044250488, + "step": 12836 + }, + { + "epoch": 2.0, + "learning_rate": 4.732158306966129e-06, + "logits/chosen": -2.7885520458221436, + "logits/rejected": -2.5167200565338135, + "logps/chosen": -212.3681182861328, + "logps/rejected": -396.5142822265625, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.577136039733887, + "rewards/margins": 5.849061965942383, + "rewards/rejected": -13.42619800567627, + "step": 12837 + }, + { + "epoch": 2.0, + "learning_rate": 4.731424866434981e-06, + "logits/chosen": -2.74552059173584, + "logits/rejected": -2.9454872608184814, + "logps/chosen": -125.45052337646484, + "logps/rejected": -205.3489990234375, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811361789703369, + "rewards/margins": 2.8408358097076416, + "rewards/rejected": -9.65219783782959, + "step": 12838 + }, + { + "epoch": 2.0, + "learning_rate": 4.730691425903833e-06, + "logits/chosen": -1.8325897455215454, + "logits/rejected": -2.6792125701904297, + "logps/chosen": -103.09980773925781, + "logps/rejected": -250.07675170898438, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989821434020996, + "rewards/margins": 6.892472743988037, + "rewards/rejected": -12.882293701171875, + "step": 12839 + }, + { + "epoch": 2.0, + "learning_rate": 4.729957985372686e-06, + "logits/chosen": -2.981534004211426, + "logits/rejected": -2.1454010009765625, + "logps/chosen": -438.7248229980469, + "logps/rejected": -548.96826171875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.302104949951172, + "rewards/margins": 5.808921813964844, + "rewards/rejected": -13.111026763916016, + "step": 12840 + }, + { + "epoch": 2.0, + "learning_rate": 4.729224544841538e-06, + "logits/chosen": -3.1698389053344727, + "logits/rejected": -2.570680618286133, + "logps/chosen": -170.37181091308594, + "logps/rejected": -50.33477783203125, + "loss": 2.1394, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.796986103057861, + "rewards/margins": -1.9995378255844116, + "rewards/rejected": -3.79744815826416, + "step": 12841 + }, + { + "epoch": 2.0, + "learning_rate": 4.72849110431039e-06, + "logits/chosen": -3.0521020889282227, + "logits/rejected": -3.1082587242126465, + "logps/chosen": -46.910945892333984, + "logps/rejected": -212.64486694335938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8639469146728516, + "rewards/margins": 8.978184700012207, + "rewards/rejected": -11.842132568359375, + "step": 12842 + }, + { + "epoch": 2.0, + "learning_rate": 4.727757663779242e-06, + "logits/chosen": -2.748305082321167, + "logits/rejected": -2.111046075820923, + "logps/chosen": -442.31768798828125, + "logps/rejected": -374.1275634765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213377475738525, + "rewards/margins": 9.057321548461914, + "rewards/rejected": -13.270698547363281, + "step": 12843 + }, + { + "epoch": 2.0, + "learning_rate": 4.7270242232480935e-06, + "logits/chosen": -2.998814582824707, + "logits/rejected": -3.023080348968506, + "logps/chosen": -422.68951416015625, + "logps/rejected": -442.58502197265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.408197402954102, + "rewards/margins": 8.621297836303711, + "rewards/rejected": -14.029495239257812, + "step": 12844 + }, + { + "epoch": 2.0, + "learning_rate": 4.726290782716946e-06, + "logits/chosen": -0.5662268400192261, + "logits/rejected": -1.828059196472168, + "logps/chosen": -199.8441925048828, + "logps/rejected": -509.5114440917969, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.840466499328613, + "rewards/margins": 11.219036102294922, + "rewards/rejected": -17.05950355529785, + "step": 12845 + }, + { + "epoch": 2.0, + "learning_rate": 4.725557342185798e-06, + "logits/chosen": -2.9466798305511475, + "logits/rejected": -2.162717819213867, + "logps/chosen": -388.83929443359375, + "logps/rejected": -530.9060668945312, + "loss": 0.1743, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.237774848937988, + "rewards/margins": 4.029858112335205, + "rewards/rejected": -12.267633438110352, + "step": 12846 + }, + { + "epoch": 2.0, + "learning_rate": 4.72482390165465e-06, + "logits/chosen": -3.184476613998413, + "logits/rejected": -3.092022657394409, + "logps/chosen": -200.93092346191406, + "logps/rejected": -349.38232421875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.563892364501953, + "rewards/margins": 6.746404647827148, + "rewards/rejected": -12.310297012329102, + "step": 12847 + }, + { + "epoch": 2.0, + "learning_rate": 4.724090461123502e-06, + "logits/chosen": -1.9125614166259766, + "logits/rejected": -2.869094133377075, + "logps/chosen": -102.41357421875, + "logps/rejected": -320.7115173339844, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.272484302520752, + "rewards/margins": 5.855632305145264, + "rewards/rejected": -12.128116607666016, + "step": 12848 + }, + { + "epoch": 2.0, + "learning_rate": 4.723357020592355e-06, + "logits/chosen": -2.800908327102661, + "logits/rejected": -2.3675472736358643, + "logps/chosen": -322.4159240722656, + "logps/rejected": -306.65216064453125, + "loss": 0.2184, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.080556869506836, + "rewards/margins": 4.155730247497559, + "rewards/rejected": -10.236287117004395, + "step": 12849 + }, + { + "epoch": 2.0, + "learning_rate": 4.7226235800612065e-06, + "logits/chosen": -2.0575265884399414, + "logits/rejected": -2.0889194011688232, + "logps/chosen": -656.0518798828125, + "logps/rejected": -378.0720520019531, + "loss": 1.1246, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.616771697998047, + "rewards/margins": 3.0708489418029785, + "rewards/rejected": -11.687620162963867, + "step": 12850 + }, + { + "epoch": 2.0, + "learning_rate": 4.721890139530058e-06, + "logits/chosen": -1.113191843032837, + "logits/rejected": -1.4386732578277588, + "logps/chosen": -244.60256958007812, + "logps/rejected": -517.6273803710938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.451367378234863, + "rewards/margins": 11.103189468383789, + "rewards/rejected": -16.554555892944336, + "step": 12851 + }, + { + "epoch": 2.0, + "learning_rate": 4.72115669899891e-06, + "logits/chosen": -1.978090524673462, + "logits/rejected": -2.732285261154175, + "logps/chosen": -213.2322540283203, + "logps/rejected": -329.259765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.127571105957031, + "rewards/margins": 8.61071491241455, + "rewards/rejected": -13.738286018371582, + "step": 12852 + }, + { + "epoch": 2.0, + "learning_rate": 4.720423258467762e-06, + "logits/chosen": -2.5775179862976074, + "logits/rejected": -2.8692216873168945, + "logps/chosen": -585.6803588867188, + "logps/rejected": -555.952392578125, + "loss": 1.9483, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.861774444580078, + "rewards/margins": 2.12204647064209, + "rewards/rejected": -10.983819961547852, + "step": 12853 + }, + { + "epoch": 2.0, + "learning_rate": 4.719689817936615e-06, + "logits/chosen": -1.8701066970825195, + "logits/rejected": -3.0237245559692383, + "logps/chosen": -112.0942153930664, + "logps/rejected": -490.14385986328125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039932727813721, + "rewards/margins": 9.453472137451172, + "rewards/rejected": -16.493404388427734, + "step": 12854 + }, + { + "epoch": 2.0, + "learning_rate": 4.7189563774054676e-06, + "logits/chosen": -3.012453317642212, + "logits/rejected": -3.0073440074920654, + "logps/chosen": -129.58998107910156, + "logps/rejected": -120.29681396484375, + "loss": 0.0719, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9132585525512695, + "rewards/margins": 2.7741336822509766, + "rewards/rejected": -7.687392234802246, + "step": 12855 + }, + { + "epoch": 2.0, + "learning_rate": 4.7182229368743194e-06, + "logits/chosen": -2.8358707427978516, + "logits/rejected": -2.4549176692962646, + "logps/chosen": -202.23484802246094, + "logps/rejected": -213.25437927246094, + "loss": 0.4533, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.417184829711914, + "rewards/margins": 2.3428874015808105, + "rewards/rejected": -11.760071754455566, + "step": 12856 + }, + { + "epoch": 2.0, + "learning_rate": 4.717489496343171e-06, + "logits/chosen": -3.0669937133789062, + "logits/rejected": -3.0112545490264893, + "logps/chosen": -320.5787353515625, + "logps/rejected": -243.07464599609375, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174210548400879, + "rewards/margins": 5.243340969085693, + "rewards/rejected": -11.417551040649414, + "step": 12857 + }, + { + "epoch": 2.0, + "learning_rate": 4.716756055812024e-06, + "logits/chosen": -2.048733711242676, + "logits/rejected": -2.945880651473999, + "logps/chosen": -98.31240844726562, + "logps/rejected": -273.1309814453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.703188896179199, + "rewards/margins": 8.273125648498535, + "rewards/rejected": -12.976314544677734, + "step": 12858 + }, + { + "epoch": 2.0, + "learning_rate": 4.716022615280876e-06, + "logits/chosen": -2.7388405799865723, + "logits/rejected": -2.9259145259857178, + "logps/chosen": -280.8453674316406, + "logps/rejected": -465.2986145019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.330745816230774, + "rewards/margins": 13.905767440795898, + "rewards/rejected": -15.236513137817383, + "step": 12859 + }, + { + "epoch": 2.0, + "learning_rate": 4.715289174749728e-06, + "logits/chosen": -2.9987480640411377, + "logits/rejected": -2.6441376209259033, + "logps/chosen": -1167.6846923828125, + "logps/rejected": -701.3145751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.652795314788818, + "rewards/margins": 8.596521377563477, + "rewards/rejected": -13.249316215515137, + "step": 12860 + }, + { + "epoch": 2.0, + "learning_rate": 4.71455573421858e-06, + "logits/chosen": -2.717709541320801, + "logits/rejected": -1.9979896545410156, + "logps/chosen": -205.96307373046875, + "logps/rejected": -278.2420654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.248692035675049, + "rewards/margins": 10.941576957702637, + "rewards/rejected": -14.190268516540527, + "step": 12861 + }, + { + "epoch": 2.0, + "learning_rate": 4.713822293687432e-06, + "logits/chosen": -2.075479745864868, + "logits/rejected": -2.996056318283081, + "logps/chosen": -263.7850036621094, + "logps/rejected": -364.9710693359375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.586308479309082, + "rewards/margins": 7.718759536743164, + "rewards/rejected": -14.305068969726562, + "step": 12862 + }, + { + "epoch": 2.0, + "learning_rate": 4.713088853156284e-06, + "logits/chosen": -2.268754243850708, + "logits/rejected": -2.862607955932617, + "logps/chosen": -470.29473876953125, + "logps/rejected": -594.3661499023438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.17302131652832, + "rewards/margins": 12.395076751708984, + "rewards/rejected": -20.568098068237305, + "step": 12863 + }, + { + "epoch": 2.0, + "learning_rate": 4.712355412625136e-06, + "logits/chosen": -1.8960117101669312, + "logits/rejected": -2.7314822673797607, + "logps/chosen": -218.932861328125, + "logps/rejected": -479.45709228515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.808629512786865, + "rewards/margins": 8.393949508666992, + "rewards/rejected": -16.202579498291016, + "step": 12864 + }, + { + "epoch": 2.0, + "learning_rate": 4.711621972093988e-06, + "logits/chosen": -2.950762987136841, + "logits/rejected": -2.6339218616485596, + "logps/chosen": -418.78955078125, + "logps/rejected": -686.7927856445312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.716208457946777, + "rewards/margins": 10.050019264221191, + "rewards/rejected": -14.766227722167969, + "step": 12865 + }, + { + "epoch": 2.0, + "learning_rate": 4.71088853156284e-06, + "logits/chosen": -2.7857916355133057, + "logits/rejected": -2.931333541870117, + "logps/chosen": -242.37167358398438, + "logps/rejected": -338.4271545410156, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.189009666442871, + "rewards/margins": 7.472068786621094, + "rewards/rejected": -11.661078453063965, + "step": 12866 + }, + { + "epoch": 2.0, + "learning_rate": 4.710155091031693e-06, + "logits/chosen": -1.64936363697052, + "logits/rejected": -2.7393083572387695, + "logps/chosen": -124.80776977539062, + "logps/rejected": -439.71136474609375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087366104125977, + "rewards/margins": 11.913949012756348, + "rewards/rejected": -18.00131607055664, + "step": 12867 + }, + { + "epoch": 2.0, + "learning_rate": 4.7094216505005445e-06, + "logits/chosen": -2.3232805728912354, + "logits/rejected": -2.991790771484375, + "logps/chosen": -51.003929138183594, + "logps/rejected": -391.4114685058594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.040524005889893, + "rewards/margins": 10.554197311401367, + "rewards/rejected": -14.594720840454102, + "step": 12868 + }, + { + "epoch": 2.0, + "learning_rate": 4.708688209969396e-06, + "logits/chosen": -2.8467164039611816, + "logits/rejected": -2.610513687133789, + "logps/chosen": -421.8407287597656, + "logps/rejected": -436.1696472167969, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.763983249664307, + "rewards/margins": 10.208456039428711, + "rewards/rejected": -15.972439765930176, + "step": 12869 + }, + { + "epoch": 2.0, + "learning_rate": 4.707954769438248e-06, + "logits/chosen": -2.834794759750366, + "logits/rejected": -2.922018051147461, + "logps/chosen": -446.94525146484375, + "logps/rejected": -557.0821533203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.739004135131836, + "rewards/margins": 13.806206703186035, + "rewards/rejected": -17.545209884643555, + "step": 12870 + }, + { + "epoch": 2.0, + "learning_rate": 4.707221328907101e-06, + "logits/chosen": -1.196860432624817, + "logits/rejected": -2.6218395233154297, + "logps/chosen": -158.175048828125, + "logps/rejected": -409.27593994140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.361205101013184, + "rewards/margins": 10.053672790527344, + "rewards/rejected": -15.414877891540527, + "step": 12871 + }, + { + "epoch": 2.0, + "learning_rate": 4.706487888375954e-06, + "logits/chosen": -2.703336000442505, + "logits/rejected": -2.8432934284210205, + "logps/chosen": -330.0540466308594, + "logps/rejected": -389.44818115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.788824558258057, + "rewards/margins": 10.77662467956543, + "rewards/rejected": -15.565449714660645, + "step": 12872 + }, + { + "epoch": 2.0, + "learning_rate": 4.705754447844806e-06, + "logits/chosen": -2.196160316467285, + "logits/rejected": -3.2270753383636475, + "logps/chosen": -318.53759765625, + "logps/rejected": -400.4007873535156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.731037139892578, + "rewards/margins": 7.660995960235596, + "rewards/rejected": -10.392032623291016, + "step": 12873 + }, + { + "epoch": 2.0, + "learning_rate": 4.7050210073136575e-06, + "logits/chosen": -3.2223525047302246, + "logits/rejected": -3.0565402507781982, + "logps/chosen": -132.07073974609375, + "logps/rejected": -135.10552978515625, + "loss": 0.4476, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.173868179321289, + "rewards/margins": 3.604379892349243, + "rewards/rejected": -8.778247833251953, + "step": 12874 + }, + { + "epoch": 2.0, + "learning_rate": 4.704287566782509e-06, + "logits/chosen": -3.0770533084869385, + "logits/rejected": -3.0006635189056396, + "logps/chosen": -160.25244140625, + "logps/rejected": -539.0220336914062, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.85444974899292, + "rewards/margins": 5.127756595611572, + "rewards/rejected": -10.982206344604492, + "step": 12875 + }, + { + "epoch": 2.0, + "learning_rate": 4.703554126251362e-06, + "logits/chosen": -1.5878037214279175, + "logits/rejected": -2.756033182144165, + "logps/chosen": -175.26275634765625, + "logps/rejected": -533.3494873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.972268104553223, + "rewards/margins": 11.896465301513672, + "rewards/rejected": -17.868732452392578, + "step": 12876 + }, + { + "epoch": 2.0, + "learning_rate": 4.702820685720214e-06, + "logits/chosen": -3.100865125656128, + "logits/rejected": -1.4714138507843018, + "logps/chosen": -303.41864013671875, + "logps/rejected": -237.185302734375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.447691440582275, + "rewards/margins": 7.661408424377441, + "rewards/rejected": -12.109100341796875, + "step": 12877 + }, + { + "epoch": 2.0, + "learning_rate": 4.702087245189066e-06, + "logits/chosen": -2.081151008605957, + "logits/rejected": -2.652614116668701, + "logps/chosen": -155.87350463867188, + "logps/rejected": -213.89297485351562, + "loss": 1.9846, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.377927780151367, + "rewards/margins": 1.545142412185669, + "rewards/rejected": -9.923069953918457, + "step": 12878 + }, + { + "epoch": 2.0, + "learning_rate": 4.701353804657918e-06, + "logits/chosen": -2.0180981159210205, + "logits/rejected": -3.0287251472473145, + "logps/chosen": -83.55729675292969, + "logps/rejected": -384.05267333984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.727922439575195, + "rewards/margins": 7.238873481750488, + "rewards/rejected": -11.966794967651367, + "step": 12879 + }, + { + "epoch": 2.0, + "learning_rate": 4.7006203641267705e-06, + "logits/chosen": -2.0651416778564453, + "logits/rejected": -3.0513243675231934, + "logps/chosen": -376.0989990234375, + "logps/rejected": -497.0523681640625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.375605583190918, + "rewards/margins": 7.537863731384277, + "rewards/rejected": -12.913469314575195, + "step": 12880 + }, + { + "epoch": 2.0, + "learning_rate": 4.699886923595622e-06, + "logits/chosen": -3.112957239151001, + "logits/rejected": -3.1102514266967773, + "logps/chosen": -506.9622802734375, + "logps/rejected": -367.36444091796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.446505069732666, + "rewards/margins": 8.527220726013184, + "rewards/rejected": -12.973725318908691, + "step": 12881 + }, + { + "epoch": 2.0, + "learning_rate": 4.699153483064474e-06, + "logits/chosen": -2.427687883377075, + "logits/rejected": -2.7208337783813477, + "logps/chosen": -332.6923522949219, + "logps/rejected": -288.6107177734375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.132845878601074, + "rewards/margins": 4.667134761810303, + "rewards/rejected": -8.799981117248535, + "step": 12882 + }, + { + "epoch": 2.0, + "learning_rate": 4.698420042533326e-06, + "logits/chosen": -2.6678853034973145, + "logits/rejected": -2.7793962955474854, + "logps/chosen": -158.4002227783203, + "logps/rejected": -260.51336669921875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7155601978302, + "rewards/margins": 8.240053176879883, + "rewards/rejected": -10.95561408996582, + "step": 12883 + }, + { + "epoch": 2.0, + "learning_rate": 4.697686602002178e-06, + "logits/chosen": -2.1757712364196777, + "logits/rejected": -2.870152711868286, + "logps/chosen": -296.66741943359375, + "logps/rejected": -459.249267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.233034610748291, + "rewards/margins": 12.533666610717773, + "rewards/rejected": -15.766700744628906, + "step": 12884 + }, + { + "epoch": 2.0, + "learning_rate": 4.696953161471031e-06, + "logits/chosen": -2.420517921447754, + "logits/rejected": -2.9673118591308594, + "logps/chosen": -193.16317749023438, + "logps/rejected": -271.3272705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0178747177124023, + "rewards/margins": 9.391748428344727, + "rewards/rejected": -12.409624099731445, + "step": 12885 + }, + { + "epoch": 2.0, + "learning_rate": 4.6962197209398826e-06, + "logits/chosen": -1.245517611503601, + "logits/rejected": -3.014979839324951, + "logps/chosen": -141.54110717773438, + "logps/rejected": -530.87890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.815944671630859, + "rewards/margins": 8.968961715698242, + "rewards/rejected": -16.7849063873291, + "step": 12886 + }, + { + "epoch": 2.0, + "learning_rate": 4.6954862804087345e-06, + "logits/chosen": -2.896449327468872, + "logits/rejected": -1.7292900085449219, + "logps/chosen": -720.804931640625, + "logps/rejected": -444.8328552246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.093344211578369, + "rewards/margins": 10.296707153320312, + "rewards/rejected": -13.390050888061523, + "step": 12887 + }, + { + "epoch": 2.0, + "learning_rate": 4.694752839877587e-06, + "logits/chosen": -2.3567652702331543, + "logits/rejected": -2.8996455669403076, + "logps/chosen": -114.5912094116211, + "logps/rejected": -279.7332763671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.68129825592041, + "rewards/margins": 7.9529876708984375, + "rewards/rejected": -13.634284973144531, + "step": 12888 + }, + { + "epoch": 2.0, + "learning_rate": 4.69401939934644e-06, + "logits/chosen": -2.1341066360473633, + "logits/rejected": -3.0652031898498535, + "logps/chosen": -91.60576629638672, + "logps/rejected": -379.99554443359375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5868945121765137, + "rewards/margins": 6.652309417724609, + "rewards/rejected": -9.239204406738281, + "step": 12889 + }, + { + "epoch": 2.0, + "learning_rate": 4.693285958815292e-06, + "logits/chosen": -3.0061118602752686, + "logits/rejected": -2.6209449768066406, + "logps/chosen": -287.37750244140625, + "logps/rejected": -346.9085388183594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.628556251525879, + "rewards/margins": 9.936274528503418, + "rewards/rejected": -12.564830780029297, + "step": 12890 + }, + { + "epoch": 2.0, + "learning_rate": 4.692552518284144e-06, + "logits/chosen": -1.7377562522888184, + "logits/rejected": -2.5984315872192383, + "logps/chosen": -261.0523376464844, + "logps/rejected": -453.21728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.801606178283691, + "rewards/margins": 10.769277572631836, + "rewards/rejected": -15.570882797241211, + "step": 12891 + }, + { + "epoch": 2.0, + "learning_rate": 4.6918190777529955e-06, + "logits/chosen": -1.841163992881775, + "logits/rejected": -2.831611156463623, + "logps/chosen": -95.95288848876953, + "logps/rejected": -303.5604248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219407081604004, + "rewards/margins": 10.861106872558594, + "rewards/rejected": -15.080514907836914, + "step": 12892 + }, + { + "epoch": 2.01, + "learning_rate": 4.691085637221847e-06, + "logits/chosen": -2.4131062030792236, + "logits/rejected": -3.038388729095459, + "logps/chosen": -61.98860549926758, + "logps/rejected": -351.1319274902344, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.581503868103027, + "rewards/margins": 8.563175201416016, + "rewards/rejected": -13.14468002319336, + "step": 12893 + }, + { + "epoch": 2.01, + "learning_rate": 4.6903521966907e-06, + "logits/chosen": -1.9544938802719116, + "logits/rejected": -3.006220579147339, + "logps/chosen": -129.09475708007812, + "logps/rejected": -416.6836853027344, + "loss": 0.0359, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.864874839782715, + "rewards/margins": 8.208953857421875, + "rewards/rejected": -16.073827743530273, + "step": 12894 + }, + { + "epoch": 2.01, + "learning_rate": 4.689618756159552e-06, + "logits/chosen": -2.0749316215515137, + "logits/rejected": -3.130892038345337, + "logps/chosen": -83.91986083984375, + "logps/rejected": -256.3432312011719, + "loss": 0.0573, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6776885986328125, + "rewards/margins": 6.01017427444458, + "rewards/rejected": -12.687862396240234, + "step": 12895 + }, + { + "epoch": 2.01, + "learning_rate": 4.688885315628404e-06, + "logits/chosen": -2.3534772396087646, + "logits/rejected": -2.9925873279571533, + "logps/chosen": -258.96484375, + "logps/rejected": -510.19287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.435990333557129, + "rewards/margins": 12.506742477416992, + "rewards/rejected": -15.942731857299805, + "step": 12896 + }, + { + "epoch": 2.01, + "learning_rate": 4.688151875097256e-06, + "logits/chosen": -1.619678020477295, + "logits/rejected": -2.9409303665161133, + "logps/chosen": -168.20726013183594, + "logps/rejected": -499.47650146484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.537816047668457, + "rewards/margins": 9.938760757446289, + "rewards/rejected": -15.476576805114746, + "step": 12897 + }, + { + "epoch": 2.01, + "learning_rate": 4.6874184345661085e-06, + "logits/chosen": -2.999685049057007, + "logits/rejected": -2.882035970687866, + "logps/chosen": -131.4697265625, + "logps/rejected": -199.82379150390625, + "loss": 0.967, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.693431854248047, + "rewards/margins": 2.6042308807373047, + "rewards/rejected": -11.297662734985352, + "step": 12898 + }, + { + "epoch": 2.01, + "learning_rate": 4.68668499403496e-06, + "logits/chosen": -1.689723014831543, + "logits/rejected": -2.9852874279022217, + "logps/chosen": -102.3758544921875, + "logps/rejected": -273.61370849609375, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.796998023986816, + "rewards/margins": 9.26356315612793, + "rewards/rejected": -14.060562133789062, + "step": 12899 + }, + { + "epoch": 2.01, + "learning_rate": 4.685951553503812e-06, + "logits/chosen": -1.98209810256958, + "logits/rejected": -1.6550883054733276, + "logps/chosen": -306.63031005859375, + "logps/rejected": -245.8784637451172, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.26699161529541, + "rewards/margins": 7.251493453979492, + "rewards/rejected": -12.518486022949219, + "step": 12900 + }, + { + "epoch": 2.01, + "learning_rate": 4.685218112972664e-06, + "logits/chosen": -2.783599376678467, + "logits/rejected": -2.9335994720458984, + "logps/chosen": -254.81820678710938, + "logps/rejected": -357.68603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2502880096435547, + "rewards/margins": 8.81261920928955, + "rewards/rejected": -12.062907218933105, + "step": 12901 + }, + { + "epoch": 2.01, + "learning_rate": 4.684484672441517e-06, + "logits/chosen": -2.7392044067382812, + "logits/rejected": -3.033405065536499, + "logps/chosen": -65.83247375488281, + "logps/rejected": -258.7968444824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422276496887207, + "rewards/margins": 9.095559120178223, + "rewards/rejected": -13.51783561706543, + "step": 12902 + }, + { + "epoch": 2.01, + "learning_rate": 4.683751231910369e-06, + "logits/chosen": -1.9017157554626465, + "logits/rejected": -3.0441346168518066, + "logps/chosen": -168.71685791015625, + "logps/rejected": -414.3342590332031, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.981606483459473, + "rewards/margins": 7.423364639282227, + "rewards/rejected": -12.404972076416016, + "step": 12903 + }, + { + "epoch": 2.01, + "learning_rate": 4.683017791379221e-06, + "logits/chosen": -2.1548423767089844, + "logits/rejected": -2.379696846008301, + "logps/chosen": -226.37350463867188, + "logps/rejected": -407.8040771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.226545333862305, + "rewards/margins": 10.680603981018066, + "rewards/rejected": -15.907148361206055, + "step": 12904 + }, + { + "epoch": 2.01, + "learning_rate": 4.682284350848073e-06, + "logits/chosen": -2.8122782707214355, + "logits/rejected": -2.981502056121826, + "logps/chosen": -887.723876953125, + "logps/rejected": -803.3870849609375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.750309944152832, + "rewards/margins": 6.228654384613037, + "rewards/rejected": -11.978963851928711, + "step": 12905 + }, + { + "epoch": 2.01, + "learning_rate": 4.681550910316925e-06, + "logits/chosen": -2.892646551132202, + "logits/rejected": -3.0605719089508057, + "logps/chosen": -197.12905883789062, + "logps/rejected": -296.83282470703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.183285713195801, + "rewards/margins": 8.464902877807617, + "rewards/rejected": -10.648189544677734, + "step": 12906 + }, + { + "epoch": 2.01, + "learning_rate": 4.680817469785778e-06, + "logits/chosen": -2.9116899967193604, + "logits/rejected": -1.4066309928894043, + "logps/chosen": -302.95050048828125, + "logps/rejected": -270.7319641113281, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.717720985412598, + "rewards/margins": 9.136064529418945, + "rewards/rejected": -14.853784561157227, + "step": 12907 + }, + { + "epoch": 2.01, + "learning_rate": 4.68008402925463e-06, + "logits/chosen": -2.9421303272247314, + "logits/rejected": -1.7471370697021484, + "logps/chosen": -473.1295166015625, + "logps/rejected": -299.8127746582031, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.344624042510986, + "rewards/margins": 5.871139049530029, + "rewards/rejected": -11.215763092041016, + "step": 12908 + }, + { + "epoch": 2.01, + "learning_rate": 4.679350588723482e-06, + "logits/chosen": -3.0348594188690186, + "logits/rejected": -2.349780321121216, + "logps/chosen": -653.9808349609375, + "logps/rejected": -386.4110107421875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1148834228515625, + "rewards/margins": 6.924673557281494, + "rewards/rejected": -12.039556503295898, + "step": 12909 + }, + { + "epoch": 2.01, + "learning_rate": 4.678617148192334e-06, + "logits/chosen": -3.049891233444214, + "logits/rejected": -2.4385557174682617, + "logps/chosen": -373.11712646484375, + "logps/rejected": -485.82000732421875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.636643409729004, + "rewards/margins": 7.0534138679504395, + "rewards/rejected": -11.690057754516602, + "step": 12910 + }, + { + "epoch": 2.01, + "learning_rate": 4.677883707661186e-06, + "logits/chosen": -1.5958861112594604, + "logits/rejected": -2.778217315673828, + "logps/chosen": -184.39175415039062, + "logps/rejected": -276.2459411621094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.739097595214844, + "rewards/margins": 6.944571018218994, + "rewards/rejected": -12.68366813659668, + "step": 12911 + }, + { + "epoch": 2.01, + "learning_rate": 4.677150267130038e-06, + "logits/chosen": -2.419370174407959, + "logits/rejected": -2.8851232528686523, + "logps/chosen": -279.12298583984375, + "logps/rejected": -499.6924743652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.618783950805664, + "rewards/margins": 11.811655044555664, + "rewards/rejected": -18.430438995361328, + "step": 12912 + }, + { + "epoch": 2.01, + "learning_rate": 4.67641682659889e-06, + "logits/chosen": -2.251107931137085, + "logits/rejected": -3.040271043777466, + "logps/chosen": -175.3004150390625, + "logps/rejected": -381.4005126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7559280395507812, + "rewards/margins": 11.769157409667969, + "rewards/rejected": -13.52508544921875, + "step": 12913 + }, + { + "epoch": 2.01, + "learning_rate": 4.675683386067742e-06, + "logits/chosen": -1.564033031463623, + "logits/rejected": -3.0278689861297607, + "logps/chosen": -213.53582763671875, + "logps/rejected": -577.5159301757812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7938332557678223, + "rewards/margins": 11.454248428344727, + "rewards/rejected": -14.248082160949707, + "step": 12914 + }, + { + "epoch": 2.01, + "learning_rate": 4.674949945536594e-06, + "logits/chosen": -2.3415443897247314, + "logits/rejected": -3.055753231048584, + "logps/chosen": -100.19439697265625, + "logps/rejected": -245.47085571289062, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.033202648162842, + "rewards/margins": 7.879948616027832, + "rewards/rejected": -11.913150787353516, + "step": 12915 + }, + { + "epoch": 2.01, + "learning_rate": 4.6742165050054465e-06, + "logits/chosen": -2.6602838039398193, + "logits/rejected": -2.7822775840759277, + "logps/chosen": -540.8544311523438, + "logps/rejected": -499.1075744628906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.889817714691162, + "rewards/margins": 9.744521141052246, + "rewards/rejected": -13.63433837890625, + "step": 12916 + }, + { + "epoch": 2.01, + "learning_rate": 4.673483064474298e-06, + "logits/chosen": -3.100072145462036, + "logits/rejected": -1.82310152053833, + "logps/chosen": -383.9096374511719, + "logps/rejected": -330.43072509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3812665939331055, + "rewards/margins": 9.524444580078125, + "rewards/rejected": -13.905710220336914, + "step": 12917 + }, + { + "epoch": 2.01, + "learning_rate": 4.67274962394315e-06, + "logits/chosen": -2.2945473194122314, + "logits/rejected": -3.1644535064697266, + "logps/chosen": -128.6693878173828, + "logps/rejected": -353.0941467285156, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.895517349243164, + "rewards/margins": 7.20274543762207, + "rewards/rejected": -12.098262786865234, + "step": 12918 + }, + { + "epoch": 2.01, + "learning_rate": 4.672016183412002e-06, + "logits/chosen": -2.0126357078552246, + "logits/rejected": -2.931595802307129, + "logps/chosen": -217.0888214111328, + "logps/rejected": -464.8431396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479654312133789, + "rewards/margins": 8.894976615905762, + "rewards/rejected": -13.374629974365234, + "step": 12919 + }, + { + "epoch": 2.01, + "learning_rate": 4.671282742880855e-06, + "logits/chosen": -0.7022554278373718, + "logits/rejected": -2.1540002822875977, + "logps/chosen": -239.375244140625, + "logps/rejected": -484.63702392578125, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06838846206665, + "rewards/margins": 7.593167304992676, + "rewards/rejected": -13.661555290222168, + "step": 12920 + }, + { + "epoch": 2.01, + "learning_rate": 4.670549302349707e-06, + "logits/chosen": -1.5554563999176025, + "logits/rejected": -2.8453855514526367, + "logps/chosen": -179.184326171875, + "logps/rejected": -540.4664916992188, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.375934600830078, + "rewards/margins": 8.036373138427734, + "rewards/rejected": -12.412307739257812, + "step": 12921 + }, + { + "epoch": 2.01, + "learning_rate": 4.6698158618185595e-06, + "logits/chosen": -2.7385787963867188, + "logits/rejected": -2.7891125679016113, + "logps/chosen": -139.46302795410156, + "logps/rejected": -330.0191345214844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.288543701171875, + "rewards/margins": 8.21743392944336, + "rewards/rejected": -15.505977630615234, + "step": 12922 + }, + { + "epoch": 2.01, + "learning_rate": 4.669082421287411e-06, + "logits/chosen": -2.981276273727417, + "logits/rejected": -1.272230625152588, + "logps/chosen": -868.2293090820312, + "logps/rejected": -558.9542846679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8666443824768066, + "rewards/margins": 12.042211532592773, + "rewards/rejected": -13.908856391906738, + "step": 12923 + }, + { + "epoch": 2.01, + "learning_rate": 4.668348980756263e-06, + "logits/chosen": -2.9282617568969727, + "logits/rejected": -2.312006711959839, + "logps/chosen": -151.3167724609375, + "logps/rejected": -180.84713745117188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2344402074813843, + "rewards/margins": 9.304449081420898, + "rewards/rejected": -10.538888931274414, + "step": 12924 + }, + { + "epoch": 2.01, + "learning_rate": 4.667615540225116e-06, + "logits/chosen": -2.3541347980499268, + "logits/rejected": -3.0534300804138184, + "logps/chosen": -181.41639709472656, + "logps/rejected": -316.8377990722656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3149681091308594, + "rewards/margins": 7.145870208740234, + "rewards/rejected": -9.460838317871094, + "step": 12925 + }, + { + "epoch": 2.01, + "learning_rate": 4.666882099693968e-06, + "logits/chosen": -2.984361410140991, + "logits/rejected": -3.085653066635132, + "logps/chosen": -162.09051513671875, + "logps/rejected": -209.35598754882812, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.61441707611084, + "rewards/margins": 7.482039928436279, + "rewards/rejected": -11.096456527709961, + "step": 12926 + }, + { + "epoch": 2.01, + "learning_rate": 4.66614865916282e-06, + "logits/chosen": -1.8802233934402466, + "logits/rejected": -2.989478826522827, + "logps/chosen": -260.772216796875, + "logps/rejected": -453.4984130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.644042491912842, + "rewards/margins": 12.724428176879883, + "rewards/rejected": -16.368471145629883, + "step": 12927 + }, + { + "epoch": 2.01, + "learning_rate": 4.665415218631672e-06, + "logits/chosen": -1.4485938549041748, + "logits/rejected": -2.5709781646728516, + "logps/chosen": -163.25704956054688, + "logps/rejected": -397.4688720703125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.057578086853027, + "rewards/margins": 7.613517761230469, + "rewards/rejected": -14.671095848083496, + "step": 12928 + }, + { + "epoch": 2.01, + "learning_rate": 4.664681778100524e-06, + "logits/chosen": -2.163818836212158, + "logits/rejected": -2.8658182621002197, + "logps/chosen": -133.63796997070312, + "logps/rejected": -323.19580078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.317049026489258, + "rewards/margins": 6.874020576477051, + "rewards/rejected": -13.191069602966309, + "step": 12929 + }, + { + "epoch": 2.01, + "learning_rate": 4.663948337569376e-06, + "logits/chosen": -2.38838791847229, + "logits/rejected": -3.16749906539917, + "logps/chosen": -66.28890228271484, + "logps/rejected": -254.19895935058594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.450893402099609, + "rewards/margins": 8.905557632446289, + "rewards/rejected": -14.356451034545898, + "step": 12930 + }, + { + "epoch": 2.01, + "learning_rate": 4.663214897038228e-06, + "logits/chosen": -3.1703221797943115, + "logits/rejected": -2.8959052562713623, + "logps/chosen": -138.16275024414062, + "logps/rejected": -171.13241577148438, + "loss": 2.1207, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.321889877319336, + "rewards/margins": 1.728447437286377, + "rewards/rejected": -7.050336837768555, + "step": 12931 + }, + { + "epoch": 2.01, + "learning_rate": 4.66248145650708e-06, + "logits/chosen": -1.4153975248336792, + "logits/rejected": -2.216324806213379, + "logps/chosen": -249.7071533203125, + "logps/rejected": -390.97552490234375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0898866653442383, + "rewards/margins": 7.102697372436523, + "rewards/rejected": -10.192584037780762, + "step": 12932 + }, + { + "epoch": 2.01, + "learning_rate": 4.661748015975932e-06, + "logits/chosen": -2.836390972137451, + "logits/rejected": -1.8483226299285889, + "logps/chosen": -505.2266845703125, + "logps/rejected": -456.46368408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.72231912612915, + "rewards/margins": 9.435216903686523, + "rewards/rejected": -16.157535552978516, + "step": 12933 + }, + { + "epoch": 2.01, + "learning_rate": 4.661014575444785e-06, + "logits/chosen": -2.8815839290618896, + "logits/rejected": -2.003314256668091, + "logps/chosen": -255.376708984375, + "logps/rejected": -231.73228454589844, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.53139591217041, + "rewards/margins": 4.792298316955566, + "rewards/rejected": -11.323694229125977, + "step": 12934 + }, + { + "epoch": 2.01, + "learning_rate": 4.6602811349136365e-06, + "logits/chosen": -2.0989208221435547, + "logits/rejected": -3.0378756523132324, + "logps/chosen": -177.1191864013672, + "logps/rejected": -336.2371826171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.200413227081299, + "rewards/margins": 7.621522903442383, + "rewards/rejected": -12.821935653686523, + "step": 12935 + }, + { + "epoch": 2.01, + "learning_rate": 4.659547694382488e-06, + "logits/chosen": -2.62699294090271, + "logits/rejected": -2.967092514038086, + "logps/chosen": -139.69873046875, + "logps/rejected": -148.71353149414062, + "loss": 0.9598, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.639270782470703, + "rewards/margins": 2.2176194190979004, + "rewards/rejected": -10.856889724731445, + "step": 12936 + }, + { + "epoch": 2.01, + "learning_rate": 4.65881425385134e-06, + "logits/chosen": -2.2791450023651123, + "logits/rejected": -2.037471294403076, + "logps/chosen": -319.71075439453125, + "logps/rejected": -438.86932373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4154677391052246, + "rewards/margins": 11.537388801574707, + "rewards/rejected": -14.952856063842773, + "step": 12937 + }, + { + "epoch": 2.01, + "learning_rate": 4.658080813320193e-06, + "logits/chosen": -2.6329939365386963, + "logits/rejected": -3.013892889022827, + "logps/chosen": -209.87266540527344, + "logps/rejected": -453.8238525390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.600915908813477, + "rewards/margins": 10.804546356201172, + "rewards/rejected": -15.405462265014648, + "step": 12938 + }, + { + "epoch": 2.01, + "learning_rate": 4.657347372789046e-06, + "logits/chosen": -3.0631415843963623, + "logits/rejected": -3.099993944168091, + "logps/chosen": -98.13114166259766, + "logps/rejected": -311.017333984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.866628646850586, + "rewards/margins": 8.818631172180176, + "rewards/rejected": -12.685258865356445, + "step": 12939 + }, + { + "epoch": 2.01, + "learning_rate": 4.6566139322578976e-06, + "logits/chosen": -1.6960828304290771, + "logits/rejected": -0.7966791391372681, + "logps/chosen": -680.6314697265625, + "logps/rejected": -567.6589965820312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.672906875610352, + "rewards/margins": 9.067636489868164, + "rewards/rejected": -14.740543365478516, + "step": 12940 + }, + { + "epoch": 2.01, + "learning_rate": 4.6558804917267494e-06, + "logits/chosen": -1.300969123840332, + "logits/rejected": -2.7532405853271484, + "logps/chosen": -135.41372680664062, + "logps/rejected": -415.47271728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.160604000091553, + "rewards/margins": 9.067553520202637, + "rewards/rejected": -16.22815704345703, + "step": 12941 + }, + { + "epoch": 2.01, + "learning_rate": 4.655147051195601e-06, + "logits/chosen": -1.565839409828186, + "logits/rejected": -2.7827658653259277, + "logps/chosen": -121.69296264648438, + "logps/rejected": -435.40948486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.048765659332275, + "rewards/margins": 11.727910995483398, + "rewards/rejected": -15.776676177978516, + "step": 12942 + }, + { + "epoch": 2.01, + "learning_rate": 4.654413610664454e-06, + "logits/chosen": -1.7585819959640503, + "logits/rejected": -2.7793242931365967, + "logps/chosen": -197.83651733398438, + "logps/rejected": -418.5977783203125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.047179698944092, + "rewards/margins": 6.849442481994629, + "rewards/rejected": -10.896621704101562, + "step": 12943 + }, + { + "epoch": 2.01, + "learning_rate": 4.653680170133306e-06, + "logits/chosen": -3.0621585845947266, + "logits/rejected": -2.5838377475738525, + "logps/chosen": -552.2102661132812, + "logps/rejected": -511.77447509765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.904423713684082, + "rewards/margins": 8.955484390258789, + "rewards/rejected": -12.859907150268555, + "step": 12944 + }, + { + "epoch": 2.01, + "learning_rate": 4.652946729602158e-06, + "logits/chosen": -2.900712490081787, + "logits/rejected": -3.1687371730804443, + "logps/chosen": -252.71226501464844, + "logps/rejected": -410.31890869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.663130283355713, + "rewards/margins": 9.844482421875, + "rewards/rejected": -15.507612228393555, + "step": 12945 + }, + { + "epoch": 2.01, + "learning_rate": 4.65221328907101e-06, + "logits/chosen": -2.845390558242798, + "logits/rejected": -2.267667770385742, + "logps/chosen": -329.9250793457031, + "logps/rejected": -402.19488525390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7599687576293945, + "rewards/margins": 8.238149642944336, + "rewards/rejected": -14.998117446899414, + "step": 12946 + }, + { + "epoch": 2.01, + "learning_rate": 4.651479848539862e-06, + "logits/chosen": -2.8716893196105957, + "logits/rejected": -1.781551480293274, + "logps/chosen": -587.1728515625, + "logps/rejected": -392.8251647949219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0791068077087402, + "rewards/margins": 10.364339828491211, + "rewards/rejected": -13.44344711303711, + "step": 12947 + }, + { + "epoch": 2.01, + "learning_rate": 4.650746408008714e-06, + "logits/chosen": -2.2465338706970215, + "logits/rejected": -3.063063621520996, + "logps/chosen": -95.23301696777344, + "logps/rejected": -380.04901123046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.383305549621582, + "rewards/margins": 7.828908920288086, + "rewards/rejected": -12.212215423583984, + "step": 12948 + }, + { + "epoch": 2.01, + "learning_rate": 4.650012967477566e-06, + "logits/chosen": -3.0412662029266357, + "logits/rejected": -2.7427423000335693, + "logps/chosen": -364.0717468261719, + "logps/rejected": -312.28094482421875, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011682510375977, + "rewards/margins": 4.973428726196289, + "rewards/rejected": -10.985111236572266, + "step": 12949 + }, + { + "epoch": 2.01, + "learning_rate": 4.649279526946418e-06, + "logits/chosen": -2.6462996006011963, + "logits/rejected": -3.0626955032348633, + "logps/chosen": -681.078857421875, + "logps/rejected": -539.0877685546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.233765602111816, + "rewards/margins": 6.583367824554443, + "rewards/rejected": -10.817132949829102, + "step": 12950 + }, + { + "epoch": 2.01, + "learning_rate": 4.648546086415271e-06, + "logits/chosen": -3.1899468898773193, + "logits/rejected": -3.0308475494384766, + "logps/chosen": -86.65404510498047, + "logps/rejected": -197.0345458984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.851059913635254, + "rewards/margins": 8.258753776550293, + "rewards/rejected": -10.109813690185547, + "step": 12951 + }, + { + "epoch": 2.01, + "learning_rate": 4.647812645884123e-06, + "logits/chosen": -2.4560296535491943, + "logits/rejected": -2.900799512863159, + "logps/chosen": -99.52291870117188, + "logps/rejected": -407.4852600097656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7242817878723145, + "rewards/margins": 7.715740203857422, + "rewards/rejected": -12.440022468566895, + "step": 12952 + }, + { + "epoch": 2.01, + "learning_rate": 4.6470792053529745e-06, + "logits/chosen": -1.9423112869262695, + "logits/rejected": -2.923884630203247, + "logps/chosen": -202.6116180419922, + "logps/rejected": -355.4558410644531, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038327217102051, + "rewards/margins": 5.717748641967773, + "rewards/rejected": -10.75607681274414, + "step": 12953 + }, + { + "epoch": 2.01, + "learning_rate": 4.646345764821826e-06, + "logits/chosen": -3.0032618045806885, + "logits/rejected": -1.9333465099334717, + "logps/chosen": -291.5350036621094, + "logps/rejected": -247.54852294921875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2803916931152344, + "rewards/margins": 7.71101188659668, + "rewards/rejected": -10.991403579711914, + "step": 12954 + }, + { + "epoch": 2.01, + "learning_rate": 4.645612324290679e-06, + "logits/chosen": -2.84851336479187, + "logits/rejected": -2.1826066970825195, + "logps/chosen": -137.86280822753906, + "logps/rejected": -245.69625854492188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.422902822494507, + "rewards/margins": 8.683035850524902, + "rewards/rejected": -11.105938911437988, + "step": 12955 + }, + { + "epoch": 2.01, + "learning_rate": 4.644878883759532e-06, + "logits/chosen": -2.1273951530456543, + "logits/rejected": -3.1582818031311035, + "logps/chosen": -135.74745178222656, + "logps/rejected": -441.93243408203125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7127366065979004, + "rewards/margins": 10.187767028808594, + "rewards/rejected": -12.900503158569336, + "step": 12956 + }, + { + "epoch": 2.02, + "learning_rate": 4.644145443228384e-06, + "logits/chosen": -3.0572879314422607, + "logits/rejected": -2.2035579681396484, + "logps/chosen": -213.8712158203125, + "logps/rejected": -246.7244873046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.579179286956787, + "rewards/margins": 9.24029541015625, + "rewards/rejected": -14.819475173950195, + "step": 12957 + }, + { + "epoch": 2.02, + "learning_rate": 4.643412002697236e-06, + "logits/chosen": -2.9413416385650635, + "logits/rejected": -1.780903697013855, + "logps/chosen": -543.6129760742188, + "logps/rejected": -372.90850830078125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.828593730926514, + "rewards/margins": 6.168026447296143, + "rewards/rejected": -12.996620178222656, + "step": 12958 + }, + { + "epoch": 2.02, + "learning_rate": 4.6426785621660875e-06, + "logits/chosen": -1.5747636556625366, + "logits/rejected": -2.938645124435425, + "logps/chosen": -103.0882797241211, + "logps/rejected": -475.3443603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9826810359954834, + "rewards/margins": 14.298166275024414, + "rewards/rejected": -18.280847549438477, + "step": 12959 + }, + { + "epoch": 2.02, + "learning_rate": 4.64194512163494e-06, + "logits/chosen": -3.0630502700805664, + "logits/rejected": -2.5199029445648193, + "logps/chosen": -172.96084594726562, + "logps/rejected": -118.01307678222656, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9583139419555664, + "rewards/margins": 5.962253093719482, + "rewards/rejected": -9.92056655883789, + "step": 12960 + }, + { + "epoch": 2.02, + "learning_rate": 4.641211681103792e-06, + "logits/chosen": -3.1307852268218994, + "logits/rejected": -2.771315574645996, + "logps/chosen": -134.62783813476562, + "logps/rejected": -211.57962036132812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7800030708312988, + "rewards/margins": 7.550650596618652, + "rewards/rejected": -9.33065414428711, + "step": 12961 + }, + { + "epoch": 2.02, + "learning_rate": 4.640478240572644e-06, + "logits/chosen": -2.9274375438690186, + "logits/rejected": -3.030404806137085, + "logps/chosen": -149.44345092773438, + "logps/rejected": -256.7190246582031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.832343339920044, + "rewards/margins": 8.208542823791504, + "rewards/rejected": -11.040885925292969, + "step": 12962 + }, + { + "epoch": 2.02, + "learning_rate": 4.639744800041496e-06, + "logits/chosen": -2.7661848068237305, + "logits/rejected": -2.949348211288452, + "logps/chosen": -113.00614929199219, + "logps/rejected": -235.01795959472656, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146693229675293, + "rewards/margins": 3.1278629302978516, + "rewards/rejected": -9.274556159973145, + "step": 12963 + }, + { + "epoch": 2.02, + "learning_rate": 4.639011359510348e-06, + "logits/chosen": -2.277782917022705, + "logits/rejected": -3.1105844974517822, + "logps/chosen": -79.90335845947266, + "logps/rejected": -288.770751953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.275677680969238, + "rewards/margins": 7.028508186340332, + "rewards/rejected": -11.30418586730957, + "step": 12964 + }, + { + "epoch": 2.02, + "learning_rate": 4.6382779189792004e-06, + "logits/chosen": -3.092836618423462, + "logits/rejected": -3.0343809127807617, + "logps/chosen": -171.64566040039062, + "logps/rejected": -192.58778381347656, + "loss": 0.3811, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.288852691650391, + "rewards/margins": 3.878410577774048, + "rewards/rejected": -10.16726303100586, + "step": 12965 + }, + { + "epoch": 2.02, + "learning_rate": 4.637544478448052e-06, + "logits/chosen": -2.4421842098236084, + "logits/rejected": -2.9802255630493164, + "logps/chosen": -75.81681823730469, + "logps/rejected": -198.81149291992188, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515098571777344, + "rewards/margins": 3.932507276535034, + "rewards/rejected": -10.447606086730957, + "step": 12966 + }, + { + "epoch": 2.02, + "learning_rate": 4.636811037916904e-06, + "logits/chosen": -2.221456289291382, + "logits/rejected": -2.9162890911102295, + "logps/chosen": -306.5247802734375, + "logps/rejected": -788.9298706054688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.417818069458008, + "rewards/margins": 7.97130012512207, + "rewards/rejected": -15.389118194580078, + "step": 12967 + }, + { + "epoch": 2.02, + "learning_rate": 4.636077597385756e-06, + "logits/chosen": -2.1354422569274902, + "logits/rejected": -2.8978519439697266, + "logps/chosen": -352.25567626953125, + "logps/rejected": -489.110595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1634018421173096, + "rewards/margins": 10.758737564086914, + "rewards/rejected": -13.922139167785645, + "step": 12968 + }, + { + "epoch": 2.02, + "learning_rate": 4.635344156854609e-06, + "logits/chosen": -2.0751595497131348, + "logits/rejected": -2.8648271560668945, + "logps/chosen": -208.60719299316406, + "logps/rejected": -416.02178955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.321844577789307, + "rewards/margins": 9.250545501708984, + "rewards/rejected": -14.572389602661133, + "step": 12969 + }, + { + "epoch": 2.02, + "learning_rate": 4.634610716323461e-06, + "logits/chosen": -2.570082664489746, + "logits/rejected": -3.183807373046875, + "logps/chosen": -527.70166015625, + "logps/rejected": -654.031005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9496200680732727, + "rewards/margins": 11.49635124206543, + "rewards/rejected": -12.44597053527832, + "step": 12970 + }, + { + "epoch": 2.02, + "learning_rate": 4.6338772757923126e-06, + "logits/chosen": -3.037442684173584, + "logits/rejected": -1.8213661909103394, + "logps/chosen": -153.76519775390625, + "logps/rejected": -159.53372192382812, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.404596328735352, + "rewards/margins": 7.6196818351745605, + "rewards/rejected": -12.02427864074707, + "step": 12971 + }, + { + "epoch": 2.02, + "learning_rate": 4.633143835261165e-06, + "logits/chosen": -2.4468555450439453, + "logits/rejected": -2.6147406101226807, + "logps/chosen": -119.99261474609375, + "logps/rejected": -260.94390869140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.12465238571167, + "rewards/margins": 7.918166160583496, + "rewards/rejected": -13.042818069458008, + "step": 12972 + }, + { + "epoch": 2.02, + "learning_rate": 4.632410394730017e-06, + "logits/chosen": -2.9978790283203125, + "logits/rejected": -3.043729066848755, + "logps/chosen": -159.05223083496094, + "logps/rejected": -235.26043701171875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.090760707855225, + "rewards/margins": 7.107237339019775, + "rewards/rejected": -11.197998046875, + "step": 12973 + }, + { + "epoch": 2.02, + "learning_rate": 4.63167695419887e-06, + "logits/chosen": -2.2891831398010254, + "logits/rejected": -2.9887301921844482, + "logps/chosen": -117.45954895019531, + "logps/rejected": -240.45640563964844, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2928690910339355, + "rewards/margins": 4.302093029022217, + "rewards/rejected": -10.594962120056152, + "step": 12974 + }, + { + "epoch": 2.02, + "learning_rate": 4.630943513667722e-06, + "logits/chosen": -2.9071829319000244, + "logits/rejected": -1.802993893623352, + "logps/chosen": -471.9866943359375, + "logps/rejected": -487.63824462890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.935674667358398, + "rewards/margins": 8.055230140686035, + "rewards/rejected": -14.990904808044434, + "step": 12975 + }, + { + "epoch": 2.02, + "learning_rate": 4.630210073136574e-06, + "logits/chosen": -1.5601472854614258, + "logits/rejected": -2.2740423679351807, + "logps/chosen": -187.59835815429688, + "logps/rejected": -354.4986572265625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771695137023926, + "rewards/margins": 6.003465175628662, + "rewards/rejected": -11.77515983581543, + "step": 12976 + }, + { + "epoch": 2.02, + "learning_rate": 4.6294766326054255e-06, + "logits/chosen": -3.032224178314209, + "logits/rejected": -2.5447018146514893, + "logps/chosen": -411.7748718261719, + "logps/rejected": -358.140380859375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4644060134887695, + "rewards/margins": 6.736289024353027, + "rewards/rejected": -10.200695037841797, + "step": 12977 + }, + { + "epoch": 2.02, + "learning_rate": 4.628743192074278e-06, + "logits/chosen": -1.39028799533844, + "logits/rejected": -2.6612601280212402, + "logps/chosen": -199.1947021484375, + "logps/rejected": -489.3699035644531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.405031204223633, + "rewards/margins": 7.819528579711914, + "rewards/rejected": -15.224559783935547, + "step": 12978 + }, + { + "epoch": 2.02, + "learning_rate": 4.62800975154313e-06, + "logits/chosen": -1.9111976623535156, + "logits/rejected": -2.8376922607421875, + "logps/chosen": -136.89315795898438, + "logps/rejected": -357.5449523925781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.011446952819824, + "rewards/margins": 9.234983444213867, + "rewards/rejected": -14.246430397033691, + "step": 12979 + }, + { + "epoch": 2.02, + "learning_rate": 4.627276311011982e-06, + "logits/chosen": -1.5139341354370117, + "logits/rejected": -2.3206615447998047, + "logps/chosen": -362.6107177734375, + "logps/rejected": -698.908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5344085693359375, + "rewards/margins": 20.512306213378906, + "rewards/rejected": -26.046714782714844, + "step": 12980 + }, + { + "epoch": 2.02, + "learning_rate": 4.626542870480834e-06, + "logits/chosen": -1.2195733785629272, + "logits/rejected": -2.9360477924346924, + "logps/chosen": -66.32789611816406, + "logps/rejected": -755.8965454101562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.842400074005127, + "rewards/margins": 8.77721118927002, + "rewards/rejected": -13.619611740112305, + "step": 12981 + }, + { + "epoch": 2.02, + "learning_rate": 4.625809429949686e-06, + "logits/chosen": -2.8582184314727783, + "logits/rejected": -1.896838903427124, + "logps/chosen": -303.252197265625, + "logps/rejected": -319.6705322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.750272274017334, + "rewards/margins": 11.058748245239258, + "rewards/rejected": -13.80902099609375, + "step": 12982 + }, + { + "epoch": 2.02, + "learning_rate": 4.6250759894185385e-06, + "logits/chosen": -2.7282118797302246, + "logits/rejected": -3.0619776248931885, + "logps/chosen": -120.67041015625, + "logps/rejected": -366.4234619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7956390380859375, + "rewards/margins": 9.565065383911133, + "rewards/rejected": -15.36070442199707, + "step": 12983 + }, + { + "epoch": 2.02, + "learning_rate": 4.62434254888739e-06, + "logits/chosen": -2.705573797225952, + "logits/rejected": -2.8557324409484863, + "logps/chosen": -469.0256652832031, + "logps/rejected": -490.68206787109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.960678577423096, + "rewards/margins": 10.018318176269531, + "rewards/rejected": -14.978997230529785, + "step": 12984 + }, + { + "epoch": 2.02, + "learning_rate": 4.623609108356242e-06, + "logits/chosen": -3.111671209335327, + "logits/rejected": -3.0131025314331055, + "logps/chosen": -109.79875946044922, + "logps/rejected": -161.7761688232422, + "loss": 0.2679, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.688770771026611, + "rewards/margins": 4.065672874450684, + "rewards/rejected": -9.754444122314453, + "step": 12985 + }, + { + "epoch": 2.02, + "learning_rate": 4.622875667825094e-06, + "logits/chosen": -2.460359573364258, + "logits/rejected": -2.9012978076934814, + "logps/chosen": -419.1855163574219, + "logps/rejected": -339.34686279296875, + "loss": 0.3228, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.75483512878418, + "rewards/margins": 3.7223997116088867, + "rewards/rejected": -10.477234840393066, + "step": 12986 + }, + { + "epoch": 2.02, + "learning_rate": 4.622142227293947e-06, + "logits/chosen": -3.0647099018096924, + "logits/rejected": -2.9528727531433105, + "logps/chosen": -261.523681640625, + "logps/rejected": -231.0728759765625, + "loss": 0.8089, + "rewards/accuracies": 0.5, + "rewards/chosen": -4.026011943817139, + "rewards/margins": 4.064573764801025, + "rewards/rejected": -8.090585708618164, + "step": 12987 + }, + { + "epoch": 2.02, + "learning_rate": 4.621408786762799e-06, + "logits/chosen": -2.938190221786499, + "logits/rejected": -1.9334272146224976, + "logps/chosen": -508.9644775390625, + "logps/rejected": -332.05987548828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6660261154174805, + "rewards/margins": 6.780896186828613, + "rewards/rejected": -13.446922302246094, + "step": 12988 + }, + { + "epoch": 2.02, + "learning_rate": 4.6206753462316514e-06, + "logits/chosen": -2.8477911949157715, + "logits/rejected": -2.752631902694702, + "logps/chosen": -472.6363525390625, + "logps/rejected": -586.4073486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.629438877105713, + "rewards/margins": 13.346321105957031, + "rewards/rejected": -18.97576141357422, + "step": 12989 + }, + { + "epoch": 2.02, + "learning_rate": 4.619941905700503e-06, + "logits/chosen": -2.336744546890259, + "logits/rejected": -3.0497355461120605, + "logps/chosen": -447.3428039550781, + "logps/rejected": -731.6578369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.640972137451172, + "rewards/margins": 10.109317779541016, + "rewards/rejected": -15.750289916992188, + "step": 12990 + }, + { + "epoch": 2.02, + "learning_rate": 4.619208465169355e-06, + "logits/chosen": -1.9312458038330078, + "logits/rejected": -2.697821855545044, + "logps/chosen": -137.76544189453125, + "logps/rejected": -351.21978759765625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.857614517211914, + "rewards/margins": 9.700654983520508, + "rewards/rejected": -14.558269500732422, + "step": 12991 + }, + { + "epoch": 2.02, + "learning_rate": 4.618475024638208e-06, + "logits/chosen": -1.8317408561706543, + "logits/rejected": -2.6755239963531494, + "logps/chosen": -183.3106689453125, + "logps/rejected": -361.1424865722656, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.474847793579102, + "rewards/margins": 7.371349811553955, + "rewards/rejected": -18.84619903564453, + "step": 12992 + }, + { + "epoch": 2.02, + "learning_rate": 4.61774158410706e-06, + "logits/chosen": -2.2377243041992188, + "logits/rejected": -2.754049301147461, + "logps/chosen": -459.6607971191406, + "logps/rejected": -473.3765869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.795260429382324, + "rewards/margins": 9.743453979492188, + "rewards/rejected": -16.538715362548828, + "step": 12993 + }, + { + "epoch": 2.02, + "learning_rate": 4.617008143575912e-06, + "logits/chosen": -2.321132183074951, + "logits/rejected": -2.0860116481781006, + "logps/chosen": -325.64556884765625, + "logps/rejected": -462.47039794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.277412414550781, + "rewards/margins": 9.185514450073242, + "rewards/rejected": -14.462926864624023, + "step": 12994 + }, + { + "epoch": 2.02, + "learning_rate": 4.6162747030447636e-06, + "logits/chosen": -1.6811610460281372, + "logits/rejected": -2.518890857696533, + "logps/chosen": -182.23825073242188, + "logps/rejected": -355.97344970703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.96386194229126, + "rewards/margins": 8.557082176208496, + "rewards/rejected": -13.520944595336914, + "step": 12995 + }, + { + "epoch": 2.02, + "learning_rate": 4.615541262513616e-06, + "logits/chosen": -2.7238330841064453, + "logits/rejected": -2.0635106563568115, + "logps/chosen": -282.37677001953125, + "logps/rejected": -190.58627319335938, + "loss": 1.0755, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.867459297180176, + "rewards/margins": 4.146369934082031, + "rewards/rejected": -12.013829231262207, + "step": 12996 + }, + { + "epoch": 2.02, + "learning_rate": 4.614807821982468e-06, + "logits/chosen": -2.1565122604370117, + "logits/rejected": -3.0085082054138184, + "logps/chosen": -332.33746337890625, + "logps/rejected": -417.4005126953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.180427551269531, + "rewards/margins": 10.673460960388184, + "rewards/rejected": -16.8538875579834, + "step": 12997 + }, + { + "epoch": 2.02, + "learning_rate": 4.61407438145132e-06, + "logits/chosen": -2.6326730251312256, + "logits/rejected": -2.9214813709259033, + "logps/chosen": -380.46893310546875, + "logps/rejected": -575.0167846679688, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.838486433029175, + "rewards/margins": 6.060044288635254, + "rewards/rejected": -9.898530960083008, + "step": 12998 + }, + { + "epoch": 2.02, + "learning_rate": 4.613340940920172e-06, + "logits/chosen": -2.029466152191162, + "logits/rejected": -2.1719491481781006, + "logps/chosen": -296.65313720703125, + "logps/rejected": -516.7951049804688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.162980079650879, + "rewards/margins": 10.48824405670166, + "rewards/rejected": -16.65122413635254, + "step": 12999 + }, + { + "epoch": 2.02, + "learning_rate": 4.612607500389025e-06, + "logits/chosen": -2.6630518436431885, + "logits/rejected": -2.3638482093811035, + "logps/chosen": -238.39927673339844, + "logps/rejected": -179.82623291015625, + "loss": 0.2801, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.395674228668213, + "rewards/margins": 2.4199252128601074, + "rewards/rejected": -8.81559944152832, + "step": 13000 + }, + { + "epoch": 2.02, + "learning_rate": 4.6118740598578765e-06, + "logits/chosen": -1.9667242765426636, + "logits/rejected": -2.91265869140625, + "logps/chosen": -311.31939697265625, + "logps/rejected": -664.066650390625, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.756866455078125, + "rewards/margins": 9.371282577514648, + "rewards/rejected": -18.128149032592773, + "step": 13001 + }, + { + "epoch": 2.02, + "learning_rate": 4.611140619326728e-06, + "logits/chosen": -1.4102526903152466, + "logits/rejected": -2.4664087295532227, + "logps/chosen": -196.37789916992188, + "logps/rejected": -436.8974914550781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.367828369140625, + "rewards/margins": 11.551385879516602, + "rewards/rejected": -16.919214248657227, + "step": 13002 + }, + { + "epoch": 2.02, + "learning_rate": 4.61040717879558e-06, + "logits/chosen": -1.673500895500183, + "logits/rejected": -2.742438793182373, + "logps/chosen": -115.34782409667969, + "logps/rejected": -217.15785217285156, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.745648384094238, + "rewards/margins": 4.884683609008789, + "rewards/rejected": -10.630331039428711, + "step": 13003 + }, + { + "epoch": 2.02, + "learning_rate": 4.609673738264432e-06, + "logits/chosen": -2.330043077468872, + "logits/rejected": -2.3017754554748535, + "logps/chosen": -301.8548583984375, + "logps/rejected": -397.0773620605469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.766781806945801, + "rewards/margins": 9.669443130493164, + "rewards/rejected": -17.43622589111328, + "step": 13004 + }, + { + "epoch": 2.02, + "learning_rate": 4.608940297733285e-06, + "logits/chosen": -2.9051661491394043, + "logits/rejected": -2.9120707511901855, + "logps/chosen": -299.4833984375, + "logps/rejected": -297.9483337402344, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.719752311706543, + "rewards/margins": 7.208822727203369, + "rewards/rejected": -12.92857551574707, + "step": 13005 + }, + { + "epoch": 2.02, + "learning_rate": 4.608206857202138e-06, + "logits/chosen": -2.8128864765167236, + "logits/rejected": -3.0826470851898193, + "logps/chosen": -67.19135284423828, + "logps/rejected": -222.38421630859375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7693123817443848, + "rewards/margins": 7.056110382080078, + "rewards/rejected": -9.825422286987305, + "step": 13006 + }, + { + "epoch": 2.02, + "learning_rate": 4.6074734166709895e-06, + "logits/chosen": -2.3165717124938965, + "logits/rejected": -3.0353810787200928, + "logps/chosen": -211.43295288085938, + "logps/rejected": -1075.9244384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3014068603515625, + "rewards/margins": 10.95806884765625, + "rewards/rejected": -18.259475708007812, + "step": 13007 + }, + { + "epoch": 2.02, + "learning_rate": 4.606739976139841e-06, + "logits/chosen": -2.386389970779419, + "logits/rejected": -2.9795379638671875, + "logps/chosen": -408.81329345703125, + "logps/rejected": -621.6656494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.728867530822754, + "rewards/margins": 12.001181602478027, + "rewards/rejected": -19.73004913330078, + "step": 13008 + }, + { + "epoch": 2.02, + "learning_rate": 4.606006535608694e-06, + "logits/chosen": -2.3858797550201416, + "logits/rejected": -3.102065086364746, + "logps/chosen": -206.29119873046875, + "logps/rejected": -466.3821105957031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981122970581055, + "rewards/margins": 9.811814308166504, + "rewards/rejected": -15.792937278747559, + "step": 13009 + }, + { + "epoch": 2.02, + "learning_rate": 4.605273095077546e-06, + "logits/chosen": -1.8947656154632568, + "logits/rejected": -3.0467023849487305, + "logps/chosen": -141.397216796875, + "logps/rejected": -307.20050048828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.440918922424316, + "rewards/margins": 8.248229026794434, + "rewards/rejected": -13.68914794921875, + "step": 13010 + }, + { + "epoch": 2.02, + "learning_rate": 4.604539654546398e-06, + "logits/chosen": -2.8932876586914062, + "logits/rejected": -3.039199113845825, + "logps/chosen": -281.7767333984375, + "logps/rejected": -372.19000244140625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.447568893432617, + "rewards/margins": 6.461812973022461, + "rewards/rejected": -10.909381866455078, + "step": 13011 + }, + { + "epoch": 2.02, + "learning_rate": 4.60380621401525e-06, + "logits/chosen": -2.7299811840057373, + "logits/rejected": -1.6962883472442627, + "logps/chosen": -499.49462890625, + "logps/rejected": -462.8265380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802812576293945, + "rewards/margins": 9.40600299835205, + "rewards/rejected": -17.20881462097168, + "step": 13012 + }, + { + "epoch": 2.02, + "learning_rate": 4.603072773484102e-06, + "logits/chosen": -1.917320966720581, + "logits/rejected": -2.7940514087677, + "logps/chosen": -141.3319091796875, + "logps/rejected": -215.31182861328125, + "loss": 1.013, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.118885040283203, + "rewards/margins": 2.2208027839660645, + "rewards/rejected": -14.339688301086426, + "step": 13013 + }, + { + "epoch": 2.02, + "learning_rate": 4.602339332952954e-06, + "logits/chosen": -2.9776625633239746, + "logits/rejected": -3.166062831878662, + "logps/chosen": -168.74319458007812, + "logps/rejected": -276.08489990234375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.389317750930786, + "rewards/margins": 7.41778039932251, + "rewards/rejected": -10.807098388671875, + "step": 13014 + }, + { + "epoch": 2.02, + "learning_rate": 4.601605892421806e-06, + "logits/chosen": -2.6961123943328857, + "logits/rejected": -2.9827592372894287, + "logps/chosen": -143.34078979492188, + "logps/rejected": -226.91256713867188, + "loss": 0.0626, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.166403770446777, + "rewards/margins": 5.641005039215088, + "rewards/rejected": -9.807409286499023, + "step": 13015 + }, + { + "epoch": 2.02, + "learning_rate": 4.600872451890658e-06, + "logits/chosen": -2.207211494445801, + "logits/rejected": -2.836430072784424, + "logps/chosen": -324.4593811035156, + "logps/rejected": -473.1617736816406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.265765190124512, + "rewards/margins": 7.5637736320495605, + "rewards/rejected": -13.82953929901123, + "step": 13016 + }, + { + "epoch": 2.02, + "learning_rate": 4.60013901135951e-06, + "logits/chosen": -2.8963217735290527, + "logits/rejected": -2.3329977989196777, + "logps/chosen": -919.1860961914062, + "logps/rejected": -438.8687438964844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.590542793273926, + "rewards/margins": 7.7021331787109375, + "rewards/rejected": -12.292675971984863, + "step": 13017 + }, + { + "epoch": 2.02, + "learning_rate": 4.599405570828363e-06, + "logits/chosen": -2.6064271926879883, + "logits/rejected": -3.000943422317505, + "logps/chosen": -483.6351013183594, + "logps/rejected": -515.8857421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.509362697601318, + "rewards/margins": 9.243446350097656, + "rewards/rejected": -13.752808570861816, + "step": 13018 + }, + { + "epoch": 2.02, + "learning_rate": 4.5986721302972146e-06, + "logits/chosen": -3.0102667808532715, + "logits/rejected": -2.7420361042022705, + "logps/chosen": -430.411865234375, + "logps/rejected": -428.59130859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.428589344024658, + "rewards/margins": 8.14809799194336, + "rewards/rejected": -13.57668685913086, + "step": 13019 + }, + { + "epoch": 2.02, + "learning_rate": 4.5979386897660665e-06, + "logits/chosen": -0.856883704662323, + "logits/rejected": -1.9432673454284668, + "logps/chosen": -203.0352783203125, + "logps/rejected": -549.1317749023438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.958294868469238, + "rewards/margins": 14.43680477142334, + "rewards/rejected": -19.395099639892578, + "step": 13020 + }, + { + "epoch": 2.03, + "learning_rate": 4.597205249234918e-06, + "logits/chosen": -1.8175345659255981, + "logits/rejected": -2.5879902839660645, + "logps/chosen": -455.676025390625, + "logps/rejected": -441.8185729980469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.873223304748535, + "rewards/margins": 9.318422317504883, + "rewards/rejected": -16.191646575927734, + "step": 13021 + }, + { + "epoch": 2.03, + "learning_rate": 4.596471808703771e-06, + "logits/chosen": -3.1611015796661377, + "logits/rejected": -3.2035512924194336, + "logps/chosen": -558.6195678710938, + "logps/rejected": -819.819580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.615912675857544, + "rewards/margins": 12.192322731018066, + "rewards/rejected": -15.808235168457031, + "step": 13022 + }, + { + "epoch": 2.03, + "learning_rate": 4.595738368172624e-06, + "logits/chosen": -1.0984333753585815, + "logits/rejected": -1.3604248762130737, + "logps/chosen": -448.5365905761719, + "logps/rejected": -594.5281982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.876590251922607, + "rewards/margins": 11.769720077514648, + "rewards/rejected": -16.646310806274414, + "step": 13023 + }, + { + "epoch": 2.03, + "learning_rate": 4.595004927641476e-06, + "logits/chosen": -2.2527637481689453, + "logits/rejected": -2.694669246673584, + "logps/chosen": -294.8131408691406, + "logps/rejected": -458.1117248535156, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.876044273376465, + "rewards/margins": 7.404874324798584, + "rewards/rejected": -16.28091812133789, + "step": 13024 + }, + { + "epoch": 2.03, + "learning_rate": 4.5942714871103275e-06, + "logits/chosen": -3.0177626609802246, + "logits/rejected": -3.259925603866577, + "logps/chosen": -80.85723114013672, + "logps/rejected": -253.5155029296875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.717187881469727, + "rewards/margins": 6.552441596984863, + "rewards/rejected": -12.269630432128906, + "step": 13025 + }, + { + "epoch": 2.03, + "learning_rate": 4.593538046579179e-06, + "logits/chosen": -0.7518876194953918, + "logits/rejected": -3.1053695678710938, + "logps/chosen": -329.28851318359375, + "logps/rejected": -514.074951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.392914772033691, + "rewards/margins": 15.194293022155762, + "rewards/rejected": -19.587207794189453, + "step": 13026 + }, + { + "epoch": 2.03, + "learning_rate": 4.592804606048032e-06, + "logits/chosen": -3.1230132579803467, + "logits/rejected": -3.048394203186035, + "logps/chosen": -226.16461181640625, + "logps/rejected": -289.2850341796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.478219032287598, + "rewards/margins": 8.02748966217041, + "rewards/rejected": -12.505708694458008, + "step": 13027 + }, + { + "epoch": 2.03, + "learning_rate": 4.592071165516884e-06, + "logits/chosen": -1.8732478618621826, + "logits/rejected": -2.760380744934082, + "logps/chosen": -319.79730224609375, + "logps/rejected": -514.412353515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01995325088501, + "rewards/margins": 7.051515102386475, + "rewards/rejected": -14.071468353271484, + "step": 13028 + }, + { + "epoch": 2.03, + "learning_rate": 4.591337724985736e-06, + "logits/chosen": -2.551295757293701, + "logits/rejected": -3.0431857109069824, + "logps/chosen": -297.88568115234375, + "logps/rejected": -463.7105407714844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.692972183227539, + "rewards/margins": 8.47024917602539, + "rewards/rejected": -15.163220405578613, + "step": 13029 + }, + { + "epoch": 2.03, + "learning_rate": 4.590604284454588e-06, + "logits/chosen": -2.945603132247925, + "logits/rejected": -2.966947555541992, + "logps/chosen": -424.49981689453125, + "logps/rejected": -411.25543212890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.083277702331543, + "rewards/margins": 7.567232131958008, + "rewards/rejected": -15.65050983428955, + "step": 13030 + }, + { + "epoch": 2.03, + "learning_rate": 4.58987084392344e-06, + "logits/chosen": -2.8487730026245117, + "logits/rejected": -3.0114998817443848, + "logps/chosen": -80.25894165039062, + "logps/rejected": -186.22268676757812, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.621297359466553, + "rewards/margins": 5.9844970703125, + "rewards/rejected": -11.605794906616211, + "step": 13031 + }, + { + "epoch": 2.03, + "learning_rate": 4.589137403392292e-06, + "logits/chosen": -1.2585595846176147, + "logits/rejected": -1.5430443286895752, + "logps/chosen": -246.9744110107422, + "logps/rejected": -368.80804443359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.142617702484131, + "rewards/margins": 9.014466285705566, + "rewards/rejected": -13.157084465026855, + "step": 13032 + }, + { + "epoch": 2.03, + "learning_rate": 4.588403962861144e-06, + "logits/chosen": -0.6630220413208008, + "logits/rejected": -1.7440669536590576, + "logps/chosen": -291.84228515625, + "logps/rejected": -710.103515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.401520729064941, + "rewards/margins": 12.980932235717773, + "rewards/rejected": -21.3824520111084, + "step": 13033 + }, + { + "epoch": 2.03, + "learning_rate": 4.587670522329996e-06, + "logits/chosen": -2.0719947814941406, + "logits/rejected": -2.9955198764801025, + "logps/chosen": -160.12814331054688, + "logps/rejected": -200.71487426757812, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002377986907959, + "rewards/margins": 6.627346038818359, + "rewards/rejected": -12.629724502563477, + "step": 13034 + }, + { + "epoch": 2.03, + "learning_rate": 4.586937081798848e-06, + "logits/chosen": -1.431458830833435, + "logits/rejected": -2.4539670944213867, + "logps/chosen": -129.36810302734375, + "logps/rejected": -430.09716796875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.604982376098633, + "rewards/margins": 9.038188934326172, + "rewards/rejected": -18.643171310424805, + "step": 13035 + }, + { + "epoch": 2.03, + "learning_rate": 4.586203641267701e-06, + "logits/chosen": -2.945573091506958, + "logits/rejected": -2.7683870792388916, + "logps/chosen": -488.00848388671875, + "logps/rejected": -526.2066040039062, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.047510623931885, + "rewards/margins": 9.174165725708008, + "rewards/rejected": -16.221675872802734, + "step": 13036 + }, + { + "epoch": 2.03, + "learning_rate": 4.585470200736553e-06, + "logits/chosen": -2.4838898181915283, + "logits/rejected": -2.9342241287231445, + "logps/chosen": -142.657470703125, + "logps/rejected": -347.0157470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.676967144012451, + "rewards/margins": 9.574684143066406, + "rewards/rejected": -15.251651763916016, + "step": 13037 + }, + { + "epoch": 2.03, + "learning_rate": 4.5847367602054045e-06, + "logits/chosen": -2.9295125007629395, + "logits/rejected": -2.9014174938201904, + "logps/chosen": -447.8946838378906, + "logps/rejected": -615.94287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5717720985412598, + "rewards/margins": 12.752859115600586, + "rewards/rejected": -16.324630737304688, + "step": 13038 + }, + { + "epoch": 2.03, + "learning_rate": 4.584003319674257e-06, + "logits/chosen": -2.9224133491516113, + "logits/rejected": -1.8152611255645752, + "logps/chosen": -175.775146484375, + "logps/rejected": -291.71600341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4337239265441895, + "rewards/margins": 10.242158889770508, + "rewards/rejected": -12.675882339477539, + "step": 13039 + }, + { + "epoch": 2.03, + "learning_rate": 4.583269879143109e-06, + "logits/chosen": -2.850857734680176, + "logits/rejected": -2.81127667427063, + "logps/chosen": -184.66510009765625, + "logps/rejected": -314.0168151855469, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004343032836914, + "rewards/margins": 7.681765556335449, + "rewards/rejected": -13.686108589172363, + "step": 13040 + }, + { + "epoch": 2.03, + "learning_rate": 4.582536438611962e-06, + "logits/chosen": -2.568089723587036, + "logits/rejected": -2.1418991088867188, + "logps/chosen": -184.95477294921875, + "logps/rejected": -335.4396057128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.469257354736328, + "rewards/margins": 10.701836585998535, + "rewards/rejected": -14.171093940734863, + "step": 13041 + }, + { + "epoch": 2.03, + "learning_rate": 4.581802998080814e-06, + "logits/chosen": -2.6469154357910156, + "logits/rejected": -2.1431007385253906, + "logps/chosen": -261.33929443359375, + "logps/rejected": -196.97634887695312, + "loss": 0.1425, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.893096923828125, + "rewards/margins": 4.3486199378967285, + "rewards/rejected": -13.241716384887695, + "step": 13042 + }, + { + "epoch": 2.03, + "learning_rate": 4.581069557549666e-06, + "logits/chosen": -2.722388505935669, + "logits/rejected": -1.8726401329040527, + "logps/chosen": -659.41064453125, + "logps/rejected": -543.5729370117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.350576400756836, + "rewards/margins": 9.031414031982422, + "rewards/rejected": -14.381990432739258, + "step": 13043 + }, + { + "epoch": 2.03, + "learning_rate": 4.5803361170185175e-06, + "logits/chosen": -1.9653151035308838, + "logits/rejected": -2.7526702880859375, + "logps/chosen": -145.62168884277344, + "logps/rejected": -280.6314392089844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.654186248779297, + "rewards/margins": 8.577463150024414, + "rewards/rejected": -13.231648445129395, + "step": 13044 + }, + { + "epoch": 2.03, + "learning_rate": 4.57960267648737e-06, + "logits/chosen": -2.067375898361206, + "logits/rejected": -2.918107509613037, + "logps/chosen": -334.7535400390625, + "logps/rejected": -462.733154296875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135234355926514, + "rewards/margins": 5.538388252258301, + "rewards/rejected": -9.673622131347656, + "step": 13045 + }, + { + "epoch": 2.03, + "learning_rate": 4.578869235956222e-06, + "logits/chosen": -2.2320892810821533, + "logits/rejected": -2.9329848289489746, + "logps/chosen": -238.86085510253906, + "logps/rejected": -292.9620666503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.87823486328125, + "rewards/margins": 10.9826078414917, + "rewards/rejected": -13.86084270477295, + "step": 13046 + }, + { + "epoch": 2.03, + "learning_rate": 4.578135795425074e-06, + "logits/chosen": -1.9779213666915894, + "logits/rejected": -2.442007303237915, + "logps/chosen": -459.58612060546875, + "logps/rejected": -533.109619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.614082336425781, + "rewards/margins": 10.270853042602539, + "rewards/rejected": -14.88493537902832, + "step": 13047 + }, + { + "epoch": 2.03, + "learning_rate": 4.577402354893926e-06, + "logits/chosen": -1.0926960706710815, + "logits/rejected": -2.319772481918335, + "logps/chosen": -196.0904998779297, + "logps/rejected": -481.09912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4677886962890625, + "rewards/margins": 14.359094619750977, + "rewards/rejected": -19.82688331604004, + "step": 13048 + }, + { + "epoch": 2.03, + "learning_rate": 4.5766689143627785e-06, + "logits/chosen": -3.048189640045166, + "logits/rejected": -2.773395538330078, + "logps/chosen": -493.04632568359375, + "logps/rejected": -451.95379638671875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.059565544128418, + "rewards/margins": 6.513258934020996, + "rewards/rejected": -13.572824478149414, + "step": 13049 + }, + { + "epoch": 2.03, + "learning_rate": 4.57593547383163e-06, + "logits/chosen": -2.7086877822875977, + "logits/rejected": -2.8362743854522705, + "logps/chosen": -292.08966064453125, + "logps/rejected": -379.2342224121094, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.231108665466309, + "rewards/margins": 6.573032379150391, + "rewards/rejected": -11.804140090942383, + "step": 13050 + }, + { + "epoch": 2.03, + "learning_rate": 4.575202033300482e-06, + "logits/chosen": -1.8520824909210205, + "logits/rejected": -2.9307713508605957, + "logps/chosen": -250.4036102294922, + "logps/rejected": -612.94091796875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.560103416442871, + "rewards/margins": 7.676390647888184, + "rewards/rejected": -16.236494064331055, + "step": 13051 + }, + { + "epoch": 2.03, + "learning_rate": 4.574468592769334e-06, + "logits/chosen": -2.271690845489502, + "logits/rejected": -2.7071499824523926, + "logps/chosen": -234.75177001953125, + "logps/rejected": -415.1218566894531, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.118177890777588, + "rewards/margins": 6.852802276611328, + "rewards/rejected": -13.970979690551758, + "step": 13052 + }, + { + "epoch": 2.03, + "learning_rate": 4.573735152238186e-06, + "logits/chosen": -2.1548120975494385, + "logits/rejected": -2.288499593734741, + "logps/chosen": -240.87872314453125, + "logps/rejected": -619.5908203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.218560218811035, + "rewards/margins": 10.94709587097168, + "rewards/rejected": -18.16565704345703, + "step": 13053 + }, + { + "epoch": 2.03, + "learning_rate": 4.573001711707039e-06, + "logits/chosen": -2.376682996749878, + "logits/rejected": -3.0226712226867676, + "logps/chosen": -118.62127685546875, + "logps/rejected": -188.5491485595703, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.389830112457275, + "rewards/margins": 5.048861503601074, + "rewards/rejected": -10.438691139221191, + "step": 13054 + }, + { + "epoch": 2.03, + "learning_rate": 4.572268271175891e-06, + "logits/chosen": -2.901258945465088, + "logits/rejected": -2.236419439315796, + "logps/chosen": -136.6719512939453, + "logps/rejected": -328.8750305175781, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8144481182098389, + "rewards/margins": 9.350272178649902, + "rewards/rejected": -11.16472053527832, + "step": 13055 + }, + { + "epoch": 2.03, + "learning_rate": 4.5715348306447425e-06, + "logits/chosen": -2.624757766723633, + "logits/rejected": -2.875279188156128, + "logps/chosen": -168.091552734375, + "logps/rejected": -373.16900634765625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.703361988067627, + "rewards/margins": 6.145273208618164, + "rewards/rejected": -12.848634719848633, + "step": 13056 + }, + { + "epoch": 2.03, + "learning_rate": 4.570801390113595e-06, + "logits/chosen": -2.8540475368499756, + "logits/rejected": -2.773092031478882, + "logps/chosen": -669.0413818359375, + "logps/rejected": -628.2303466796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.96804141998291, + "rewards/margins": 8.727286338806152, + "rewards/rejected": -14.695327758789062, + "step": 13057 + }, + { + "epoch": 2.03, + "learning_rate": 4.570067949582448e-06, + "logits/chosen": -2.675560235977173, + "logits/rejected": -3.061145782470703, + "logps/chosen": -183.81442260742188, + "logps/rejected": -301.7612609863281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.919763565063477, + "rewards/margins": 7.543900489807129, + "rewards/rejected": -14.463664054870605, + "step": 13058 + }, + { + "epoch": 2.03, + "learning_rate": 4.5693345090513e-06, + "logits/chosen": -3.1135566234588623, + "logits/rejected": -2.3727598190307617, + "logps/chosen": -322.689697265625, + "logps/rejected": -223.84268188476562, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.313572883605957, + "rewards/margins": 5.171918869018555, + "rewards/rejected": -12.485490798950195, + "step": 13059 + }, + { + "epoch": 2.03, + "learning_rate": 4.568601068520152e-06, + "logits/chosen": -2.894444227218628, + "logits/rejected": -2.0689408779144287, + "logps/chosen": -733.682861328125, + "logps/rejected": -403.66558837890625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605725288391113, + "rewards/margins": 7.523747444152832, + "rewards/rejected": -14.129472732543945, + "step": 13060 + }, + { + "epoch": 2.03, + "learning_rate": 4.567867627989004e-06, + "logits/chosen": -2.60245680809021, + "logits/rejected": -2.8029708862304688, + "logps/chosen": -186.58633422851562, + "logps/rejected": -377.38812255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2889556884765625, + "rewards/margins": 13.642780303955078, + "rewards/rejected": -18.93173599243164, + "step": 13061 + }, + { + "epoch": 2.03, + "learning_rate": 4.5671341874578555e-06, + "logits/chosen": -2.9997990131378174, + "logits/rejected": -2.8071579933166504, + "logps/chosen": -170.0933380126953, + "logps/rejected": -133.95199584960938, + "loss": 0.7148, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.028759002685547, + "rewards/margins": 2.219153642654419, + "rewards/rejected": -9.247912406921387, + "step": 13062 + }, + { + "epoch": 2.03, + "learning_rate": 4.566400746926708e-06, + "logits/chosen": -2.961012125015259, + "logits/rejected": -1.693879246711731, + "logps/chosen": -861.62939453125, + "logps/rejected": -592.4595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1861724853515625, + "rewards/margins": 11.871091842651367, + "rewards/rejected": -15.05726432800293, + "step": 13063 + }, + { + "epoch": 2.03, + "learning_rate": 4.56566730639556e-06, + "logits/chosen": -2.2101666927337646, + "logits/rejected": -2.9678211212158203, + "logps/chosen": -82.99970245361328, + "logps/rejected": -324.23291015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.861607074737549, + "rewards/margins": 9.614130020141602, + "rewards/rejected": -15.475736618041992, + "step": 13064 + }, + { + "epoch": 2.03, + "learning_rate": 4.564933865864412e-06, + "logits/chosen": -2.859881639480591, + "logits/rejected": -3.0909314155578613, + "logps/chosen": -115.42083740234375, + "logps/rejected": -130.9459228515625, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.273601531982422, + "rewards/margins": 4.110418319702148, + "rewards/rejected": -10.38401985168457, + "step": 13065 + }, + { + "epoch": 2.03, + "learning_rate": 4.564200425333264e-06, + "logits/chosen": -2.641944646835327, + "logits/rejected": -2.0595669746398926, + "logps/chosen": -216.13172912597656, + "logps/rejected": -187.06285095214844, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7151085138320923, + "rewards/margins": 7.462166786193848, + "rewards/rejected": -8.177275657653809, + "step": 13066 + }, + { + "epoch": 2.03, + "learning_rate": 4.563466984802117e-06, + "logits/chosen": -1.515846610069275, + "logits/rejected": -2.715860366821289, + "logps/chosen": -183.0885467529297, + "logps/rejected": -549.2149658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.314692497253418, + "rewards/margins": 14.609804153442383, + "rewards/rejected": -21.924495697021484, + "step": 13067 + }, + { + "epoch": 2.03, + "learning_rate": 4.5627335442709685e-06, + "logits/chosen": -2.2010302543640137, + "logits/rejected": -2.7187438011169434, + "logps/chosen": -215.0831298828125, + "logps/rejected": -301.8479919433594, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5773515701293945, + "rewards/margins": 5.677074432373047, + "rewards/rejected": -11.254426002502441, + "step": 13068 + }, + { + "epoch": 2.03, + "learning_rate": 4.56200010373982e-06, + "logits/chosen": -3.123530387878418, + "logits/rejected": -2.861435890197754, + "logps/chosen": -686.9890747070312, + "logps/rejected": -532.3876342773438, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.493260383605957, + "rewards/margins": 5.939221382141113, + "rewards/rejected": -12.43248176574707, + "step": 13069 + }, + { + "epoch": 2.03, + "learning_rate": 4.561266663208672e-06, + "logits/chosen": -2.877370834350586, + "logits/rejected": -2.0141119956970215, + "logps/chosen": -823.4362182617188, + "logps/rejected": -491.5543212890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9213124513626099, + "rewards/margins": 10.74249267578125, + "rewards/rejected": -12.66380500793457, + "step": 13070 + }, + { + "epoch": 2.03, + "learning_rate": 4.560533222677524e-06, + "logits/chosen": -2.40848708152771, + "logits/rejected": -2.1061832904815674, + "logps/chosen": -273.74566650390625, + "logps/rejected": -493.8111877441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.178955078125, + "rewards/margins": 12.62378215789795, + "rewards/rejected": -16.802738189697266, + "step": 13071 + }, + { + "epoch": 2.03, + "learning_rate": 4.559799782146377e-06, + "logits/chosen": -0.8252015709877014, + "logits/rejected": -2.198389768600464, + "logps/chosen": -200.63357543945312, + "logps/rejected": -637.4166259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.718592643737793, + "rewards/margins": 11.511919021606445, + "rewards/rejected": -18.230510711669922, + "step": 13072 + }, + { + "epoch": 2.03, + "learning_rate": 4.559066341615229e-06, + "logits/chosen": -1.737296223640442, + "logits/rejected": -2.827300548553467, + "logps/chosen": -230.06724548339844, + "logps/rejected": -517.7484741210938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2539472579956055, + "rewards/margins": 8.699240684509277, + "rewards/rejected": -14.953187942504883, + "step": 13073 + }, + { + "epoch": 2.03, + "learning_rate": 4.5583329010840814e-06, + "logits/chosen": -2.4146196842193604, + "logits/rejected": -2.7358548641204834, + "logps/chosen": -161.4582061767578, + "logps/rejected": -387.6843566894531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.050233840942383, + "rewards/margins": 8.875865936279297, + "rewards/rejected": -13.92609977722168, + "step": 13074 + }, + { + "epoch": 2.03, + "learning_rate": 4.557599460552933e-06, + "logits/chosen": -2.6745495796203613, + "logits/rejected": -3.1541197299957275, + "logps/chosen": -186.21417236328125, + "logps/rejected": -311.5659484863281, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6551718711853027, + "rewards/margins": 7.14423942565918, + "rewards/rejected": -10.79941177368164, + "step": 13075 + }, + { + "epoch": 2.03, + "learning_rate": 4.556866020021786e-06, + "logits/chosen": -2.7369532585144043, + "logits/rejected": -2.965250253677368, + "logps/chosen": -84.34268188476562, + "logps/rejected": -370.520751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487856864929199, + "rewards/margins": 8.363871574401855, + "rewards/rejected": -14.851728439331055, + "step": 13076 + }, + { + "epoch": 2.03, + "learning_rate": 4.556132579490638e-06, + "logits/chosen": -2.612523317337036, + "logits/rejected": -2.7065892219543457, + "logps/chosen": -144.94837951660156, + "logps/rejected": -135.85406494140625, + "loss": 0.1542, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2975921630859375, + "rewards/margins": 2.8250010013580322, + "rewards/rejected": -8.12259292602539, + "step": 13077 + }, + { + "epoch": 2.03, + "learning_rate": 4.55539913895949e-06, + "logits/chosen": -1.7968072891235352, + "logits/rejected": -2.9923017024993896, + "logps/chosen": -239.3243408203125, + "logps/rejected": -680.607177734375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.967259883880615, + "rewards/margins": 10.352143287658691, + "rewards/rejected": -15.319402694702148, + "step": 13078 + }, + { + "epoch": 2.03, + "learning_rate": 4.554665698428342e-06, + "logits/chosen": -2.91084361076355, + "logits/rejected": -2.884024143218994, + "logps/chosen": -610.7310791015625, + "logps/rejected": -663.7569580078125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.699767112731934, + "rewards/margins": 7.345746994018555, + "rewards/rejected": -14.045514106750488, + "step": 13079 + }, + { + "epoch": 2.03, + "learning_rate": 4.5539322578971935e-06, + "logits/chosen": -2.3431589603424072, + "logits/rejected": -2.6111702919006348, + "logps/chosen": -170.53713989257812, + "logps/rejected": -414.627197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.91227388381958, + "rewards/margins": 11.55527400970459, + "rewards/rejected": -16.467548370361328, + "step": 13080 + }, + { + "epoch": 2.03, + "learning_rate": 4.553198817366046e-06, + "logits/chosen": -2.3679239749908447, + "logits/rejected": -2.383354425430298, + "logps/chosen": -603.918212890625, + "logps/rejected": -472.30499267578125, + "loss": 0.9412, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.8853230476379395, + "rewards/margins": 6.4018449783325195, + "rewards/rejected": -12.287168502807617, + "step": 13081 + }, + { + "epoch": 2.03, + "learning_rate": 4.552465376834898e-06, + "logits/chosen": -3.1824252605438232, + "logits/rejected": -2.8696229457855225, + "logps/chosen": -214.44805908203125, + "logps/rejected": -475.55987548828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2546000480651855, + "rewards/margins": 8.464109420776367, + "rewards/rejected": -13.718708992004395, + "step": 13082 + }, + { + "epoch": 2.03, + "learning_rate": 4.55173193630375e-06, + "logits/chosen": -2.927401304244995, + "logits/rejected": -2.9489681720733643, + "logps/chosen": -100.78994750976562, + "logps/rejected": -347.103271484375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.311583518981934, + "rewards/margins": 6.242060661315918, + "rewards/rejected": -10.553644180297852, + "step": 13083 + }, + { + "epoch": 2.03, + "learning_rate": 4.550998495772602e-06, + "logits/chosen": -2.8414361476898193, + "logits/rejected": -2.134918689727783, + "logps/chosen": -504.88787841796875, + "logps/rejected": -439.0546569824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0694668292999268, + "rewards/margins": 9.235607147216797, + "rewards/rejected": -11.305073738098145, + "step": 13084 + }, + { + "epoch": 2.03, + "learning_rate": 4.550265055241455e-06, + "logits/chosen": -2.2832508087158203, + "logits/rejected": -2.7930665016174316, + "logps/chosen": -140.47320556640625, + "logps/rejected": -499.659912109375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01785135269165, + "rewards/margins": 7.914673328399658, + "rewards/rejected": -14.932524681091309, + "step": 13085 + }, + { + "epoch": 2.04, + "learning_rate": 4.5495316147103065e-06, + "logits/chosen": -2.5919110774993896, + "logits/rejected": -2.913275718688965, + "logps/chosen": -144.84164428710938, + "logps/rejected": -397.8881530761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397087097167969, + "rewards/margins": 12.645845413208008, + "rewards/rejected": -17.04293441772461, + "step": 13086 + }, + { + "epoch": 2.04, + "learning_rate": 4.548798174179158e-06, + "logits/chosen": -2.861060857772827, + "logits/rejected": -2.9196219444274902, + "logps/chosen": -634.4700927734375, + "logps/rejected": -667.9364013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0872650146484375, + "rewards/margins": 9.366683006286621, + "rewards/rejected": -14.453947067260742, + "step": 13087 + }, + { + "epoch": 2.04, + "learning_rate": 4.54806473364801e-06, + "logits/chosen": -2.7472832202911377, + "logits/rejected": -1.6660505533218384, + "logps/chosen": -388.00701904296875, + "logps/rejected": -293.7922058105469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.673451900482178, + "rewards/margins": 8.216395378112793, + "rewards/rejected": -13.889846801757812, + "step": 13088 + }, + { + "epoch": 2.04, + "learning_rate": 4.547331293116863e-06, + "logits/chosen": -2.473320960998535, + "logits/rejected": -2.3323588371276855, + "logps/chosen": -379.7593994140625, + "logps/rejected": -292.4556884765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0367202758789062, + "rewards/margins": 9.623867988586426, + "rewards/rejected": -12.660588264465332, + "step": 13089 + }, + { + "epoch": 2.04, + "learning_rate": 4.546597852585715e-06, + "logits/chosen": -2.1618309020996094, + "logits/rejected": -3.0543339252471924, + "logps/chosen": -230.99362182617188, + "logps/rejected": -602.4344482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.376134872436523, + "rewards/margins": 9.457010269165039, + "rewards/rejected": -14.833145141601562, + "step": 13090 + }, + { + "epoch": 2.04, + "learning_rate": 4.545864412054568e-06, + "logits/chosen": -2.998892307281494, + "logits/rejected": -2.9588780403137207, + "logps/chosen": -379.6168518066406, + "logps/rejected": -421.2537841796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.036940813064575, + "rewards/margins": 9.479061126708984, + "rewards/rejected": -12.51600170135498, + "step": 13091 + }, + { + "epoch": 2.04, + "learning_rate": 4.5451309715234195e-06, + "logits/chosen": -2.86364483833313, + "logits/rejected": -2.9182043075561523, + "logps/chosen": -215.47616577148438, + "logps/rejected": -243.3955535888672, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4131920337677, + "rewards/margins": 8.893616676330566, + "rewards/rejected": -11.306808471679688, + "step": 13092 + }, + { + "epoch": 2.04, + "learning_rate": 4.544397530992271e-06, + "logits/chosen": -2.5372157096862793, + "logits/rejected": -2.5942838191986084, + "logps/chosen": -288.0765686035156, + "logps/rejected": -521.3321533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105381011962891, + "rewards/margins": 15.926553726196289, + "rewards/rejected": -22.03193473815918, + "step": 13093 + }, + { + "epoch": 2.04, + "learning_rate": 4.543664090461124e-06, + "logits/chosen": -2.8287203311920166, + "logits/rejected": -1.6282165050506592, + "logps/chosen": -308.5952453613281, + "logps/rejected": -228.54415893554688, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.925473213195801, + "rewards/margins": 7.778232574462891, + "rewards/rejected": -11.703705787658691, + "step": 13094 + }, + { + "epoch": 2.04, + "learning_rate": 4.542930649929976e-06, + "logits/chosen": -3.067934036254883, + "logits/rejected": -2.619011640548706, + "logps/chosen": -316.48583984375, + "logps/rejected": -313.67913818359375, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8046655654907227, + "rewards/margins": 6.020609378814697, + "rewards/rejected": -9.825275421142578, + "step": 13095 + }, + { + "epoch": 2.04, + "learning_rate": 4.542197209398828e-06, + "logits/chosen": -2.5325305461883545, + "logits/rejected": -2.889169931411743, + "logps/chosen": -168.25604248046875, + "logps/rejected": -315.648681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1331071853637695, + "rewards/margins": 9.044942855834961, + "rewards/rejected": -13.17805004119873, + "step": 13096 + }, + { + "epoch": 2.04, + "learning_rate": 4.54146376886768e-06, + "logits/chosen": -2.5053679943084717, + "logits/rejected": -2.9424800872802734, + "logps/chosen": -252.14468383789062, + "logps/rejected": -290.02862548828125, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1132941246032715, + "rewards/margins": 4.976603031158447, + "rewards/rejected": -12.089897155761719, + "step": 13097 + }, + { + "epoch": 2.04, + "learning_rate": 4.5407303283365324e-06, + "logits/chosen": -2.044930934906006, + "logits/rejected": -3.01619553565979, + "logps/chosen": -67.49089050292969, + "logps/rejected": -311.4790954589844, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.510451316833496, + "rewards/margins": 7.238180160522461, + "rewards/rejected": -12.748631477355957, + "step": 13098 + }, + { + "epoch": 2.04, + "learning_rate": 4.539996887805384e-06, + "logits/chosen": -2.671647310256958, + "logits/rejected": -3.147191286087036, + "logps/chosen": -142.21511840820312, + "logps/rejected": -304.07891845703125, + "loss": 0.0403, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.377675533294678, + "rewards/margins": 4.609188079833984, + "rewards/rejected": -10.98686408996582, + "step": 13099 + }, + { + "epoch": 2.04, + "learning_rate": 4.539263447274236e-06, + "logits/chosen": -0.6340542435646057, + "logits/rejected": -1.8084020614624023, + "logps/chosen": -67.19644165039062, + "logps/rejected": -389.55963134765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.329782962799072, + "rewards/margins": 8.856274604797363, + "rewards/rejected": -14.186057090759277, + "step": 13100 + }, + { + "epoch": 2.04, + "learning_rate": 4.538530006743088e-06, + "logits/chosen": -1.0091420412063599, + "logits/rejected": -2.772318124771118, + "logps/chosen": -167.623291015625, + "logps/rejected": -604.9501342773438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.961027145385742, + "rewards/margins": 11.944847106933594, + "rewards/rejected": -17.905874252319336, + "step": 13101 + }, + { + "epoch": 2.04, + "learning_rate": 4.53779656621194e-06, + "logits/chosen": -2.1443209648132324, + "logits/rejected": -2.8510563373565674, + "logps/chosen": -357.0760803222656, + "logps/rejected": -436.2088623046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.753050804138184, + "rewards/margins": 7.8214497566223145, + "rewards/rejected": -15.574501037597656, + "step": 13102 + }, + { + "epoch": 2.04, + "learning_rate": 4.537063125680793e-06, + "logits/chosen": -1.903102993965149, + "logits/rejected": -2.1588053703308105, + "logps/chosen": -420.5218200683594, + "logps/rejected": -518.7416381835938, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.854280471801758, + "rewards/margins": 9.710432052612305, + "rewards/rejected": -17.564712524414062, + "step": 13103 + }, + { + "epoch": 2.04, + "learning_rate": 4.5363296851496446e-06, + "logits/chosen": -1.6198745965957642, + "logits/rejected": -1.877131462097168, + "logps/chosen": -244.9152374267578, + "logps/rejected": -332.5672607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5820794105529785, + "rewards/margins": 10.147912979125977, + "rewards/rejected": -14.729992866516113, + "step": 13104 + }, + { + "epoch": 2.04, + "learning_rate": 4.5355962446184964e-06, + "logits/chosen": -2.9917843341827393, + "logits/rejected": -2.352151870727539, + "logps/chosen": -279.8146667480469, + "logps/rejected": -399.99627685546875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040548801422119, + "rewards/margins": 9.990057945251465, + "rewards/rejected": -16.030607223510742, + "step": 13105 + }, + { + "epoch": 2.04, + "learning_rate": 4.534862804087348e-06, + "logits/chosen": -2.981820821762085, + "logits/rejected": -2.9977943897247314, + "logps/chosen": -83.00355529785156, + "logps/rejected": -434.47113037109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7994866371154785, + "rewards/margins": 8.26432991027832, + "rewards/rejected": -14.063817977905273, + "step": 13106 + }, + { + "epoch": 2.04, + "learning_rate": 4.534129363556201e-06, + "logits/chosen": -2.5570242404937744, + "logits/rejected": -2.951352596282959, + "logps/chosen": -245.18020629882812, + "logps/rejected": -308.4649658203125, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.010138034820557, + "rewards/margins": 5.862155437469482, + "rewards/rejected": -9.872293472290039, + "step": 13107 + }, + { + "epoch": 2.04, + "learning_rate": 4.533395923025054e-06, + "logits/chosen": -2.75396990776062, + "logits/rejected": -2.6451728343963623, + "logps/chosen": -161.84849548339844, + "logps/rejected": -279.9002685546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.24962043762207, + "rewards/margins": 7.533588409423828, + "rewards/rejected": -14.783208847045898, + "step": 13108 + }, + { + "epoch": 2.04, + "learning_rate": 4.532662482493906e-06, + "logits/chosen": -2.77892804145813, + "logits/rejected": -2.5481784343719482, + "logps/chosen": -479.97906494140625, + "logps/rejected": -354.6148986816406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9937775135040283, + "rewards/margins": 8.866576194763184, + "rewards/rejected": -11.860353469848633, + "step": 13109 + }, + { + "epoch": 2.04, + "learning_rate": 4.5319290419627575e-06, + "logits/chosen": -1.3496376276016235, + "logits/rejected": -2.73475980758667, + "logps/chosen": -201.97080993652344, + "logps/rejected": -636.3318481445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5868024826049805, + "rewards/margins": 13.340961456298828, + "rewards/rejected": -20.927764892578125, + "step": 13110 + }, + { + "epoch": 2.04, + "learning_rate": 4.531195601431609e-06, + "logits/chosen": -2.1233086585998535, + "logits/rejected": -2.46920108795166, + "logps/chosen": -253.00027465820312, + "logps/rejected": -311.1902160644531, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.672458648681641, + "rewards/margins": 8.816655158996582, + "rewards/rejected": -15.489114761352539, + "step": 13111 + }, + { + "epoch": 2.04, + "learning_rate": 4.530462160900462e-06, + "logits/chosen": -2.573296308517456, + "logits/rejected": -2.8663151264190674, + "logps/chosen": -442.18804931640625, + "logps/rejected": -557.3065185546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6823012828826904, + "rewards/margins": 7.858914375305176, + "rewards/rejected": -11.541215896606445, + "step": 13112 + }, + { + "epoch": 2.04, + "learning_rate": 4.529728720369314e-06, + "logits/chosen": -2.5956950187683105, + "logits/rejected": -2.931533098220825, + "logps/chosen": -113.33738708496094, + "logps/rejected": -338.03582763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3735055923461914, + "rewards/margins": 10.557273864746094, + "rewards/rejected": -12.930778503417969, + "step": 13113 + }, + { + "epoch": 2.04, + "learning_rate": 4.528995279838166e-06, + "logits/chosen": -2.970569133758545, + "logits/rejected": -3.048435688018799, + "logps/chosen": -145.4866943359375, + "logps/rejected": -245.0099639892578, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0116190910339355, + "rewards/margins": 7.247030735015869, + "rewards/rejected": -11.258649826049805, + "step": 13114 + }, + { + "epoch": 2.04, + "learning_rate": 4.528261839307018e-06, + "logits/chosen": -1.9001083374023438, + "logits/rejected": -2.8224685192108154, + "logps/chosen": -201.6490936279297, + "logps/rejected": -347.5665588378906, + "loss": 0.11, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.514387130737305, + "rewards/margins": 5.708969593048096, + "rewards/rejected": -13.223356246948242, + "step": 13115 + }, + { + "epoch": 2.04, + "learning_rate": 4.5275283987758705e-06, + "logits/chosen": -1.4659093618392944, + "logits/rejected": -2.5789198875427246, + "logps/chosen": -173.82952880859375, + "logps/rejected": -366.6632080078125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.704826831817627, + "rewards/margins": 9.24328327178955, + "rewards/rejected": -14.948110580444336, + "step": 13116 + }, + { + "epoch": 2.04, + "learning_rate": 4.526794958244722e-06, + "logits/chosen": -2.7657718658447266, + "logits/rejected": -2.9403305053710938, + "logps/chosen": -242.3533935546875, + "logps/rejected": -312.28961181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9809370040893555, + "rewards/margins": 8.64079475402832, + "rewards/rejected": -13.621731758117676, + "step": 13117 + }, + { + "epoch": 2.04, + "learning_rate": 4.526061517713574e-06, + "logits/chosen": -2.4664297103881836, + "logits/rejected": -3.1132102012634277, + "logps/chosen": -167.71400451660156, + "logps/rejected": -401.93408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6998798847198486, + "rewards/margins": 10.000106811523438, + "rewards/rejected": -13.699986457824707, + "step": 13118 + }, + { + "epoch": 2.04, + "learning_rate": 4.525328077182426e-06, + "logits/chosen": -2.900183916091919, + "logits/rejected": -2.498748302459717, + "logps/chosen": -395.66107177734375, + "logps/rejected": -668.9785766601562, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.282820224761963, + "rewards/margins": 8.333015441894531, + "rewards/rejected": -13.615835189819336, + "step": 13119 + }, + { + "epoch": 2.04, + "learning_rate": 4.524594636651278e-06, + "logits/chosen": -3.150564432144165, + "logits/rejected": -2.8758652210235596, + "logps/chosen": -262.48876953125, + "logps/rejected": -333.29742431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5736727714538574, + "rewards/margins": 9.179244041442871, + "rewards/rejected": -12.75291633605957, + "step": 13120 + }, + { + "epoch": 2.04, + "learning_rate": 4.523861196120131e-06, + "logits/chosen": -3.0318944454193115, + "logits/rejected": -2.3182108402252197, + "logps/chosen": -362.1396484375, + "logps/rejected": -225.34283447265625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7642364501953125, + "rewards/margins": 7.309482574462891, + "rewards/rejected": -9.073719024658203, + "step": 13121 + }, + { + "epoch": 2.04, + "learning_rate": 4.523127755588983e-06, + "logits/chosen": -2.0804975032806396, + "logits/rejected": -2.872013807296753, + "logps/chosen": -484.9996337890625, + "logps/rejected": -572.420654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.387618541717529, + "rewards/margins": 9.898172378540039, + "rewards/rejected": -16.285791397094727, + "step": 13122 + }, + { + "epoch": 2.04, + "learning_rate": 4.5223943150578345e-06, + "logits/chosen": -2.2192909717559814, + "logits/rejected": -2.941424608230591, + "logps/chosen": -251.49203491210938, + "logps/rejected": -376.62030029296875, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.875375270843506, + "rewards/margins": 7.4503936767578125, + "rewards/rejected": -13.325769424438477, + "step": 13123 + }, + { + "epoch": 2.04, + "learning_rate": 4.521660874526687e-06, + "logits/chosen": -2.010772705078125, + "logits/rejected": -2.810272216796875, + "logps/chosen": -315.2579040527344, + "logps/rejected": -503.0292053222656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177729606628418, + "rewards/margins": 7.723841667175293, + "rewards/rejected": -13.901571273803711, + "step": 13124 + }, + { + "epoch": 2.04, + "learning_rate": 4.52092743399554e-06, + "logits/chosen": -2.5436863899230957, + "logits/rejected": -2.0672852993011475, + "logps/chosen": -531.86376953125, + "logps/rejected": -545.2107543945312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.300318241119385, + "rewards/margins": 9.78077507019043, + "rewards/rejected": -14.081092834472656, + "step": 13125 + }, + { + "epoch": 2.04, + "learning_rate": 4.520193993464392e-06, + "logits/chosen": -2.9038102626800537, + "logits/rejected": -2.1332786083221436, + "logps/chosen": -192.3255615234375, + "logps/rejected": -318.75054931640625, + "loss": 0.2301, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.230797290802002, + "rewards/margins": 8.785917282104492, + "rewards/rejected": -14.016714096069336, + "step": 13126 + }, + { + "epoch": 2.04, + "learning_rate": 4.519460552933244e-06, + "logits/chosen": -2.2496347427368164, + "logits/rejected": -3.138427257537842, + "logps/chosen": -296.0642395019531, + "logps/rejected": -497.1207580566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0564866065979004, + "rewards/margins": 12.639554977416992, + "rewards/rejected": -15.69604206085205, + "step": 13127 + }, + { + "epoch": 2.04, + "learning_rate": 4.5187271124020956e-06, + "logits/chosen": -2.9570600986480713, + "logits/rejected": -2.930901527404785, + "logps/chosen": -59.70110321044922, + "logps/rejected": -154.25625610351562, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.851858615875244, + "rewards/margins": 7.672025680541992, + "rewards/rejected": -11.523884773254395, + "step": 13128 + }, + { + "epoch": 2.04, + "learning_rate": 4.5179936718709474e-06, + "logits/chosen": -2.54668927192688, + "logits/rejected": -2.9571845531463623, + "logps/chosen": -241.0763702392578, + "logps/rejected": -320.18212890625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.199549674987793, + "rewards/margins": 7.111050605773926, + "rewards/rejected": -12.310600280761719, + "step": 13129 + }, + { + "epoch": 2.04, + "learning_rate": 4.5172602313398e-06, + "logits/chosen": -2.91219425201416, + "logits/rejected": -2.119086742401123, + "logps/chosen": -715.5592651367188, + "logps/rejected": -453.996337890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.097537040710449, + "rewards/margins": 10.142548561096191, + "rewards/rejected": -15.24008560180664, + "step": 13130 + }, + { + "epoch": 2.04, + "learning_rate": 4.516526790808652e-06, + "logits/chosen": -2.7561304569244385, + "logits/rejected": -2.402163028717041, + "logps/chosen": -148.25485229492188, + "logps/rejected": -322.24566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9638543128967285, + "rewards/margins": 12.339499473571777, + "rewards/rejected": -14.303354263305664, + "step": 13131 + }, + { + "epoch": 2.04, + "learning_rate": 4.515793350277504e-06, + "logits/chosen": -2.8194961547851562, + "logits/rejected": -2.976076364517212, + "logps/chosen": -274.6082763671875, + "logps/rejected": -359.5144958496094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.939133644104004, + "rewards/margins": 8.603431701660156, + "rewards/rejected": -12.542566299438477, + "step": 13132 + }, + { + "epoch": 2.04, + "learning_rate": 4.515059909746356e-06, + "logits/chosen": -1.8639428615570068, + "logits/rejected": -2.3513646125793457, + "logps/chosen": -159.517578125, + "logps/rejected": -267.3828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.345977306365967, + "rewards/margins": 9.907249450683594, + "rewards/rejected": -14.253227233886719, + "step": 13133 + }, + { + "epoch": 2.04, + "learning_rate": 4.5143264692152085e-06, + "logits/chosen": -1.8924179077148438, + "logits/rejected": -2.6101529598236084, + "logps/chosen": -117.76576232910156, + "logps/rejected": -340.721923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.072257995605469, + "rewards/margins": 8.48151969909668, + "rewards/rejected": -14.553777694702148, + "step": 13134 + }, + { + "epoch": 2.04, + "learning_rate": 4.51359302868406e-06, + "logits/chosen": -1.4250249862670898, + "logits/rejected": -2.5480926036834717, + "logps/chosen": -198.69161987304688, + "logps/rejected": -465.8287353515625, + "loss": 0.0633, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.74091911315918, + "rewards/margins": 8.779470443725586, + "rewards/rejected": -20.520389556884766, + "step": 13135 + }, + { + "epoch": 2.04, + "learning_rate": 4.512859588152912e-06, + "logits/chosen": -2.7621896266937256, + "logits/rejected": -2.3554298877716064, + "logps/chosen": -745.7828369140625, + "logps/rejected": -655.6337890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.642633438110352, + "rewards/margins": 8.295391082763672, + "rewards/rejected": -14.938024520874023, + "step": 13136 + }, + { + "epoch": 2.04, + "learning_rate": 4.512126147621764e-06, + "logits/chosen": -3.0632495880126953, + "logits/rejected": -2.171048164367676, + "logps/chosen": -400.047119140625, + "logps/rejected": -199.6865234375, + "loss": 0.1518, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.29240083694458, + "rewards/margins": 3.862429141998291, + "rewards/rejected": -10.154829978942871, + "step": 13137 + }, + { + "epoch": 2.04, + "learning_rate": 4.511392707090617e-06, + "logits/chosen": -0.9890900254249573, + "logits/rejected": -2.3040249347686768, + "logps/chosen": -127.5577392578125, + "logps/rejected": -245.52272033691406, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.648530006408691, + "rewards/margins": 6.41110897064209, + "rewards/rejected": -12.059638977050781, + "step": 13138 + }, + { + "epoch": 2.04, + "learning_rate": 4.510659266559469e-06, + "logits/chosen": -2.9641494750976562, + "logits/rejected": -2.837705373764038, + "logps/chosen": -217.93069458007812, + "logps/rejected": -279.253173828125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.706467628479004, + "rewards/margins": 6.9936418533325195, + "rewards/rejected": -13.700109481811523, + "step": 13139 + }, + { + "epoch": 2.04, + "learning_rate": 4.509925826028321e-06, + "logits/chosen": -3.0322868824005127, + "logits/rejected": -2.5535943508148193, + "logps/chosen": -436.1917724609375, + "logps/rejected": -201.2763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0279135704040527, + "rewards/margins": 9.917826652526855, + "rewards/rejected": -12.94573974609375, + "step": 13140 + }, + { + "epoch": 2.04, + "learning_rate": 4.509192385497173e-06, + "logits/chosen": -3.0415239334106445, + "logits/rejected": -2.744882106781006, + "logps/chosen": -517.2579956054688, + "logps/rejected": -599.385009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8354668617248535, + "rewards/margins": 8.867563247680664, + "rewards/rejected": -13.70302963256836, + "step": 13141 + }, + { + "epoch": 2.04, + "learning_rate": 4.508458944966025e-06, + "logits/chosen": -2.630842447280884, + "logits/rejected": -2.486442804336548, + "logps/chosen": -283.7251281738281, + "logps/rejected": -324.03057861328125, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.35425329208374, + "rewards/margins": 7.498228073120117, + "rewards/rejected": -12.852481842041016, + "step": 13142 + }, + { + "epoch": 2.04, + "learning_rate": 4.507725504434878e-06, + "logits/chosen": -1.8314882516860962, + "logits/rejected": -3.0542995929718018, + "logps/chosen": -169.77102661132812, + "logps/rejected": -460.6046142578125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.081167697906494, + "rewards/margins": 9.973343849182129, + "rewards/rejected": -16.05451202392578, + "step": 13143 + }, + { + "epoch": 2.04, + "learning_rate": 4.50699206390373e-06, + "logits/chosen": -3.117955446243286, + "logits/rejected": -2.890611410140991, + "logps/chosen": -146.97901916503906, + "logps/rejected": -366.7362365722656, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.916587829589844, + "rewards/margins": 6.542362213134766, + "rewards/rejected": -12.45895004272461, + "step": 13144 + }, + { + "epoch": 2.04, + "learning_rate": 4.506258623372582e-06, + "logits/chosen": -2.4382052421569824, + "logits/rejected": -2.9189016819000244, + "logps/chosen": -78.07086181640625, + "logps/rejected": -259.08984375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7833075523376465, + "rewards/margins": 7.142178535461426, + "rewards/rejected": -10.925485610961914, + "step": 13145 + }, + { + "epoch": 2.04, + "learning_rate": 4.505525182841434e-06, + "logits/chosen": -0.8617352247238159, + "logits/rejected": -2.596501350402832, + "logps/chosen": -122.44132995605469, + "logps/rejected": -568.6951904296875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.953604221343994, + "rewards/margins": 15.697633743286133, + "rewards/rejected": -20.65123748779297, + "step": 13146 + }, + { + "epoch": 2.04, + "learning_rate": 4.504791742310286e-06, + "logits/chosen": -1.2696254253387451, + "logits/rejected": -2.6243560314178467, + "logps/chosen": -134.44129943847656, + "logps/rejected": -501.91522216796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.762899398803711, + "rewards/margins": 10.1922607421875, + "rewards/rejected": -19.95516014099121, + "step": 13147 + }, + { + "epoch": 2.04, + "learning_rate": 4.504058301779138e-06, + "logits/chosen": -2.2391412258148193, + "logits/rejected": -2.9470341205596924, + "logps/chosen": -128.76385498046875, + "logps/rejected": -299.9691162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.230996131896973, + "rewards/margins": 10.248165130615234, + "rewards/rejected": -14.479161262512207, + "step": 13148 + }, + { + "epoch": 2.04, + "learning_rate": 4.50332486124799e-06, + "logits/chosen": -1.5295729637145996, + "logits/rejected": -2.3924992084503174, + "logps/chosen": -229.43362426757812, + "logps/rejected": -424.3972473144531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337713241577148, + "rewards/margins": 9.616769790649414, + "rewards/rejected": -13.954483032226562, + "step": 13149 + }, + { + "epoch": 2.05, + "learning_rate": 4.502591420716842e-06, + "logits/chosen": -1.945690393447876, + "logits/rejected": -2.665658712387085, + "logps/chosen": -216.52935791015625, + "logps/rejected": -446.3358154296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.59816837310791, + "rewards/margins": 8.453788757324219, + "rewards/rejected": -15.051958084106445, + "step": 13150 + }, + { + "epoch": 2.05, + "learning_rate": 4.501857980185694e-06, + "logits/chosen": -2.8623108863830566, + "logits/rejected": -2.564661979675293, + "logps/chosen": -356.2086181640625, + "logps/rejected": -493.8866882324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092434406280518, + "rewards/margins": 12.987190246582031, + "rewards/rejected": -19.07962417602539, + "step": 13151 + }, + { + "epoch": 2.05, + "learning_rate": 4.5011245396545466e-06, + "logits/chosen": -1.1113804578781128, + "logits/rejected": -2.7783567905426025, + "logps/chosen": -108.3961410522461, + "logps/rejected": -405.5738525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701937675476074, + "rewards/margins": 11.210762023925781, + "rewards/rejected": -17.912700653076172, + "step": 13152 + }, + { + "epoch": 2.05, + "learning_rate": 4.5003910991233984e-06, + "logits/chosen": -2.924586057662964, + "logits/rejected": -2.9542675018310547, + "logps/chosen": -621.5677490234375, + "logps/rejected": -691.2734985351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.94821834564209, + "rewards/margins": 14.874536514282227, + "rewards/rejected": -19.82275390625, + "step": 13153 + }, + { + "epoch": 2.05, + "learning_rate": 4.49965765859225e-06, + "logits/chosen": -1.7879678010940552, + "logits/rejected": -2.7313127517700195, + "logps/chosen": -205.05810546875, + "logps/rejected": -536.6522216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8124799728393555, + "rewards/margins": 10.509973526000977, + "rewards/rejected": -18.32245445251465, + "step": 13154 + }, + { + "epoch": 2.05, + "learning_rate": 4.498924218061102e-06, + "logits/chosen": -2.82124400138855, + "logits/rejected": -2.6503708362579346, + "logps/chosen": -234.1468048095703, + "logps/rejected": -410.35601806640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6052260398864746, + "rewards/margins": 10.907740592956543, + "rewards/rejected": -14.51296615600586, + "step": 13155 + }, + { + "epoch": 2.05, + "learning_rate": 4.498190777529955e-06, + "logits/chosen": -2.5946552753448486, + "logits/rejected": -3.1167938709259033, + "logps/chosen": -166.34849548339844, + "logps/rejected": -426.2044677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872246742248535, + "rewards/margins": 10.220070838928223, + "rewards/rejected": -16.092317581176758, + "step": 13156 + }, + { + "epoch": 2.05, + "learning_rate": 4.497457336998807e-06, + "logits/chosen": -3.00012469291687, + "logits/rejected": -3.0196640491485596, + "logps/chosen": -634.746826171875, + "logps/rejected": -578.8798217773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.069666862487793, + "rewards/margins": 11.59290599822998, + "rewards/rejected": -15.662572860717773, + "step": 13157 + }, + { + "epoch": 2.05, + "learning_rate": 4.4967238964676595e-06, + "logits/chosen": -2.6958072185516357, + "logits/rejected": -3.0524587631225586, + "logps/chosen": -159.72406005859375, + "logps/rejected": -203.742431640625, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.531644821166992, + "rewards/margins": 5.269567012786865, + "rewards/rejected": -9.8012113571167, + "step": 13158 + }, + { + "epoch": 2.05, + "learning_rate": 4.495990455936511e-06, + "logits/chosen": -2.7370355129241943, + "logits/rejected": -2.911837100982666, + "logps/chosen": -120.12604522705078, + "logps/rejected": -232.42010498046875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.419040679931641, + "rewards/margins": 7.01613712310791, + "rewards/rejected": -11.435176849365234, + "step": 13159 + }, + { + "epoch": 2.05, + "learning_rate": 4.495257015405363e-06, + "logits/chosen": -2.7514572143554688, + "logits/rejected": -1.703405499458313, + "logps/chosen": -511.9601745605469, + "logps/rejected": -518.2784423828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.470752239227295, + "rewards/margins": 11.139148712158203, + "rewards/rejected": -18.609901428222656, + "step": 13160 + }, + { + "epoch": 2.05, + "learning_rate": 4.494523574874216e-06, + "logits/chosen": -2.440659761428833, + "logits/rejected": -2.9777283668518066, + "logps/chosen": -212.79547119140625, + "logps/rejected": -407.958740234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.575413703918457, + "rewards/margins": 8.98243522644043, + "rewards/rejected": -13.55784797668457, + "step": 13161 + }, + { + "epoch": 2.05, + "learning_rate": 4.493790134343068e-06, + "logits/chosen": -2.089881658554077, + "logits/rejected": -2.8542139530181885, + "logps/chosen": -523.0274047851562, + "logps/rejected": -539.1767578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.832925796508789, + "rewards/margins": 8.147315979003906, + "rewards/rejected": -14.980241775512695, + "step": 13162 + }, + { + "epoch": 2.05, + "learning_rate": 4.49305669381192e-06, + "logits/chosen": -1.8376504182815552, + "logits/rejected": -2.63569974899292, + "logps/chosen": -181.5895233154297, + "logps/rejected": -452.9087219238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.552148818969727, + "rewards/margins": 13.058576583862305, + "rewards/rejected": -21.61072540283203, + "step": 13163 + }, + { + "epoch": 2.05, + "learning_rate": 4.492323253280772e-06, + "logits/chosen": -2.840012550354004, + "logits/rejected": -2.3543336391448975, + "logps/chosen": -230.1878662109375, + "logps/rejected": -372.12823486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.441035270690918, + "rewards/margins": 10.579949378967285, + "rewards/rejected": -17.020984649658203, + "step": 13164 + }, + { + "epoch": 2.05, + "learning_rate": 4.491589812749624e-06, + "logits/chosen": -2.5893654823303223, + "logits/rejected": -2.9162709712982178, + "logps/chosen": -168.56195068359375, + "logps/rejected": -303.6101989746094, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.845333099365234, + "rewards/margins": 4.944936275482178, + "rewards/rejected": -10.79026985168457, + "step": 13165 + }, + { + "epoch": 2.05, + "learning_rate": 4.490856372218476e-06, + "logits/chosen": -2.8254029750823975, + "logits/rejected": -1.4079300165176392, + "logps/chosen": -416.5167541503906, + "logps/rejected": -373.0960998535156, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.344240188598633, + "rewards/margins": 6.690000534057617, + "rewards/rejected": -11.034241676330566, + "step": 13166 + }, + { + "epoch": 2.05, + "learning_rate": 4.490122931687328e-06, + "logits/chosen": -2.546010732650757, + "logits/rejected": -2.886859655380249, + "logps/chosen": -357.90753173828125, + "logps/rejected": -441.9853515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.644168853759766, + "rewards/margins": 7.859633922576904, + "rewards/rejected": -12.503803253173828, + "step": 13167 + }, + { + "epoch": 2.05, + "learning_rate": 4.48938949115618e-06, + "logits/chosen": -1.8603625297546387, + "logits/rejected": -2.864971399307251, + "logps/chosen": -163.8193359375, + "logps/rejected": -479.65380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.010221481323242, + "rewards/margins": 8.569343566894531, + "rewards/rejected": -16.57956314086914, + "step": 13168 + }, + { + "epoch": 2.05, + "learning_rate": 4.488656050625032e-06, + "logits/chosen": -1.0573375225067139, + "logits/rejected": -2.403630495071411, + "logps/chosen": -679.3635864257812, + "logps/rejected": -428.4996643066406, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.759383201599121, + "rewards/margins": 8.17092514038086, + "rewards/rejected": -15.93030834197998, + "step": 13169 + }, + { + "epoch": 2.05, + "learning_rate": 4.487922610093885e-06, + "logits/chosen": -1.9910236597061157, + "logits/rejected": -3.059904098510742, + "logps/chosen": -84.21243286132812, + "logps/rejected": -350.677001953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5353193283081055, + "rewards/margins": 9.305925369262695, + "rewards/rejected": -15.841243743896484, + "step": 13170 + }, + { + "epoch": 2.05, + "learning_rate": 4.4871891695627365e-06, + "logits/chosen": -2.887056827545166, + "logits/rejected": -3.1192431449890137, + "logps/chosen": -78.9262466430664, + "logps/rejected": -220.01321411132812, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3863725662231445, + "rewards/margins": 4.3356170654296875, + "rewards/rejected": -9.721989631652832, + "step": 13171 + }, + { + "epoch": 2.05, + "learning_rate": 4.486455729031588e-06, + "logits/chosen": -2.701319932937622, + "logits/rejected": -2.5889432430267334, + "logps/chosen": -92.64065551757812, + "logps/rejected": -298.40185546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.123507976531982, + "rewards/margins": 9.584434509277344, + "rewards/rejected": -15.707942962646484, + "step": 13172 + }, + { + "epoch": 2.05, + "learning_rate": 4.48572228850044e-06, + "logits/chosen": -1.508893370628357, + "logits/rejected": -2.7420191764831543, + "logps/chosen": -176.8785400390625, + "logps/rejected": -347.17254638671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.519436836242676, + "rewards/margins": 9.720982551574707, + "rewards/rejected": -17.240419387817383, + "step": 13173 + }, + { + "epoch": 2.05, + "learning_rate": 4.484988847969293e-06, + "logits/chosen": -1.108748197555542, + "logits/rejected": -2.4553818702697754, + "logps/chosen": -240.88442993164062, + "logps/rejected": -626.8876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076862335205078, + "rewards/margins": 16.094148635864258, + "rewards/rejected": -22.171010971069336, + "step": 13174 + }, + { + "epoch": 2.05, + "learning_rate": 4.484255407438146e-06, + "logits/chosen": -2.8140766620635986, + "logits/rejected": -2.904297113418579, + "logps/chosen": -142.16741943359375, + "logps/rejected": -394.46051025390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.392086029052734, + "rewards/margins": 10.492033958435059, + "rewards/rejected": -17.88412094116211, + "step": 13175 + }, + { + "epoch": 2.05, + "learning_rate": 4.483521966906998e-06, + "logits/chosen": -2.89471173286438, + "logits/rejected": -2.871721029281616, + "logps/chosen": -202.7105712890625, + "logps/rejected": -279.4579772949219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.605301856994629, + "rewards/margins": 8.18641471862793, + "rewards/rejected": -12.791717529296875, + "step": 13176 + }, + { + "epoch": 2.05, + "learning_rate": 4.4827885263758495e-06, + "logits/chosen": -2.4442384243011475, + "logits/rejected": -2.612800121307373, + "logps/chosen": -265.64984130859375, + "logps/rejected": -461.57781982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.708520412445068, + "rewards/margins": 11.286486625671387, + "rewards/rejected": -17.995006561279297, + "step": 13177 + }, + { + "epoch": 2.05, + "learning_rate": 4.482055085844701e-06, + "logits/chosen": -1.901450753211975, + "logits/rejected": -2.6130776405334473, + "logps/chosen": -132.30259704589844, + "logps/rejected": -371.0523681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362034797668457, + "rewards/margins": 12.302271842956543, + "rewards/rejected": -16.664306640625, + "step": 13178 + }, + { + "epoch": 2.05, + "learning_rate": 4.481321645313554e-06, + "logits/chosen": -2.754257917404175, + "logits/rejected": -2.7697031497955322, + "logps/chosen": -392.9434814453125, + "logps/rejected": -505.1275634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.165084838867188, + "rewards/margins": 10.31094741821289, + "rewards/rejected": -18.476032257080078, + "step": 13179 + }, + { + "epoch": 2.05, + "learning_rate": 4.480588204782406e-06, + "logits/chosen": -2.627012252807617, + "logits/rejected": -3.0369420051574707, + "logps/chosen": -290.7316589355469, + "logps/rejected": -239.30703735351562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.850702285766602, + "rewards/margins": 7.133732795715332, + "rewards/rejected": -13.984435081481934, + "step": 13180 + }, + { + "epoch": 2.05, + "learning_rate": 4.479854764251258e-06, + "logits/chosen": -2.368241310119629, + "logits/rejected": -2.631247043609619, + "logps/chosen": -215.815673828125, + "logps/rejected": -395.5388488769531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.716986656188965, + "rewards/margins": 8.0826416015625, + "rewards/rejected": -15.799628257751465, + "step": 13181 + }, + { + "epoch": 2.05, + "learning_rate": 4.47912132372011e-06, + "logits/chosen": -1.4707362651824951, + "logits/rejected": -2.4233033657073975, + "logps/chosen": -589.3219604492188, + "logps/rejected": -937.236328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.830605983734131, + "rewards/margins": 14.058176040649414, + "rewards/rejected": -19.888782501220703, + "step": 13182 + }, + { + "epoch": 2.05, + "learning_rate": 4.478387883188962e-06, + "logits/chosen": -1.8818793296813965, + "logits/rejected": -2.820244073867798, + "logps/chosen": -256.0559997558594, + "logps/rejected": -288.6725769042969, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.31884241104126, + "rewards/margins": 4.418585777282715, + "rewards/rejected": -10.737428665161133, + "step": 13183 + }, + { + "epoch": 2.05, + "learning_rate": 4.477654442657814e-06, + "logits/chosen": -2.6348679065704346, + "logits/rejected": -2.8006935119628906, + "logps/chosen": -285.99267578125, + "logps/rejected": -508.80523681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3139801025390625, + "rewards/margins": 9.235666275024414, + "rewards/rejected": -15.549646377563477, + "step": 13184 + }, + { + "epoch": 2.05, + "learning_rate": 4.476921002126666e-06, + "logits/chosen": -2.2719051837921143, + "logits/rejected": -2.433868646621704, + "logps/chosen": -127.0285873413086, + "logps/rejected": -242.56712341308594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066742897033691, + "rewards/margins": 8.42596435546875, + "rewards/rejected": -14.492707252502441, + "step": 13185 + }, + { + "epoch": 2.05, + "learning_rate": 4.476187561595518e-06, + "logits/chosen": -2.933987855911255, + "logits/rejected": -2.6940410137176514, + "logps/chosen": -532.1676025390625, + "logps/rejected": -430.510498046875, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.861145973205566, + "rewards/margins": 5.774548530578613, + "rewards/rejected": -13.63569450378418, + "step": 13186 + }, + { + "epoch": 2.05, + "learning_rate": 4.475454121064371e-06, + "logits/chosen": -2.1526100635528564, + "logits/rejected": -3.0344057083129883, + "logps/chosen": -369.05804443359375, + "logps/rejected": -678.396240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.866239547729492, + "rewards/margins": 12.255064010620117, + "rewards/rejected": -24.12130355834961, + "step": 13187 + }, + { + "epoch": 2.05, + "learning_rate": 4.474720680533223e-06, + "logits/chosen": -0.4023685157299042, + "logits/rejected": -2.072610855102539, + "logps/chosen": -201.91477966308594, + "logps/rejected": -403.6903381347656, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4816312789917, + "rewards/margins": 15.338661193847656, + "rewards/rejected": -23.82029151916504, + "step": 13188 + }, + { + "epoch": 2.05, + "learning_rate": 4.4739872400020745e-06, + "logits/chosen": -0.7825681567192078, + "logits/rejected": -2.939467191696167, + "logps/chosen": -151.83966064453125, + "logps/rejected": -492.8150939941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.511720657348633, + "rewards/margins": 9.386022567749023, + "rewards/rejected": -16.897743225097656, + "step": 13189 + }, + { + "epoch": 2.05, + "learning_rate": 4.473253799470926e-06, + "logits/chosen": -2.596290111541748, + "logits/rejected": -3.0148136615753174, + "logps/chosen": -103.8636703491211, + "logps/rejected": -272.10791015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7883429527282715, + "rewards/margins": 7.997365474700928, + "rewards/rejected": -15.7857084274292, + "step": 13190 + }, + { + "epoch": 2.05, + "learning_rate": 4.472520358939779e-06, + "logits/chosen": -2.6734471321105957, + "logits/rejected": -2.3201749324798584, + "logps/chosen": -210.84976196289062, + "logps/rejected": -251.00567626953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.401930332183838, + "rewards/margins": 8.994181632995605, + "rewards/rejected": -15.396112442016602, + "step": 13191 + }, + { + "epoch": 2.05, + "learning_rate": 4.471786918408632e-06, + "logits/chosen": -2.9867584705352783, + "logits/rejected": -2.1384313106536865, + "logps/chosen": -369.9429626464844, + "logps/rejected": -422.6866455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755853652954102, + "rewards/margins": 9.349899291992188, + "rewards/rejected": -15.105752944946289, + "step": 13192 + }, + { + "epoch": 2.05, + "learning_rate": 4.471053477877484e-06, + "logits/chosen": -2.382887601852417, + "logits/rejected": -2.9125101566314697, + "logps/chosen": -481.5254211425781, + "logps/rejected": -625.32958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.126594543457031, + "rewards/margins": 9.773355484008789, + "rewards/rejected": -17.89995002746582, + "step": 13193 + }, + { + "epoch": 2.05, + "learning_rate": 4.470320037346336e-06, + "logits/chosen": -2.2136290073394775, + "logits/rejected": -2.627086877822876, + "logps/chosen": -93.17832946777344, + "logps/rejected": -501.62225341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901487350463867, + "rewards/margins": 15.410223007202148, + "rewards/rejected": -21.311710357666016, + "step": 13194 + }, + { + "epoch": 2.05, + "learning_rate": 4.4695865968151875e-06, + "logits/chosen": -1.4218758344650269, + "logits/rejected": -2.6852195262908936, + "logps/chosen": -201.15185546875, + "logps/rejected": -374.8229064941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.581830024719238, + "rewards/margins": 9.10521125793457, + "rewards/rejected": -17.687042236328125, + "step": 13195 + }, + { + "epoch": 2.05, + "learning_rate": 4.46885315628404e-06, + "logits/chosen": -2.1560120582580566, + "logits/rejected": -2.903156280517578, + "logps/chosen": -138.29640197753906, + "logps/rejected": -287.2068176269531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930546760559082, + "rewards/margins": 8.217877388000488, + "rewards/rejected": -14.14842414855957, + "step": 13196 + }, + { + "epoch": 2.05, + "learning_rate": 4.468119715752892e-06, + "logits/chosen": -2.9355711936950684, + "logits/rejected": -2.3532633781433105, + "logps/chosen": -432.5909423828125, + "logps/rejected": -255.1396484375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.855583190917969, + "rewards/margins": 5.404474258422852, + "rewards/rejected": -13.26005744934082, + "step": 13197 + }, + { + "epoch": 2.05, + "learning_rate": 4.467386275221744e-06, + "logits/chosen": -2.6244332790374756, + "logits/rejected": -2.989060878753662, + "logps/chosen": -213.30804443359375, + "logps/rejected": -395.16656494140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.240423202514648, + "rewards/margins": 8.56800365447998, + "rewards/rejected": -16.808427810668945, + "step": 13198 + }, + { + "epoch": 2.05, + "learning_rate": 4.466652834690596e-06, + "logits/chosen": -2.8983964920043945, + "logits/rejected": -2.7406671047210693, + "logps/chosen": -351.80670166015625, + "logps/rejected": -371.1631774902344, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.161626815795898, + "rewards/margins": 6.863334655761719, + "rewards/rejected": -14.024961471557617, + "step": 13199 + }, + { + "epoch": 2.05, + "learning_rate": 4.465919394159448e-06, + "logits/chosen": -2.948392629623413, + "logits/rejected": -2.261159896850586, + "logps/chosen": -970.0265502929688, + "logps/rejected": -713.7696533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.783963680267334, + "rewards/margins": 11.558764457702637, + "rewards/rejected": -17.342727661132812, + "step": 13200 + }, + { + "epoch": 2.05, + "learning_rate": 4.4651859536283005e-06, + "logits/chosen": -2.872809410095215, + "logits/rejected": -2.526907205581665, + "logps/chosen": -322.4405822753906, + "logps/rejected": -367.33441162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067235946655273, + "rewards/margins": 10.982955932617188, + "rewards/rejected": -17.05019187927246, + "step": 13201 + }, + { + "epoch": 2.05, + "learning_rate": 4.464452513097152e-06, + "logits/chosen": -1.9665924310684204, + "logits/rejected": -2.7520480155944824, + "logps/chosen": -167.64077758789062, + "logps/rejected": -291.5009460449219, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216599464416504, + "rewards/margins": 9.600715637207031, + "rewards/rejected": -15.817316055297852, + "step": 13202 + }, + { + "epoch": 2.05, + "learning_rate": 4.463719072566004e-06, + "logits/chosen": -2.6724014282226562, + "logits/rejected": -3.1404519081115723, + "logps/chosen": -42.778472900390625, + "logps/rejected": -203.6339874267578, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.146400213241577, + "rewards/margins": 8.004863739013672, + "rewards/rejected": -11.151264190673828, + "step": 13203 + }, + { + "epoch": 2.05, + "learning_rate": 4.462985632034856e-06, + "logits/chosen": -2.672112464904785, + "logits/rejected": -3.0027549266815186, + "logps/chosen": -324.0562438964844, + "logps/rejected": -478.25225830078125, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.277172088623047, + "rewards/margins": 5.473633766174316, + "rewards/rejected": -13.750805854797363, + "step": 13204 + }, + { + "epoch": 2.05, + "learning_rate": 4.462252191503709e-06, + "logits/chosen": -2.819695234298706, + "logits/rejected": -2.9125261306762695, + "logps/chosen": -65.69198608398438, + "logps/rejected": -277.0252685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.023341655731201, + "rewards/margins": 12.733221054077148, + "rewards/rejected": -16.756563186645508, + "step": 13205 + }, + { + "epoch": 2.05, + "learning_rate": 4.461518750972561e-06, + "logits/chosen": -2.395514488220215, + "logits/rejected": -2.538865327835083, + "logps/chosen": -200.00743103027344, + "logps/rejected": -282.83563232421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.319771766662598, + "rewards/margins": 10.288251876831055, + "rewards/rejected": -17.608022689819336, + "step": 13206 + }, + { + "epoch": 2.05, + "learning_rate": 4.460785310441413e-06, + "logits/chosen": -2.9390945434570312, + "logits/rejected": -1.4269840717315674, + "logps/chosen": -467.01654052734375, + "logps/rejected": -435.9761962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.204469203948975, + "rewards/margins": 12.418437957763672, + "rewards/rejected": -16.622905731201172, + "step": 13207 + }, + { + "epoch": 2.05, + "learning_rate": 4.460051869910265e-06, + "logits/chosen": -2.759228229522705, + "logits/rejected": -2.922473430633545, + "logps/chosen": -158.49639892578125, + "logps/rejected": -233.7111358642578, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.063960075378418, + "rewards/margins": 7.63054084777832, + "rewards/rejected": -15.694500923156738, + "step": 13208 + }, + { + "epoch": 2.05, + "learning_rate": 4.459318429379117e-06, + "logits/chosen": -2.652348518371582, + "logits/rejected": -3.0204343795776367, + "logps/chosen": -584.4478149414062, + "logps/rejected": -703.9468994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181188106536865, + "rewards/margins": 15.087766647338867, + "rewards/rejected": -19.26895523071289, + "step": 13209 + }, + { + "epoch": 2.05, + "learning_rate": 4.45858498884797e-06, + "logits/chosen": -1.6571917533874512, + "logits/rejected": -2.7484750747680664, + "logps/chosen": -160.01263427734375, + "logps/rejected": -435.82330322265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.119717597961426, + "rewards/margins": 9.398921966552734, + "rewards/rejected": -16.518638610839844, + "step": 13210 + }, + { + "epoch": 2.05, + "learning_rate": 4.457851548316822e-06, + "logits/chosen": -2.1125686168670654, + "logits/rejected": -2.779010772705078, + "logps/chosen": -358.35589599609375, + "logps/rejected": -410.35345458984375, + "loss": 0.0828, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.471853733062744, + "rewards/margins": 5.1957106590271, + "rewards/rejected": -11.667564392089844, + "step": 13211 + }, + { + "epoch": 2.05, + "learning_rate": 4.457118107785674e-06, + "logits/chosen": -1.8201285600662231, + "logits/rejected": -2.9588372707366943, + "logps/chosen": -197.07113647460938, + "logps/rejected": -452.59307861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6042675971984863, + "rewards/margins": 13.671308517456055, + "rewards/rejected": -16.275577545166016, + "step": 13212 + }, + { + "epoch": 2.05, + "learning_rate": 4.4563846672545255e-06, + "logits/chosen": -1.3713972568511963, + "logits/rejected": -2.165961503982544, + "logps/chosen": -135.4967041015625, + "logps/rejected": -354.416259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.192507743835449, + "rewards/margins": 11.63121223449707, + "rewards/rejected": -17.823719024658203, + "step": 13213 + }, + { + "epoch": 2.06, + "learning_rate": 4.455651226723378e-06, + "logits/chosen": -3.0980591773986816, + "logits/rejected": -2.464078426361084, + "logps/chosen": -165.42230224609375, + "logps/rejected": -191.81033325195312, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.045244216918945, + "rewards/margins": 7.256524085998535, + "rewards/rejected": -12.30176830291748, + "step": 13214 + }, + { + "epoch": 2.06, + "learning_rate": 4.45491778619223e-06, + "logits/chosen": -2.8361105918884277, + "logits/rejected": -2.2175800800323486, + "logps/chosen": -630.6146240234375, + "logps/rejected": -605.1289672851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.819912910461426, + "rewards/margins": 11.130926132202148, + "rewards/rejected": -18.950838088989258, + "step": 13215 + }, + { + "epoch": 2.06, + "learning_rate": 4.454184345661082e-06, + "logits/chosen": -2.912536859512329, + "logits/rejected": -2.023885488510132, + "logps/chosen": -273.4971618652344, + "logps/rejected": -447.12164306640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.157700538635254, + "rewards/margins": 11.699287414550781, + "rewards/rejected": -20.85698699951172, + "step": 13216 + }, + { + "epoch": 2.06, + "learning_rate": 4.453450905129934e-06, + "logits/chosen": -2.4669928550720215, + "logits/rejected": -2.979504108428955, + "logps/chosen": -52.30531311035156, + "logps/rejected": -222.80311584472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7416818141937256, + "rewards/margins": 10.47362232208252, + "rewards/rejected": -14.215304374694824, + "step": 13217 + }, + { + "epoch": 2.06, + "learning_rate": 4.452717464598786e-06, + "logits/chosen": -1.1850452423095703, + "logits/rejected": -2.3920164108276367, + "logps/chosen": -168.00711059570312, + "logps/rejected": -364.05035400390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.760025024414062, + "rewards/margins": 8.934231758117676, + "rewards/rejected": -19.694255828857422, + "step": 13218 + }, + { + "epoch": 2.06, + "learning_rate": 4.4519840240676385e-06, + "logits/chosen": -1.8580117225646973, + "logits/rejected": -2.464043378829956, + "logps/chosen": -204.63510131835938, + "logps/rejected": -421.84027099609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.384981155395508, + "rewards/margins": 10.859272003173828, + "rewards/rejected": -19.24425506591797, + "step": 13219 + }, + { + "epoch": 2.06, + "learning_rate": 4.45125058353649e-06, + "logits/chosen": -2.32338809967041, + "logits/rejected": -1.6432688236236572, + "logps/chosen": -399.5999755859375, + "logps/rejected": -418.16021728515625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006886959075928, + "rewards/margins": 12.572068214416504, + "rewards/rejected": -18.578954696655273, + "step": 13220 + }, + { + "epoch": 2.06, + "learning_rate": 4.450517143005342e-06, + "logits/chosen": -2.036896228790283, + "logits/rejected": -2.5430235862731934, + "logps/chosen": -421.4324951171875, + "logps/rejected": -469.5003356933594, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.756207466125488, + "rewards/margins": 7.606930255889893, + "rewards/rejected": -16.36313819885254, + "step": 13221 + }, + { + "epoch": 2.06, + "learning_rate": 4.449783702474194e-06, + "logits/chosen": -2.603193998336792, + "logits/rejected": -1.7734209299087524, + "logps/chosen": -273.1958923339844, + "logps/rejected": -421.09295654296875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.404682159423828, + "rewards/margins": 10.227685928344727, + "rewards/rejected": -14.632368087768555, + "step": 13222 + }, + { + "epoch": 2.06, + "learning_rate": 4.449050261943047e-06, + "logits/chosen": -3.1073594093322754, + "logits/rejected": -3.0442800521850586, + "logps/chosen": -197.22032165527344, + "logps/rejected": -396.56146240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.123171329498291, + "rewards/margins": 11.59830093383789, + "rewards/rejected": -15.721471786499023, + "step": 13223 + }, + { + "epoch": 2.06, + "learning_rate": 4.448316821411899e-06, + "logits/chosen": -2.98689603805542, + "logits/rejected": -2.3449103832244873, + "logps/chosen": -208.06703186035156, + "logps/rejected": -260.5098876953125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.890944480895996, + "rewards/margins": 8.586585998535156, + "rewards/rejected": -14.477530479431152, + "step": 13224 + }, + { + "epoch": 2.06, + "learning_rate": 4.4475833808807515e-06, + "logits/chosen": -2.7189929485321045, + "logits/rejected": -2.8993592262268066, + "logps/chosen": -172.09085083007812, + "logps/rejected": -406.19610595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.071300506591797, + "rewards/margins": 12.951091766357422, + "rewards/rejected": -22.02239227294922, + "step": 13225 + }, + { + "epoch": 2.06, + "learning_rate": 4.446849940349603e-06, + "logits/chosen": -2.2265632152557373, + "logits/rejected": -2.9012441635131836, + "logps/chosen": -398.6390075683594, + "logps/rejected": -560.075927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.08160400390625, + "rewards/margins": 9.474494934082031, + "rewards/rejected": -13.556098937988281, + "step": 13226 + }, + { + "epoch": 2.06, + "learning_rate": 4.446116499818455e-06, + "logits/chosen": -2.677182912826538, + "logits/rejected": -2.9158711433410645, + "logps/chosen": -710.848388671875, + "logps/rejected": -700.35107421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.695854187011719, + "rewards/margins": 7.702748775482178, + "rewards/rejected": -18.398601531982422, + "step": 13227 + }, + { + "epoch": 2.06, + "learning_rate": 4.445383059287308e-06, + "logits/chosen": -2.5774717330932617, + "logits/rejected": -0.9289470911026001, + "logps/chosen": -393.8439025878906, + "logps/rejected": -374.7891845703125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.990948677062988, + "rewards/margins": 10.0593843460083, + "rewards/rejected": -18.05033302307129, + "step": 13228 + }, + { + "epoch": 2.06, + "learning_rate": 4.44464961875616e-06, + "logits/chosen": -2.6276121139526367, + "logits/rejected": -2.9859862327575684, + "logps/chosen": -806.022216796875, + "logps/rejected": -1105.672119140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.1381196975708, + "rewards/margins": 8.739557266235352, + "rewards/rejected": -18.877676010131836, + "step": 13229 + }, + { + "epoch": 2.06, + "learning_rate": 4.443916178225012e-06, + "logits/chosen": -2.7802374362945557, + "logits/rejected": -2.560361385345459, + "logps/chosen": -359.0121765136719, + "logps/rejected": -412.24609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.28507137298584, + "rewards/margins": 10.397396087646484, + "rewards/rejected": -18.68246841430664, + "step": 13230 + }, + { + "epoch": 2.06, + "learning_rate": 4.443182737693864e-06, + "logits/chosen": -2.0472586154937744, + "logits/rejected": -2.539550542831421, + "logps/chosen": -398.8958740234375, + "logps/rejected": -479.421142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690818786621094, + "rewards/margins": 9.158292770385742, + "rewards/rejected": -17.84911346435547, + "step": 13231 + }, + { + "epoch": 2.06, + "learning_rate": 4.442449297162716e-06, + "logits/chosen": -2.5106918811798096, + "logits/rejected": -2.7689170837402344, + "logps/chosen": -374.4241027832031, + "logps/rejected": -588.6831665039062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2117280960083, + "rewards/margins": 10.505453109741211, + "rewards/rejected": -18.717182159423828, + "step": 13232 + }, + { + "epoch": 2.06, + "learning_rate": 4.441715856631568e-06, + "logits/chosen": -2.593289375305176, + "logits/rejected": -2.6373181343078613, + "logps/chosen": -255.49099731445312, + "logps/rejected": -463.8641662597656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.059261322021484, + "rewards/margins": 13.222305297851562, + "rewards/rejected": -20.281566619873047, + "step": 13233 + }, + { + "epoch": 2.06, + "learning_rate": 4.44098241610042e-06, + "logits/chosen": -2.809156894683838, + "logits/rejected": -1.665808081626892, + "logps/chosen": -514.6685180664062, + "logps/rejected": -384.31060791015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.014320373535156, + "rewards/margins": 7.415163993835449, + "rewards/rejected": -15.429484367370605, + "step": 13234 + }, + { + "epoch": 2.06, + "learning_rate": 4.440248975569272e-06, + "logits/chosen": -0.5943976044654846, + "logits/rejected": -2.418581962585449, + "logps/chosen": -126.53015899658203, + "logps/rejected": -534.7399291992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.80978775024414, + "rewards/margins": 13.627986907958984, + "rewards/rejected": -22.437774658203125, + "step": 13235 + }, + { + "epoch": 2.06, + "learning_rate": 4.439515535038125e-06, + "logits/chosen": -2.6318092346191406, + "logits/rejected": -1.981998324394226, + "logps/chosen": -210.56491088867188, + "logps/rejected": -391.2319641113281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.142403602600098, + "rewards/margins": 11.279317855834961, + "rewards/rejected": -19.421720504760742, + "step": 13236 + }, + { + "epoch": 2.06, + "learning_rate": 4.4387820945069766e-06, + "logits/chosen": -2.2483949661254883, + "logits/rejected": -3.036750555038452, + "logps/chosen": -553.0524291992188, + "logps/rejected": -621.8195190429688, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.820374488830566, + "rewards/margins": 7.023542404174805, + "rewards/rejected": -13.843915939331055, + "step": 13237 + }, + { + "epoch": 2.06, + "learning_rate": 4.4380486539758284e-06, + "logits/chosen": -2.976088285446167, + "logits/rejected": -3.0546557903289795, + "logps/chosen": -72.79217529296875, + "logps/rejected": -233.125244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3974103927612305, + "rewards/margins": 11.544450759887695, + "rewards/rejected": -16.94186019897461, + "step": 13238 + }, + { + "epoch": 2.06, + "learning_rate": 4.43731521344468e-06, + "logits/chosen": -2.9214468002319336, + "logits/rejected": -2.0583112239837646, + "logps/chosen": -147.640869140625, + "logps/rejected": -199.9715576171875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.012617588043213, + "rewards/margins": 7.1449666023254395, + "rewards/rejected": -12.157584190368652, + "step": 13239 + }, + { + "epoch": 2.06, + "learning_rate": 4.436581772913532e-06, + "logits/chosen": -2.7251040935516357, + "logits/rejected": -1.648093342781067, + "logps/chosen": -263.78228759765625, + "logps/rejected": -166.30274963378906, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.691572189331055, + "rewards/margins": 5.96710205078125, + "rewards/rejected": -12.658674240112305, + "step": 13240 + }, + { + "epoch": 2.06, + "learning_rate": 4.435848332382385e-06, + "logits/chosen": -2.6894431114196777, + "logits/rejected": -1.981892704963684, + "logps/chosen": -452.14697265625, + "logps/rejected": -392.4492492675781, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.44550895690918, + "rewards/margins": 9.365983963012695, + "rewards/rejected": -18.811492919921875, + "step": 13241 + }, + { + "epoch": 2.06, + "learning_rate": 4.435114891851238e-06, + "logits/chosen": -2.7176711559295654, + "logits/rejected": -2.85024094581604, + "logps/chosen": -129.43508911132812, + "logps/rejected": -285.7042236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.140923500061035, + "rewards/margins": 10.208791732788086, + "rewards/rejected": -16.349716186523438, + "step": 13242 + }, + { + "epoch": 2.06, + "learning_rate": 4.4343814513200895e-06, + "logits/chosen": -2.9275801181793213, + "logits/rejected": -2.824657678604126, + "logps/chosen": -355.0650329589844, + "logps/rejected": -202.15692138671875, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302323818206787, + "rewards/margins": 5.940716743469238, + "rewards/rejected": -11.243040084838867, + "step": 13243 + }, + { + "epoch": 2.06, + "learning_rate": 4.433648010788941e-06, + "logits/chosen": -1.1972547769546509, + "logits/rejected": -2.7892510890960693, + "logps/chosen": -307.1550598144531, + "logps/rejected": -625.305908203125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.727582931518555, + "rewards/margins": 7.688427448272705, + "rewards/rejected": -18.416011810302734, + "step": 13244 + }, + { + "epoch": 2.06, + "learning_rate": 4.432914570257794e-06, + "logits/chosen": -2.320964813232422, + "logits/rejected": -2.4457995891571045, + "logps/chosen": -210.83535766601562, + "logps/rejected": -327.2546691894531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.316032409667969, + "rewards/margins": 9.25944995880127, + "rewards/rejected": -18.575481414794922, + "step": 13245 + }, + { + "epoch": 2.06, + "learning_rate": 4.432181129726646e-06, + "logits/chosen": -1.547196865081787, + "logits/rejected": -2.753716468811035, + "logps/chosen": -266.0657043457031, + "logps/rejected": -634.1495361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.769094944000244, + "rewards/margins": 15.312677383422852, + "rewards/rejected": -21.081771850585938, + "step": 13246 + }, + { + "epoch": 2.06, + "learning_rate": 4.431447689195498e-06, + "logits/chosen": -2.403643846511841, + "logits/rejected": -2.657681465148926, + "logps/chosen": -214.9189453125, + "logps/rejected": -359.0776672363281, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.330048561096191, + "rewards/margins": 7.878618240356445, + "rewards/rejected": -19.208667755126953, + "step": 13247 + }, + { + "epoch": 2.06, + "learning_rate": 4.43071424866435e-06, + "logits/chosen": -2.6914408206939697, + "logits/rejected": -2.7967746257781982, + "logps/chosen": -276.86297607421875, + "logps/rejected": -338.17803955078125, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.40966796875, + "rewards/margins": 6.780832767486572, + "rewards/rejected": -18.190500259399414, + "step": 13248 + }, + { + "epoch": 2.06, + "learning_rate": 4.429980808133202e-06, + "logits/chosen": -2.7101993560791016, + "logits/rejected": -2.9655637741088867, + "logps/chosen": -109.43418884277344, + "logps/rejected": -414.45147705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.097020626068115, + "rewards/margins": 11.562976837158203, + "rewards/rejected": -18.659997940063477, + "step": 13249 + }, + { + "epoch": 2.06, + "learning_rate": 4.429247367602054e-06, + "logits/chosen": -2.860226631164551, + "logits/rejected": -2.5141494274139404, + "logps/chosen": -297.32000732421875, + "logps/rejected": -317.86181640625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.394100666046143, + "rewards/margins": 6.929870128631592, + "rewards/rejected": -14.323970794677734, + "step": 13250 + }, + { + "epoch": 2.06, + "learning_rate": 4.428513927070906e-06, + "logits/chosen": -1.7291277647018433, + "logits/rejected": -1.9081615209579468, + "logps/chosen": -519.9600219726562, + "logps/rejected": -596.8382568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.777193069458008, + "rewards/margins": 14.667142868041992, + "rewards/rejected": -21.4443359375, + "step": 13251 + }, + { + "epoch": 2.06, + "learning_rate": 4.427780486539758e-06, + "logits/chosen": -2.3305108547210693, + "logits/rejected": -2.834010124206543, + "logps/chosen": -92.78713989257812, + "logps/rejected": -273.2758483886719, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.573204040527344, + "rewards/margins": 7.675400733947754, + "rewards/rejected": -13.248604774475098, + "step": 13252 + }, + { + "epoch": 2.06, + "learning_rate": 4.42704704600861e-06, + "logits/chosen": -3.146909475326538, + "logits/rejected": -2.8714098930358887, + "logps/chosen": -162.7414093017578, + "logps/rejected": -377.9385986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.670529842376709, + "rewards/margins": 10.728368759155273, + "rewards/rejected": -15.39889907836914, + "step": 13253 + }, + { + "epoch": 2.06, + "learning_rate": 4.426313605477463e-06, + "logits/chosen": -2.269469738006592, + "logits/rejected": -2.8636322021484375, + "logps/chosen": -202.27223205566406, + "logps/rejected": -376.490234375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.846315860748291, + "rewards/margins": 8.05482292175293, + "rewards/rejected": -14.901138305664062, + "step": 13254 + }, + { + "epoch": 2.06, + "learning_rate": 4.425580164946315e-06, + "logits/chosen": -0.9037368297576904, + "logits/rejected": -2.3772361278533936, + "logps/chosen": -152.89987182617188, + "logps/rejected": -361.96759033203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.749425888061523, + "rewards/margins": 9.818109512329102, + "rewards/rejected": -19.567535400390625, + "step": 13255 + }, + { + "epoch": 2.06, + "learning_rate": 4.4248467244151665e-06, + "logits/chosen": -1.3189733028411865, + "logits/rejected": -2.799450159072876, + "logps/chosen": -134.65191650390625, + "logps/rejected": -521.0511474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.477581977844238, + "rewards/margins": 13.083745956420898, + "rewards/rejected": -19.561328887939453, + "step": 13256 + }, + { + "epoch": 2.06, + "learning_rate": 4.424113283884018e-06, + "logits/chosen": -1.19830322265625, + "logits/rejected": -2.643538475036621, + "logps/chosen": -170.90467834472656, + "logps/rejected": -410.5428466796875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.916291236877441, + "rewards/margins": 5.858498573303223, + "rewards/rejected": -13.774789810180664, + "step": 13257 + }, + { + "epoch": 2.06, + "learning_rate": 4.423379843352871e-06, + "logits/chosen": -1.3576881885528564, + "logits/rejected": -2.8605451583862305, + "logps/chosen": -147.23550415039062, + "logps/rejected": -445.4417419433594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.80420446395874, + "rewards/margins": 8.914633750915527, + "rewards/rejected": -16.71883773803711, + "step": 13258 + }, + { + "epoch": 2.06, + "learning_rate": 4.422646402821724e-06, + "logits/chosen": -2.8531689643859863, + "logits/rejected": -2.280944585800171, + "logps/chosen": -500.39892578125, + "logps/rejected": -441.5128173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.450448036193848, + "rewards/margins": 10.48582649230957, + "rewards/rejected": -16.936275482177734, + "step": 13259 + }, + { + "epoch": 2.06, + "learning_rate": 4.421912962290576e-06, + "logits/chosen": -2.0731823444366455, + "logits/rejected": -3.0287253856658936, + "logps/chosen": -202.75335693359375, + "logps/rejected": -549.0490112304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.687919616699219, + "rewards/margins": 12.380125045776367, + "rewards/rejected": -19.068044662475586, + "step": 13260 + }, + { + "epoch": 2.06, + "learning_rate": 4.4211795217594276e-06, + "logits/chosen": -2.736323356628418, + "logits/rejected": -2.682551145553589, + "logps/chosen": -227.5103759765625, + "logps/rejected": -387.22900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.360243797302246, + "rewards/margins": 11.916156768798828, + "rewards/rejected": -19.27640151977539, + "step": 13261 + }, + { + "epoch": 2.06, + "learning_rate": 4.4204460812282794e-06, + "logits/chosen": -2.705723285675049, + "logits/rejected": -2.1494696140289307, + "logps/chosen": -280.1769104003906, + "logps/rejected": -332.7408142089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.96419620513916, + "rewards/margins": 10.101364135742188, + "rewards/rejected": -18.06555938720703, + "step": 13262 + }, + { + "epoch": 2.06, + "learning_rate": 4.419712640697132e-06, + "logits/chosen": -3.0208303928375244, + "logits/rejected": -3.1624460220336914, + "logps/chosen": -132.9884033203125, + "logps/rejected": -181.98983764648438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.882518768310547, + "rewards/margins": 7.322901248931885, + "rewards/rejected": -14.205419540405273, + "step": 13263 + }, + { + "epoch": 2.06, + "learning_rate": 4.418979200165984e-06, + "logits/chosen": -2.895279884338379, + "logits/rejected": -2.2250730991363525, + "logps/chosen": -383.0914306640625, + "logps/rejected": -280.55584716796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.034587860107422, + "rewards/margins": 8.24100112915039, + "rewards/rejected": -16.275588989257812, + "step": 13264 + }, + { + "epoch": 2.06, + "learning_rate": 4.418245759634836e-06, + "logits/chosen": -0.5650091171264648, + "logits/rejected": -3.0494792461395264, + "logps/chosen": -153.66226196289062, + "logps/rejected": -597.821533203125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.727080345153809, + "rewards/margins": 8.151872634887695, + "rewards/rejected": -14.87895393371582, + "step": 13265 + }, + { + "epoch": 2.06, + "learning_rate": 4.417512319103688e-06, + "logits/chosen": -2.011253595352173, + "logits/rejected": -2.927885055541992, + "logps/chosen": -113.25941467285156, + "logps/rejected": -355.24298095703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.602651119232178, + "rewards/margins": 9.51028060913086, + "rewards/rejected": -15.112932205200195, + "step": 13266 + }, + { + "epoch": 2.06, + "learning_rate": 4.41677887857254e-06, + "logits/chosen": -2.4040470123291016, + "logits/rejected": -2.5250234603881836, + "logps/chosen": -464.40362548828125, + "logps/rejected": -548.744873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.136236667633057, + "rewards/margins": 9.55645751953125, + "rewards/rejected": -15.692694664001465, + "step": 13267 + }, + { + "epoch": 2.06, + "learning_rate": 4.416045438041392e-06, + "logits/chosen": -0.8163347244262695, + "logits/rejected": -2.4534127712249756, + "logps/chosen": -163.59483337402344, + "logps/rejected": -430.28009033203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.383355140686035, + "rewards/margins": 8.513924598693848, + "rewards/rejected": -14.897279739379883, + "step": 13268 + }, + { + "epoch": 2.06, + "learning_rate": 4.415311997510244e-06, + "logits/chosen": -2.6194746494293213, + "logits/rejected": -2.9183871746063232, + "logps/chosen": -98.1261215209961, + "logps/rejected": -284.032958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.332089424133301, + "rewards/margins": 11.373080253601074, + "rewards/rejected": -17.705169677734375, + "step": 13269 + }, + { + "epoch": 2.06, + "learning_rate": 4.414578556979096e-06, + "logits/chosen": -2.932084083557129, + "logits/rejected": -2.951566696166992, + "logps/chosen": -310.77093505859375, + "logps/rejected": -490.2401123046875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.005823135375977, + "rewards/margins": 5.556957721710205, + "rewards/rejected": -14.562780380249023, + "step": 13270 + }, + { + "epoch": 2.06, + "learning_rate": 4.413845116447948e-06, + "logits/chosen": -2.7659783363342285, + "logits/rejected": -2.474977731704712, + "logps/chosen": -380.1290283203125, + "logps/rejected": -454.35833740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018764495849609, + "rewards/margins": 11.16287899017334, + "rewards/rejected": -17.181644439697266, + "step": 13271 + }, + { + "epoch": 2.06, + "learning_rate": 4.413111675916801e-06, + "logits/chosen": -2.429135799407959, + "logits/rejected": -2.8264987468719482, + "logps/chosen": -278.9154052734375, + "logps/rejected": -304.9134521484375, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.452792167663574, + "rewards/margins": 6.755158424377441, + "rewards/rejected": -15.207950592041016, + "step": 13272 + }, + { + "epoch": 2.06, + "learning_rate": 4.412378235385653e-06, + "logits/chosen": -2.653768539428711, + "logits/rejected": -1.9156845808029175, + "logps/chosen": -245.36932373046875, + "logps/rejected": -224.48388671875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.946489334106445, + "rewards/margins": 7.238365173339844, + "rewards/rejected": -13.184854507446289, + "step": 13273 + }, + { + "epoch": 2.06, + "learning_rate": 4.4116447948545045e-06, + "logits/chosen": -2.809922218322754, + "logits/rejected": -3.0538644790649414, + "logps/chosen": -230.19740295410156, + "logps/rejected": -393.1495056152344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.776943206787109, + "rewards/margins": 9.192605972290039, + "rewards/rejected": -15.969549179077148, + "step": 13274 + }, + { + "epoch": 2.06, + "learning_rate": 4.410911354323357e-06, + "logits/chosen": -2.6257028579711914, + "logits/rejected": -2.7951653003692627, + "logps/chosen": -452.54119873046875, + "logps/rejected": -691.357177734375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.12950325012207, + "rewards/margins": 7.452888011932373, + "rewards/rejected": -17.5823917388916, + "step": 13275 + }, + { + "epoch": 2.06, + "learning_rate": 4.410177913792209e-06, + "logits/chosen": -2.5821876525878906, + "logits/rejected": -2.843903064727783, + "logps/chosen": -240.34376525878906, + "logps/rejected": -673.551025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.211640357971191, + "rewards/margins": 13.496699333190918, + "rewards/rejected": -21.70833969116211, + "step": 13276 + }, + { + "epoch": 2.06, + "learning_rate": 4.409444473261062e-06, + "logits/chosen": -1.8949846029281616, + "logits/rejected": -2.7605667114257812, + "logps/chosen": -254.16796875, + "logps/rejected": -441.5482177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43224573135376, + "rewards/margins": 10.086174964904785, + "rewards/rejected": -16.518421173095703, + "step": 13277 + }, + { + "epoch": 2.07, + "learning_rate": 4.408711032729914e-06, + "logits/chosen": -2.8872146606445312, + "logits/rejected": -2.6445791721343994, + "logps/chosen": -530.78271484375, + "logps/rejected": -467.85906982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.867303848266602, + "rewards/margins": 14.654436111450195, + "rewards/rejected": -22.521739959716797, + "step": 13278 + }, + { + "epoch": 2.07, + "learning_rate": 4.407977592198766e-06, + "logits/chosen": -3.120847225189209, + "logits/rejected": -1.756866216659546, + "logps/chosen": -588.6231079101562, + "logps/rejected": -371.9465026855469, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.815313816070557, + "rewards/margins": 7.149460792541504, + "rewards/rejected": -12.964775085449219, + "step": 13279 + }, + { + "epoch": 2.07, + "learning_rate": 4.4072441516676175e-06, + "logits/chosen": -2.8334577083587646, + "logits/rejected": -1.084824562072754, + "logps/chosen": -373.8663330078125, + "logps/rejected": -289.8565979003906, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066047668457031, + "rewards/margins": 10.083699226379395, + "rewards/rejected": -16.14974594116211, + "step": 13280 + }, + { + "epoch": 2.07, + "learning_rate": 4.40651071113647e-06, + "logits/chosen": -2.7529456615448, + "logits/rejected": -3.011561870574951, + "logps/chosen": -223.07864379882812, + "logps/rejected": -488.699462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.639184474945068, + "rewards/margins": 10.362796783447266, + "rewards/rejected": -18.001981735229492, + "step": 13281 + }, + { + "epoch": 2.07, + "learning_rate": 4.405777270605322e-06, + "logits/chosen": -2.8376576900482178, + "logits/rejected": -2.9017674922943115, + "logps/chosen": -746.963134765625, + "logps/rejected": -622.98681640625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795351982116699, + "rewards/margins": 7.26971435546875, + "rewards/rejected": -13.06506633758545, + "step": 13282 + }, + { + "epoch": 2.07, + "learning_rate": 4.405043830074174e-06, + "logits/chosen": -1.5885833501815796, + "logits/rejected": -2.502042055130005, + "logps/chosen": -157.7954864501953, + "logps/rejected": -561.7577514648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.425624847412109, + "rewards/margins": 18.146102905273438, + "rewards/rejected": -25.571727752685547, + "step": 13283 + }, + { + "epoch": 2.07, + "learning_rate": 4.404310389543026e-06, + "logits/chosen": -0.9322367310523987, + "logits/rejected": -1.8443174362182617, + "logps/chosen": -139.00396728515625, + "logps/rejected": -328.8180847167969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.002349853515625, + "rewards/margins": 11.08975601196289, + "rewards/rejected": -18.092105865478516, + "step": 13284 + }, + { + "epoch": 2.07, + "learning_rate": 4.4035769490118786e-06, + "logits/chosen": -2.647143602371216, + "logits/rejected": -2.3800907135009766, + "logps/chosen": -437.1676025390625, + "logps/rejected": -361.19677734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.70919132232666, + "rewards/margins": 7.605445861816406, + "rewards/rejected": -13.314637184143066, + "step": 13285 + }, + { + "epoch": 2.07, + "learning_rate": 4.4028435084807304e-06, + "logits/chosen": -2.824282646179199, + "logits/rejected": -3.0814976692199707, + "logps/chosen": -118.54993438720703, + "logps/rejected": -446.91278076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6416015625, + "rewards/margins": 10.253772735595703, + "rewards/rejected": -15.895374298095703, + "step": 13286 + }, + { + "epoch": 2.07, + "learning_rate": 4.402110067949582e-06, + "logits/chosen": -2.7587268352508545, + "logits/rejected": -2.470282554626465, + "logps/chosen": -221.15243530273438, + "logps/rejected": -266.9535217285156, + "loss": 0.1154, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.006052017211914, + "rewards/margins": 6.519955635070801, + "rewards/rejected": -16.5260066986084, + "step": 13287 + }, + { + "epoch": 2.07, + "learning_rate": 4.401376627418434e-06, + "logits/chosen": -1.6202552318572998, + "logits/rejected": -2.776754379272461, + "logps/chosen": -234.25448608398438, + "logps/rejected": -283.4120178222656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.009951591491699, + "rewards/margins": 9.29206657409668, + "rewards/rejected": -16.302017211914062, + "step": 13288 + }, + { + "epoch": 2.07, + "learning_rate": 4.400643186887286e-06, + "logits/chosen": -3.030216693878174, + "logits/rejected": -2.9855189323425293, + "logps/chosen": -156.454345703125, + "logps/rejected": -241.0403289794922, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6384735107421875, + "rewards/margins": 9.26844596862793, + "rewards/rejected": -15.906919479370117, + "step": 13289 + }, + { + "epoch": 2.07, + "learning_rate": 4.399909746356139e-06, + "logits/chosen": -2.3623178005218506, + "logits/rejected": -2.5811355113983154, + "logps/chosen": -379.6943664550781, + "logps/rejected": -389.62176513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.867398262023926, + "rewards/margins": 10.35532283782959, + "rewards/rejected": -15.222721099853516, + "step": 13290 + }, + { + "epoch": 2.07, + "learning_rate": 4.399176305824991e-06, + "logits/chosen": -2.253300666809082, + "logits/rejected": -3.094909906387329, + "logps/chosen": -220.47988891601562, + "logps/rejected": -449.19232177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3710455894470215, + "rewards/margins": 10.40703296661377, + "rewards/rejected": -14.77807903289795, + "step": 13291 + }, + { + "epoch": 2.07, + "learning_rate": 4.398442865293843e-06, + "logits/chosen": -1.664597749710083, + "logits/rejected": -2.4970149993896484, + "logps/chosen": -290.961181640625, + "logps/rejected": -417.62542724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.83379602432251, + "rewards/margins": 9.160758972167969, + "rewards/rejected": -14.99455451965332, + "step": 13292 + }, + { + "epoch": 2.07, + "learning_rate": 4.397709424762695e-06, + "logits/chosen": -2.7099385261535645, + "logits/rejected": -2.2446377277374268, + "logps/chosen": -334.20361328125, + "logps/rejected": -488.208251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.765193939208984, + "rewards/margins": 11.736442565917969, + "rewards/rejected": -20.501636505126953, + "step": 13293 + }, + { + "epoch": 2.07, + "learning_rate": 4.396975984231548e-06, + "logits/chosen": -2.7964272499084473, + "logits/rejected": -2.253931760787964, + "logps/chosen": -272.5685729980469, + "logps/rejected": -272.0119323730469, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620120525360107, + "rewards/margins": 7.305095672607422, + "rewards/rejected": -12.925216674804688, + "step": 13294 + }, + { + "epoch": 2.07, + "learning_rate": 4.3962425437004e-06, + "logits/chosen": -2.6110270023345947, + "logits/rejected": -1.9008269309997559, + "logps/chosen": -174.46678161621094, + "logps/rejected": -316.5269775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.284024238586426, + "rewards/margins": 11.022539138793945, + "rewards/rejected": -17.306562423706055, + "step": 13295 + }, + { + "epoch": 2.07, + "learning_rate": 4.395509103169252e-06, + "logits/chosen": -2.7531895637512207, + "logits/rejected": -1.0304194688796997, + "logps/chosen": -284.3916931152344, + "logps/rejected": -418.88775634765625, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.421836853027344, + "rewards/margins": 9.617263793945312, + "rewards/rejected": -16.039100646972656, + "step": 13296 + }, + { + "epoch": 2.07, + "learning_rate": 4.394775662638104e-06, + "logits/chosen": -2.677902936935425, + "logits/rejected": -3.09187912940979, + "logps/chosen": -128.9518585205078, + "logps/rejected": -209.86407470703125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.255220413208008, + "rewards/margins": 7.4560933113098145, + "rewards/rejected": -14.711313247680664, + "step": 13297 + }, + { + "epoch": 2.07, + "learning_rate": 4.3940422221069555e-06, + "logits/chosen": -2.7545511722564697, + "logits/rejected": -1.9717952013015747, + "logps/chosen": -276.4192810058594, + "logps/rejected": -278.42901611328125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4055814743042, + "rewards/margins": 7.287607192993164, + "rewards/rejected": -15.693187713623047, + "step": 13298 + }, + { + "epoch": 2.07, + "learning_rate": 4.393308781575808e-06, + "logits/chosen": -3.1187822818756104, + "logits/rejected": -2.7129740715026855, + "logps/chosen": -141.40968322753906, + "logps/rejected": -244.12445068359375, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.864616870880127, + "rewards/margins": 7.383746147155762, + "rewards/rejected": -12.24836254119873, + "step": 13299 + }, + { + "epoch": 2.07, + "learning_rate": 4.39257534104466e-06, + "logits/chosen": -1.1129082441329956, + "logits/rejected": -2.769714593887329, + "logps/chosen": -95.94815063476562, + "logps/rejected": -398.17401123046875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.504952430725098, + "rewards/margins": 10.421524047851562, + "rewards/rejected": -17.926475524902344, + "step": 13300 + }, + { + "epoch": 2.07, + "learning_rate": 4.391841900513512e-06, + "logits/chosen": -2.681699514389038, + "logits/rejected": -2.1283328533172607, + "logps/chosen": -220.59451293945312, + "logps/rejected": -329.33050537109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.249686241149902, + "rewards/margins": 8.738639831542969, + "rewards/rejected": -16.988325119018555, + "step": 13301 + }, + { + "epoch": 2.07, + "learning_rate": 4.391108459982364e-06, + "logits/chosen": -2.936514139175415, + "logits/rejected": -2.562678098678589, + "logps/chosen": -347.2693786621094, + "logps/rejected": -405.607421875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.179679870605469, + "rewards/margins": 8.323052406311035, + "rewards/rejected": -17.502731323242188, + "step": 13302 + }, + { + "epoch": 2.07, + "learning_rate": 4.390375019451217e-06, + "logits/chosen": -2.9814183712005615, + "logits/rejected": -2.7025303840637207, + "logps/chosen": -232.13229370117188, + "logps/rejected": -294.74810791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.284241676330566, + "rewards/margins": 9.952598571777344, + "rewards/rejected": -15.23684024810791, + "step": 13303 + }, + { + "epoch": 2.07, + "learning_rate": 4.3896415789200685e-06, + "logits/chosen": -2.3978540897369385, + "logits/rejected": -3.0481412410736084, + "logps/chosen": -109.75206756591797, + "logps/rejected": -254.93325805664062, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.28765344619751, + "rewards/margins": 6.826210975646973, + "rewards/rejected": -14.11386489868164, + "step": 13304 + }, + { + "epoch": 2.07, + "learning_rate": 4.38890813838892e-06, + "logits/chosen": -2.744513988494873, + "logits/rejected": -3.074979782104492, + "logps/chosen": -147.38525390625, + "logps/rejected": -315.68133544921875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.198570251464844, + "rewards/margins": 6.333375930786133, + "rewards/rejected": -12.531946182250977, + "step": 13305 + }, + { + "epoch": 2.07, + "learning_rate": 4.388174697857772e-06, + "logits/chosen": -2.8626511096954346, + "logits/rejected": -3.09480619430542, + "logps/chosen": -96.37814331054688, + "logps/rejected": -212.545654296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011313438415527, + "rewards/margins": 8.97065544128418, + "rewards/rejected": -14.981968879699707, + "step": 13306 + }, + { + "epoch": 2.07, + "learning_rate": 4.387441257326624e-06, + "logits/chosen": -1.6855919361114502, + "logits/rejected": -2.676271915435791, + "logps/chosen": -123.89350891113281, + "logps/rejected": -384.718505859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8563361167907715, + "rewards/margins": 9.626401901245117, + "rewards/rejected": -14.48273754119873, + "step": 13307 + }, + { + "epoch": 2.07, + "learning_rate": 4.386707816795477e-06, + "logits/chosen": -1.591120958328247, + "logits/rejected": -2.692187786102295, + "logps/chosen": -144.18142700195312, + "logps/rejected": -441.214599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.280665397644043, + "rewards/margins": 11.818901062011719, + "rewards/rejected": -19.099567413330078, + "step": 13308 + }, + { + "epoch": 2.07, + "learning_rate": 4.38597437626433e-06, + "logits/chosen": -1.8107894659042358, + "logits/rejected": -2.647001266479492, + "logps/chosen": -211.84609985351562, + "logps/rejected": -351.9047546386719, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.74232292175293, + "rewards/margins": 8.384151458740234, + "rewards/rejected": -18.12647247314453, + "step": 13309 + }, + { + "epoch": 2.07, + "learning_rate": 4.3852409357331815e-06, + "logits/chosen": -2.611175775527954, + "logits/rejected": -2.922865867614746, + "logps/chosen": -227.2735137939453, + "logps/rejected": -461.0092468261719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.343316078186035, + "rewards/margins": 10.639904975891113, + "rewards/rejected": -16.98322105407715, + "step": 13310 + }, + { + "epoch": 2.07, + "learning_rate": 4.384507495202033e-06, + "logits/chosen": -2.621366262435913, + "logits/rejected": -2.9832184314727783, + "logps/chosen": -231.0000457763672, + "logps/rejected": -375.88800048828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.468389511108398, + "rewards/margins": 7.3953962326049805, + "rewards/rejected": -13.863785743713379, + "step": 13311 + }, + { + "epoch": 2.07, + "learning_rate": 4.383774054670886e-06, + "logits/chosen": -2.4661989212036133, + "logits/rejected": -3.020082712173462, + "logps/chosen": -96.32300567626953, + "logps/rejected": -228.67831420898438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.015894412994385, + "rewards/margins": 9.449023246765137, + "rewards/rejected": -14.46491813659668, + "step": 13312 + }, + { + "epoch": 2.07, + "learning_rate": 4.383040614139738e-06, + "logits/chosen": -2.697816848754883, + "logits/rejected": -2.82120418548584, + "logps/chosen": -599.2272338867188, + "logps/rejected": -762.5574951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.791499614715576, + "rewards/margins": 10.858848571777344, + "rewards/rejected": -18.650348663330078, + "step": 13313 + }, + { + "epoch": 2.07, + "learning_rate": 4.38230717360859e-06, + "logits/chosen": -2.3241803646087646, + "logits/rejected": -2.001783847808838, + "logps/chosen": -308.1281433105469, + "logps/rejected": -348.3650817871094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.330414772033691, + "rewards/margins": 8.023542404174805, + "rewards/rejected": -14.353957176208496, + "step": 13314 + }, + { + "epoch": 2.07, + "learning_rate": 4.381573733077442e-06, + "logits/chosen": -2.8351361751556396, + "logits/rejected": -2.562539577484131, + "logps/chosen": -243.8260040283203, + "logps/rejected": -296.20770263671875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.47872543334961, + "rewards/margins": 5.175530433654785, + "rewards/rejected": -14.654254913330078, + "step": 13315 + }, + { + "epoch": 2.07, + "learning_rate": 4.3808402925462936e-06, + "logits/chosen": -2.8430607318878174, + "logits/rejected": -2.0557796955108643, + "logps/chosen": -500.95941162109375, + "logps/rejected": -565.7322387695312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156588077545166, + "rewards/margins": 13.107211112976074, + "rewards/rejected": -17.2637996673584, + "step": 13316 + }, + { + "epoch": 2.07, + "learning_rate": 4.380106852015146e-06, + "logits/chosen": -1.9100403785705566, + "logits/rejected": -3.0427544116973877, + "logps/chosen": -158.02639770507812, + "logps/rejected": -201.67306518554688, + "loss": 1.6986, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.769285202026367, + "rewards/margins": 2.8977231979370117, + "rewards/rejected": -9.667008399963379, + "step": 13317 + }, + { + "epoch": 2.07, + "learning_rate": 4.379373411483998e-06, + "logits/chosen": -2.910430669784546, + "logits/rejected": -2.475661516189575, + "logps/chosen": -380.8197021484375, + "logps/rejected": -653.0469970703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734096527099609, + "rewards/margins": 9.016733169555664, + "rewards/rejected": -14.750829696655273, + "step": 13318 + }, + { + "epoch": 2.07, + "learning_rate": 4.37863997095285e-06, + "logits/chosen": -2.7018163204193115, + "logits/rejected": -2.8085992336273193, + "logps/chosen": -312.12603759765625, + "logps/rejected": -347.48443603515625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779250144958496, + "rewards/margins": 7.7216877937316895, + "rewards/rejected": -16.500938415527344, + "step": 13319 + }, + { + "epoch": 2.07, + "learning_rate": 4.377906530421702e-06, + "logits/chosen": -2.689283609390259, + "logits/rejected": -2.8344240188598633, + "logps/chosen": -121.18285369873047, + "logps/rejected": -414.35711669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684703826904297, + "rewards/margins": 13.8098783493042, + "rewards/rejected": -19.494583129882812, + "step": 13320 + }, + { + "epoch": 2.07, + "learning_rate": 4.377173089890555e-06, + "logits/chosen": -0.7273998260498047, + "logits/rejected": -2.767814874649048, + "logps/chosen": -122.87920379638672, + "logps/rejected": -406.19189453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.060174942016602, + "rewards/margins": 8.218637466430664, + "rewards/rejected": -18.278812408447266, + "step": 13321 + }, + { + "epoch": 2.07, + "learning_rate": 4.3764396493594065e-06, + "logits/chosen": -1.111415147781372, + "logits/rejected": -2.2888400554656982, + "logps/chosen": -85.06596374511719, + "logps/rejected": -317.67095947265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.352099418640137, + "rewards/margins": 8.66042709350586, + "rewards/rejected": -15.012527465820312, + "step": 13322 + }, + { + "epoch": 2.07, + "learning_rate": 4.375706208828258e-06, + "logits/chosen": -2.636610746383667, + "logits/rejected": -2.6629140377044678, + "logps/chosen": -151.28839111328125, + "logps/rejected": -230.24374389648438, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.111084938049316, + "rewards/margins": 9.377653121948242, + "rewards/rejected": -13.488737106323242, + "step": 13323 + }, + { + "epoch": 2.07, + "learning_rate": 4.37497276829711e-06, + "logits/chosen": -1.824368953704834, + "logits/rejected": -2.6500039100646973, + "logps/chosen": -241.00894165039062, + "logps/rejected": -533.862548828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.408116340637207, + "rewards/margins": 8.475459098815918, + "rewards/rejected": -15.883575439453125, + "step": 13324 + }, + { + "epoch": 2.07, + "learning_rate": 4.374239327765963e-06, + "logits/chosen": -2.485978364944458, + "logits/rejected": -1.4098955392837524, + "logps/chosen": -269.89471435546875, + "logps/rejected": -274.03948974609375, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.793871879577637, + "rewards/margins": 4.868446350097656, + "rewards/rejected": -16.66231918334961, + "step": 13325 + }, + { + "epoch": 2.07, + "learning_rate": 4.373505887234816e-06, + "logits/chosen": -2.096646547317505, + "logits/rejected": -2.6337146759033203, + "logps/chosen": -448.545654296875, + "logps/rejected": -540.9613647460938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.432819366455078, + "rewards/margins": 14.295751571655273, + "rewards/rejected": -22.72857093811035, + "step": 13326 + }, + { + "epoch": 2.07, + "learning_rate": 4.372772446703668e-06, + "logits/chosen": -2.454883575439453, + "logits/rejected": -2.987117290496826, + "logps/chosen": -271.40045166015625, + "logps/rejected": -427.3162841796875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.009363174438477, + "rewards/margins": 10.400810241699219, + "rewards/rejected": -19.410173416137695, + "step": 13327 + }, + { + "epoch": 2.07, + "learning_rate": 4.3720390061725195e-06, + "logits/chosen": -1.317867398262024, + "logits/rejected": -2.280531406402588, + "logps/chosen": -285.8365783691406, + "logps/rejected": -394.2150573730469, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.538620948791504, + "rewards/margins": 5.901838779449463, + "rewards/rejected": -15.440460205078125, + "step": 13328 + }, + { + "epoch": 2.07, + "learning_rate": 4.371305565641371e-06, + "logits/chosen": -2.0952954292297363, + "logits/rejected": -2.4238297939300537, + "logps/chosen": -284.5858459472656, + "logps/rejected": -414.69940185546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.93815803527832, + "rewards/margins": 10.694765090942383, + "rewards/rejected": -19.632923126220703, + "step": 13329 + }, + { + "epoch": 2.07, + "learning_rate": 4.370572125110224e-06, + "logits/chosen": -2.0270469188690186, + "logits/rejected": -2.60388445854187, + "logps/chosen": -285.85888671875, + "logps/rejected": -602.23779296875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.220399856567383, + "rewards/margins": 7.569468021392822, + "rewards/rejected": -17.789867401123047, + "step": 13330 + }, + { + "epoch": 2.07, + "learning_rate": 4.369838684579076e-06, + "logits/chosen": -1.0905249118804932, + "logits/rejected": -2.4610648155212402, + "logps/chosen": -227.89840698242188, + "logps/rejected": -591.9818725585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.550990104675293, + "rewards/margins": 14.084125518798828, + "rewards/rejected": -21.635116577148438, + "step": 13331 + }, + { + "epoch": 2.07, + "learning_rate": 4.369105244047928e-06, + "logits/chosen": -3.078885078430176, + "logits/rejected": -2.990908622741699, + "logps/chosen": -94.36027526855469, + "logps/rejected": -155.12359619140625, + "loss": 0.2636, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213874816894531, + "rewards/margins": 5.386995792388916, + "rewards/rejected": -11.600871086120605, + "step": 13332 + }, + { + "epoch": 2.07, + "learning_rate": 4.36837180351678e-06, + "logits/chosen": -1.79884672164917, + "logits/rejected": -2.9353528022766113, + "logps/chosen": -169.79074096679688, + "logps/rejected": -445.1089782714844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301955223083496, + "rewards/margins": 8.954736709594727, + "rewards/rejected": -13.256691932678223, + "step": 13333 + }, + { + "epoch": 2.07, + "learning_rate": 4.3676383629856325e-06, + "logits/chosen": -1.241710901260376, + "logits/rejected": -2.579411745071411, + "logps/chosen": -130.5194091796875, + "logps/rejected": -371.85186767578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.596779823303223, + "rewards/margins": 9.348062515258789, + "rewards/rejected": -16.944843292236328, + "step": 13334 + }, + { + "epoch": 2.07, + "learning_rate": 4.366904922454484e-06, + "logits/chosen": -2.9109270572662354, + "logits/rejected": -2.838128089904785, + "logps/chosen": -310.03466796875, + "logps/rejected": -311.12420654296875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.252047538757324, + "rewards/margins": 10.111429214477539, + "rewards/rejected": -18.36347770690918, + "step": 13335 + }, + { + "epoch": 2.07, + "learning_rate": 4.366171481923336e-06, + "logits/chosen": -2.858797073364258, + "logits/rejected": -3.0345029830932617, + "logps/chosen": -74.68624114990234, + "logps/rejected": -225.48858642578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362278461456299, + "rewards/margins": 8.058460235595703, + "rewards/rejected": -12.42073917388916, + "step": 13336 + }, + { + "epoch": 2.07, + "learning_rate": 4.365438041392188e-06, + "logits/chosen": -1.642185926437378, + "logits/rejected": -2.87003231048584, + "logps/chosen": -186.29452514648438, + "logps/rejected": -411.92095947265625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.60056734085083, + "rewards/margins": 10.709939956665039, + "rewards/rejected": -17.310508728027344, + "step": 13337 + }, + { + "epoch": 2.07, + "learning_rate": 4.36470460086104e-06, + "logits/chosen": -2.492933511734009, + "logits/rejected": -3.1083462238311768, + "logps/chosen": -61.76092529296875, + "logps/rejected": -262.0010986328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.653968811035156, + "rewards/margins": 8.326079368591309, + "rewards/rejected": -12.980047225952148, + "step": 13338 + }, + { + "epoch": 2.07, + "learning_rate": 4.363971160329893e-06, + "logits/chosen": -2.8709323406219482, + "logits/rejected": -2.897381067276001, + "logps/chosen": -467.863525390625, + "logps/rejected": -630.470458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.46873664855957, + "rewards/margins": 12.453885078430176, + "rewards/rejected": -17.922622680664062, + "step": 13339 + }, + { + "epoch": 2.07, + "learning_rate": 4.363237719798745e-06, + "logits/chosen": -1.5745923519134521, + "logits/rejected": -2.97184157371521, + "logps/chosen": -371.5064697265625, + "logps/rejected": -694.2572631835938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.943475723266602, + "rewards/margins": 9.397915840148926, + "rewards/rejected": -18.341392517089844, + "step": 13340 + }, + { + "epoch": 2.07, + "learning_rate": 4.3625042792675965e-06, + "logits/chosen": -2.7111356258392334, + "logits/rejected": -2.3294880390167236, + "logps/chosen": -694.163818359375, + "logps/rejected": -775.4908447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117291450500488, + "rewards/margins": 14.306289672851562, + "rewards/rejected": -20.423580169677734, + "step": 13341 + }, + { + "epoch": 2.07, + "learning_rate": 4.361770838736449e-06, + "logits/chosen": -2.957113265991211, + "logits/rejected": -2.7650856971740723, + "logps/chosen": -172.27801513671875, + "logps/rejected": -291.4189758300781, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6667327880859375, + "rewards/margins": 8.163393020629883, + "rewards/rejected": -14.83012580871582, + "step": 13342 + }, + { + "epoch": 2.08, + "learning_rate": 4.361037398205302e-06, + "logits/chosen": -2.8554985523223877, + "logits/rejected": -2.370243787765503, + "logps/chosen": -339.0433044433594, + "logps/rejected": -239.54946899414062, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.063760757446289, + "rewards/margins": 5.605703353881836, + "rewards/rejected": -12.669464111328125, + "step": 13343 + }, + { + "epoch": 2.08, + "learning_rate": 4.360303957674154e-06, + "logits/chosen": -2.819964647293091, + "logits/rejected": -2.9372079372406006, + "logps/chosen": -79.26591491699219, + "logps/rejected": -357.8228759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.963603973388672, + "rewards/margins": 9.513638496398926, + "rewards/rejected": -13.477242469787598, + "step": 13344 + }, + { + "epoch": 2.08, + "learning_rate": 4.359570517143006e-06, + "logits/chosen": -1.4265004396438599, + "logits/rejected": -2.679621458053589, + "logps/chosen": -168.746826171875, + "logps/rejected": -402.4361572265625, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.664979934692383, + "rewards/margins": 7.189462184906006, + "rewards/rejected": -16.854442596435547, + "step": 13345 + }, + { + "epoch": 2.08, + "learning_rate": 4.3588370766118575e-06, + "logits/chosen": -2.667410135269165, + "logits/rejected": -3.0143706798553467, + "logps/chosen": -76.23970794677734, + "logps/rejected": -220.79364013671875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.603407859802246, + "rewards/margins": 7.461207389831543, + "rewards/rejected": -13.064615249633789, + "step": 13346 + }, + { + "epoch": 2.08, + "learning_rate": 4.3581036360807094e-06, + "logits/chosen": -2.57012939453125, + "logits/rejected": -2.736100196838379, + "logps/chosen": -240.0362548828125, + "logps/rejected": -261.4466552734375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173815727233887, + "rewards/margins": 6.6225666999816895, + "rewards/rejected": -12.796382904052734, + "step": 13347 + }, + { + "epoch": 2.08, + "learning_rate": 4.357370195549562e-06, + "logits/chosen": -3.116478204727173, + "logits/rejected": -2.8836028575897217, + "logps/chosen": -562.5147705078125, + "logps/rejected": -379.3452453613281, + "loss": 0.821, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.076188087463379, + "rewards/margins": 3.77974796295166, + "rewards/rejected": -10.855936050415039, + "step": 13348 + }, + { + "epoch": 2.08, + "learning_rate": 4.356636755018414e-06, + "logits/chosen": -2.7225019931793213, + "logits/rejected": -2.837496280670166, + "logps/chosen": -196.12298583984375, + "logps/rejected": -319.39849853515625, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.717275619506836, + "rewards/margins": 5.974471092224121, + "rewards/rejected": -11.691746711730957, + "step": 13349 + }, + { + "epoch": 2.08, + "learning_rate": 4.355903314487266e-06, + "logits/chosen": -2.1390740871429443, + "logits/rejected": -3.0436925888061523, + "logps/chosen": -126.82302856445312, + "logps/rejected": -480.25518798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622409820556641, + "rewards/margins": 12.298768997192383, + "rewards/rejected": -18.921180725097656, + "step": 13350 + }, + { + "epoch": 2.08, + "learning_rate": 4.355169873956118e-06, + "logits/chosen": -2.618479013442993, + "logits/rejected": -2.9426817893981934, + "logps/chosen": -106.74305725097656, + "logps/rejected": -362.59136962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264770030975342, + "rewards/margins": 12.869852066040039, + "rewards/rejected": -18.13462257385254, + "step": 13351 + }, + { + "epoch": 2.08, + "learning_rate": 4.3544364334249705e-06, + "logits/chosen": -1.2696533203125, + "logits/rejected": -2.408686876296997, + "logps/chosen": -265.54791259765625, + "logps/rejected": -590.1171875, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.941515922546387, + "rewards/margins": 12.591588020324707, + "rewards/rejected": -18.533103942871094, + "step": 13352 + }, + { + "epoch": 2.08, + "learning_rate": 4.353702992893822e-06, + "logits/chosen": -2.6909165382385254, + "logits/rejected": -3.1502392292022705, + "logps/chosen": -82.43498992919922, + "logps/rejected": -300.5897216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.669478416442871, + "rewards/margins": 10.740364074707031, + "rewards/rejected": -17.40984344482422, + "step": 13353 + }, + { + "epoch": 2.08, + "learning_rate": 4.352969552362674e-06, + "logits/chosen": -2.9249155521392822, + "logits/rejected": -2.9380455017089844, + "logps/chosen": -210.48484802246094, + "logps/rejected": -388.13311767578125, + "loss": 0.1389, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.669695854187012, + "rewards/margins": 4.311134338378906, + "rewards/rejected": -15.980830192565918, + "step": 13354 + }, + { + "epoch": 2.08, + "learning_rate": 4.352236111831526e-06, + "logits/chosen": -1.807458758354187, + "logits/rejected": -3.0060033798217773, + "logps/chosen": -348.37603759765625, + "logps/rejected": -766.7525634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.041245937347412, + "rewards/margins": 10.370658874511719, + "rewards/rejected": -15.411905288696289, + "step": 13355 + }, + { + "epoch": 2.08, + "learning_rate": 4.351502671300378e-06, + "logits/chosen": -2.1831791400909424, + "logits/rejected": -2.8300228118896484, + "logps/chosen": -231.62353515625, + "logps/rejected": -364.866455078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.811861038208008, + "rewards/margins": 9.560596466064453, + "rewards/rejected": -14.372456550598145, + "step": 13356 + }, + { + "epoch": 2.08, + "learning_rate": 4.350769230769231e-06, + "logits/chosen": -2.242976665496826, + "logits/rejected": -3.033856153488159, + "logps/chosen": -102.23116302490234, + "logps/rejected": -396.392822265625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.919404983520508, + "rewards/margins": 7.792524337768555, + "rewards/rejected": -14.711929321289062, + "step": 13357 + }, + { + "epoch": 2.08, + "learning_rate": 4.350035790238083e-06, + "logits/chosen": -1.343071699142456, + "logits/rejected": -2.7539315223693848, + "logps/chosen": -151.0454864501953, + "logps/rejected": -513.8746337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174454212188721, + "rewards/margins": 11.173223495483398, + "rewards/rejected": -15.347677230834961, + "step": 13358 + }, + { + "epoch": 2.08, + "learning_rate": 4.349302349706935e-06, + "logits/chosen": -2.124141216278076, + "logits/rejected": -2.903437852859497, + "logps/chosen": -249.78237915039062, + "logps/rejected": -621.821533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.512800216674805, + "rewards/margins": 11.662871360778809, + "rewards/rejected": -17.17567253112793, + "step": 13359 + }, + { + "epoch": 2.08, + "learning_rate": 4.348568909175787e-06, + "logits/chosen": -1.3873569965362549, + "logits/rejected": -2.3818087577819824, + "logps/chosen": -285.5372314453125, + "logps/rejected": -491.3835754394531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.206768989562988, + "rewards/margins": 12.799784660339355, + "rewards/rejected": -17.006553649902344, + "step": 13360 + }, + { + "epoch": 2.08, + "learning_rate": 4.34783546864464e-06, + "logits/chosen": -2.08927845954895, + "logits/rejected": -2.6292858123779297, + "logps/chosen": -219.68313598632812, + "logps/rejected": -273.081787109375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2177300453186035, + "rewards/margins": 6.327569007873535, + "rewards/rejected": -11.545299530029297, + "step": 13361 + }, + { + "epoch": 2.08, + "learning_rate": 4.347102028113492e-06, + "logits/chosen": -2.036630868911743, + "logits/rejected": -2.854682445526123, + "logps/chosen": -259.4361572265625, + "logps/rejected": -495.1177062988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.662513732910156, + "rewards/margins": 12.931049346923828, + "rewards/rejected": -21.593563079833984, + "step": 13362 + }, + { + "epoch": 2.08, + "learning_rate": 4.346368587582344e-06, + "logits/chosen": -2.8171653747558594, + "logits/rejected": -2.2635250091552734, + "logps/chosen": -801.84228515625, + "logps/rejected": -456.47216796875, + "loss": 0.0529, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.840479850769043, + "rewards/margins": 5.226353645324707, + "rewards/rejected": -12.06683349609375, + "step": 13363 + }, + { + "epoch": 2.08, + "learning_rate": 4.345635147051196e-06, + "logits/chosen": -2.1105988025665283, + "logits/rejected": -2.966743230819702, + "logps/chosen": -96.20912170410156, + "logps/rejected": -497.74346923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.099144458770752, + "rewards/margins": 15.030349731445312, + "rewards/rejected": -20.129493713378906, + "step": 13364 + }, + { + "epoch": 2.08, + "learning_rate": 4.3449017065200475e-06, + "logits/chosen": -2.3719170093536377, + "logits/rejected": -1.7216907739639282, + "logps/chosen": -202.8960723876953, + "logps/rejected": -216.74566650390625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.608598709106445, + "rewards/margins": 5.921689510345459, + "rewards/rejected": -14.530288696289062, + "step": 13365 + }, + { + "epoch": 2.08, + "learning_rate": 4.3441682659889e-06, + "logits/chosen": -1.504286527633667, + "logits/rejected": -2.043780565261841, + "logps/chosen": -723.7197265625, + "logps/rejected": -708.64599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.380924224853516, + "rewards/margins": 13.547727584838867, + "rewards/rejected": -22.92864990234375, + "step": 13366 + }, + { + "epoch": 2.08, + "learning_rate": 4.343434825457752e-06, + "logits/chosen": -2.952840566635132, + "logits/rejected": -2.96940016746521, + "logps/chosen": -646.8374633789062, + "logps/rejected": -607.0338134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.987364768981934, + "rewards/margins": 10.426759719848633, + "rewards/rejected": -17.41412353515625, + "step": 13367 + }, + { + "epoch": 2.08, + "learning_rate": 4.342701384926604e-06, + "logits/chosen": -2.813889741897583, + "logits/rejected": -2.934795618057251, + "logps/chosen": -322.17889404296875, + "logps/rejected": -453.192626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.396109104156494, + "rewards/margins": 10.800579071044922, + "rewards/rejected": -16.196687698364258, + "step": 13368 + }, + { + "epoch": 2.08, + "learning_rate": 4.341967944395456e-06, + "logits/chosen": -2.7312405109405518, + "logits/rejected": -2.645817518234253, + "logps/chosen": -272.1014404296875, + "logps/rejected": -324.03656005859375, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.13183307647705, + "rewards/margins": 5.035746097564697, + "rewards/rejected": -14.167579650878906, + "step": 13369 + }, + { + "epoch": 2.08, + "learning_rate": 4.3412345038643086e-06, + "logits/chosen": -2.7305550575256348, + "logits/rejected": -2.767399311065674, + "logps/chosen": -514.8518676757812, + "logps/rejected": -558.7642211914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.583698272705078, + "rewards/margins": 11.660789489746094, + "rewards/rejected": -17.244487762451172, + "step": 13370 + }, + { + "epoch": 2.08, + "learning_rate": 4.3405010633331604e-06, + "logits/chosen": -2.9567675590515137, + "logits/rejected": -2.324810743331909, + "logps/chosen": -354.0298156738281, + "logps/rejected": -504.96185302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.926541805267334, + "rewards/margins": 9.419635772705078, + "rewards/rejected": -17.34617805480957, + "step": 13371 + }, + { + "epoch": 2.08, + "learning_rate": 4.339767622802012e-06, + "logits/chosen": -2.6961770057678223, + "logits/rejected": -2.668440580368042, + "logps/chosen": -228.00543212890625, + "logps/rejected": -302.5185241699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469892501831055, + "rewards/margins": 10.817554473876953, + "rewards/rejected": -18.287446975708008, + "step": 13372 + }, + { + "epoch": 2.08, + "learning_rate": 4.339034182270864e-06, + "logits/chosen": -2.6527843475341797, + "logits/rejected": -2.8460640907287598, + "logps/chosen": -444.92437744140625, + "logps/rejected": -306.43798828125, + "loss": 0.9868, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.5770263671875, + "rewards/margins": 1.2376694679260254, + "rewards/rejected": -11.814695358276367, + "step": 13373 + }, + { + "epoch": 2.08, + "learning_rate": 4.338300741739717e-06, + "logits/chosen": -1.0440127849578857, + "logits/rejected": -2.385341167449951, + "logps/chosen": -351.66925048828125, + "logps/rejected": -495.42083740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.059240818023682, + "rewards/margins": 12.462642669677734, + "rewards/rejected": -17.521883010864258, + "step": 13374 + }, + { + "epoch": 2.08, + "learning_rate": 4.337567301208569e-06, + "logits/chosen": -1.1055550575256348, + "logits/rejected": -2.905921220779419, + "logps/chosen": -148.61549377441406, + "logps/rejected": -589.2874145507812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.608928680419922, + "rewards/margins": 12.011907577514648, + "rewards/rejected": -20.620834350585938, + "step": 13375 + }, + { + "epoch": 2.08, + "learning_rate": 4.3368338606774215e-06, + "logits/chosen": -2.7514231204986572, + "logits/rejected": -2.817031145095825, + "logps/chosen": -195.6550750732422, + "logps/rejected": -320.4369201660156, + "loss": 0.9801, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.076040267944336, + "rewards/margins": 4.37223482131958, + "rewards/rejected": -15.448274612426758, + "step": 13376 + }, + { + "epoch": 2.08, + "learning_rate": 4.336100420146273e-06, + "logits/chosen": -2.961876630783081, + "logits/rejected": -2.799898862838745, + "logps/chosen": -491.7441101074219, + "logps/rejected": -505.00518798828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.543148994445801, + "rewards/margins": 9.553171157836914, + "rewards/rejected": -17.09632110595703, + "step": 13377 + }, + { + "epoch": 2.08, + "learning_rate": 4.335366979615125e-06, + "logits/chosen": -2.448143243789673, + "logits/rejected": -2.879183292388916, + "logps/chosen": -598.8444213867188, + "logps/rejected": -554.9715576171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.830936431884766, + "rewards/margins": 8.743440628051758, + "rewards/rejected": -13.574377059936523, + "step": 13378 + }, + { + "epoch": 2.08, + "learning_rate": 4.334633539083978e-06, + "logits/chosen": -2.64792799949646, + "logits/rejected": -2.841952085494995, + "logps/chosen": -462.6572570800781, + "logps/rejected": -589.6532592773438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0438008308410645, + "rewards/margins": 15.314233779907227, + "rewards/rejected": -18.358036041259766, + "step": 13379 + }, + { + "epoch": 2.08, + "learning_rate": 4.33390009855283e-06, + "logits/chosen": -1.9169639348983765, + "logits/rejected": -2.66074275970459, + "logps/chosen": -621.181640625, + "logps/rejected": -577.7939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301541328430176, + "rewards/margins": 16.05872344970703, + "rewards/rejected": -20.36026382446289, + "step": 13380 + }, + { + "epoch": 2.08, + "learning_rate": 4.333166658021682e-06, + "logits/chosen": -2.83646559715271, + "logits/rejected": -2.380911350250244, + "logps/chosen": -204.9022979736328, + "logps/rejected": -313.1878662109375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.707677841186523, + "rewards/margins": 5.866142749786377, + "rewards/rejected": -14.573820114135742, + "step": 13381 + }, + { + "epoch": 2.08, + "learning_rate": 4.332433217490534e-06, + "logits/chosen": -1.8603817224502563, + "logits/rejected": -2.465972900390625, + "logps/chosen": -181.79885864257812, + "logps/rejected": -349.06201171875, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.164488792419434, + "rewards/margins": 7.761971473693848, + "rewards/rejected": -14.926460266113281, + "step": 13382 + }, + { + "epoch": 2.08, + "learning_rate": 4.331699776959386e-06, + "logits/chosen": -1.287865161895752, + "logits/rejected": -2.67836594581604, + "logps/chosen": -113.88511657714844, + "logps/rejected": -562.3839721679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.987818241119385, + "rewards/margins": 12.028203010559082, + "rewards/rejected": -17.016021728515625, + "step": 13383 + }, + { + "epoch": 2.08, + "learning_rate": 4.330966336428238e-06, + "logits/chosen": -2.757655382156372, + "logits/rejected": -2.8639578819274902, + "logps/chosen": -475.0366516113281, + "logps/rejected": -737.1351928710938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.630379676818848, + "rewards/margins": 9.872377395629883, + "rewards/rejected": -15.502756118774414, + "step": 13384 + }, + { + "epoch": 2.08, + "learning_rate": 4.33023289589709e-06, + "logits/chosen": -1.682751178741455, + "logits/rejected": -2.96101975440979, + "logps/chosen": -324.3782958984375, + "logps/rejected": -506.13043212890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.686320304870605, + "rewards/margins": 11.18912124633789, + "rewards/rejected": -20.87544059753418, + "step": 13385 + }, + { + "epoch": 2.08, + "learning_rate": 4.329499455365942e-06, + "logits/chosen": -1.815834403038025, + "logits/rejected": -2.56718373298645, + "logps/chosen": -196.12046813964844, + "logps/rejected": -469.3437805175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.264553070068359, + "rewards/margins": 10.175651550292969, + "rewards/rejected": -17.440204620361328, + "step": 13386 + }, + { + "epoch": 2.08, + "learning_rate": 4.328766014834794e-06, + "logits/chosen": -2.807722568511963, + "logits/rejected": -1.633986473083496, + "logps/chosen": -385.58233642578125, + "logps/rejected": -439.661376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.408321380615234, + "rewards/margins": 12.036874771118164, + "rewards/rejected": -18.4451961517334, + "step": 13387 + }, + { + "epoch": 2.08, + "learning_rate": 4.328032574303647e-06, + "logits/chosen": -2.7173774242401123, + "logits/rejected": -2.176100492477417, + "logps/chosen": -187.94525146484375, + "logps/rejected": -337.369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.340490818023682, + "rewards/margins": 11.493658065795898, + "rewards/rejected": -16.834148406982422, + "step": 13388 + }, + { + "epoch": 2.08, + "learning_rate": 4.3272991337724985e-06, + "logits/chosen": -2.7012574672698975, + "logits/rejected": -2.878275156021118, + "logps/chosen": -273.6743469238281, + "logps/rejected": -242.78042602539062, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043789863586426, + "rewards/margins": 5.19108772277832, + "rewards/rejected": -11.23487663269043, + "step": 13389 + }, + { + "epoch": 2.08, + "learning_rate": 4.32656569324135e-06, + "logits/chosen": -2.196950912475586, + "logits/rejected": -2.98720121383667, + "logps/chosen": -145.59347534179688, + "logps/rejected": -264.2466125488281, + "loss": 4.3987, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.465314865112305, + "rewards/margins": 1.8022289276123047, + "rewards/rejected": -13.26754379272461, + "step": 13390 + }, + { + "epoch": 2.08, + "learning_rate": 4.325832252710202e-06, + "logits/chosen": -1.9643577337265015, + "logits/rejected": -3.0065770149230957, + "logps/chosen": -308.79364013671875, + "logps/rejected": -539.6109619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.075053215026855, + "rewards/margins": 10.353344917297363, + "rewards/rejected": -18.42839813232422, + "step": 13391 + }, + { + "epoch": 2.08, + "learning_rate": 4.325098812179055e-06, + "logits/chosen": -2.8552722930908203, + "logits/rejected": -2.722165107727051, + "logps/chosen": -159.18820190429688, + "logps/rejected": -173.88580322265625, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3628129959106445, + "rewards/margins": 8.047492027282715, + "rewards/rejected": -13.41030502319336, + "step": 13392 + }, + { + "epoch": 2.08, + "learning_rate": 4.324365371647908e-06, + "logits/chosen": -2.4757585525512695, + "logits/rejected": -2.696600914001465, + "logps/chosen": -508.7822570800781, + "logps/rejected": -458.83642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.754138946533203, + "rewards/margins": 14.972652435302734, + "rewards/rejected": -18.726791381835938, + "step": 13393 + }, + { + "epoch": 2.08, + "learning_rate": 4.3236319311167596e-06, + "logits/chosen": -2.384814977645874, + "logits/rejected": -2.5234532356262207, + "logps/chosen": -128.0338592529297, + "logps/rejected": -279.98919677734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.840396404266357, + "rewards/margins": 8.491059303283691, + "rewards/rejected": -15.33145523071289, + "step": 13394 + }, + { + "epoch": 2.08, + "learning_rate": 4.3228984905856114e-06, + "logits/chosen": -2.062865734100342, + "logits/rejected": -2.8772034645080566, + "logps/chosen": -434.4507751464844, + "logps/rejected": -591.1641845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.138166427612305, + "rewards/margins": 10.575166702270508, + "rewards/rejected": -20.713333129882812, + "step": 13395 + }, + { + "epoch": 2.08, + "learning_rate": 4.322165050054463e-06, + "logits/chosen": -1.90641188621521, + "logits/rejected": -2.432710886001587, + "logps/chosen": -475.60028076171875, + "logps/rejected": -777.734130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.09211540222168, + "rewards/margins": 14.057064056396484, + "rewards/rejected": -20.149179458618164, + "step": 13396 + }, + { + "epoch": 2.08, + "learning_rate": 4.321431609523316e-06, + "logits/chosen": -2.7428290843963623, + "logits/rejected": -2.865614652633667, + "logps/chosen": -129.15887451171875, + "logps/rejected": -274.6448974609375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.26077938079834, + "rewards/margins": 9.064068794250488, + "rewards/rejected": -15.324848175048828, + "step": 13397 + }, + { + "epoch": 2.08, + "learning_rate": 4.320698168992168e-06, + "logits/chosen": -2.2396762371063232, + "logits/rejected": -2.9567763805389404, + "logps/chosen": -337.0778503417969, + "logps/rejected": -324.3722839355469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.597546577453613, + "rewards/margins": 9.691822052001953, + "rewards/rejected": -16.28936767578125, + "step": 13398 + }, + { + "epoch": 2.08, + "learning_rate": 4.31996472846102e-06, + "logits/chosen": -1.6349700689315796, + "logits/rejected": -2.6542320251464844, + "logps/chosen": -139.6002960205078, + "logps/rejected": -315.0732421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.605448722839355, + "rewards/margins": 9.911462783813477, + "rewards/rejected": -18.51691246032715, + "step": 13399 + }, + { + "epoch": 2.08, + "learning_rate": 4.319231287929872e-06, + "logits/chosen": -2.2717363834381104, + "logits/rejected": -2.957725763320923, + "logps/chosen": -198.61798095703125, + "logps/rejected": -336.9559326171875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.210925102233887, + "rewards/margins": 7.247569561004639, + "rewards/rejected": -12.458494186401367, + "step": 13400 + }, + { + "epoch": 2.08, + "learning_rate": 4.318497847398724e-06, + "logits/chosen": -2.732008695602417, + "logits/rejected": -2.183532476425171, + "logps/chosen": -310.322021484375, + "logps/rejected": -317.69134521484375, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.834259033203125, + "rewards/margins": 3.9907634258270264, + "rewards/rejected": -13.825021743774414, + "step": 13401 + }, + { + "epoch": 2.08, + "learning_rate": 4.317764406867576e-06, + "logits/chosen": -2.76833176612854, + "logits/rejected": -2.202852725982666, + "logps/chosen": -408.61090087890625, + "logps/rejected": -517.9073486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1210737228393555, + "rewards/margins": 11.321075439453125, + "rewards/rejected": -16.442150115966797, + "step": 13402 + }, + { + "epoch": 2.08, + "learning_rate": 4.317030966336428e-06, + "logits/chosen": -0.9936853051185608, + "logits/rejected": -2.5447781085968018, + "logps/chosen": -192.55209350585938, + "logps/rejected": -299.74041748046875, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.067374229431152, + "rewards/margins": 5.5339860916137695, + "rewards/rejected": -15.601360321044922, + "step": 13403 + }, + { + "epoch": 2.08, + "learning_rate": 4.31629752580528e-06, + "logits/chosen": -1.9670788049697876, + "logits/rejected": -2.6804893016815186, + "logps/chosen": -166.25650024414062, + "logps/rejected": -572.8884887695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02793025970459, + "rewards/margins": 18.1314697265625, + "rewards/rejected": -26.159399032592773, + "step": 13404 + }, + { + "epoch": 2.08, + "learning_rate": 4.315564085274132e-06, + "logits/chosen": -1.8140202760696411, + "logits/rejected": -2.3397631645202637, + "logps/chosen": -223.39892578125, + "logps/rejected": -591.031982421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.568690299987793, + "rewards/margins": 12.13758659362793, + "rewards/rejected": -20.70627784729004, + "step": 13405 + }, + { + "epoch": 2.08, + "learning_rate": 4.314830644742985e-06, + "logits/chosen": -2.1284892559051514, + "logits/rejected": -2.898449420928955, + "logps/chosen": -181.19171142578125, + "logps/rejected": -296.68756103515625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.673709869384766, + "rewards/margins": 6.63437557220459, + "rewards/rejected": -15.308084487915039, + "step": 13406 + }, + { + "epoch": 2.09, + "learning_rate": 4.3140972042118365e-06, + "logits/chosen": -3.023927927017212, + "logits/rejected": -2.7406978607177734, + "logps/chosen": -428.4992370605469, + "logps/rejected": -684.659423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.941001892089844, + "rewards/margins": 15.935935974121094, + "rewards/rejected": -23.876937866210938, + "step": 13407 + }, + { + "epoch": 2.09, + "learning_rate": 4.313363763680688e-06, + "logits/chosen": -2.007779121398926, + "logits/rejected": -2.585475206375122, + "logps/chosen": -221.90065002441406, + "logps/rejected": -330.1009521484375, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.736895561218262, + "rewards/margins": 8.949309349060059, + "rewards/rejected": -16.68620491027832, + "step": 13408 + }, + { + "epoch": 2.09, + "learning_rate": 4.312630323149541e-06, + "logits/chosen": -2.6436421871185303, + "logits/rejected": -2.6408984661102295, + "logps/chosen": -353.6260070800781, + "logps/rejected": -292.06268310546875, + "loss": 1.7972, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.548224449157715, + "rewards/margins": 1.9489772319793701, + "rewards/rejected": -12.497201919555664, + "step": 13409 + }, + { + "epoch": 2.09, + "learning_rate": 4.311896882618394e-06, + "logits/chosen": -1.3987481594085693, + "logits/rejected": -2.8604464530944824, + "logps/chosen": -160.68209838867188, + "logps/rejected": -355.46051025390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.617258071899414, + "rewards/margins": 7.113146781921387, + "rewards/rejected": -14.7304048538208, + "step": 13410 + }, + { + "epoch": 2.09, + "learning_rate": 4.311163442087246e-06, + "logits/chosen": -0.871584951877594, + "logits/rejected": -2.178986072540283, + "logps/chosen": -338.93963623046875, + "logps/rejected": -862.1405639648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.598160743713379, + "rewards/margins": 19.56436538696289, + "rewards/rejected": -27.162525177001953, + "step": 13411 + }, + { + "epoch": 2.09, + "learning_rate": 4.310430001556098e-06, + "logits/chosen": -2.5221097469329834, + "logits/rejected": -2.849558115005493, + "logps/chosen": -261.4359130859375, + "logps/rejected": -364.0829772949219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.50402307510376, + "rewards/margins": 8.345258712768555, + "rewards/rejected": -15.849281311035156, + "step": 13412 + }, + { + "epoch": 2.09, + "learning_rate": 4.3096965610249495e-06, + "logits/chosen": -1.7459362745285034, + "logits/rejected": -2.7538065910339355, + "logps/chosen": -219.36849975585938, + "logps/rejected": -307.1746826171875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.645702362060547, + "rewards/margins": 6.6591315269470215, + "rewards/rejected": -15.30483341217041, + "step": 13413 + }, + { + "epoch": 2.09, + "learning_rate": 4.308963120493801e-06, + "logits/chosen": -2.757634401321411, + "logits/rejected": -2.5868279933929443, + "logps/chosen": -532.6576538085938, + "logps/rejected": -549.0947875976562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.631680488586426, + "rewards/margins": 11.925324440002441, + "rewards/rejected": -19.557004928588867, + "step": 13414 + }, + { + "epoch": 2.09, + "learning_rate": 4.308229679962654e-06, + "logits/chosen": -1.8718171119689941, + "logits/rejected": -2.513439416885376, + "logps/chosen": -342.92840576171875, + "logps/rejected": -583.0550537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.617107391357422, + "rewards/margins": 11.382457733154297, + "rewards/rejected": -20.99956512451172, + "step": 13415 + }, + { + "epoch": 2.09, + "learning_rate": 4.307496239431506e-06, + "logits/chosen": -2.5919759273529053, + "logits/rejected": -3.0634307861328125, + "logps/chosen": -129.09893798828125, + "logps/rejected": -376.6485595703125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7046403884887695, + "rewards/margins": 7.369293689727783, + "rewards/rejected": -13.073934555053711, + "step": 13416 + }, + { + "epoch": 2.09, + "learning_rate": 4.306762798900358e-06, + "logits/chosen": -1.8960508108139038, + "logits/rejected": -2.4965357780456543, + "logps/chosen": -163.411865234375, + "logps/rejected": -357.0752868652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2626776695251465, + "rewards/margins": 11.35175895690918, + "rewards/rejected": -15.614436149597168, + "step": 13417 + }, + { + "epoch": 2.09, + "learning_rate": 4.30602935836921e-06, + "logits/chosen": -2.903188467025757, + "logits/rejected": -2.3051881790161133, + "logps/chosen": -213.12229919433594, + "logps/rejected": -182.11544799804688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.485388994216919, + "rewards/margins": 7.609161376953125, + "rewards/rejected": -10.094550132751465, + "step": 13418 + }, + { + "epoch": 2.09, + "learning_rate": 4.3052959178380624e-06, + "logits/chosen": -2.9327142238616943, + "logits/rejected": -2.6536107063293457, + "logps/chosen": -680.95166015625, + "logps/rejected": -700.3619384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.527951240539551, + "rewards/margins": 11.373781204223633, + "rewards/rejected": -16.9017333984375, + "step": 13419 + }, + { + "epoch": 2.09, + "learning_rate": 4.304562477306914e-06, + "logits/chosen": -2.971829414367676, + "logits/rejected": -3.076650619506836, + "logps/chosen": -618.6531982421875, + "logps/rejected": -711.7720336914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8582539558410645, + "rewards/margins": 11.841874122619629, + "rewards/rejected": -17.70012855529785, + "step": 13420 + }, + { + "epoch": 2.09, + "learning_rate": 4.303829036775766e-06, + "logits/chosen": -3.0046374797821045, + "logits/rejected": -2.547976016998291, + "logps/chosen": -693.1268310546875, + "logps/rejected": -426.1961975097656, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.806459426879883, + "rewards/margins": 6.663993835449219, + "rewards/rejected": -12.470453262329102, + "step": 13421 + }, + { + "epoch": 2.09, + "learning_rate": 4.303095596244618e-06, + "logits/chosen": -2.515597343444824, + "logits/rejected": -2.983112096786499, + "logps/chosen": -81.28874206542969, + "logps/rejected": -256.8405456542969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.247917175292969, + "rewards/margins": 7.735695838928223, + "rewards/rejected": -13.983613014221191, + "step": 13422 + }, + { + "epoch": 2.09, + "learning_rate": 4.302362155713471e-06, + "logits/chosen": -1.077451229095459, + "logits/rejected": -2.4221463203430176, + "logps/chosen": -149.8076171875, + "logps/rejected": -729.7182006835938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6976318359375, + "rewards/margins": 17.619495391845703, + "rewards/rejected": -25.317127227783203, + "step": 13423 + }, + { + "epoch": 2.09, + "learning_rate": 4.301628715182323e-06, + "logits/chosen": -2.918700695037842, + "logits/rejected": -2.692831039428711, + "logps/chosen": -577.5554809570312, + "logps/rejected": -511.3525390625, + "loss": 0.1211, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.729761123657227, + "rewards/margins": 7.439964771270752, + "rewards/rejected": -15.16972541809082, + "step": 13424 + }, + { + "epoch": 2.09, + "learning_rate": 4.3008952746511746e-06, + "logits/chosen": -2.0574951171875, + "logits/rejected": -2.561520576477051, + "logps/chosen": -159.326904296875, + "logps/rejected": -320.12957763671875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.527016639709473, + "rewards/margins": 7.594343185424805, + "rewards/rejected": -15.121359825134277, + "step": 13425 + }, + { + "epoch": 2.09, + "learning_rate": 4.300161834120027e-06, + "logits/chosen": -1.9172046184539795, + "logits/rejected": -2.421388626098633, + "logps/chosen": -180.1327362060547, + "logps/rejected": -328.38531494140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60125732421875, + "rewards/margins": 8.67802906036377, + "rewards/rejected": -13.27928638458252, + "step": 13426 + }, + { + "epoch": 2.09, + "learning_rate": 4.299428393588879e-06, + "logits/chosen": -3.0022358894348145, + "logits/rejected": -3.1181421279907227, + "logps/chosen": -74.55022430419922, + "logps/rejected": -228.7325897216797, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.021119117736816, + "rewards/margins": 8.697911262512207, + "rewards/rejected": -13.719030380249023, + "step": 13427 + }, + { + "epoch": 2.09, + "learning_rate": 4.298694953057732e-06, + "logits/chosen": -1.2462705373764038, + "logits/rejected": -2.5903830528259277, + "logps/chosen": -142.92514038085938, + "logps/rejected": -312.39532470703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6693615913391113, + "rewards/margins": 7.839643478393555, + "rewards/rejected": -11.509005546569824, + "step": 13428 + }, + { + "epoch": 2.09, + "learning_rate": 4.297961512526584e-06, + "logits/chosen": -2.2545390129089355, + "logits/rejected": -2.6622579097747803, + "logps/chosen": -253.6956024169922, + "logps/rejected": -452.40118408203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.895216941833496, + "rewards/margins": 7.894267559051514, + "rewards/rejected": -16.78948402404785, + "step": 13429 + }, + { + "epoch": 2.09, + "learning_rate": 4.297228071995436e-06, + "logits/chosen": -2.730475425720215, + "logits/rejected": -2.9693830013275146, + "logps/chosen": -101.03324890136719, + "logps/rejected": -266.0185852050781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2005720138549805, + "rewards/margins": 9.930244445800781, + "rewards/rejected": -17.130817413330078, + "step": 13430 + }, + { + "epoch": 2.09, + "learning_rate": 4.2964946314642875e-06, + "logits/chosen": -2.570146083831787, + "logits/rejected": -2.997715711593628, + "logps/chosen": -500.888916015625, + "logps/rejected": -667.054931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.329845428466797, + "rewards/margins": 9.688146591186523, + "rewards/rejected": -19.01799201965332, + "step": 13431 + }, + { + "epoch": 2.09, + "learning_rate": 4.29576119093314e-06, + "logits/chosen": -0.8585126399993896, + "logits/rejected": -2.356680154800415, + "logps/chosen": -179.78004455566406, + "logps/rejected": -568.556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.290637016296387, + "rewards/margins": 15.00356388092041, + "rewards/rejected": -22.294200897216797, + "step": 13432 + }, + { + "epoch": 2.09, + "learning_rate": 4.295027750401992e-06, + "logits/chosen": -2.575120687484741, + "logits/rejected": -1.7923834323883057, + "logps/chosen": -224.61160278320312, + "logps/rejected": -304.3992919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.818594932556152, + "rewards/margins": 9.296076774597168, + "rewards/rejected": -14.11467170715332, + "step": 13433 + }, + { + "epoch": 2.09, + "learning_rate": 4.294294309870844e-06, + "logits/chosen": -1.775477409362793, + "logits/rejected": -2.703071117401123, + "logps/chosen": -138.2137451171875, + "logps/rejected": -487.5228271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138566017150879, + "rewards/margins": 12.288031578063965, + "rewards/rejected": -18.426597595214844, + "step": 13434 + }, + { + "epoch": 2.09, + "learning_rate": 4.293560869339696e-06, + "logits/chosen": -1.325575590133667, + "logits/rejected": -2.5446503162384033, + "logps/chosen": -145.11830139160156, + "logps/rejected": -384.1561279296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.215876579284668, + "rewards/margins": 8.906768798828125, + "rewards/rejected": -15.122645378112793, + "step": 13435 + }, + { + "epoch": 2.09, + "learning_rate": 4.292827428808548e-06, + "logits/chosen": -2.1450204849243164, + "logits/rejected": -2.4641778469085693, + "logps/chosen": -163.75839233398438, + "logps/rejected": -329.9837341308594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.348570346832275, + "rewards/margins": 8.770980834960938, + "rewards/rejected": -14.119550704956055, + "step": 13436 + }, + { + "epoch": 2.09, + "learning_rate": 4.2920939882774005e-06, + "logits/chosen": -2.7732930183410645, + "logits/rejected": -2.9001717567443848, + "logps/chosen": -177.81716918945312, + "logps/rejected": -337.85723876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0906596183776855, + "rewards/margins": 12.254470825195312, + "rewards/rejected": -17.345130920410156, + "step": 13437 + }, + { + "epoch": 2.09, + "learning_rate": 4.291360547746252e-06, + "logits/chosen": -2.7134172916412354, + "logits/rejected": -2.6712048053741455, + "logps/chosen": -398.2100830078125, + "logps/rejected": -504.79656982421875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.42228889465332, + "rewards/margins": 8.609485626220703, + "rewards/rejected": -16.031774520874023, + "step": 13438 + }, + { + "epoch": 2.09, + "learning_rate": 4.290627107215104e-06, + "logits/chosen": -2.075328826904297, + "logits/rejected": -2.903226852416992, + "logps/chosen": -163.7682342529297, + "logps/rejected": -220.4927978515625, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.938592910766602, + "rewards/margins": 3.288438320159912, + "rewards/rejected": -11.227031707763672, + "step": 13439 + }, + { + "epoch": 2.09, + "learning_rate": 4.289893666683956e-06, + "logits/chosen": -2.2453441619873047, + "logits/rejected": -2.4338231086730957, + "logps/chosen": -249.89007568359375, + "logps/rejected": -357.7474670410156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.569313049316406, + "rewards/margins": 8.61827278137207, + "rewards/rejected": -15.187585830688477, + "step": 13440 + }, + { + "epoch": 2.09, + "learning_rate": 4.289160226152809e-06, + "logits/chosen": -2.940990686416626, + "logits/rejected": -2.672553539276123, + "logps/chosen": -262.3633117675781, + "logps/rejected": -322.8768615722656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8188982009887695, + "rewards/margins": 9.892581939697266, + "rewards/rejected": -13.711480140686035, + "step": 13441 + }, + { + "epoch": 2.09, + "learning_rate": 4.288426785621661e-06, + "logits/chosen": -2.1398510932922363, + "logits/rejected": -2.666771650314331, + "logps/chosen": -198.69088745117188, + "logps/rejected": -397.114013671875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3637919425964355, + "rewards/margins": 6.380916595458984, + "rewards/rejected": -11.744709014892578, + "step": 13442 + }, + { + "epoch": 2.09, + "learning_rate": 4.287693345090513e-06, + "logits/chosen": -2.9087724685668945, + "logits/rejected": -2.824556350708008, + "logps/chosen": -239.35617065429688, + "logps/rejected": -236.67269897460938, + "loss": 0.1806, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.027095317840576, + "rewards/margins": 5.548335552215576, + "rewards/rejected": -12.575430870056152, + "step": 13443 + }, + { + "epoch": 2.09, + "learning_rate": 4.286959904559365e-06, + "logits/chosen": -2.815920114517212, + "logits/rejected": -2.048114776611328, + "logps/chosen": -257.4830017089844, + "logps/rejected": -297.4729309082031, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575655460357666, + "rewards/margins": 7.18740177154541, + "rewards/rejected": -12.763057708740234, + "step": 13444 + }, + { + "epoch": 2.09, + "learning_rate": 4.286226464028217e-06, + "logits/chosen": -2.7930095195770264, + "logits/rejected": -1.9351415634155273, + "logps/chosen": -743.485107421875, + "logps/rejected": -592.7059326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.274703025817871, + "rewards/margins": 13.055633544921875, + "rewards/rejected": -19.330337524414062, + "step": 13445 + }, + { + "epoch": 2.09, + "learning_rate": 4.28549302349707e-06, + "logits/chosen": -2.847203254699707, + "logits/rejected": -2.899240255355835, + "logps/chosen": -115.77954864501953, + "logps/rejected": -327.718505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090096473693848, + "rewards/margins": 12.318967819213867, + "rewards/rejected": -19.4090633392334, + "step": 13446 + }, + { + "epoch": 2.09, + "learning_rate": 4.284759582965922e-06, + "logits/chosen": -0.5856642127037048, + "logits/rejected": -2.6793365478515625, + "logps/chosen": -160.63619995117188, + "logps/rejected": -408.76751708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7596611976623535, + "rewards/margins": 18.014467239379883, + "rewards/rejected": -22.774127960205078, + "step": 13447 + }, + { + "epoch": 2.09, + "learning_rate": 4.284026142434774e-06, + "logits/chosen": -2.1464121341705322, + "logits/rejected": -1.0583105087280273, + "logps/chosen": -348.60394287109375, + "logps/rejected": -202.59603881835938, + "loss": 0.0748, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.636598110198975, + "rewards/margins": 8.504545211791992, + "rewards/rejected": -14.141143798828125, + "step": 13448 + }, + { + "epoch": 2.09, + "learning_rate": 4.2832927019036256e-06, + "logits/chosen": -1.8391823768615723, + "logits/rejected": -2.9542782306671143, + "logps/chosen": -95.45709228515625, + "logps/rejected": -363.5644836425781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1956281661987305, + "rewards/margins": 9.873157501220703, + "rewards/rejected": -16.06878662109375, + "step": 13449 + }, + { + "epoch": 2.09, + "learning_rate": 4.282559261372478e-06, + "logits/chosen": -1.5949798822402954, + "logits/rejected": -2.5024993419647217, + "logps/chosen": -175.80050659179688, + "logps/rejected": -404.9654846191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.240316390991211, + "rewards/margins": 12.132205963134766, + "rewards/rejected": -20.372522354125977, + "step": 13450 + }, + { + "epoch": 2.09, + "learning_rate": 4.28182582084133e-06, + "logits/chosen": -2.29695463180542, + "logits/rejected": -2.9083430767059326, + "logps/chosen": -94.85692596435547, + "logps/rejected": -436.2586669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182833194732666, + "rewards/margins": 10.367610931396484, + "rewards/rejected": -16.550443649291992, + "step": 13451 + }, + { + "epoch": 2.09, + "learning_rate": 4.281092380310182e-06, + "logits/chosen": -2.212886095046997, + "logits/rejected": -2.9141998291015625, + "logps/chosen": -446.72235107421875, + "logps/rejected": -484.19134521484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.861386299133301, + "rewards/margins": 7.7407755851745605, + "rewards/rejected": -15.602161407470703, + "step": 13452 + }, + { + "epoch": 2.09, + "learning_rate": 4.280358939779034e-06, + "logits/chosen": -2.914638042449951, + "logits/rejected": -2.7411415576934814, + "logps/chosen": -157.60105895996094, + "logps/rejected": -300.1629333496094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1896493434906006, + "rewards/margins": 9.488398551940918, + "rewards/rejected": -12.678048133850098, + "step": 13453 + }, + { + "epoch": 2.09, + "learning_rate": 4.279625499247886e-06, + "logits/chosen": -1.1633832454681396, + "logits/rejected": -2.7853920459747314, + "logps/chosen": -95.92139434814453, + "logps/rejected": -311.00384521484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.086057186126709, + "rewards/margins": 6.9397873878479, + "rewards/rejected": -12.02584457397461, + "step": 13454 + }, + { + "epoch": 2.09, + "learning_rate": 4.2788920587167385e-06, + "logits/chosen": -2.093930721282959, + "logits/rejected": -2.431473970413208, + "logps/chosen": -174.68377685546875, + "logps/rejected": -463.9576721191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.08822250366211, + "rewards/margins": 15.350780487060547, + "rewards/rejected": -25.439002990722656, + "step": 13455 + }, + { + "epoch": 2.09, + "learning_rate": 4.27815861818559e-06, + "logits/chosen": -2.3164427280426025, + "logits/rejected": -2.462946891784668, + "logps/chosen": -182.47378540039062, + "logps/rejected": -360.6669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1427412033081055, + "rewards/margins": 12.247264862060547, + "rewards/rejected": -15.390007019042969, + "step": 13456 + }, + { + "epoch": 2.09, + "learning_rate": 4.277425177654442e-06, + "logits/chosen": -2.6969282627105713, + "logits/rejected": -2.4879579544067383, + "logps/chosen": -322.444091796875, + "logps/rejected": -577.4749755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.46529483795166, + "rewards/margins": 12.027570724487305, + "rewards/rejected": -18.49286651611328, + "step": 13457 + }, + { + "epoch": 2.09, + "learning_rate": 4.276691737123294e-06, + "logits/chosen": -2.6646721363067627, + "logits/rejected": -2.854353427886963, + "logps/chosen": -135.3768768310547, + "logps/rejected": -377.8284912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.876333713531494, + "rewards/margins": 10.975517272949219, + "rewards/rejected": -16.851852416992188, + "step": 13458 + }, + { + "epoch": 2.09, + "learning_rate": 4.275958296592147e-06, + "logits/chosen": -2.2968475818634033, + "logits/rejected": -2.916231632232666, + "logps/chosen": -323.5375671386719, + "logps/rejected": -273.6335144042969, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.443753242492676, + "rewards/margins": 5.183773517608643, + "rewards/rejected": -12.627527236938477, + "step": 13459 + }, + { + "epoch": 2.09, + "learning_rate": 4.275224856060999e-06, + "logits/chosen": -2.629784345626831, + "logits/rejected": -2.0912559032440186, + "logps/chosen": -563.6259765625, + "logps/rejected": -618.5377197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.986626625061035, + "rewards/margins": 13.370853424072266, + "rewards/rejected": -21.357479095458984, + "step": 13460 + }, + { + "epoch": 2.09, + "learning_rate": 4.2744914155298515e-06, + "logits/chosen": -2.1769826412200928, + "logits/rejected": -2.6707043647766113, + "logps/chosen": -171.41937255859375, + "logps/rejected": -308.99810791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.913144111633301, + "rewards/margins": 8.946517944335938, + "rewards/rejected": -13.859662055969238, + "step": 13461 + }, + { + "epoch": 2.09, + "learning_rate": 4.273757974998703e-06, + "logits/chosen": -1.9293575286865234, + "logits/rejected": -2.7308716773986816, + "logps/chosen": -318.7545166015625, + "logps/rejected": -450.1728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.878978729248047, + "rewards/margins": 11.104472160339355, + "rewards/rejected": -15.983450889587402, + "step": 13462 + }, + { + "epoch": 2.09, + "learning_rate": 4.273024534467555e-06, + "logits/chosen": -2.1933083534240723, + "logits/rejected": -2.394876003265381, + "logps/chosen": -193.20736694335938, + "logps/rejected": -378.6940612792969, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.72496223449707, + "rewards/margins": 9.44272232055664, + "rewards/rejected": -18.167682647705078, + "step": 13463 + }, + { + "epoch": 2.09, + "learning_rate": 4.272291093936408e-06, + "logits/chosen": -2.6325571537017822, + "logits/rejected": -2.6305840015411377, + "logps/chosen": -448.6545104980469, + "logps/rejected": -292.862060546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.677995681762695, + "rewards/margins": 8.493557929992676, + "rewards/rejected": -15.171552658081055, + "step": 13464 + }, + { + "epoch": 2.09, + "learning_rate": 4.27155765340526e-06, + "logits/chosen": -2.8387269973754883, + "logits/rejected": -2.887314796447754, + "logps/chosen": -139.72344970703125, + "logps/rejected": -301.65667724609375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.790487289428711, + "rewards/margins": 6.828248023986816, + "rewards/rejected": -14.618736267089844, + "step": 13465 + }, + { + "epoch": 2.09, + "learning_rate": 4.270824212874112e-06, + "logits/chosen": -1.2185226678848267, + "logits/rejected": -2.4591777324676514, + "logps/chosen": -223.99539184570312, + "logps/rejected": -535.6390991210938, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.779298782348633, + "rewards/margins": 11.189491271972656, + "rewards/rejected": -18.96879005432129, + "step": 13466 + }, + { + "epoch": 2.09, + "learning_rate": 4.270090772342964e-06, + "logits/chosen": -1.846471905708313, + "logits/rejected": -2.6300415992736816, + "logps/chosen": -169.01414489746094, + "logps/rejected": -342.7850646972656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.932459831237793, + "rewards/margins": 7.922929763793945, + "rewards/rejected": -12.855389595031738, + "step": 13467 + }, + { + "epoch": 2.09, + "learning_rate": 4.269357331811816e-06, + "logits/chosen": -2.704310894012451, + "logits/rejected": -2.878803253173828, + "logps/chosen": -50.305660247802734, + "logps/rejected": -192.67823791503906, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.139508247375488, + "rewards/margins": 6.9939422607421875, + "rewards/rejected": -11.133450508117676, + "step": 13468 + }, + { + "epoch": 2.09, + "learning_rate": 4.268623891280668e-06, + "logits/chosen": -2.4994184970855713, + "logits/rejected": -2.93792724609375, + "logps/chosen": -130.22308349609375, + "logps/rejected": -235.26551818847656, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.557122230529785, + "rewards/margins": 5.84462833404541, + "rewards/rejected": -13.401750564575195, + "step": 13469 + }, + { + "epoch": 2.09, + "learning_rate": 4.26789045074952e-06, + "logits/chosen": -1.6076860427856445, + "logits/rejected": -2.5163159370422363, + "logps/chosen": -162.6129150390625, + "logps/rejected": -424.7105712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6511759757995605, + "rewards/margins": 13.967147827148438, + "rewards/rejected": -19.618324279785156, + "step": 13470 + }, + { + "epoch": 2.1, + "learning_rate": 4.267157010218372e-06, + "logits/chosen": -1.5733506679534912, + "logits/rejected": -2.5436625480651855, + "logps/chosen": -109.54914855957031, + "logps/rejected": -415.0382385253906, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.621554851531982, + "rewards/margins": 7.213348865509033, + "rewards/rejected": -13.834903717041016, + "step": 13471 + }, + { + "epoch": 2.1, + "learning_rate": 4.266423569687225e-06, + "logits/chosen": -2.732897996902466, + "logits/rejected": -2.1530230045318604, + "logps/chosen": -140.34535217285156, + "logps/rejected": -195.89373779296875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.78712272644043, + "rewards/margins": 7.561100959777832, + "rewards/rejected": -12.348222732543945, + "step": 13472 + }, + { + "epoch": 2.1, + "learning_rate": 4.265690129156077e-06, + "logits/chosen": -1.7115613222122192, + "logits/rejected": -2.579622983932495, + "logps/chosen": -222.44418334960938, + "logps/rejected": -408.0472717285156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.881846904754639, + "rewards/margins": 8.031667709350586, + "rewards/rejected": -15.913515090942383, + "step": 13473 + }, + { + "epoch": 2.1, + "learning_rate": 4.2649566886249285e-06, + "logits/chosen": -2.059887409210205, + "logits/rejected": -2.832521915435791, + "logps/chosen": -139.79232788085938, + "logps/rejected": -548.970947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381911277770996, + "rewards/margins": 10.381534576416016, + "rewards/rejected": -16.763446807861328, + "step": 13474 + }, + { + "epoch": 2.1, + "learning_rate": 4.26422324809378e-06, + "logits/chosen": -2.7327728271484375, + "logits/rejected": -2.2771995067596436, + "logps/chosen": -301.82000732421875, + "logps/rejected": -304.92083740234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771462440490723, + "rewards/margins": 8.163116455078125, + "rewards/rejected": -13.934577941894531, + "step": 13475 + }, + { + "epoch": 2.1, + "learning_rate": 4.263489807562632e-06, + "logits/chosen": -2.283597946166992, + "logits/rejected": -2.971672773361206, + "logps/chosen": -102.15353393554688, + "logps/rejected": -290.354736328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.425817012786865, + "rewards/margins": 9.598567008972168, + "rewards/rejected": -14.024383544921875, + "step": 13476 + }, + { + "epoch": 2.1, + "learning_rate": 4.262756367031485e-06, + "logits/chosen": -2.1079158782958984, + "logits/rejected": -2.70306396484375, + "logps/chosen": -215.93125915527344, + "logps/rejected": -421.30035400390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.742210388183594, + "rewards/margins": 7.759756088256836, + "rewards/rejected": -15.50196647644043, + "step": 13477 + }, + { + "epoch": 2.1, + "learning_rate": 4.262022926500338e-06, + "logits/chosen": -2.9056780338287354, + "logits/rejected": -3.0730199813842773, + "logps/chosen": -79.06101989746094, + "logps/rejected": -165.2008056640625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954397678375244, + "rewards/margins": 6.53297233581543, + "rewards/rejected": -12.487369537353516, + "step": 13478 + }, + { + "epoch": 2.1, + "learning_rate": 4.2612894859691895e-06, + "logits/chosen": -2.529528856277466, + "logits/rejected": -2.3939123153686523, + "logps/chosen": -282.9322814941406, + "logps/rejected": -514.177490234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.987035751342773, + "rewards/margins": 10.765732765197754, + "rewards/rejected": -19.752769470214844, + "step": 13479 + }, + { + "epoch": 2.1, + "learning_rate": 4.2605560454380414e-06, + "logits/chosen": -1.7284330129623413, + "logits/rejected": -2.558490514755249, + "logps/chosen": -117.75299835205078, + "logps/rejected": -332.9437255859375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.506989479064941, + "rewards/margins": 9.258901596069336, + "rewards/rejected": -17.765892028808594, + "step": 13480 + }, + { + "epoch": 2.1, + "learning_rate": 4.259822604906894e-06, + "logits/chosen": -1.1578876972198486, + "logits/rejected": -2.9376590251922607, + "logps/chosen": -85.66757202148438, + "logps/rejected": -472.512939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.131126403808594, + "rewards/margins": 9.306459426879883, + "rewards/rejected": -16.437585830688477, + "step": 13481 + }, + { + "epoch": 2.1, + "learning_rate": 4.259089164375746e-06, + "logits/chosen": -1.9473536014556885, + "logits/rejected": -2.930107355117798, + "logps/chosen": -157.08078002929688, + "logps/rejected": -248.7949981689453, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5225963592529297, + "rewards/margins": 8.866314888000488, + "rewards/rejected": -11.388911247253418, + "step": 13482 + }, + { + "epoch": 2.1, + "learning_rate": 4.258355723844598e-06, + "logits/chosen": -2.3702330589294434, + "logits/rejected": -2.8373377323150635, + "logps/chosen": -72.57150268554688, + "logps/rejected": -281.4371643066406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.816411972045898, + "rewards/margins": 11.462751388549805, + "rewards/rejected": -16.279163360595703, + "step": 13483 + }, + { + "epoch": 2.1, + "learning_rate": 4.25762228331345e-06, + "logits/chosen": -2.693089246749878, + "logits/rejected": -2.1811835765838623, + "logps/chosen": -507.19512939453125, + "logps/rejected": -486.5851745605469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.05569839477539, + "rewards/margins": 8.866880416870117, + "rewards/rejected": -19.922576904296875, + "step": 13484 + }, + { + "epoch": 2.1, + "learning_rate": 4.256888842782302e-06, + "logits/chosen": -1.5216138362884521, + "logits/rejected": -2.645664691925049, + "logps/chosen": -123.47830200195312, + "logps/rejected": -411.1069641113281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2052998542785645, + "rewards/margins": 8.249239921569824, + "rewards/rejected": -14.454540252685547, + "step": 13485 + }, + { + "epoch": 2.1, + "learning_rate": 4.256155402251154e-06, + "logits/chosen": -2.538254737854004, + "logits/rejected": -2.2243592739105225, + "logps/chosen": -199.68338012695312, + "logps/rejected": -282.9781799316406, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.830953598022461, + "rewards/margins": 7.577998638153076, + "rewards/rejected": -17.408952713012695, + "step": 13486 + }, + { + "epoch": 2.1, + "learning_rate": 4.255421961720006e-06, + "logits/chosen": -0.7944599390029907, + "logits/rejected": -2.835481643676758, + "logps/chosen": -165.68113708496094, + "logps/rejected": -772.338134765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.63888168334961, + "rewards/margins": 9.540876388549805, + "rewards/rejected": -19.179758071899414, + "step": 13487 + }, + { + "epoch": 2.1, + "learning_rate": 4.254688521188858e-06, + "logits/chosen": -2.5268406867980957, + "logits/rejected": -2.6000094413757324, + "logps/chosen": -364.3269958496094, + "logps/rejected": -627.6332397460938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.405457496643066, + "rewards/margins": 10.697702407836914, + "rewards/rejected": -19.103158950805664, + "step": 13488 + }, + { + "epoch": 2.1, + "learning_rate": 4.25395508065771e-06, + "logits/chosen": -2.14190936088562, + "logits/rejected": -2.371133804321289, + "logps/chosen": -427.3135070800781, + "logps/rejected": -536.7131958007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.120756149291992, + "rewards/margins": 11.849319458007812, + "rewards/rejected": -20.970073699951172, + "step": 13489 + }, + { + "epoch": 2.1, + "learning_rate": 4.253221640126563e-06, + "logits/chosen": -2.208911657333374, + "logits/rejected": -2.9687867164611816, + "logps/chosen": -176.64906311035156, + "logps/rejected": -614.4469604492188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.960610389709473, + "rewards/margins": 14.112918853759766, + "rewards/rejected": -19.073530197143555, + "step": 13490 + }, + { + "epoch": 2.1, + "learning_rate": 4.252488199595415e-06, + "logits/chosen": -2.007352113723755, + "logits/rejected": -3.007206439971924, + "logps/chosen": -197.056396484375, + "logps/rejected": -516.6320190429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.142147064208984, + "rewards/margins": 13.363061904907227, + "rewards/rejected": -17.50520896911621, + "step": 13491 + }, + { + "epoch": 2.1, + "learning_rate": 4.2517547590642665e-06, + "logits/chosen": -2.7618961334228516, + "logits/rejected": -1.4742351770401, + "logps/chosen": -230.42237854003906, + "logps/rejected": -256.8930969238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0878677368164062, + "rewards/margins": 14.699370384216309, + "rewards/rejected": -16.78723907470703, + "step": 13492 + }, + { + "epoch": 2.1, + "learning_rate": 4.251021318533118e-06, + "logits/chosen": -1.240683913230896, + "logits/rejected": -2.0348198413848877, + "logps/chosen": -314.9245910644531, + "logps/rejected": -829.7848510742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.848949432373047, + "rewards/margins": 17.462799072265625, + "rewards/rejected": -23.311748504638672, + "step": 13493 + }, + { + "epoch": 2.1, + "learning_rate": 4.250287878001971e-06, + "logits/chosen": -2.3291895389556885, + "logits/rejected": -2.936387300491333, + "logps/chosen": -282.3967590332031, + "logps/rejected": -272.50067138671875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.21187162399292, + "rewards/margins": 5.528995990753174, + "rewards/rejected": -11.740867614746094, + "step": 13494 + }, + { + "epoch": 2.1, + "learning_rate": 4.249554437470824e-06, + "logits/chosen": -2.120277166366577, + "logits/rejected": -2.744734287261963, + "logps/chosen": -246.1215362548828, + "logps/rejected": -455.5069580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.021377086639404, + "rewards/margins": 9.828810691833496, + "rewards/rejected": -16.850187301635742, + "step": 13495 + }, + { + "epoch": 2.1, + "learning_rate": 4.248820996939676e-06, + "logits/chosen": -1.3039722442626953, + "logits/rejected": -2.680109977722168, + "logps/chosen": -200.78158569335938, + "logps/rejected": -455.3681640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.410752296447754, + "rewards/margins": 10.444665908813477, + "rewards/rejected": -17.855417251586914, + "step": 13496 + }, + { + "epoch": 2.1, + "learning_rate": 4.248087556408528e-06, + "logits/chosen": -1.8230750560760498, + "logits/rejected": -2.532625436782837, + "logps/chosen": -129.48342895507812, + "logps/rejected": -362.00958251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.015259742736816, + "rewards/margins": 9.60456657409668, + "rewards/rejected": -18.619827270507812, + "step": 13497 + }, + { + "epoch": 2.1, + "learning_rate": 4.2473541158773795e-06, + "logits/chosen": -1.939138650894165, + "logits/rejected": -2.89858341217041, + "logps/chosen": -173.17172241210938, + "logps/rejected": -386.051025390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.428635597229004, + "rewards/margins": 8.337204933166504, + "rewards/rejected": -13.765840530395508, + "step": 13498 + }, + { + "epoch": 2.1, + "learning_rate": 4.246620675346232e-06, + "logits/chosen": -1.8051855564117432, + "logits/rejected": -2.7334957122802734, + "logps/chosen": -132.33425903320312, + "logps/rejected": -518.3321533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.216598987579346, + "rewards/margins": 13.054006576538086, + "rewards/rejected": -20.270606994628906, + "step": 13499 + }, + { + "epoch": 2.1, + "learning_rate": 4.245887234815084e-06, + "logits/chosen": -1.9591281414031982, + "logits/rejected": -2.815920352935791, + "logps/chosen": -152.79400634765625, + "logps/rejected": -437.98089599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018340587615967, + "rewards/margins": 11.371053695678711, + "rewards/rejected": -17.389394760131836, + "step": 13500 + }, + { + "epoch": 2.1, + "learning_rate": 4.245153794283936e-06, + "logits/chosen": -2.78841233253479, + "logits/rejected": -2.3666727542877197, + "logps/chosen": -185.0857391357422, + "logps/rejected": -290.7005310058594, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.836582660675049, + "rewards/margins": 8.04619312286377, + "rewards/rejected": -12.882776260375977, + "step": 13501 + }, + { + "epoch": 2.1, + "learning_rate": 4.244420353752788e-06, + "logits/chosen": -2.80041241645813, + "logits/rejected": -3.0757954120635986, + "logps/chosen": -79.0230941772461, + "logps/rejected": -372.8549499511719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.444237232208252, + "rewards/margins": 9.497504234313965, + "rewards/rejected": -14.941741943359375, + "step": 13502 + }, + { + "epoch": 2.1, + "learning_rate": 4.24368691322164e-06, + "logits/chosen": -2.2555484771728516, + "logits/rejected": -3.0207133293151855, + "logps/chosen": -297.6001892089844, + "logps/rejected": -446.563232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5812530517578125, + "rewards/margins": 12.659361839294434, + "rewards/rejected": -15.240614891052246, + "step": 13503 + }, + { + "epoch": 2.1, + "learning_rate": 4.2429534726904924e-06, + "logits/chosen": -2.9105143547058105, + "logits/rejected": -2.9954819679260254, + "logps/chosen": -131.08935546875, + "logps/rejected": -371.5045166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.927022933959961, + "rewards/margins": 10.366945266723633, + "rewards/rejected": -16.293968200683594, + "step": 13504 + }, + { + "epoch": 2.1, + "learning_rate": 4.242220032159344e-06, + "logits/chosen": -2.3980939388275146, + "logits/rejected": -2.931121826171875, + "logps/chosen": -290.1517639160156, + "logps/rejected": -536.4947509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.520816802978516, + "rewards/margins": 10.535184860229492, + "rewards/rejected": -18.056001663208008, + "step": 13505 + }, + { + "epoch": 2.1, + "learning_rate": 4.241486591628196e-06, + "logits/chosen": -2.879155158996582, + "logits/rejected": -2.874046802520752, + "logps/chosen": -244.35386657714844, + "logps/rejected": -474.4322509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995106220245361, + "rewards/margins": 11.958608627319336, + "rewards/rejected": -17.95371437072754, + "step": 13506 + }, + { + "epoch": 2.1, + "learning_rate": 4.240753151097048e-06, + "logits/chosen": -2.5708887577056885, + "logits/rejected": -2.62485671043396, + "logps/chosen": -264.456787109375, + "logps/rejected": -314.8007507324219, + "loss": 0.9378, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.428450584411621, + "rewards/margins": 3.181384563446045, + "rewards/rejected": -11.609834671020508, + "step": 13507 + }, + { + "epoch": 2.1, + "learning_rate": 4.240019710565901e-06, + "logits/chosen": -2.794691801071167, + "logits/rejected": -2.829989433288574, + "logps/chosen": -192.2563934326172, + "logps/rejected": -298.75146484375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.714728355407715, + "rewards/margins": 5.832127571105957, + "rewards/rejected": -14.546855926513672, + "step": 13508 + }, + { + "epoch": 2.1, + "learning_rate": 4.239286270034753e-06, + "logits/chosen": -1.9717484712600708, + "logits/rejected": -2.776474714279175, + "logps/chosen": -129.3199005126953, + "logps/rejected": -417.82684326171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.816107749938965, + "rewards/margins": 11.537134170532227, + "rewards/rejected": -17.353240966796875, + "step": 13509 + }, + { + "epoch": 2.1, + "learning_rate": 4.2385528295036045e-06, + "logits/chosen": -2.680208921432495, + "logits/rejected": -3.076026439666748, + "logps/chosen": -208.04302978515625, + "logps/rejected": -190.1087188720703, + "loss": 0.3903, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.252991676330566, + "rewards/margins": 3.0887935161590576, + "rewards/rejected": -9.341785430908203, + "step": 13510 + }, + { + "epoch": 2.1, + "learning_rate": 4.237819388972457e-06, + "logits/chosen": -2.3193161487579346, + "logits/rejected": -2.8813631534576416, + "logps/chosen": -283.4993591308594, + "logps/rejected": -332.2815246582031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096137046813965, + "rewards/margins": 8.656156539916992, + "rewards/rejected": -14.752294540405273, + "step": 13511 + }, + { + "epoch": 2.1, + "learning_rate": 4.23708594844131e-06, + "logits/chosen": -2.4255449771881104, + "logits/rejected": -2.7975945472717285, + "logps/chosen": -647.7153930664062, + "logps/rejected": -557.1844482421875, + "loss": 0.49, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.74264907836914, + "rewards/margins": 4.2811808586120605, + "rewards/rejected": -15.023829460144043, + "step": 13512 + }, + { + "epoch": 2.1, + "learning_rate": 4.236352507910162e-06, + "logits/chosen": -1.9200713634490967, + "logits/rejected": -2.7365846633911133, + "logps/chosen": -244.8160400390625, + "logps/rejected": -614.5664672851562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.9569091796875, + "rewards/margins": 8.536752700805664, + "rewards/rejected": -18.493661880493164, + "step": 13513 + }, + { + "epoch": 2.1, + "learning_rate": 4.235619067379014e-06, + "logits/chosen": -2.47434401512146, + "logits/rejected": -2.535045623779297, + "logps/chosen": -294.9337463378906, + "logps/rejected": -289.97552490234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.426589012145996, + "rewards/margins": 7.94231653213501, + "rewards/rejected": -17.368906021118164, + "step": 13514 + }, + { + "epoch": 2.1, + "learning_rate": 4.234885626847866e-06, + "logits/chosen": -2.800955057144165, + "logits/rejected": -2.5097389221191406, + "logps/chosen": -524.4195556640625, + "logps/rejected": -411.9100036621094, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.591985702514648, + "rewards/margins": 6.2873992919921875, + "rewards/rejected": -13.879384994506836, + "step": 13515 + }, + { + "epoch": 2.1, + "learning_rate": 4.2341521863167175e-06, + "logits/chosen": -2.6335160732269287, + "logits/rejected": -3.001317262649536, + "logps/chosen": -135.311767578125, + "logps/rejected": -171.7859649658203, + "loss": 1.0197, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.798405647277832, + "rewards/margins": 2.0836410522460938, + "rewards/rejected": -8.882046699523926, + "step": 13516 + }, + { + "epoch": 2.1, + "learning_rate": 4.23341874578557e-06, + "logits/chosen": -2.8444340229034424, + "logits/rejected": -2.4586198329925537, + "logps/chosen": -185.38412475585938, + "logps/rejected": -259.0162353515625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.338239669799805, + "rewards/margins": 6.93402624130249, + "rewards/rejected": -16.272266387939453, + "step": 13517 + }, + { + "epoch": 2.1, + "learning_rate": 4.232685305254422e-06, + "logits/chosen": -2.009526252746582, + "logits/rejected": -2.1940555572509766, + "logps/chosen": -583.9779052734375, + "logps/rejected": -500.9215393066406, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.626944541931152, + "rewards/margins": 7.046026706695557, + "rewards/rejected": -16.672969818115234, + "step": 13518 + }, + { + "epoch": 2.1, + "learning_rate": 4.231951864723274e-06, + "logits/chosen": -2.150862455368042, + "logits/rejected": -2.9263710975646973, + "logps/chosen": -147.0531768798828, + "logps/rejected": -276.6337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.666082382202148, + "rewards/margins": 11.173203468322754, + "rewards/rejected": -16.83928680419922, + "step": 13519 + }, + { + "epoch": 2.1, + "learning_rate": 4.231218424192126e-06, + "logits/chosen": -2.8158483505249023, + "logits/rejected": -1.5461459159851074, + "logps/chosen": -290.61871337890625, + "logps/rejected": -309.3133544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.874778747558594, + "rewards/margins": 10.47221851348877, + "rewards/rejected": -15.34699821472168, + "step": 13520 + }, + { + "epoch": 2.1, + "learning_rate": 4.230484983660979e-06, + "logits/chosen": -2.09891414642334, + "logits/rejected": -3.0137417316436768, + "logps/chosen": -249.18246459960938, + "logps/rejected": -390.3990783691406, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.997151374816895, + "rewards/margins": 8.139001846313477, + "rewards/rejected": -17.136154174804688, + "step": 13521 + }, + { + "epoch": 2.1, + "learning_rate": 4.2297515431298305e-06, + "logits/chosen": -2.286208391189575, + "logits/rejected": -2.684532403945923, + "logps/chosen": -382.2674560546875, + "logps/rejected": -457.6686706542969, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.074565887451172, + "rewards/margins": 7.578146457672119, + "rewards/rejected": -17.652711868286133, + "step": 13522 + }, + { + "epoch": 2.1, + "learning_rate": 4.229018102598682e-06, + "logits/chosen": -1.3729254007339478, + "logits/rejected": -2.4380173683166504, + "logps/chosen": -163.97537231445312, + "logps/rejected": -496.4378662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.296616554260254, + "rewards/margins": 11.730111122131348, + "rewards/rejected": -22.0267276763916, + "step": 13523 + }, + { + "epoch": 2.1, + "learning_rate": 4.228284662067534e-06, + "logits/chosen": -1.5325350761413574, + "logits/rejected": -2.7463223934173584, + "logps/chosen": -196.53060913085938, + "logps/rejected": -469.6166687011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.419787883758545, + "rewards/margins": 11.864015579223633, + "rewards/rejected": -19.283803939819336, + "step": 13524 + }, + { + "epoch": 2.1, + "learning_rate": 4.227551221536386e-06, + "logits/chosen": -2.105595111846924, + "logits/rejected": -2.79681134223938, + "logps/chosen": -228.2234344482422, + "logps/rejected": -246.1800537109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3621256351470947, + "rewards/margins": 8.457966804504395, + "rewards/rejected": -11.82009220123291, + "step": 13525 + }, + { + "epoch": 2.1, + "learning_rate": 4.226817781005239e-06, + "logits/chosen": -2.4522182941436768, + "logits/rejected": -2.332528591156006, + "logps/chosen": -139.1388702392578, + "logps/rejected": -346.1499328613281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8062028884887695, + "rewards/margins": 7.172487258911133, + "rewards/rejected": -13.978690147399902, + "step": 13526 + }, + { + "epoch": 2.1, + "learning_rate": 4.226084340474091e-06, + "logits/chosen": -2.2587804794311523, + "logits/rejected": -2.5600552558898926, + "logps/chosen": -140.4552764892578, + "logps/rejected": -415.6177978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5197887420654297, + "rewards/margins": 14.238482475280762, + "rewards/rejected": -16.758270263671875, + "step": 13527 + }, + { + "epoch": 2.1, + "learning_rate": 4.2253508999429434e-06, + "logits/chosen": -2.872483015060425, + "logits/rejected": -2.6899571418762207, + "logps/chosen": -367.7568359375, + "logps/rejected": -392.5687255859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.864351272583008, + "rewards/margins": 9.42267894744873, + "rewards/rejected": -14.287030220031738, + "step": 13528 + }, + { + "epoch": 2.1, + "learning_rate": 4.224617459411795e-06, + "logits/chosen": -2.8647730350494385, + "logits/rejected": -1.8120871782302856, + "logps/chosen": -671.7023315429688, + "logps/rejected": -393.83160400390625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.479082107543945, + "rewards/margins": 7.302570819854736, + "rewards/rejected": -12.781652450561523, + "step": 13529 + }, + { + "epoch": 2.1, + "learning_rate": 4.223884018880648e-06, + "logits/chosen": -2.5745344161987305, + "logits/rejected": -1.990257978439331, + "logps/chosen": -347.2466125488281, + "logps/rejected": -321.90850830078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.557758331298828, + "rewards/margins": 9.163492202758789, + "rewards/rejected": -13.721250534057617, + "step": 13530 + }, + { + "epoch": 2.1, + "learning_rate": 4.2231505783495e-06, + "logits/chosen": -2.690044641494751, + "logits/rejected": -2.6761953830718994, + "logps/chosen": -196.74014282226562, + "logps/rejected": -387.006103515625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.386602401733398, + "rewards/margins": 7.654967784881592, + "rewards/rejected": -13.041569709777832, + "step": 13531 + }, + { + "epoch": 2.1, + "learning_rate": 4.222417137818352e-06, + "logits/chosen": -2.165804624557495, + "logits/rejected": -2.7964518070220947, + "logps/chosen": -134.51556396484375, + "logps/rejected": -392.3387451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3436784744262695, + "rewards/margins": 9.897193908691406, + "rewards/rejected": -15.240872383117676, + "step": 13532 + }, + { + "epoch": 2.1, + "learning_rate": 4.221683697287204e-06, + "logits/chosen": -2.1655900478363037, + "logits/rejected": -2.8257980346679688, + "logps/chosen": -209.01617431640625, + "logps/rejected": -650.4146728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.60004997253418, + "rewards/margins": 9.761975288391113, + "rewards/rejected": -18.36202621459961, + "step": 13533 + }, + { + "epoch": 2.1, + "learning_rate": 4.2209502567560556e-06, + "logits/chosen": -1.3358992338180542, + "logits/rejected": -2.6486456394195557, + "logps/chosen": -115.18616485595703, + "logps/rejected": -286.3063049316406, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.181133270263672, + "rewards/margins": 3.4797873497009277, + "rewards/rejected": -11.660920143127441, + "step": 13534 + }, + { + "epoch": 2.1, + "learning_rate": 4.220216816224908e-06, + "logits/chosen": -2.600243091583252, + "logits/rejected": -2.358283519744873, + "logps/chosen": -423.39324951171875, + "logps/rejected": -493.6824035644531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.857343673706055, + "rewards/margins": 9.82557487487793, + "rewards/rejected": -15.682918548583984, + "step": 13535 + }, + { + "epoch": 2.11, + "learning_rate": 4.21948337569376e-06, + "logits/chosen": -2.188826560974121, + "logits/rejected": -3.0542218685150146, + "logps/chosen": -153.8648223876953, + "logps/rejected": -410.92877197265625, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.777036666870117, + "rewards/margins": 5.375813007354736, + "rewards/rejected": -15.152849197387695, + "step": 13536 + }, + { + "epoch": 2.11, + "learning_rate": 4.218749935162612e-06, + "logits/chosen": -2.8318493366241455, + "logits/rejected": -2.763201951980591, + "logps/chosen": -258.39404296875, + "logps/rejected": -496.99920654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.806232929229736, + "rewards/margins": 11.350372314453125, + "rewards/rejected": -18.156604766845703, + "step": 13537 + }, + { + "epoch": 2.11, + "learning_rate": 4.218016494631464e-06, + "logits/chosen": -2.0883169174194336, + "logits/rejected": -1.9371654987335205, + "logps/chosen": -506.07965087890625, + "logps/rejected": -654.3834838867188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.486894607543945, + "rewards/margins": 8.509342193603516, + "rewards/rejected": -17.996238708496094, + "step": 13538 + }, + { + "epoch": 2.11, + "learning_rate": 4.217283054100317e-06, + "logits/chosen": -2.8262476921081543, + "logits/rejected": -2.8756203651428223, + "logps/chosen": -210.02218627929688, + "logps/rejected": -388.47998046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7490739822387695, + "rewards/margins": 7.59163761138916, + "rewards/rejected": -12.34071159362793, + "step": 13539 + }, + { + "epoch": 2.11, + "learning_rate": 4.2165496135691685e-06, + "logits/chosen": -2.8351759910583496, + "logits/rejected": -2.5429391860961914, + "logps/chosen": -230.94097900390625, + "logps/rejected": -263.9896240234375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9310753345489502, + "rewards/margins": 10.460501670837402, + "rewards/rejected": -11.391576766967773, + "step": 13540 + }, + { + "epoch": 2.11, + "learning_rate": 4.21581617303802e-06, + "logits/chosen": -2.8066186904907227, + "logits/rejected": -1.5738698244094849, + "logps/chosen": -529.3665161132812, + "logps/rejected": -312.3527526855469, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.330798149108887, + "rewards/margins": 3.306272506713867, + "rewards/rejected": -11.637070655822754, + "step": 13541 + }, + { + "epoch": 2.11, + "learning_rate": 4.215082732506872e-06, + "logits/chosen": -2.6184632778167725, + "logits/rejected": -1.8500092029571533, + "logps/chosen": -277.91436767578125, + "logps/rejected": -317.30938720703125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.251307010650635, + "rewards/margins": 10.088334083557129, + "rewards/rejected": -17.339641571044922, + "step": 13542 + }, + { + "epoch": 2.11, + "learning_rate": 4.214349291975724e-06, + "logits/chosen": -2.7920379638671875, + "logits/rejected": -2.7004148960113525, + "logps/chosen": -186.2384033203125, + "logps/rejected": -314.75848388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.846072673797607, + "rewards/margins": 9.280524253845215, + "rewards/rejected": -15.126596450805664, + "step": 13543 + }, + { + "epoch": 2.11, + "learning_rate": 4.213615851444577e-06, + "logits/chosen": -2.9145925045013428, + "logits/rejected": -3.001713514328003, + "logps/chosen": -130.00328063964844, + "logps/rejected": -267.09881591796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.625937461853027, + "rewards/margins": 8.736721992492676, + "rewards/rejected": -15.362659454345703, + "step": 13544 + }, + { + "epoch": 2.11, + "learning_rate": 4.21288241091343e-06, + "logits/chosen": -2.6625802516937256, + "logits/rejected": -2.968681573867798, + "logps/chosen": -71.42913055419922, + "logps/rejected": -213.62939453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209112644195557, + "rewards/margins": 8.175475120544434, + "rewards/rejected": -12.384588241577148, + "step": 13545 + }, + { + "epoch": 2.11, + "learning_rate": 4.2121489703822815e-06, + "logits/chosen": -2.8805670738220215, + "logits/rejected": -2.9330906867980957, + "logps/chosen": -271.73944091796875, + "logps/rejected": -363.8150634765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.54315710067749, + "rewards/margins": 8.620524406433105, + "rewards/rejected": -13.163681030273438, + "step": 13546 + }, + { + "epoch": 2.11, + "learning_rate": 4.211415529851133e-06, + "logits/chosen": -2.803330421447754, + "logits/rejected": -2.7372541427612305, + "logps/chosen": -216.24652099609375, + "logps/rejected": -194.6448211669922, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.197105407714844, + "rewards/margins": 5.089481353759766, + "rewards/rejected": -15.28658676147461, + "step": 13547 + }, + { + "epoch": 2.11, + "learning_rate": 4.210682089319986e-06, + "logits/chosen": -0.7764365673065186, + "logits/rejected": -2.2247323989868164, + "logps/chosen": -109.46842193603516, + "logps/rejected": -340.0719299316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.221117973327637, + "rewards/margins": 11.28155517578125, + "rewards/rejected": -18.502674102783203, + "step": 13548 + }, + { + "epoch": 2.11, + "learning_rate": 4.209948648788838e-06, + "logits/chosen": -1.8286470174789429, + "logits/rejected": -2.6458375453948975, + "logps/chosen": -492.6914367675781, + "logps/rejected": -804.7677001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.358340263366699, + "rewards/margins": 18.084503173828125, + "rewards/rejected": -22.44284439086914, + "step": 13549 + }, + { + "epoch": 2.11, + "learning_rate": 4.20921520825769e-06, + "logits/chosen": -2.8192601203918457, + "logits/rejected": -2.6832523345947266, + "logps/chosen": -469.5, + "logps/rejected": -544.3994750976562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5277605056762695, + "rewards/margins": 11.786033630371094, + "rewards/rejected": -15.313793182373047, + "step": 13550 + }, + { + "epoch": 2.11, + "learning_rate": 4.208481767726542e-06, + "logits/chosen": -2.9230763912200928, + "logits/rejected": -2.2885854244232178, + "logps/chosen": -527.3113403320312, + "logps/rejected": -464.6318054199219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.960570335388184, + "rewards/margins": 9.371424674987793, + "rewards/rejected": -16.331995010375977, + "step": 13551 + }, + { + "epoch": 2.11, + "learning_rate": 4.207748327195394e-06, + "logits/chosen": -2.7257144451141357, + "logits/rejected": -2.893710136413574, + "logps/chosen": -681.5634765625, + "logps/rejected": -649.3533935546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.964076042175293, + "rewards/margins": 6.950701713562012, + "rewards/rejected": -13.914777755737305, + "step": 13552 + }, + { + "epoch": 2.11, + "learning_rate": 4.207014886664246e-06, + "logits/chosen": -2.7463550567626953, + "logits/rejected": -2.3821420669555664, + "logps/chosen": -282.6119384765625, + "logps/rejected": -285.3836669921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9645309448242188, + "rewards/margins": 9.372093200683594, + "rewards/rejected": -13.336624145507812, + "step": 13553 + }, + { + "epoch": 2.11, + "learning_rate": 4.206281446133098e-06, + "logits/chosen": -2.7223589420318604, + "logits/rejected": -2.3149003982543945, + "logps/chosen": -513.7140502929688, + "logps/rejected": -477.4650573730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60194730758667, + "rewards/margins": 13.332975387573242, + "rewards/rejected": -17.934921264648438, + "step": 13554 + }, + { + "epoch": 2.11, + "learning_rate": 4.20554800560195e-06, + "logits/chosen": -2.2829697132110596, + "logits/rejected": -2.8288090229034424, + "logps/chosen": -157.37387084960938, + "logps/rejected": -329.40185546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.693200588226318, + "rewards/margins": 9.082043647766113, + "rewards/rejected": -14.775243759155273, + "step": 13555 + }, + { + "epoch": 2.11, + "learning_rate": 4.204814565070802e-06, + "logits/chosen": -0.8170729875564575, + "logits/rejected": -2.7182047367095947, + "logps/chosen": -157.2344970703125, + "logps/rejected": -683.3417358398438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.117408752441406, + "rewards/margins": 10.001110076904297, + "rewards/rejected": -19.118518829345703, + "step": 13556 + }, + { + "epoch": 2.11, + "learning_rate": 4.204081124539655e-06, + "logits/chosen": -0.8158561587333679, + "logits/rejected": -2.4576308727264404, + "logps/chosen": -217.68206787109375, + "logps/rejected": -761.926513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.431097030639648, + "rewards/margins": 13.20404052734375, + "rewards/rejected": -22.63513946533203, + "step": 13557 + }, + { + "epoch": 2.11, + "learning_rate": 4.2033476840085066e-06, + "logits/chosen": -2.7517478466033936, + "logits/rejected": -2.6540465354919434, + "logps/chosen": -332.4007568359375, + "logps/rejected": -460.3358154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.835351943969727, + "rewards/margins": 10.053430557250977, + "rewards/rejected": -15.888782501220703, + "step": 13558 + }, + { + "epoch": 2.11, + "learning_rate": 4.2026142434773584e-06, + "logits/chosen": -2.6834630966186523, + "logits/rejected": -2.822484016418457, + "logps/chosen": -246.45826721191406, + "logps/rejected": -443.7596435546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.449247360229492, + "rewards/margins": 9.026836395263672, + "rewards/rejected": -15.47608470916748, + "step": 13559 + }, + { + "epoch": 2.11, + "learning_rate": 4.20188080294621e-06, + "logits/chosen": -2.820450782775879, + "logits/rejected": -2.920851230621338, + "logps/chosen": -144.7761688232422, + "logps/rejected": -241.3775634765625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.355005264282227, + "rewards/margins": 8.362813949584961, + "rewards/rejected": -13.717819213867188, + "step": 13560 + }, + { + "epoch": 2.11, + "learning_rate": 4.201147362415063e-06, + "logits/chosen": -2.697108507156372, + "logits/rejected": -3.145160436630249, + "logps/chosen": -61.55113983154297, + "logps/rejected": -195.2601776123047, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.970233917236328, + "rewards/margins": 8.381842613220215, + "rewards/rejected": -13.352075576782227, + "step": 13561 + }, + { + "epoch": 2.11, + "learning_rate": 4.200413921883916e-06, + "logits/chosen": -1.9472461938858032, + "logits/rejected": -2.8468070030212402, + "logps/chosen": -364.1253662109375, + "logps/rejected": -523.9201049804688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.535549163818359, + "rewards/margins": 9.81953239440918, + "rewards/rejected": -17.35508155822754, + "step": 13562 + }, + { + "epoch": 2.11, + "learning_rate": 4.199680481352768e-06, + "logits/chosen": -1.9209235906600952, + "logits/rejected": -2.8346211910247803, + "logps/chosen": -193.58505249023438, + "logps/rejected": -387.1717529296875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534053325653076, + "rewards/margins": 6.580467224121094, + "rewards/rejected": -13.114521026611328, + "step": 13563 + }, + { + "epoch": 2.11, + "learning_rate": 4.1989470408216195e-06, + "logits/chosen": -1.698508620262146, + "logits/rejected": -2.722137928009033, + "logps/chosen": -70.03245544433594, + "logps/rejected": -267.2262878417969, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6845521926879883, + "rewards/margins": 10.415447235107422, + "rewards/rejected": -13.09999942779541, + "step": 13564 + }, + { + "epoch": 2.11, + "learning_rate": 4.198213600290471e-06, + "logits/chosen": -1.5106533765792847, + "logits/rejected": -2.617461919784546, + "logps/chosen": -253.14234924316406, + "logps/rejected": -507.61077880859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.381063461303711, + "rewards/margins": 11.398187637329102, + "rewards/rejected": -18.779251098632812, + "step": 13565 + }, + { + "epoch": 2.11, + "learning_rate": 4.197480159759324e-06, + "logits/chosen": -3.0581507682800293, + "logits/rejected": -1.8220962285995483, + "logps/chosen": -290.5048828125, + "logps/rejected": -216.29251098632812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1696577072143555, + "rewards/margins": 9.084922790527344, + "rewards/rejected": -16.254579544067383, + "step": 13566 + }, + { + "epoch": 2.11, + "learning_rate": 4.196746719228176e-06, + "logits/chosen": -2.8121275901794434, + "logits/rejected": -3.0574638843536377, + "logps/chosen": -142.58627319335938, + "logps/rejected": -141.7471160888672, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.307201385498047, + "rewards/margins": 6.5441484451293945, + "rewards/rejected": -8.851349830627441, + "step": 13567 + }, + { + "epoch": 2.11, + "learning_rate": 4.196013278697028e-06, + "logits/chosen": -2.499133825302124, + "logits/rejected": -2.7562673091888428, + "logps/chosen": -159.85189819335938, + "logps/rejected": -407.29620361328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.618432521820068, + "rewards/margins": 10.594356536865234, + "rewards/rejected": -18.21278953552246, + "step": 13568 + }, + { + "epoch": 2.11, + "learning_rate": 4.19527983816588e-06, + "logits/chosen": -1.7636220455169678, + "logits/rejected": -2.5589897632598877, + "logps/chosen": -182.09640502929688, + "logps/rejected": -474.1473388671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.556583404541016, + "rewards/margins": 10.666037559509277, + "rewards/rejected": -18.22262191772461, + "step": 13569 + }, + { + "epoch": 2.11, + "learning_rate": 4.1945463976347325e-06, + "logits/chosen": -2.0968451499938965, + "logits/rejected": -2.9608230590820312, + "logps/chosen": -89.69271850585938, + "logps/rejected": -224.9409942626953, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.275942802429199, + "rewards/margins": 6.21495246887207, + "rewards/rejected": -13.49089527130127, + "step": 13570 + }, + { + "epoch": 2.11, + "learning_rate": 4.193812957103584e-06, + "logits/chosen": -1.8832911252975464, + "logits/rejected": -2.9124341011047363, + "logps/chosen": -158.84359741210938, + "logps/rejected": -292.860107421875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.354293823242188, + "rewards/margins": 6.095475673675537, + "rewards/rejected": -14.449769973754883, + "step": 13571 + }, + { + "epoch": 2.11, + "learning_rate": 4.193079516572436e-06, + "logits/chosen": -1.281163215637207, + "logits/rejected": -2.3443052768707275, + "logps/chosen": -126.98212432861328, + "logps/rejected": -306.68194580078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.616394996643066, + "rewards/margins": 9.957948684692383, + "rewards/rejected": -17.574342727661133, + "step": 13572 + }, + { + "epoch": 2.11, + "learning_rate": 4.192346076041288e-06, + "logits/chosen": -1.4986549615859985, + "logits/rejected": -2.836942672729492, + "logps/chosen": -335.2001037597656, + "logps/rejected": -591.291748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.814231872558594, + "rewards/margins": 10.398524284362793, + "rewards/rejected": -17.212757110595703, + "step": 13573 + }, + { + "epoch": 2.11, + "learning_rate": 4.19161263551014e-06, + "logits/chosen": -2.77817440032959, + "logits/rejected": -2.8653948307037354, + "logps/chosen": -174.04226684570312, + "logps/rejected": -419.10394287109375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.326800346374512, + "rewards/margins": 8.745539665222168, + "rewards/rejected": -15.07234001159668, + "step": 13574 + }, + { + "epoch": 2.11, + "learning_rate": 4.190879194978993e-06, + "logits/chosen": -2.903555154800415, + "logits/rejected": -2.9326202869415283, + "logps/chosen": -74.50386047363281, + "logps/rejected": -180.56686401367188, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1786065101623535, + "rewards/margins": 7.630561828613281, + "rewards/rejected": -13.809167861938477, + "step": 13575 + }, + { + "epoch": 2.11, + "learning_rate": 4.190145754447845e-06, + "logits/chosen": -2.558478593826294, + "logits/rejected": -1.1869564056396484, + "logps/chosen": -251.2708282470703, + "logps/rejected": -186.16722106933594, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.775084495544434, + "rewards/margins": 6.715699672698975, + "rewards/rejected": -11.490784645080566, + "step": 13576 + }, + { + "epoch": 2.11, + "learning_rate": 4.1894123139166965e-06, + "logits/chosen": -2.6784393787384033, + "logits/rejected": -2.8527042865753174, + "logps/chosen": -81.17193603515625, + "logps/rejected": -232.46173095703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9182305335998535, + "rewards/margins": 8.502154350280762, + "rewards/rejected": -13.420384407043457, + "step": 13577 + }, + { + "epoch": 2.11, + "learning_rate": 4.188678873385549e-06, + "logits/chosen": -1.7782633304595947, + "logits/rejected": -2.709021806716919, + "logps/chosen": -177.23123168945312, + "logps/rejected": -511.8663330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.607170104980469, + "rewards/margins": 14.003556251525879, + "rewards/rejected": -21.61072540283203, + "step": 13578 + }, + { + "epoch": 2.11, + "learning_rate": 4.187945432854402e-06, + "logits/chosen": -2.9878182411193848, + "logits/rejected": -2.8465323448181152, + "logps/chosen": -276.4990234375, + "logps/rejected": -420.4783630371094, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.262704849243164, + "rewards/margins": 10.448430061340332, + "rewards/rejected": -18.711135864257812, + "step": 13579 + }, + { + "epoch": 2.11, + "learning_rate": 4.187211992323254e-06, + "logits/chosen": -0.2592638432979584, + "logits/rejected": -2.5397253036499023, + "logps/chosen": -209.83534240722656, + "logps/rejected": -1039.41943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.597211837768555, + "rewards/margins": 17.091585159301758, + "rewards/rejected": -27.688796997070312, + "step": 13580 + }, + { + "epoch": 2.11, + "learning_rate": 4.186478551792106e-06, + "logits/chosen": -2.1663098335266113, + "logits/rejected": -2.4566802978515625, + "logps/chosen": -239.8179168701172, + "logps/rejected": -372.18829345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.197960376739502, + "rewards/margins": 10.26739501953125, + "rewards/rejected": -17.465354919433594, + "step": 13581 + }, + { + "epoch": 2.11, + "learning_rate": 4.1857451112609576e-06, + "logits/chosen": -1.2488218545913696, + "logits/rejected": -2.759505271911621, + "logps/chosen": -215.86639404296875, + "logps/rejected": -810.4935302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.880714416503906, + "rewards/margins": 11.264680862426758, + "rewards/rejected": -20.145395278930664, + "step": 13582 + }, + { + "epoch": 2.11, + "learning_rate": 4.1850116707298095e-06, + "logits/chosen": -1.9659295082092285, + "logits/rejected": -2.17122483253479, + "logps/chosen": -196.18585205078125, + "logps/rejected": -290.8072509765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.995429992675781, + "rewards/margins": 7.874531269073486, + "rewards/rejected": -18.86996078491211, + "step": 13583 + }, + { + "epoch": 2.11, + "learning_rate": 4.184278230198662e-06, + "logits/chosen": -1.6896840333938599, + "logits/rejected": -2.305342435836792, + "logps/chosen": -309.0762939453125, + "logps/rejected": -454.9609069824219, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.222009658813477, + "rewards/margins": 6.786376476287842, + "rewards/rejected": -16.008386611938477, + "step": 13584 + }, + { + "epoch": 2.11, + "learning_rate": 4.183544789667514e-06, + "logits/chosen": -1.6736520528793335, + "logits/rejected": -2.8135833740234375, + "logps/chosen": -186.7255401611328, + "logps/rejected": -401.601318359375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.191710948944092, + "rewards/margins": 7.768217086791992, + "rewards/rejected": -14.959928512573242, + "step": 13585 + }, + { + "epoch": 2.11, + "learning_rate": 4.182811349136366e-06, + "logits/chosen": -2.7108633518218994, + "logits/rejected": -1.6353756189346313, + "logps/chosen": -181.66896057128906, + "logps/rejected": -319.6883239746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.814110279083252, + "rewards/margins": 12.315288543701172, + "rewards/rejected": -16.129398345947266, + "step": 13586 + }, + { + "epoch": 2.11, + "learning_rate": 4.182077908605218e-06, + "logits/chosen": -1.9313318729400635, + "logits/rejected": -2.520434617996216, + "logps/chosen": -224.83319091796875, + "logps/rejected": -233.30824279785156, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.592585563659668, + "rewards/margins": 5.4486165046691895, + "rewards/rejected": -12.041202545166016, + "step": 13587 + }, + { + "epoch": 2.11, + "learning_rate": 4.1813444680740705e-06, + "logits/chosen": -2.8773601055145264, + "logits/rejected": -2.8862147331237793, + "logps/chosen": -146.57415771484375, + "logps/rejected": -440.6531066894531, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.134025573730469, + "rewards/margins": 9.886220932006836, + "rewards/rejected": -20.020246505737305, + "step": 13588 + }, + { + "epoch": 2.11, + "learning_rate": 4.180611027542922e-06, + "logits/chosen": -2.7406344413757324, + "logits/rejected": -2.8833065032958984, + "logps/chosen": -181.54818725585938, + "logps/rejected": -377.014404296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.576798439025879, + "rewards/margins": 8.44471549987793, + "rewards/rejected": -15.021514892578125, + "step": 13589 + }, + { + "epoch": 2.11, + "learning_rate": 4.179877587011774e-06, + "logits/chosen": -2.7811405658721924, + "logits/rejected": -2.9242289066314697, + "logps/chosen": -99.07756805419922, + "logps/rejected": -574.5817260742188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.330427169799805, + "rewards/margins": 11.548377990722656, + "rewards/rejected": -18.87880516052246, + "step": 13590 + }, + { + "epoch": 2.11, + "learning_rate": 4.179144146480626e-06, + "logits/chosen": -3.014744758605957, + "logits/rejected": -2.6029365062713623, + "logps/chosen": -433.8688049316406, + "logps/rejected": -387.584228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.432134628295898, + "rewards/margins": 10.789491653442383, + "rewards/rejected": -16.22162628173828, + "step": 13591 + }, + { + "epoch": 2.11, + "learning_rate": 4.178410705949478e-06, + "logits/chosen": -2.7380001544952393, + "logits/rejected": -3.107300281524658, + "logps/chosen": -148.55474853515625, + "logps/rejected": -228.4815673828125, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089195251464844, + "rewards/margins": 5.941980361938477, + "rewards/rejected": -13.03117561340332, + "step": 13592 + }, + { + "epoch": 2.11, + "learning_rate": 4.177677265418331e-06, + "logits/chosen": -2.772592067718506, + "logits/rejected": -1.7887814044952393, + "logps/chosen": -387.7024230957031, + "logps/rejected": -436.61639404296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.715716361999512, + "rewards/margins": 7.7378129959106445, + "rewards/rejected": -17.453529357910156, + "step": 13593 + }, + { + "epoch": 2.11, + "learning_rate": 4.176943824887183e-06, + "logits/chosen": -2.3259408473968506, + "logits/rejected": -2.881301164627075, + "logps/chosen": -309.8369445800781, + "logps/rejected": -395.5089111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.157522678375244, + "rewards/margins": 13.842669486999512, + "rewards/rejected": -18.000192642211914, + "step": 13594 + }, + { + "epoch": 2.11, + "learning_rate": 4.176210384356035e-06, + "logits/chosen": -2.5260348320007324, + "logits/rejected": -2.8511548042297363, + "logps/chosen": -101.95476531982422, + "logps/rejected": -422.2931823730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.490447998046875, + "rewards/margins": 12.283029556274414, + "rewards/rejected": -19.77347755432129, + "step": 13595 + }, + { + "epoch": 2.11, + "learning_rate": 4.175476943824887e-06, + "logits/chosen": -2.5779895782470703, + "logits/rejected": -2.404712677001953, + "logps/chosen": -207.29864501953125, + "logps/rejected": -371.3865661621094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.070510864257812, + "rewards/margins": 8.08137321472168, + "rewards/rejected": -16.151884078979492, + "step": 13596 + }, + { + "epoch": 2.11, + "learning_rate": 4.17474350329374e-06, + "logits/chosen": -1.7003014087677002, + "logits/rejected": -2.8789098262786865, + "logps/chosen": -230.32626342773438, + "logps/rejected": -346.9034118652344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.138243675231934, + "rewards/margins": 9.389130592346191, + "rewards/rejected": -17.527374267578125, + "step": 13597 + }, + { + "epoch": 2.11, + "learning_rate": 4.174010062762592e-06, + "logits/chosen": -2.311892509460449, + "logits/rejected": -2.453679084777832, + "logps/chosen": -270.526123046875, + "logps/rejected": -466.2470703125, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.595216751098633, + "rewards/margins": 5.526514530181885, + "rewards/rejected": -17.12173080444336, + "step": 13598 + }, + { + "epoch": 2.11, + "learning_rate": 4.173276622231444e-06, + "logits/chosen": -2.73378849029541, + "logits/rejected": -2.9262828826904297, + "logps/chosen": -143.12921142578125, + "logps/rejected": -310.8813781738281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9327621459960938, + "rewards/margins": 8.290631294250488, + "rewards/rejected": -12.223392486572266, + "step": 13599 + }, + { + "epoch": 2.12, + "learning_rate": 4.172543181700296e-06, + "logits/chosen": -2.4442813396453857, + "logits/rejected": -2.925215482711792, + "logps/chosen": -156.87120056152344, + "logps/rejected": -394.6971435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.586010932922363, + "rewards/margins": 9.624687194824219, + "rewards/rejected": -15.210698127746582, + "step": 13600 + }, + { + "epoch": 2.12, + "learning_rate": 4.1718097411691475e-06, + "logits/chosen": -2.1570703983306885, + "logits/rejected": -2.7595269680023193, + "logps/chosen": -310.924560546875, + "logps/rejected": -458.110595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965766429901123, + "rewards/margins": 9.807039260864258, + "rewards/rejected": -15.772806167602539, + "step": 13601 + }, + { + "epoch": 2.12, + "learning_rate": 4.171076300638e-06, + "logits/chosen": -2.3658316135406494, + "logits/rejected": -2.4524497985839844, + "logps/chosen": -204.83302307128906, + "logps/rejected": -291.78240966796875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.365076065063477, + "rewards/margins": 6.010613918304443, + "rewards/rejected": -17.375690460205078, + "step": 13602 + }, + { + "epoch": 2.12, + "learning_rate": 4.170342860106852e-06, + "logits/chosen": -1.9318920373916626, + "logits/rejected": -2.340806722640991, + "logps/chosen": -142.685791015625, + "logps/rejected": -253.8499298095703, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.527698516845703, + "rewards/margins": 6.158100605010986, + "rewards/rejected": -13.685798645019531, + "step": 13603 + }, + { + "epoch": 2.12, + "learning_rate": 4.169609419575704e-06, + "logits/chosen": -1.799903154373169, + "logits/rejected": -2.550647258758545, + "logps/chosen": -247.34945678710938, + "logps/rejected": -542.9034423828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.48746109008789, + "rewards/margins": 13.348106384277344, + "rewards/rejected": -24.835567474365234, + "step": 13604 + }, + { + "epoch": 2.12, + "learning_rate": 4.168875979044556e-06, + "logits/chosen": -2.0710561275482178, + "logits/rejected": -2.600374221801758, + "logps/chosen": -280.08251953125, + "logps/rejected": -251.2290496826172, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.075993537902832, + "rewards/margins": 5.114362716674805, + "rewards/rejected": -13.190357208251953, + "step": 13605 + }, + { + "epoch": 2.12, + "learning_rate": 4.168142538513409e-06, + "logits/chosen": -2.6898815631866455, + "logits/rejected": -2.4487125873565674, + "logps/chosen": -269.3520812988281, + "logps/rejected": -416.4552001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.990056037902832, + "rewards/margins": 11.550336837768555, + "rewards/rejected": -24.540393829345703, + "step": 13606 + }, + { + "epoch": 2.12, + "learning_rate": 4.1674090979822605e-06, + "logits/chosen": -2.8233368396759033, + "logits/rejected": -1.2433369159698486, + "logps/chosen": -476.462158203125, + "logps/rejected": -256.469970703125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.491497039794922, + "rewards/margins": 9.35768985748291, + "rewards/rejected": -17.84918785095215, + "step": 13607 + }, + { + "epoch": 2.12, + "learning_rate": 4.166675657451112e-06, + "logits/chosen": -1.9802882671356201, + "logits/rejected": -2.5836377143859863, + "logps/chosen": -236.6497039794922, + "logps/rejected": -389.62139892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.093457221984863, + "rewards/margins": 9.503487586975098, + "rewards/rejected": -16.59694480895996, + "step": 13608 + }, + { + "epoch": 2.12, + "learning_rate": 4.165942216919964e-06, + "logits/chosen": -1.1803585290908813, + "logits/rejected": -2.323514938354492, + "logps/chosen": -203.02719116210938, + "logps/rejected": -518.5877685546875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213283538818359, + "rewards/margins": 7.459847450256348, + "rewards/rejected": -13.673130989074707, + "step": 13609 + }, + { + "epoch": 2.12, + "learning_rate": 4.165208776388817e-06, + "logits/chosen": -2.635742425918579, + "logits/rejected": -2.9710276126861572, + "logps/chosen": -144.8472900390625, + "logps/rejected": -220.95236206054688, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.084711074829102, + "rewards/margins": 5.833283424377441, + "rewards/rejected": -12.917994499206543, + "step": 13610 + }, + { + "epoch": 2.12, + "learning_rate": 4.164475335857669e-06, + "logits/chosen": -2.764678955078125, + "logits/rejected": -1.8444585800170898, + "logps/chosen": -527.27294921875, + "logps/rejected": -269.0321044921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.852761745452881, + "rewards/margins": 7.631595134735107, + "rewards/rejected": -12.484356880187988, + "step": 13611 + }, + { + "epoch": 2.12, + "learning_rate": 4.1637418953265215e-06, + "logits/chosen": -1.9006407260894775, + "logits/rejected": -2.561579465866089, + "logps/chosen": -802.13623046875, + "logps/rejected": -840.1324462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.629486083984375, + "rewards/margins": 13.969491958618164, + "rewards/rejected": -18.59897804260254, + "step": 13612 + }, + { + "epoch": 2.12, + "learning_rate": 4.163008454795373e-06, + "logits/chosen": -2.1768276691436768, + "logits/rejected": -2.783947229385376, + "logps/chosen": -75.72421264648438, + "logps/rejected": -236.48345947265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.235119819641113, + "rewards/margins": 7.27879524230957, + "rewards/rejected": -12.513916015625, + "step": 13613 + }, + { + "epoch": 2.12, + "learning_rate": 4.162275014264225e-06, + "logits/chosen": -2.6182713508605957, + "logits/rejected": -2.6404669284820557, + "logps/chosen": -538.73583984375, + "logps/rejected": -711.096923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.127946853637695, + "rewards/margins": 11.670515060424805, + "rewards/rejected": -19.7984619140625, + "step": 13614 + }, + { + "epoch": 2.12, + "learning_rate": 4.161541573733078e-06, + "logits/chosen": -2.728468656539917, + "logits/rejected": -3.0194427967071533, + "logps/chosen": -436.72149658203125, + "logps/rejected": -528.7424926757812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945930480957031, + "rewards/margins": 8.878413200378418, + "rewards/rejected": -14.824342727661133, + "step": 13615 + }, + { + "epoch": 2.12, + "learning_rate": 4.16080813320193e-06, + "logits/chosen": -2.943924903869629, + "logits/rejected": -2.737272262573242, + "logps/chosen": -166.89422607421875, + "logps/rejected": -198.5966796875, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.58901596069336, + "rewards/margins": 4.939722061157227, + "rewards/rejected": -15.528738021850586, + "step": 13616 + }, + { + "epoch": 2.12, + "learning_rate": 4.160074692670782e-06, + "logits/chosen": -2.6773364543914795, + "logits/rejected": -2.0741240978240967, + "logps/chosen": -588.6783447265625, + "logps/rejected": -347.198486328125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.460824966430664, + "rewards/margins": 7.7761688232421875, + "rewards/rejected": -13.236993789672852, + "step": 13617 + }, + { + "epoch": 2.12, + "learning_rate": 4.159341252139634e-06, + "logits/chosen": -2.004970073699951, + "logits/rejected": -3.087731122970581, + "logps/chosen": -424.79815673828125, + "logps/rejected": -458.5286560058594, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918393135070801, + "rewards/margins": 9.341318130493164, + "rewards/rejected": -15.259711265563965, + "step": 13618 + }, + { + "epoch": 2.12, + "learning_rate": 4.158607811608486e-06, + "logits/chosen": -0.8293971419334412, + "logits/rejected": -2.028179168701172, + "logps/chosen": -102.62931060791016, + "logps/rejected": -277.1187438964844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.327390670776367, + "rewards/margins": 7.3636040687561035, + "rewards/rejected": -14.690994262695312, + "step": 13619 + }, + { + "epoch": 2.12, + "learning_rate": 4.157874371077338e-06, + "logits/chosen": -1.279710054397583, + "logits/rejected": -2.6676933765411377, + "logps/chosen": -252.80859375, + "logps/rejected": -517.1098022460938, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.630037307739258, + "rewards/margins": 8.142419815063477, + "rewards/rejected": -16.772457122802734, + "step": 13620 + }, + { + "epoch": 2.12, + "learning_rate": 4.15714093054619e-06, + "logits/chosen": -2.2735891342163086, + "logits/rejected": -2.8774611949920654, + "logps/chosen": -153.01483154296875, + "logps/rejected": -351.883056640625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71866512298584, + "rewards/margins": 8.523277282714844, + "rewards/rejected": -14.241941452026367, + "step": 13621 + }, + { + "epoch": 2.12, + "learning_rate": 4.156407490015042e-06, + "logits/chosen": -2.593801259994507, + "logits/rejected": -2.5867092609405518, + "logps/chosen": -268.7154541015625, + "logps/rejected": -388.33331298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.472356796264648, + "rewards/margins": 11.389545440673828, + "rewards/rejected": -20.861902236938477, + "step": 13622 + }, + { + "epoch": 2.12, + "learning_rate": 4.155674049483894e-06, + "logits/chosen": -1.676487922668457, + "logits/rejected": -2.5597434043884277, + "logps/chosen": -123.73052215576172, + "logps/rejected": -409.87890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.45983362197876, + "rewards/margins": 11.934263229370117, + "rewards/rejected": -18.39409637451172, + "step": 13623 + }, + { + "epoch": 2.12, + "learning_rate": 4.154940608952747e-06, + "logits/chosen": -2.8853158950805664, + "logits/rejected": -2.6893341541290283, + "logps/chosen": -271.75537109375, + "logps/rejected": -306.6846618652344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218867778778076, + "rewards/margins": 7.922005653381348, + "rewards/rejected": -13.140872955322266, + "step": 13624 + }, + { + "epoch": 2.12, + "learning_rate": 4.1542071684215985e-06, + "logits/chosen": -2.283169746398926, + "logits/rejected": -2.5559475421905518, + "logps/chosen": -201.18392944335938, + "logps/rejected": -357.1626281738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729547500610352, + "rewards/margins": 10.963966369628906, + "rewards/rejected": -17.693511962890625, + "step": 13625 + }, + { + "epoch": 2.12, + "learning_rate": 4.15347372789045e-06, + "logits/chosen": -2.638359308242798, + "logits/rejected": -2.561783790588379, + "logps/chosen": -381.7685546875, + "logps/rejected": -443.53631591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.383358955383301, + "rewards/margins": 11.483444213867188, + "rewards/rejected": -17.866802215576172, + "step": 13626 + }, + { + "epoch": 2.12, + "learning_rate": 4.152740287359302e-06, + "logits/chosen": -2.658958673477173, + "logits/rejected": -2.3288800716400146, + "logps/chosen": -384.8089599609375, + "logps/rejected": -333.58489990234375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.27953815460205, + "rewards/margins": 4.9981513023376465, + "rewards/rejected": -14.277688980102539, + "step": 13627 + }, + { + "epoch": 2.12, + "learning_rate": 4.152006846828155e-06, + "logits/chosen": -1.6362507343292236, + "logits/rejected": -2.7334728240966797, + "logps/chosen": -167.7006072998047, + "logps/rejected": -557.064697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.138176918029785, + "rewards/margins": 11.41935920715332, + "rewards/rejected": -19.557537078857422, + "step": 13628 + }, + { + "epoch": 2.12, + "learning_rate": 4.151273406297008e-06, + "logits/chosen": -3.015648603439331, + "logits/rejected": -2.842562437057495, + "logps/chosen": -209.49757385253906, + "logps/rejected": -321.21722412109375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.503426551818848, + "rewards/margins": 7.3210906982421875, + "rewards/rejected": -14.824517250061035, + "step": 13629 + }, + { + "epoch": 2.12, + "learning_rate": 4.15053996576586e-06, + "logits/chosen": -2.8934273719787598, + "logits/rejected": -2.7980387210845947, + "logps/chosen": -284.3800048828125, + "logps/rejected": -503.9114990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.728799343109131, + "rewards/margins": 15.368666648864746, + "rewards/rejected": -21.09746551513672, + "step": 13630 + }, + { + "epoch": 2.12, + "learning_rate": 4.1498065252347115e-06, + "logits/chosen": -2.5821938514709473, + "logits/rejected": -2.861375570297241, + "logps/chosen": -450.64166259765625, + "logps/rejected": -541.1370849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.538431167602539, + "rewards/margins": 9.128835678100586, + "rewards/rejected": -18.667266845703125, + "step": 13631 + }, + { + "epoch": 2.12, + "learning_rate": 4.149073084703563e-06, + "logits/chosen": -0.9924890995025635, + "logits/rejected": -2.583570718765259, + "logps/chosen": -225.6488037109375, + "logps/rejected": -478.2080078125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.886595726013184, + "rewards/margins": 8.15404224395752, + "rewards/rejected": -18.040637969970703, + "step": 13632 + }, + { + "epoch": 2.12, + "learning_rate": 4.148339644172416e-06, + "logits/chosen": -2.4820289611816406, + "logits/rejected": -1.458065390586853, + "logps/chosen": -180.9164581298828, + "logps/rejected": -307.062255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.715265274047852, + "rewards/margins": 12.183231353759766, + "rewards/rejected": -17.898496627807617, + "step": 13633 + }, + { + "epoch": 2.12, + "learning_rate": 4.147606203641268e-06, + "logits/chosen": -2.663815498352051, + "logits/rejected": -2.317471981048584, + "logps/chosen": -204.30189514160156, + "logps/rejected": -319.6678466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.560861587524414, + "rewards/margins": 12.14613151550293, + "rewards/rejected": -19.706993103027344, + "step": 13634 + }, + { + "epoch": 2.12, + "learning_rate": 4.14687276311012e-06, + "logits/chosen": -2.8943819999694824, + "logits/rejected": -2.2642998695373535, + "logps/chosen": -214.68377685546875, + "logps/rejected": -262.65185546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.305277347564697, + "rewards/margins": 9.311088562011719, + "rewards/rejected": -15.616365432739258, + "step": 13635 + }, + { + "epoch": 2.12, + "learning_rate": 4.146139322578972e-06, + "logits/chosen": -2.945359706878662, + "logits/rejected": -1.97452974319458, + "logps/chosen": -431.1674499511719, + "logps/rejected": -348.5089416503906, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.772915840148926, + "rewards/margins": 5.066643714904785, + "rewards/rejected": -11.839559555053711, + "step": 13636 + }, + { + "epoch": 2.12, + "learning_rate": 4.1454058820478244e-06, + "logits/chosen": -1.6448137760162354, + "logits/rejected": -2.6304047107696533, + "logps/chosen": -129.08145141601562, + "logps/rejected": -444.59930419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4116101264953613, + "rewards/margins": 12.763137817382812, + "rewards/rejected": -16.174747467041016, + "step": 13637 + }, + { + "epoch": 2.12, + "learning_rate": 4.144672441516676e-06, + "logits/chosen": -1.801472544670105, + "logits/rejected": -2.7035629749298096, + "logps/chosen": -246.75379943847656, + "logps/rejected": -592.4529418945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.938920021057129, + "rewards/margins": 10.855310440063477, + "rewards/rejected": -18.794231414794922, + "step": 13638 + }, + { + "epoch": 2.12, + "learning_rate": 4.143939000985528e-06, + "logits/chosen": -2.5978145599365234, + "logits/rejected": -2.303302764892578, + "logps/chosen": -397.4088134765625, + "logps/rejected": -409.8956298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.528759002685547, + "rewards/margins": 10.657395362854004, + "rewards/rejected": -17.186153411865234, + "step": 13639 + }, + { + "epoch": 2.12, + "learning_rate": 4.14320556045438e-06, + "logits/chosen": -2.362806558609009, + "logits/rejected": -0.7646419405937195, + "logps/chosen": -408.0243835449219, + "logps/rejected": -138.45339965820312, + "loss": 0.067, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5876381397247314, + "rewards/margins": 6.076683044433594, + "rewards/rejected": -9.664320945739746, + "step": 13640 + }, + { + "epoch": 2.12, + "learning_rate": 4.142472119923232e-06, + "logits/chosen": -2.344937562942505, + "logits/rejected": -2.6472792625427246, + "logps/chosen": -130.6221923828125, + "logps/rejected": -372.6140441894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.708556175231934, + "rewards/margins": 10.24577522277832, + "rewards/rejected": -15.954331398010254, + "step": 13641 + }, + { + "epoch": 2.12, + "learning_rate": 4.141738679392085e-06, + "logits/chosen": -1.995416283607483, + "logits/rejected": -2.8570830821990967, + "logps/chosen": -588.208984375, + "logps/rejected": -752.6165771484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.043416023254395, + "rewards/margins": 10.623717308044434, + "rewards/rejected": -21.667133331298828, + "step": 13642 + }, + { + "epoch": 2.12, + "learning_rate": 4.1410052388609365e-06, + "logits/chosen": -1.6521965265274048, + "logits/rejected": -2.367133617401123, + "logps/chosen": -149.88116455078125, + "logps/rejected": -324.2689208984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.238189697265625, + "rewards/margins": 8.112374305725098, + "rewards/rejected": -14.350564002990723, + "step": 13643 + }, + { + "epoch": 2.12, + "learning_rate": 4.1402717983297884e-06, + "logits/chosen": -2.819136619567871, + "logits/rejected": -2.8965415954589844, + "logps/chosen": -158.2886505126953, + "logps/rejected": -267.0278015136719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333125591278076, + "rewards/margins": 8.813380241394043, + "rewards/rejected": -15.146505355834961, + "step": 13644 + }, + { + "epoch": 2.12, + "learning_rate": 4.139538357798641e-06, + "logits/chosen": -2.2897560596466064, + "logits/rejected": -2.700967788696289, + "logps/chosen": -99.20793914794922, + "logps/rejected": -273.34771728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.728113174438477, + "rewards/margins": 10.739648818969727, + "rewards/rejected": -16.467761993408203, + "step": 13645 + }, + { + "epoch": 2.12, + "learning_rate": 4.138804917267494e-06, + "logits/chosen": -2.50477933883667, + "logits/rejected": -2.849868059158325, + "logps/chosen": -351.62322998046875, + "logps/rejected": -370.7776794433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8871917724609375, + "rewards/margins": 16.763294219970703, + "rewards/rejected": -24.65048599243164, + "step": 13646 + }, + { + "epoch": 2.12, + "learning_rate": 4.138071476736346e-06, + "logits/chosen": -1.601304054260254, + "logits/rejected": -2.6444079875946045, + "logps/chosen": -180.44638061523438, + "logps/rejected": -532.56494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0882411003112793, + "rewards/margins": 10.570667266845703, + "rewards/rejected": -13.65890884399414, + "step": 13647 + }, + { + "epoch": 2.12, + "learning_rate": 4.137338036205198e-06, + "logits/chosen": -2.702576160430908, + "logits/rejected": -2.4538991451263428, + "logps/chosen": -441.4228210449219, + "logps/rejected": -477.422119140625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.649499893188477, + "rewards/margins": 7.811298370361328, + "rewards/rejected": -16.460798263549805, + "step": 13648 + }, + { + "epoch": 2.12, + "learning_rate": 4.1366045956740495e-06, + "logits/chosen": -2.7731499671936035, + "logits/rejected": -1.8461296558380127, + "logps/chosen": -292.0523681640625, + "logps/rejected": -376.3173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.366168022155762, + "rewards/margins": 11.703718185424805, + "rewards/rejected": -18.06988525390625, + "step": 13649 + }, + { + "epoch": 2.12, + "learning_rate": 4.135871155142901e-06, + "logits/chosen": -2.103297710418701, + "logits/rejected": -2.452115535736084, + "logps/chosen": -161.8491668701172, + "logps/rejected": -244.86996459960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.125958442687988, + "rewards/margins": 9.721536636352539, + "rewards/rejected": -16.847496032714844, + "step": 13650 + }, + { + "epoch": 2.12, + "learning_rate": 4.135137714611754e-06, + "logits/chosen": -2.7947158813476562, + "logits/rejected": -0.9953083992004395, + "logps/chosen": -254.07937622070312, + "logps/rejected": -181.8064422607422, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.14909553527832, + "rewards/margins": 6.851248264312744, + "rewards/rejected": -15.000343322753906, + "step": 13651 + }, + { + "epoch": 2.12, + "learning_rate": 4.134404274080606e-06, + "logits/chosen": -2.93733549118042, + "logits/rejected": -3.0157408714294434, + "logps/chosen": -149.2989501953125, + "logps/rejected": -326.6357727050781, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.271116256713867, + "rewards/margins": 8.029619216918945, + "rewards/rejected": -17.300735473632812, + "step": 13652 + }, + { + "epoch": 2.12, + "learning_rate": 4.133670833549458e-06, + "logits/chosen": -2.060471296310425, + "logits/rejected": -2.6338322162628174, + "logps/chosen": -332.52386474609375, + "logps/rejected": -452.51031494140625, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.668510437011719, + "rewards/margins": 7.059658527374268, + "rewards/rejected": -15.728168487548828, + "step": 13653 + }, + { + "epoch": 2.12, + "learning_rate": 4.13293739301831e-06, + "logits/chosen": -1.976976990699768, + "logits/rejected": -2.7129523754119873, + "logps/chosen": -718.4739379882812, + "logps/rejected": -733.2359008789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.853643894195557, + "rewards/margins": 12.456819534301758, + "rewards/rejected": -19.310462951660156, + "step": 13654 + }, + { + "epoch": 2.12, + "learning_rate": 4.1322039524871625e-06, + "logits/chosen": -1.9758909940719604, + "logits/rejected": -2.4714810848236084, + "logps/chosen": -136.91690063476562, + "logps/rejected": -310.969482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.091160774230957, + "rewards/margins": 8.425146102905273, + "rewards/rejected": -17.516307830810547, + "step": 13655 + }, + { + "epoch": 2.12, + "learning_rate": 4.131470511956014e-06, + "logits/chosen": -1.5361902713775635, + "logits/rejected": -2.0405166149139404, + "logps/chosen": -185.89590454101562, + "logps/rejected": -361.40875244140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.993283271789551, + "rewards/margins": 11.757122039794922, + "rewards/rejected": -15.750405311584473, + "step": 13656 + }, + { + "epoch": 2.12, + "learning_rate": 4.130737071424866e-06, + "logits/chosen": -2.9497203826904297, + "logits/rejected": -2.9937925338745117, + "logps/chosen": -64.6387939453125, + "logps/rejected": -201.6334228515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264145851135254, + "rewards/margins": 9.625515937805176, + "rewards/rejected": -14.88966178894043, + "step": 13657 + }, + { + "epoch": 2.12, + "learning_rate": 4.130003630893718e-06, + "logits/chosen": -1.8111488819122314, + "logits/rejected": -2.875399351119995, + "logps/chosen": -181.77108764648438, + "logps/rejected": -429.9288024902344, + "loss": 0.2291, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.254338264465332, + "rewards/margins": 7.051938056945801, + "rewards/rejected": -14.306276321411133, + "step": 13658 + }, + { + "epoch": 2.12, + "learning_rate": 4.129270190362571e-06, + "logits/chosen": -0.6429798007011414, + "logits/rejected": -2.7762374877929688, + "logps/chosen": -215.22015380859375, + "logps/rejected": -648.8057250976562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.184020042419434, + "rewards/margins": 10.167673110961914, + "rewards/rejected": -17.35169219970703, + "step": 13659 + }, + { + "epoch": 2.12, + "learning_rate": 4.128536749831423e-06, + "logits/chosen": -2.497502326965332, + "logits/rejected": -2.018393039703369, + "logps/chosen": -247.7146759033203, + "logps/rejected": -267.41412353515625, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.751816749572754, + "rewards/margins": 4.7683234214782715, + "rewards/rejected": -16.520139694213867, + "step": 13660 + }, + { + "epoch": 2.12, + "learning_rate": 4.127803309300275e-06, + "logits/chosen": -1.8256334066390991, + "logits/rejected": -2.7976369857788086, + "logps/chosen": -162.62405395507812, + "logps/rejected": -379.7911376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.274578094482422, + "rewards/margins": 9.877573013305664, + "rewards/rejected": -16.152151107788086, + "step": 13661 + }, + { + "epoch": 2.12, + "learning_rate": 4.127069868769127e-06, + "logits/chosen": -2.7729029655456543, + "logits/rejected": -2.453890085220337, + "logps/chosen": -213.00851440429688, + "logps/rejected": -224.20437622070312, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.939557075500488, + "rewards/margins": 6.090945243835449, + "rewards/rejected": -12.030502319335938, + "step": 13662 + }, + { + "epoch": 2.12, + "learning_rate": 4.126336428237979e-06, + "logits/chosen": -2.8859283924102783, + "logits/rejected": -2.288128614425659, + "logps/chosen": -361.7333679199219, + "logps/rejected": -434.5244140625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.400576114654541, + "rewards/margins": 6.804410934448242, + "rewards/rejected": -14.204986572265625, + "step": 13663 + }, + { + "epoch": 2.13, + "learning_rate": 4.125602987706832e-06, + "logits/chosen": -1.9898449182510376, + "logits/rejected": -2.818004608154297, + "logps/chosen": -190.16720581054688, + "logps/rejected": -417.1486511230469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.420980930328369, + "rewards/margins": 9.994635581970215, + "rewards/rejected": -16.415616989135742, + "step": 13664 + }, + { + "epoch": 2.13, + "learning_rate": 4.124869547175684e-06, + "logits/chosen": -2.5721964836120605, + "logits/rejected": -1.863526701927185, + "logps/chosen": -300.0218505859375, + "logps/rejected": -272.68695068359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.065869331359863, + "rewards/margins": 7.877863883972168, + "rewards/rejected": -17.94373321533203, + "step": 13665 + }, + { + "epoch": 2.13, + "learning_rate": 4.124136106644536e-06, + "logits/chosen": -2.8759360313415527, + "logits/rejected": -2.560579538345337, + "logps/chosen": -466.53594970703125, + "logps/rejected": -385.3627014160156, + "loss": 1.8347, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.985374450683594, + "rewards/margins": 2.401909589767456, + "rewards/rejected": -12.387283325195312, + "step": 13666 + }, + { + "epoch": 2.13, + "learning_rate": 4.1234026661133876e-06, + "logits/chosen": -1.511104941368103, + "logits/rejected": -1.8725351095199585, + "logps/chosen": -120.96066284179688, + "logps/rejected": -287.52679443359375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.579413414001465, + "rewards/margins": 7.545060634613037, + "rewards/rejected": -17.124473571777344, + "step": 13667 + }, + { + "epoch": 2.13, + "learning_rate": 4.12266922558224e-06, + "logits/chosen": -2.7910733222961426, + "logits/rejected": -2.59183669090271, + "logps/chosen": -460.2285461425781, + "logps/rejected": -424.02740478515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3245601654052734, + "rewards/margins": 9.555465698242188, + "rewards/rejected": -11.880025863647461, + "step": 13668 + }, + { + "epoch": 2.13, + "learning_rate": 4.121935785051092e-06, + "logits/chosen": -2.7542476654052734, + "logits/rejected": -1.8591798543930054, + "logps/chosen": -226.49716186523438, + "logps/rejected": -177.6555938720703, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.31209659576416, + "rewards/margins": 5.522452354431152, + "rewards/rejected": -12.834548950195312, + "step": 13669 + }, + { + "epoch": 2.13, + "learning_rate": 4.121202344519944e-06, + "logits/chosen": -2.5190815925598145, + "logits/rejected": -2.7378339767456055, + "logps/chosen": -564.1737670898438, + "logps/rejected": -704.0740356445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.59512186050415, + "rewards/margins": 15.309983253479004, + "rewards/rejected": -21.905105590820312, + "step": 13670 + }, + { + "epoch": 2.13, + "learning_rate": 4.120468903988796e-06, + "logits/chosen": -2.912118434906006, + "logits/rejected": -2.5846688747406006, + "logps/chosen": -196.3961181640625, + "logps/rejected": -191.83663940429688, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.066672325134277, + "rewards/margins": 6.736533164978027, + "rewards/rejected": -14.803205490112305, + "step": 13671 + }, + { + "epoch": 2.13, + "learning_rate": 4.119735463457648e-06, + "logits/chosen": -2.5762393474578857, + "logits/rejected": -2.7204174995422363, + "logps/chosen": -268.971923828125, + "logps/rejected": -345.8633117675781, + "loss": 0.175, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.083717346191406, + "rewards/margins": 3.7818422317504883, + "rewards/rejected": -12.865559577941895, + "step": 13672 + }, + { + "epoch": 2.13, + "learning_rate": 4.1190020229265005e-06, + "logits/chosen": -2.1129627227783203, + "logits/rejected": -2.358325242996216, + "logps/chosen": -139.1715087890625, + "logps/rejected": -331.6520690917969, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.896420001983643, + "rewards/margins": 7.362846851348877, + "rewards/rejected": -15.25926685333252, + "step": 13673 + }, + { + "epoch": 2.13, + "learning_rate": 4.118268582395352e-06, + "logits/chosen": -2.5525312423706055, + "logits/rejected": -2.095435380935669, + "logps/chosen": -261.3305358886719, + "logps/rejected": -251.26792907714844, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.984893798828125, + "rewards/margins": 5.329012870788574, + "rewards/rejected": -13.313907623291016, + "step": 13674 + }, + { + "epoch": 2.13, + "learning_rate": 4.117535141864204e-06, + "logits/chosen": -1.101597785949707, + "logits/rejected": -2.3143715858459473, + "logps/chosen": -132.17469787597656, + "logps/rejected": -381.98101806640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.461759567260742, + "rewards/margins": 8.784894943237305, + "rewards/rejected": -19.246654510498047, + "step": 13675 + }, + { + "epoch": 2.13, + "learning_rate": 4.116801701333056e-06, + "logits/chosen": -2.921858787536621, + "logits/rejected": -2.9229047298431396, + "logps/chosen": -200.97142028808594, + "logps/rejected": -230.14707946777344, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.036269187927246, + "rewards/margins": 9.47468376159668, + "rewards/rejected": -14.51095199584961, + "step": 13676 + }, + { + "epoch": 2.13, + "learning_rate": 4.116068260801909e-06, + "logits/chosen": -2.2541706562042236, + "logits/rejected": -2.8972113132476807, + "logps/chosen": -98.80329895019531, + "logps/rejected": -331.6170959472656, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.316046714782715, + "rewards/margins": 6.92952823638916, + "rewards/rejected": -14.245574951171875, + "step": 13677 + }, + { + "epoch": 2.13, + "learning_rate": 4.115334820270761e-06, + "logits/chosen": -2.939356565475464, + "logits/rejected": -2.5467562675476074, + "logps/chosen": -196.0137176513672, + "logps/rejected": -304.6499938964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.971097707748413, + "rewards/margins": 8.818452835083008, + "rewards/rejected": -11.78955078125, + "step": 13678 + }, + { + "epoch": 2.13, + "learning_rate": 4.1146013797396135e-06, + "logits/chosen": -1.4221885204315186, + "logits/rejected": -2.2608063220977783, + "logps/chosen": -246.5028076171875, + "logps/rejected": -451.3638916015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.885200500488281, + "rewards/margins": 8.411551475524902, + "rewards/rejected": -16.2967529296875, + "step": 13679 + }, + { + "epoch": 2.13, + "learning_rate": 4.113867939208465e-06, + "logits/chosen": -3.0036542415618896, + "logits/rejected": -2.0458664894104004, + "logps/chosen": -320.6561279296875, + "logps/rejected": -311.9659729003906, + "loss": 0.0743, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.428790092468262, + "rewards/margins": 7.290212631225586, + "rewards/rejected": -13.719003677368164, + "step": 13680 + }, + { + "epoch": 2.13, + "learning_rate": 4.113134498677317e-06, + "logits/chosen": -2.9229726791381836, + "logits/rejected": -2.9559378623962402, + "logps/chosen": -131.87454223632812, + "logps/rejected": -268.42144775390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.246182441711426, + "rewards/margins": 7.669404983520508, + "rewards/rejected": -13.91558837890625, + "step": 13681 + }, + { + "epoch": 2.13, + "learning_rate": 4.11240105814617e-06, + "logits/chosen": -2.1204354763031006, + "logits/rejected": -2.70328688621521, + "logps/chosen": -215.54901123046875, + "logps/rejected": -403.904052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.509973526000977, + "rewards/margins": 9.678689956665039, + "rewards/rejected": -14.188663482666016, + "step": 13682 + }, + { + "epoch": 2.13, + "learning_rate": 4.111667617615022e-06, + "logits/chosen": -2.225388526916504, + "logits/rejected": -2.7501697540283203, + "logps/chosen": -126.934326171875, + "logps/rejected": -258.8280029296875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.362459182739258, + "rewards/margins": 6.308723449707031, + "rewards/rejected": -16.67118263244629, + "step": 13683 + }, + { + "epoch": 2.13, + "learning_rate": 4.110934177083874e-06, + "logits/chosen": -2.111940383911133, + "logits/rejected": -2.6159417629241943, + "logps/chosen": -503.54229736328125, + "logps/rejected": -618.4796142578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.54629898071289, + "rewards/margins": 8.085058212280273, + "rewards/rejected": -17.631357192993164, + "step": 13684 + }, + { + "epoch": 2.13, + "learning_rate": 4.110200736552726e-06, + "logits/chosen": -2.1037065982818604, + "logits/rejected": -2.3987722396850586, + "logps/chosen": -237.87452697753906, + "logps/rejected": -294.30181884765625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.300199508666992, + "rewards/margins": 6.945139408111572, + "rewards/rejected": -18.245338439941406, + "step": 13685 + }, + { + "epoch": 2.13, + "learning_rate": 4.109467296021578e-06, + "logits/chosen": -2.193838119506836, + "logits/rejected": -2.7070348262786865, + "logps/chosen": -452.3309020996094, + "logps/rejected": -638.683349609375, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.740965843200684, + "rewards/margins": 5.380817413330078, + "rewards/rejected": -13.121784210205078, + "step": 13686 + }, + { + "epoch": 2.13, + "learning_rate": 4.10873385549043e-06, + "logits/chosen": -2.4681403636932373, + "logits/rejected": -2.748110771179199, + "logps/chosen": -286.1839599609375, + "logps/rejected": -416.51220703125, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.958520889282227, + "rewards/margins": 5.21652889251709, + "rewards/rejected": -16.175048828125, + "step": 13687 + }, + { + "epoch": 2.13, + "learning_rate": 4.108000414959282e-06, + "logits/chosen": -2.5849013328552246, + "logits/rejected": -3.0308122634887695, + "logps/chosen": -328.2111511230469, + "logps/rejected": -464.99884033203125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.774167060852051, + "rewards/margins": 9.56373405456543, + "rewards/rejected": -15.337902069091797, + "step": 13688 + }, + { + "epoch": 2.13, + "learning_rate": 4.107266974428134e-06, + "logits/chosen": -2.545769453048706, + "logits/rejected": -2.876981496810913, + "logps/chosen": -267.1533203125, + "logps/rejected": -262.26116943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602965354919434, + "rewards/margins": 8.674165725708008, + "rewards/rejected": -15.277130126953125, + "step": 13689 + }, + { + "epoch": 2.13, + "learning_rate": 4.106533533896986e-06, + "logits/chosen": -2.6965606212615967, + "logits/rejected": -2.900028705596924, + "logps/chosen": -110.6478271484375, + "logps/rejected": -283.1165771484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.524644374847412, + "rewards/margins": 9.360413551330566, + "rewards/rejected": -13.885058403015137, + "step": 13690 + }, + { + "epoch": 2.13, + "learning_rate": 4.1058000933658386e-06, + "logits/chosen": -2.648826837539673, + "logits/rejected": -2.488839626312256, + "logps/chosen": -227.3986053466797, + "logps/rejected": -347.49658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.119889736175537, + "rewards/margins": 9.442237854003906, + "rewards/rejected": -12.562128067016602, + "step": 13691 + }, + { + "epoch": 2.13, + "learning_rate": 4.1050666528346904e-06, + "logits/chosen": -1.7051202058792114, + "logits/rejected": -2.9349958896636963, + "logps/chosen": -136.0365753173828, + "logps/rejected": -558.8560791015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0869035720825195, + "rewards/margins": 7.835624694824219, + "rewards/rejected": -14.922528266906738, + "step": 13692 + }, + { + "epoch": 2.13, + "learning_rate": 4.104333212303542e-06, + "logits/chosen": -2.203399419784546, + "logits/rejected": -2.958345651626587, + "logps/chosen": -557.885986328125, + "logps/rejected": -862.1101684570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.55434513092041, + "rewards/margins": 13.691741943359375, + "rewards/rejected": -20.2460880279541, + "step": 13693 + }, + { + "epoch": 2.13, + "learning_rate": 4.103599771772394e-06, + "logits/chosen": -2.9176199436187744, + "logits/rejected": -2.5447986125946045, + "logps/chosen": -625.168212890625, + "logps/rejected": -501.4568176269531, + "loss": 0.1503, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0755510330200195, + "rewards/margins": 9.656318664550781, + "rewards/rejected": -16.731868743896484, + "step": 13694 + }, + { + "epoch": 2.13, + "learning_rate": 4.102866331241247e-06, + "logits/chosen": -2.614917755126953, + "logits/rejected": -2.744925022125244, + "logps/chosen": -122.0923080444336, + "logps/rejected": -360.22265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.756199359893799, + "rewards/margins": 12.379189491271973, + "rewards/rejected": -19.13538932800293, + "step": 13695 + }, + { + "epoch": 2.13, + "learning_rate": 4.1021328907101e-06, + "logits/chosen": -2.3219985961914062, + "logits/rejected": -2.9941928386688232, + "logps/chosen": -65.90446472167969, + "logps/rejected": -249.2673797607422, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0936455726623535, + "rewards/margins": 8.488216400146484, + "rewards/rejected": -13.58186149597168, + "step": 13696 + }, + { + "epoch": 2.13, + "learning_rate": 4.1013994501789515e-06, + "logits/chosen": -1.92941415309906, + "logits/rejected": -2.7820310592651367, + "logps/chosen": -139.15188598632812, + "logps/rejected": -193.6177215576172, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.053698539733887, + "rewards/margins": 6.676279067993164, + "rewards/rejected": -11.72997760772705, + "step": 13697 + }, + { + "epoch": 2.13, + "learning_rate": 4.100666009647803e-06, + "logits/chosen": -1.9749457836151123, + "logits/rejected": -2.6828811168670654, + "logps/chosen": -456.0109558105469, + "logps/rejected": -594.456298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.80943489074707, + "rewards/margins": 10.326519012451172, + "rewards/rejected": -19.135953903198242, + "step": 13698 + }, + { + "epoch": 2.13, + "learning_rate": 4.099932569116656e-06, + "logits/chosen": -2.7506680488586426, + "logits/rejected": -2.5974507331848145, + "logps/chosen": -498.0291748046875, + "logps/rejected": -546.6812744140625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.695935249328613, + "rewards/margins": 6.507516860961914, + "rewards/rejected": -13.203451156616211, + "step": 13699 + }, + { + "epoch": 2.13, + "learning_rate": 4.099199128585508e-06, + "logits/chosen": -2.5456838607788086, + "logits/rejected": -1.8487226963043213, + "logps/chosen": -236.247802734375, + "logps/rejected": -324.7067565917969, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.269365310668945, + "rewards/margins": 4.89671516418457, + "rewards/rejected": -14.166080474853516, + "step": 13700 + }, + { + "epoch": 2.13, + "learning_rate": 4.09846568805436e-06, + "logits/chosen": -2.908111572265625, + "logits/rejected": -2.6085312366485596, + "logps/chosen": -130.62124633789062, + "logps/rejected": -157.2670440673828, + "loss": 0.1907, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.288440227508545, + "rewards/margins": 4.189255237579346, + "rewards/rejected": -11.47769546508789, + "step": 13701 + }, + { + "epoch": 2.13, + "learning_rate": 4.097732247523212e-06, + "logits/chosen": -1.6369603872299194, + "logits/rejected": -2.5302860736846924, + "logps/chosen": -230.79759216308594, + "logps/rejected": -436.31976318359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.964646339416504, + "rewards/margins": 10.020125389099121, + "rewards/rejected": -14.984771728515625, + "step": 13702 + }, + { + "epoch": 2.13, + "learning_rate": 4.096998806992064e-06, + "logits/chosen": -2.3629353046417236, + "logits/rejected": -2.7738404273986816, + "logps/chosen": -180.0101776123047, + "logps/rejected": -355.05364990234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.229456901550293, + "rewards/margins": 10.478754043579102, + "rewards/rejected": -19.708209991455078, + "step": 13703 + }, + { + "epoch": 2.13, + "learning_rate": 4.096265366460916e-06, + "logits/chosen": -2.63794207572937, + "logits/rejected": -2.8494720458984375, + "logps/chosen": -103.79313659667969, + "logps/rejected": -291.5049133300781, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935205459594727, + "rewards/margins": 10.113609313964844, + "rewards/rejected": -16.04881477355957, + "step": 13704 + }, + { + "epoch": 2.13, + "learning_rate": 4.095531925929768e-06, + "logits/chosen": -2.6745588779449463, + "logits/rejected": -2.552182197570801, + "logps/chosen": -185.751708984375, + "logps/rejected": -425.1443786621094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.591434478759766, + "rewards/margins": 10.679153442382812, + "rewards/rejected": -18.270587921142578, + "step": 13705 + }, + { + "epoch": 2.13, + "learning_rate": 4.09479848539862e-06, + "logits/chosen": -2.7073802947998047, + "logits/rejected": -1.7808754444122314, + "logps/chosen": -431.37615966796875, + "logps/rejected": -322.3050537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.138055324554443, + "rewards/margins": 12.367197036743164, + "rewards/rejected": -16.505252838134766, + "step": 13706 + }, + { + "epoch": 2.13, + "learning_rate": 4.094065044867472e-06, + "logits/chosen": -1.693253993988037, + "logits/rejected": -2.470109224319458, + "logps/chosen": -263.31103515625, + "logps/rejected": -605.2376708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.564175605773926, + "rewards/margins": 14.058961868286133, + "rewards/rejected": -19.623138427734375, + "step": 13707 + }, + { + "epoch": 2.13, + "learning_rate": 4.093331604336325e-06, + "logits/chosen": -2.782907247543335, + "logits/rejected": -2.120055913925171, + "logps/chosen": -589.365966796875, + "logps/rejected": -481.79913330078125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.099970817565918, + "rewards/margins": 8.11283016204834, + "rewards/rejected": -16.212800979614258, + "step": 13708 + }, + { + "epoch": 2.13, + "learning_rate": 4.092598163805177e-06, + "logits/chosen": -2.6328330039978027, + "logits/rejected": -2.453263282775879, + "logps/chosen": -598.2943115234375, + "logps/rejected": -568.3070068359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.258615970611572, + "rewards/margins": 9.127681732177734, + "rewards/rejected": -16.38629913330078, + "step": 13709 + }, + { + "epoch": 2.13, + "learning_rate": 4.0918647232740285e-06, + "logits/chosen": -2.6737120151519775, + "logits/rejected": -1.6421164274215698, + "logps/chosen": -582.2916259765625, + "logps/rejected": -438.8941345214844, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.65117073059082, + "rewards/margins": 8.172624588012695, + "rewards/rejected": -15.823795318603516, + "step": 13710 + }, + { + "epoch": 2.13, + "learning_rate": 4.09113128274288e-06, + "logits/chosen": -1.572123408317566, + "logits/rejected": -2.4739654064178467, + "logps/chosen": -271.8681335449219, + "logps/rejected": -481.12213134765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.634956359863281, + "rewards/margins": 10.807829856872559, + "rewards/rejected": -16.442787170410156, + "step": 13711 + }, + { + "epoch": 2.13, + "learning_rate": 4.090397842211733e-06, + "logits/chosen": -3.1461856365203857, + "logits/rejected": -3.092271089553833, + "logps/chosen": -203.73355102539062, + "logps/rejected": -546.4423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.01504135131836, + "rewards/margins": 15.20008659362793, + "rewards/rejected": -24.21512794494629, + "step": 13712 + }, + { + "epoch": 2.13, + "learning_rate": 4.089664401680586e-06, + "logits/chosen": -2.618326187133789, + "logits/rejected": -2.804279327392578, + "logps/chosen": -274.8930358886719, + "logps/rejected": -498.65863037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8063302040100098, + "rewards/margins": 13.785520553588867, + "rewards/rejected": -17.59185028076172, + "step": 13713 + }, + { + "epoch": 2.13, + "learning_rate": 4.088930961149438e-06, + "logits/chosen": -1.3500455617904663, + "logits/rejected": -2.624063730239868, + "logps/chosen": -199.67611694335938, + "logps/rejected": -461.6680908203125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.418261528015137, + "rewards/margins": 7.828003406524658, + "rewards/rejected": -14.246265411376953, + "step": 13714 + }, + { + "epoch": 2.13, + "learning_rate": 4.0881975206182896e-06, + "logits/chosen": -2.746901750564575, + "logits/rejected": -2.480955123901367, + "logps/chosen": -153.20950317382812, + "logps/rejected": -273.14666748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7837419509887695, + "rewards/margins": 11.59762954711914, + "rewards/rejected": -18.381372451782227, + "step": 13715 + }, + { + "epoch": 2.13, + "learning_rate": 4.0874640800871415e-06, + "logits/chosen": -1.3583210706710815, + "logits/rejected": -2.6224594116210938, + "logps/chosen": -349.859130859375, + "logps/rejected": -555.9891357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.163885116577148, + "rewards/margins": 10.513164520263672, + "rewards/rejected": -18.67704963684082, + "step": 13716 + }, + { + "epoch": 2.13, + "learning_rate": 4.086730639555994e-06, + "logits/chosen": -2.3452277183532715, + "logits/rejected": -2.684962034225464, + "logps/chosen": -190.17901611328125, + "logps/rejected": -225.9853057861328, + "loss": 0.4596, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.522989273071289, + "rewards/margins": 3.5217039585113525, + "rewards/rejected": -13.044692993164062, + "step": 13717 + }, + { + "epoch": 2.13, + "learning_rate": 4.085997199024846e-06, + "logits/chosen": -2.8456337451934814, + "logits/rejected": -2.1078126430511475, + "logps/chosen": -603.329345703125, + "logps/rejected": -408.5463562011719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.738048076629639, + "rewards/margins": 9.089118957519531, + "rewards/rejected": -15.827167510986328, + "step": 13718 + }, + { + "epoch": 2.13, + "learning_rate": 4.085263758493698e-06, + "logits/chosen": -2.0472631454467773, + "logits/rejected": -2.777222156524658, + "logps/chosen": -224.51739501953125, + "logps/rejected": -594.7135009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.322254180908203, + "rewards/margins": 11.02841567993164, + "rewards/rejected": -21.350669860839844, + "step": 13719 + }, + { + "epoch": 2.13, + "learning_rate": 4.08453031796255e-06, + "logits/chosen": -2.2122035026550293, + "logits/rejected": -2.862410306930542, + "logps/chosen": -332.72479248046875, + "logps/rejected": -658.5198364257812, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.404049873352051, + "rewards/margins": 10.063814163208008, + "rewards/rejected": -17.467864990234375, + "step": 13720 + }, + { + "epoch": 2.13, + "learning_rate": 4.083796877431402e-06, + "logits/chosen": -1.751251459121704, + "logits/rejected": -2.385833501815796, + "logps/chosen": -140.24142456054688, + "logps/rejected": -491.12591552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.585018157958984, + "rewards/margins": 11.458797454833984, + "rewards/rejected": -21.04381561279297, + "step": 13721 + }, + { + "epoch": 2.13, + "learning_rate": 4.083063436900254e-06, + "logits/chosen": -2.3592629432678223, + "logits/rejected": -2.5129079818725586, + "logps/chosen": -207.08065795898438, + "logps/rejected": -431.73956298828125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.97570514678955, + "rewards/margins": 7.7619218826293945, + "rewards/rejected": -16.737627029418945, + "step": 13722 + }, + { + "epoch": 2.13, + "learning_rate": 4.082329996369106e-06, + "logits/chosen": -2.5488975048065186, + "logits/rejected": -2.938424825668335, + "logps/chosen": -163.7126922607422, + "logps/rejected": -401.1201171875, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.343100547790527, + "rewards/margins": 9.252248764038086, + "rewards/rejected": -15.595348358154297, + "step": 13723 + }, + { + "epoch": 2.13, + "learning_rate": 4.081596555837958e-06, + "logits/chosen": -2.7658064365386963, + "logits/rejected": -3.090897560119629, + "logps/chosen": -191.5367431640625, + "logps/rejected": -594.4468383789062, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.13989543914795, + "rewards/margins": 9.925220489501953, + "rewards/rejected": -21.06511688232422, + "step": 13724 + }, + { + "epoch": 2.13, + "learning_rate": 4.08086311530681e-06, + "logits/chosen": -1.862688422203064, + "logits/rejected": -2.7853763103485107, + "logps/chosen": -215.47262573242188, + "logps/rejected": -431.3132019042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.958996772766113, + "rewards/margins": 11.682415008544922, + "rewards/rejected": -18.64141273498535, + "step": 13725 + }, + { + "epoch": 2.13, + "learning_rate": 4.080129674775663e-06, + "logits/chosen": -2.5139222145080566, + "logits/rejected": -1.273629903793335, + "logps/chosen": -363.8875427246094, + "logps/rejected": -313.6937561035156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.390568733215332, + "rewards/margins": 9.42648983001709, + "rewards/rejected": -12.817058563232422, + "step": 13726 + }, + { + "epoch": 2.13, + "learning_rate": 4.079396234244515e-06, + "logits/chosen": -2.335951805114746, + "logits/rejected": -2.0502138137817383, + "logps/chosen": -711.750732421875, + "logps/rejected": -463.10479736328125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.992377281188965, + "rewards/margins": 8.325782775878906, + "rewards/rejected": -16.318161010742188, + "step": 13727 + }, + { + "epoch": 2.13, + "learning_rate": 4.0786627937133665e-06, + "logits/chosen": -1.7416436672210693, + "logits/rejected": -2.7388572692871094, + "logps/chosen": -326.373291015625, + "logps/rejected": -456.6689147949219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.461734771728516, + "rewards/margins": 9.19373893737793, + "rewards/rejected": -16.655473709106445, + "step": 13728 + }, + { + "epoch": 2.14, + "learning_rate": 4.077929353182219e-06, + "logits/chosen": -2.6866095066070557, + "logits/rejected": -1.9403104782104492, + "logps/chosen": -596.455078125, + "logps/rejected": -497.9473876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.511239051818848, + "rewards/margins": 11.729660987854004, + "rewards/rejected": -17.24090003967285, + "step": 13729 + }, + { + "epoch": 2.14, + "learning_rate": 4.077195912651071e-06, + "logits/chosen": -2.7821929454803467, + "logits/rejected": -2.985180377960205, + "logps/chosen": -93.07292175292969, + "logps/rejected": -182.44955444335938, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8800153732299805, + "rewards/margins": 5.895403861999512, + "rewards/rejected": -12.775419235229492, + "step": 13730 + }, + { + "epoch": 2.14, + "learning_rate": 4.076462472119924e-06, + "logits/chosen": -2.681680917739868, + "logits/rejected": -1.3627439737319946, + "logps/chosen": -262.9796447753906, + "logps/rejected": -397.06329345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204192161560059, + "rewards/margins": 12.074270248413086, + "rewards/rejected": -18.278461456298828, + "step": 13731 + }, + { + "epoch": 2.14, + "learning_rate": 4.075729031588776e-06, + "logits/chosen": -2.6413371562957764, + "logits/rejected": -2.202521324157715, + "logps/chosen": -568.44677734375, + "logps/rejected": -539.716552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.244329929351807, + "rewards/margins": 9.7064847946167, + "rewards/rejected": -14.950815200805664, + "step": 13732 + }, + { + "epoch": 2.14, + "learning_rate": 4.074995591057628e-06, + "logits/chosen": -1.4422802925109863, + "logits/rejected": -2.757291078567505, + "logps/chosen": -202.82659912109375, + "logps/rejected": -385.79852294921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6428985595703125, + "rewards/margins": 8.420378684997559, + "rewards/rejected": -13.063278198242188, + "step": 13733 + }, + { + "epoch": 2.14, + "learning_rate": 4.0742621505264795e-06, + "logits/chosen": -3.075091600418091, + "logits/rejected": -3.0336594581604004, + "logps/chosen": -362.1517639160156, + "logps/rejected": -434.88116455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.900824069976807, + "rewards/margins": 10.249813079833984, + "rewards/rejected": -18.150636672973633, + "step": 13734 + }, + { + "epoch": 2.14, + "learning_rate": 4.073528709995332e-06, + "logits/chosen": -2.433311700820923, + "logits/rejected": -2.782451868057251, + "logps/chosen": -181.59320068359375, + "logps/rejected": -356.7897033691406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.834638595581055, + "rewards/margins": 9.030691146850586, + "rewards/rejected": -16.86532974243164, + "step": 13735 + }, + { + "epoch": 2.14, + "learning_rate": 4.072795269464184e-06, + "logits/chosen": -2.3427700996398926, + "logits/rejected": -2.275057792663574, + "logps/chosen": -144.82989501953125, + "logps/rejected": -247.8271484375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.410489082336426, + "rewards/margins": 5.326615333557129, + "rewards/rejected": -12.737104415893555, + "step": 13736 + }, + { + "epoch": 2.14, + "learning_rate": 4.072061828933036e-06, + "logits/chosen": -2.8035707473754883, + "logits/rejected": -2.916651725769043, + "logps/chosen": -146.91815185546875, + "logps/rejected": -454.9530334472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7600555419921875, + "rewards/margins": 11.488201141357422, + "rewards/rejected": -15.24825668334961, + "step": 13737 + }, + { + "epoch": 2.14, + "learning_rate": 4.071328388401888e-06, + "logits/chosen": -2.1698617935180664, + "logits/rejected": -1.6219829320907593, + "logps/chosen": -656.5128173828125, + "logps/rejected": -659.7894287109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.580029487609863, + "rewards/margins": 18.484722137451172, + "rewards/rejected": -24.06475257873535, + "step": 13738 + }, + { + "epoch": 2.14, + "learning_rate": 4.07059494787074e-06, + "logits/chosen": -2.645031213760376, + "logits/rejected": -1.9841179847717285, + "logps/chosen": -437.79449462890625, + "logps/rejected": -451.3038330078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.297553539276123, + "rewards/margins": 9.528716087341309, + "rewards/rejected": -14.826269149780273, + "step": 13739 + }, + { + "epoch": 2.14, + "learning_rate": 4.0698615073395925e-06, + "logits/chosen": -1.907686471939087, + "logits/rejected": -2.9450721740722656, + "logps/chosen": -156.78428649902344, + "logps/rejected": -485.684814453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.725065231323242, + "rewards/margins": 8.102683067321777, + "rewards/rejected": -17.827749252319336, + "step": 13740 + }, + { + "epoch": 2.14, + "learning_rate": 4.069128066808444e-06, + "logits/chosen": -2.6977953910827637, + "logits/rejected": -1.726214051246643, + "logps/chosen": -396.9714660644531, + "logps/rejected": -455.20159912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.505714416503906, + "rewards/margins": 16.58126449584961, + "rewards/rejected": -25.086978912353516, + "step": 13741 + }, + { + "epoch": 2.14, + "learning_rate": 4.068394626277296e-06, + "logits/chosen": -2.2185802459716797, + "logits/rejected": -1.83419930934906, + "logps/chosen": -233.04852294921875, + "logps/rejected": -256.0957336425781, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.685565948486328, + "rewards/margins": 8.686561584472656, + "rewards/rejected": -17.372127532958984, + "step": 13742 + }, + { + "epoch": 2.14, + "learning_rate": 4.067661185746148e-06, + "logits/chosen": -1.2880417108535767, + "logits/rejected": -2.721068859100342, + "logps/chosen": -221.51414489746094, + "logps/rejected": -583.7079467773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.554811477661133, + "rewards/margins": 11.500385284423828, + "rewards/rejected": -20.05519676208496, + "step": 13743 + }, + { + "epoch": 2.14, + "learning_rate": 4.066927745215001e-06, + "logits/chosen": -2.9099154472351074, + "logits/rejected": -2.86088228225708, + "logps/chosen": -202.01934814453125, + "logps/rejected": -354.24029541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0228638648986816, + "rewards/margins": 10.070211410522461, + "rewards/rejected": -13.093074798583984, + "step": 13744 + }, + { + "epoch": 2.14, + "learning_rate": 4.066194304683853e-06, + "logits/chosen": -2.2119085788726807, + "logits/rejected": -2.5711607933044434, + "logps/chosen": -267.4019775390625, + "logps/rejected": -465.91253662109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.708099365234375, + "rewards/margins": 10.149744033813477, + "rewards/rejected": -18.857845306396484, + "step": 13745 + }, + { + "epoch": 2.14, + "learning_rate": 4.065460864152705e-06, + "logits/chosen": -0.7208375334739685, + "logits/rejected": -2.477377414703369, + "logps/chosen": -333.79486083984375, + "logps/rejected": -798.8418579101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.370861053466797, + "rewards/margins": 18.03886604309082, + "rewards/rejected": -26.409727096557617, + "step": 13746 + }, + { + "epoch": 2.14, + "learning_rate": 4.064727423621557e-06, + "logits/chosen": -2.7283172607421875, + "logits/rejected": -2.837331771850586, + "logps/chosen": -113.85481262207031, + "logps/rejected": -327.0257873535156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.110288619995117, + "rewards/margins": 9.255950927734375, + "rewards/rejected": -18.366239547729492, + "step": 13747 + }, + { + "epoch": 2.14, + "learning_rate": 4.06399398309041e-06, + "logits/chosen": -2.645310878753662, + "logits/rejected": -1.6306471824645996, + "logps/chosen": -637.566650390625, + "logps/rejected": -520.9014892578125, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.636813163757324, + "rewards/margins": 7.896768569946289, + "rewards/rejected": -19.533580780029297, + "step": 13748 + }, + { + "epoch": 2.14, + "learning_rate": 4.063260542559262e-06, + "logits/chosen": -2.894822359085083, + "logits/rejected": -2.7078211307525635, + "logps/chosen": -143.37017822265625, + "logps/rejected": -333.3780517578125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.164012908935547, + "rewards/margins": 10.697052001953125, + "rewards/rejected": -19.861064910888672, + "step": 13749 + }, + { + "epoch": 2.14, + "learning_rate": 4.062527102028114e-06, + "logits/chosen": -2.521064043045044, + "logits/rejected": -2.4631333351135254, + "logps/chosen": -273.159912109375, + "logps/rejected": -483.2032775878906, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.273895263671875, + "rewards/margins": 7.374297142028809, + "rewards/rejected": -17.648191452026367, + "step": 13750 + }, + { + "epoch": 2.14, + "learning_rate": 4.061793661496966e-06, + "logits/chosen": -2.9117095470428467, + "logits/rejected": -1.8795603513717651, + "logps/chosen": -580.3450927734375, + "logps/rejected": -763.3138427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.567883491516113, + "rewards/margins": 9.575197219848633, + "rewards/rejected": -17.14307975769043, + "step": 13751 + }, + { + "epoch": 2.14, + "learning_rate": 4.0610602209658175e-06, + "logits/chosen": -2.4678664207458496, + "logits/rejected": -1.2996543645858765, + "logps/chosen": -277.80987548828125, + "logps/rejected": -274.61187744140625, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.82114028930664, + "rewards/margins": 7.653822898864746, + "rewards/rejected": -17.47496223449707, + "step": 13752 + }, + { + "epoch": 2.14, + "learning_rate": 4.06032678043467e-06, + "logits/chosen": -2.1691641807556152, + "logits/rejected": -2.4795095920562744, + "logps/chosen": -447.1656494140625, + "logps/rejected": -552.50341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.930978775024414, + "rewards/margins": 10.01044750213623, + "rewards/rejected": -18.941425323486328, + "step": 13753 + }, + { + "epoch": 2.14, + "learning_rate": 4.059593339903522e-06, + "logits/chosen": -1.9431391954421997, + "logits/rejected": -3.1204915046691895, + "logps/chosen": -171.70550537109375, + "logps/rejected": -489.8220520019531, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.532529830932617, + "rewards/margins": 5.380493640899658, + "rewards/rejected": -15.913023948669434, + "step": 13754 + }, + { + "epoch": 2.14, + "learning_rate": 4.058859899372374e-06, + "logits/chosen": -2.8664512634277344, + "logits/rejected": -2.8607187271118164, + "logps/chosen": -444.3997802734375, + "logps/rejected": -241.1803436279297, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.846362113952637, + "rewards/margins": 5.881929874420166, + "rewards/rejected": -13.728292465209961, + "step": 13755 + }, + { + "epoch": 2.14, + "learning_rate": 4.058126458841226e-06, + "logits/chosen": -2.869126558303833, + "logits/rejected": -0.9701606631278992, + "logps/chosen": -641.842529296875, + "logps/rejected": -317.2295837402344, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.348567008972168, + "rewards/margins": 7.233473777770996, + "rewards/rejected": -14.582040786743164, + "step": 13756 + }, + { + "epoch": 2.14, + "learning_rate": 4.057393018310079e-06, + "logits/chosen": -1.9494549036026, + "logits/rejected": -1.9428625106811523, + "logps/chosen": -223.2803192138672, + "logps/rejected": -415.0147705078125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.419078826904297, + "rewards/margins": 7.008288860321045, + "rewards/rejected": -21.4273681640625, + "step": 13757 + }, + { + "epoch": 2.14, + "learning_rate": 4.0566595777789305e-06, + "logits/chosen": -2.7761731147766113, + "logits/rejected": -2.794917345046997, + "logps/chosen": -300.964111328125, + "logps/rejected": -307.0435791015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.653316497802734, + "rewards/margins": 7.628105163574219, + "rewards/rejected": -13.281421661376953, + "step": 13758 + }, + { + "epoch": 2.14, + "learning_rate": 4.055926137247782e-06, + "logits/chosen": -1.992576241493225, + "logits/rejected": -2.482116460800171, + "logps/chosen": -109.38471984863281, + "logps/rejected": -259.21142578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.769027233123779, + "rewards/margins": 7.814423561096191, + "rewards/rejected": -14.583450317382812, + "step": 13759 + }, + { + "epoch": 2.14, + "learning_rate": 4.055192696716634e-06, + "logits/chosen": -2.824049711227417, + "logits/rejected": -3.0525314807891846, + "logps/chosen": -186.97271728515625, + "logps/rejected": -396.24920654296875, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.109114646911621, + "rewards/margins": 7.990777969360352, + "rewards/rejected": -17.099891662597656, + "step": 13760 + }, + { + "epoch": 2.14, + "learning_rate": 4.054459256185486e-06, + "logits/chosen": -2.46921443939209, + "logits/rejected": -2.7210733890533447, + "logps/chosen": -190.56787109375, + "logps/rejected": -305.7705383300781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.650398254394531, + "rewards/margins": 7.673578262329102, + "rewards/rejected": -15.323976516723633, + "step": 13761 + }, + { + "epoch": 2.14, + "learning_rate": 4.053725815654339e-06, + "logits/chosen": -1.3711284399032593, + "logits/rejected": -2.1782007217407227, + "logps/chosen": -189.17100524902344, + "logps/rejected": -435.457275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.494451522827148, + "rewards/margins": 14.85247802734375, + "rewards/rejected": -23.3469295501709, + "step": 13762 + }, + { + "epoch": 2.14, + "learning_rate": 4.052992375123192e-06, + "logits/chosen": -2.753068685531616, + "logits/rejected": -2.968454360961914, + "logps/chosen": -123.92192840576172, + "logps/rejected": -248.6356201171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90146541595459, + "rewards/margins": 9.555170059204102, + "rewards/rejected": -15.456634521484375, + "step": 13763 + }, + { + "epoch": 2.14, + "learning_rate": 4.0522589345920435e-06, + "logits/chosen": -2.2839179039001465, + "logits/rejected": -2.558640956878662, + "logps/chosen": -185.92477416992188, + "logps/rejected": -261.2214050292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631679534912109, + "rewards/margins": 8.684263229370117, + "rewards/rejected": -14.315942764282227, + "step": 13764 + }, + { + "epoch": 2.14, + "learning_rate": 4.051525494060895e-06, + "logits/chosen": -1.194598913192749, + "logits/rejected": -2.4355409145355225, + "logps/chosen": -173.35330200195312, + "logps/rejected": -481.36566162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3388895988464355, + "rewards/margins": 10.800897598266602, + "rewards/rejected": -18.139787673950195, + "step": 13765 + }, + { + "epoch": 2.14, + "learning_rate": 4.050792053529748e-06, + "logits/chosen": -2.4640564918518066, + "logits/rejected": -2.9584314823150635, + "logps/chosen": -206.5709991455078, + "logps/rejected": -442.8869323730469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.221941947937012, + "rewards/margins": 9.331196784973145, + "rewards/rejected": -15.553138732910156, + "step": 13766 + }, + { + "epoch": 2.14, + "learning_rate": 4.0500586129986e-06, + "logits/chosen": -1.4749929904937744, + "logits/rejected": -2.7548229694366455, + "logps/chosen": -613.9052124023438, + "logps/rejected": -858.3480224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37671184539795, + "rewards/margins": 12.922298431396484, + "rewards/rejected": -21.29901123046875, + "step": 13767 + }, + { + "epoch": 2.14, + "learning_rate": 4.049325172467452e-06, + "logits/chosen": -1.8340872526168823, + "logits/rejected": -1.947911024093628, + "logps/chosen": -170.95071411132812, + "logps/rejected": -235.697265625, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.448822021484375, + "rewards/margins": 6.948143482208252, + "rewards/rejected": -17.39696502685547, + "step": 13768 + }, + { + "epoch": 2.14, + "learning_rate": 4.048591731936304e-06, + "logits/chosen": -0.4699970781803131, + "logits/rejected": -2.785456418991089, + "logps/chosen": -133.9349822998047, + "logps/rejected": -568.6036987304688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.232461929321289, + "rewards/margins": 8.081991195678711, + "rewards/rejected": -18.314453125, + "step": 13769 + }, + { + "epoch": 2.14, + "learning_rate": 4.047858291405156e-06, + "logits/chosen": -2.4309306144714355, + "logits/rejected": -2.22577166557312, + "logps/chosen": -662.426513671875, + "logps/rejected": -658.579833984375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.779918670654297, + "rewards/margins": 14.102714538574219, + "rewards/rejected": -21.882633209228516, + "step": 13770 + }, + { + "epoch": 2.14, + "learning_rate": 4.047124850874008e-06, + "logits/chosen": -2.4818692207336426, + "logits/rejected": -2.94775390625, + "logps/chosen": -189.02810668945312, + "logps/rejected": -406.0096740722656, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.119041442871094, + "rewards/margins": 6.312523365020752, + "rewards/rejected": -17.431564331054688, + "step": 13771 + }, + { + "epoch": 2.14, + "learning_rate": 4.04639141034286e-06, + "logits/chosen": -1.9481898546218872, + "logits/rejected": -2.4340415000915527, + "logps/chosen": -209.24142456054688, + "logps/rejected": -374.146240234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009425163269043, + "rewards/margins": 11.038712501525879, + "rewards/rejected": -17.048137664794922, + "step": 13772 + }, + { + "epoch": 2.14, + "learning_rate": 4.045657969811712e-06, + "logits/chosen": -0.1313677430152893, + "logits/rejected": -2.796189546585083, + "logps/chosen": -175.97427368164062, + "logps/rejected": -543.410888671875, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.71213150024414, + "rewards/margins": 24.749401092529297, + "rewards/rejected": -36.46153259277344, + "step": 13773 + }, + { + "epoch": 2.14, + "learning_rate": 4.044924529280564e-06, + "logits/chosen": -2.2092912197113037, + "logits/rejected": -2.6985671520233154, + "logps/chosen": -316.7899169921875, + "logps/rejected": -524.9075927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.788293838500977, + "rewards/margins": 8.820024490356445, + "rewards/rejected": -17.608318328857422, + "step": 13774 + }, + { + "epoch": 2.14, + "learning_rate": 4.044191088749417e-06, + "logits/chosen": -2.6963443756103516, + "logits/rejected": -1.971353530883789, + "logps/chosen": -220.72007751464844, + "logps/rejected": -277.7449035644531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.584060668945312, + "rewards/margins": 7.235439777374268, + "rewards/rejected": -18.819499969482422, + "step": 13775 + }, + { + "epoch": 2.14, + "learning_rate": 4.0434576482182685e-06, + "logits/chosen": -2.8812451362609863, + "logits/rejected": -2.2447926998138428, + "logps/chosen": -201.98834228515625, + "logps/rejected": -264.4288330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.219161510467529, + "rewards/margins": 9.77965259552002, + "rewards/rejected": -16.99881362915039, + "step": 13776 + }, + { + "epoch": 2.14, + "learning_rate": 4.0427242076871204e-06, + "logits/chosen": -1.5282608270645142, + "logits/rejected": -2.9565558433532715, + "logps/chosen": -261.92120361328125, + "logps/rejected": -483.318359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.541824340820312, + "rewards/margins": 7.61598014831543, + "rewards/rejected": -18.157804489135742, + "step": 13777 + }, + { + "epoch": 2.14, + "learning_rate": 4.041990767155972e-06, + "logits/chosen": -1.5547230243682861, + "logits/rejected": -2.0938572883605957, + "logps/chosen": -158.590576171875, + "logps/rejected": -321.3529052734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.889400482177734, + "rewards/margins": 8.273859024047852, + "rewards/rejected": -17.16326141357422, + "step": 13778 + }, + { + "epoch": 2.14, + "learning_rate": 4.041257326624825e-06, + "logits/chosen": -2.9864282608032227, + "logits/rejected": -2.643961191177368, + "logps/chosen": -745.10302734375, + "logps/rejected": -667.8483276367188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0013227462768555, + "rewards/margins": 9.624600410461426, + "rewards/rejected": -15.625923156738281, + "step": 13779 + }, + { + "epoch": 2.14, + "learning_rate": 4.040523886093678e-06, + "logits/chosen": -1.861305594444275, + "logits/rejected": -2.5210742950439453, + "logps/chosen": -191.67575073242188, + "logps/rejected": -661.229736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.29581069946289, + "rewards/margins": 12.579906463623047, + "rewards/rejected": -24.875717163085938, + "step": 13780 + }, + { + "epoch": 2.14, + "learning_rate": 4.03979044556253e-06, + "logits/chosen": -2.7995188236236572, + "logits/rejected": -2.9251863956451416, + "logps/chosen": -261.2026062011719, + "logps/rejected": -447.2995910644531, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.718353271484375, + "rewards/margins": 7.439984321594238, + "rewards/rejected": -19.158336639404297, + "step": 13781 + }, + { + "epoch": 2.14, + "learning_rate": 4.0390570050313815e-06, + "logits/chosen": -2.6359078884124756, + "logits/rejected": -2.7146995067596436, + "logps/chosen": -108.50743103027344, + "logps/rejected": -180.20701599121094, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362884521484375, + "rewards/margins": 4.999323844909668, + "rewards/rejected": -10.36220932006836, + "step": 13782 + }, + { + "epoch": 2.14, + "learning_rate": 4.038323564500233e-06, + "logits/chosen": -2.5855038166046143, + "logits/rejected": -2.710874557495117, + "logps/chosen": -212.84323120117188, + "logps/rejected": -762.02490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.061925888061523, + "rewards/margins": 17.563138961791992, + "rewards/rejected": -29.625064849853516, + "step": 13783 + }, + { + "epoch": 2.14, + "learning_rate": 4.037590123969086e-06, + "logits/chosen": -2.6807849407196045, + "logits/rejected": -2.2027604579925537, + "logps/chosen": -565.1172485351562, + "logps/rejected": -439.03369140625, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.958759307861328, + "rewards/margins": 4.934122562408447, + "rewards/rejected": -15.892881393432617, + "step": 13784 + }, + { + "epoch": 2.14, + "learning_rate": 4.036856683437938e-06, + "logits/chosen": -2.3474183082580566, + "logits/rejected": -2.7913336753845215, + "logps/chosen": -108.74114990234375, + "logps/rejected": -503.21942138671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.144412994384766, + "rewards/margins": 11.92945671081543, + "rewards/rejected": -21.073869705200195, + "step": 13785 + }, + { + "epoch": 2.14, + "learning_rate": 4.03612324290679e-06, + "logits/chosen": -2.6772820949554443, + "logits/rejected": -1.1218851804733276, + "logps/chosen": -525.9816284179688, + "logps/rejected": -569.809326171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.925646781921387, + "rewards/margins": 12.082744598388672, + "rewards/rejected": -18.008390426635742, + "step": 13786 + }, + { + "epoch": 2.14, + "learning_rate": 4.035389802375642e-06, + "logits/chosen": -2.489089012145996, + "logits/rejected": -2.850193738937378, + "logps/chosen": -109.17207336425781, + "logps/rejected": -398.94476318359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.312894821166992, + "rewards/margins": 12.067161560058594, + "rewards/rejected": -19.380056381225586, + "step": 13787 + }, + { + "epoch": 2.14, + "learning_rate": 4.034656361844494e-06, + "logits/chosen": -2.92056941986084, + "logits/rejected": -2.475473642349243, + "logps/chosen": -226.253173828125, + "logps/rejected": -230.683349609375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.330142021179199, + "rewards/margins": 6.46876859664917, + "rewards/rejected": -11.798911094665527, + "step": 13788 + }, + { + "epoch": 2.14, + "learning_rate": 4.033922921313346e-06, + "logits/chosen": -2.3510870933532715, + "logits/rejected": -2.716066598892212, + "logps/chosen": -375.6072082519531, + "logps/rejected": -406.21380615234375, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.517158508300781, + "rewards/margins": 6.238968849182129, + "rewards/rejected": -12.75612735748291, + "step": 13789 + }, + { + "epoch": 2.14, + "learning_rate": 4.033189480782198e-06, + "logits/chosen": -2.5318377017974854, + "logits/rejected": -2.5076403617858887, + "logps/chosen": -426.73834228515625, + "logps/rejected": -551.216064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.956067085266113, + "rewards/margins": 12.56660270690918, + "rewards/rejected": -21.522668838500977, + "step": 13790 + }, + { + "epoch": 2.14, + "learning_rate": 4.03245604025105e-06, + "logits/chosen": -2.5197551250457764, + "logits/rejected": -1.0112152099609375, + "logps/chosen": -570.6669921875, + "logps/rejected": -421.009033203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.754356384277344, + "rewards/margins": 9.208650588989258, + "rewards/rejected": -19.9630069732666, + "step": 13791 + }, + { + "epoch": 2.14, + "learning_rate": 4.031722599719902e-06, + "logits/chosen": -1.3092622756958008, + "logits/rejected": -2.6432878971099854, + "logps/chosen": -163.19871520996094, + "logps/rejected": -614.634521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.830638885498047, + "rewards/margins": 14.733844757080078, + "rewards/rejected": -21.564483642578125, + "step": 13792 + }, + { + "epoch": 2.15, + "learning_rate": 4.030989159188755e-06, + "logits/chosen": -2.602588176727295, + "logits/rejected": -2.474320650100708, + "logps/chosen": -523.3424072265625, + "logps/rejected": -511.198486328125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.098888397216797, + "rewards/margins": 7.241735935211182, + "rewards/rejected": -17.34062385559082, + "step": 13793 + }, + { + "epoch": 2.15, + "learning_rate": 4.030255718657607e-06, + "logits/chosen": -1.8189473152160645, + "logits/rejected": -2.8564977645874023, + "logps/chosen": -193.01345825195312, + "logps/rejected": -483.0195617675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.383638858795166, + "rewards/margins": 13.027998924255371, + "rewards/rejected": -20.411638259887695, + "step": 13794 + }, + { + "epoch": 2.15, + "learning_rate": 4.0295222781264585e-06, + "logits/chosen": -2.087244749069214, + "logits/rejected": -2.5089566707611084, + "logps/chosen": -279.4305725097656, + "logps/rejected": -275.29046630859375, + "loss": 1.1646, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.84903335571289, + "rewards/margins": 5.088432788848877, + "rewards/rejected": -17.93746566772461, + "step": 13795 + }, + { + "epoch": 2.15, + "learning_rate": 4.028788837595311e-06, + "logits/chosen": -2.6655068397521973, + "logits/rejected": -2.392723560333252, + "logps/chosen": -131.66455078125, + "logps/rejected": -313.526123046875, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.73222541809082, + "rewards/margins": 5.220508575439453, + "rewards/rejected": -13.952733993530273, + "step": 13796 + }, + { + "epoch": 2.15, + "learning_rate": 4.028055397064164e-06, + "logits/chosen": -2.462110757827759, + "logits/rejected": -2.7078256607055664, + "logps/chosen": -202.10220336914062, + "logps/rejected": -370.46734619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.08642578125, + "rewards/margins": 11.47559928894043, + "rewards/rejected": -18.56202507019043, + "step": 13797 + }, + { + "epoch": 2.15, + "learning_rate": 4.027321956533016e-06, + "logits/chosen": -2.294950008392334, + "logits/rejected": -2.0622425079345703, + "logps/chosen": -187.38992309570312, + "logps/rejected": -243.0633544921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.673604965209961, + "rewards/margins": 7.30967903137207, + "rewards/rejected": -15.983283996582031, + "step": 13798 + }, + { + "epoch": 2.15, + "learning_rate": 4.026588516001868e-06, + "logits/chosen": -2.627108573913574, + "logits/rejected": -2.1592490673065186, + "logps/chosen": -559.1207885742188, + "logps/rejected": -660.5797119140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.572392463684082, + "rewards/margins": 7.566149711608887, + "rewards/rejected": -17.13854217529297, + "step": 13799 + }, + { + "epoch": 2.15, + "learning_rate": 4.0258550754707196e-06, + "logits/chosen": -2.693176746368408, + "logits/rejected": -2.7645211219787598, + "logps/chosen": -898.8935546875, + "logps/rejected": -661.9290771484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.908439636230469, + "rewards/margins": 7.288887977600098, + "rewards/rejected": -15.197328567504883, + "step": 13800 + }, + { + "epoch": 2.15, + "learning_rate": 4.0251216349395714e-06, + "logits/chosen": -1.9497288465499878, + "logits/rejected": -3.018958806991577, + "logps/chosen": -265.3185119628906, + "logps/rejected": -419.71185302734375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.797743797302246, + "rewards/margins": 5.580940246582031, + "rewards/rejected": -14.378684043884277, + "step": 13801 + }, + { + "epoch": 2.15, + "learning_rate": 4.024388194408424e-06, + "logits/chosen": -1.1534658670425415, + "logits/rejected": -2.732952833175659, + "logps/chosen": -171.90737915039062, + "logps/rejected": -297.9543762207031, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.648333549499512, + "rewards/margins": 6.55849552154541, + "rewards/rejected": -14.206829071044922, + "step": 13802 + }, + { + "epoch": 2.15, + "learning_rate": 4.023654753877276e-06, + "logits/chosen": -2.320042371749878, + "logits/rejected": -1.5966755151748657, + "logps/chosen": -280.2642822265625, + "logps/rejected": -337.7814025878906, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.93850040435791, + "rewards/margins": 7.571690082550049, + "rewards/rejected": -18.510190963745117, + "step": 13803 + }, + { + "epoch": 2.15, + "learning_rate": 4.022921313346128e-06, + "logits/chosen": -2.717043399810791, + "logits/rejected": -2.0666868686676025, + "logps/chosen": -329.482421875, + "logps/rejected": -250.6920623779297, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.214566230773926, + "rewards/margins": 3.3469929695129395, + "rewards/rejected": -14.561559677124023, + "step": 13804 + }, + { + "epoch": 2.15, + "learning_rate": 4.02218787281498e-06, + "logits/chosen": -2.7611827850341797, + "logits/rejected": -0.9088674783706665, + "logps/chosen": -935.23681640625, + "logps/rejected": -555.7906494140625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.426396369934082, + "rewards/margins": 9.540042877197266, + "rewards/rejected": -19.96643829345703, + "step": 13805 + }, + { + "epoch": 2.15, + "learning_rate": 4.0214544322838325e-06, + "logits/chosen": -2.156435966491699, + "logits/rejected": -2.533113956451416, + "logps/chosen": -195.71395874023438, + "logps/rejected": -450.0126037597656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.261497497558594, + "rewards/margins": 10.162781715393066, + "rewards/rejected": -19.424278259277344, + "step": 13806 + }, + { + "epoch": 2.15, + "learning_rate": 4.020720991752684e-06, + "logits/chosen": -2.017987012863159, + "logits/rejected": -2.5326528549194336, + "logps/chosen": -126.92970275878906, + "logps/rejected": -349.9357604980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3419108390808105, + "rewards/margins": 9.672316551208496, + "rewards/rejected": -16.01422691345215, + "step": 13807 + }, + { + "epoch": 2.15, + "learning_rate": 4.019987551221536e-06, + "logits/chosen": -2.650074005126953, + "logits/rejected": -1.896036148071289, + "logps/chosen": -567.2225341796875, + "logps/rejected": -468.9596252441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.976666450500488, + "rewards/margins": 12.354366302490234, + "rewards/rejected": -17.331031799316406, + "step": 13808 + }, + { + "epoch": 2.15, + "learning_rate": 4.019254110690388e-06, + "logits/chosen": -1.4117413759231567, + "logits/rejected": -2.293032169342041, + "logps/chosen": -237.72869873046875, + "logps/rejected": -466.85064697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.523426055908203, + "rewards/margins": 10.486066818237305, + "rewards/rejected": -22.009492874145508, + "step": 13809 + }, + { + "epoch": 2.15, + "learning_rate": 4.01852067015924e-06, + "logits/chosen": -2.6689443588256836, + "logits/rejected": -2.593203067779541, + "logps/chosen": -402.3226318359375, + "logps/rejected": -588.73779296875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.664566993713379, + "rewards/margins": 8.4185791015625, + "rewards/rejected": -17.083147048950195, + "step": 13810 + }, + { + "epoch": 2.15, + "learning_rate": 4.017787229628093e-06, + "logits/chosen": -2.4174869060516357, + "logits/rejected": -2.715348243713379, + "logps/chosen": -301.66534423828125, + "logps/rejected": -424.91644287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.304636001586914, + "rewards/margins": 11.513514518737793, + "rewards/rejected": -17.81814956665039, + "step": 13811 + }, + { + "epoch": 2.15, + "learning_rate": 4.017053789096945e-06, + "logits/chosen": -2.465871572494507, + "logits/rejected": -2.714547634124756, + "logps/chosen": -176.18869018554688, + "logps/rejected": -410.1746826171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.857757568359375, + "rewards/margins": 8.500165939331055, + "rewards/rejected": -18.35792350769043, + "step": 13812 + }, + { + "epoch": 2.15, + "learning_rate": 4.0163203485657965e-06, + "logits/chosen": -2.6228911876678467, + "logits/rejected": -2.4584801197052, + "logps/chosen": -172.7784881591797, + "logps/rejected": -302.6092224121094, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.88526439666748, + "rewards/margins": 6.871432781219482, + "rewards/rejected": -15.756696701049805, + "step": 13813 + }, + { + "epoch": 2.15, + "learning_rate": 4.015586908034649e-06, + "logits/chosen": -2.323622703552246, + "logits/rejected": -2.873178243637085, + "logps/chosen": -115.31741333007812, + "logps/rejected": -393.795654296875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.034420013427734, + "rewards/margins": 8.248199462890625, + "rewards/rejected": -18.28261947631836, + "step": 13814 + }, + { + "epoch": 2.15, + "learning_rate": 4.014853467503502e-06, + "logits/chosen": -2.5011203289031982, + "logits/rejected": -2.118479013442993, + "logps/chosen": -200.17935180664062, + "logps/rejected": -229.97576904296875, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.719006538391113, + "rewards/margins": 4.213674545288086, + "rewards/rejected": -9.9326810836792, + "step": 13815 + }, + { + "epoch": 2.15, + "learning_rate": 4.014120026972354e-06, + "logits/chosen": -2.9206995964050293, + "logits/rejected": -1.5480866432189941, + "logps/chosen": -303.1619873046875, + "logps/rejected": -398.31524658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.838451862335205, + "rewards/margins": 10.536933898925781, + "rewards/rejected": -17.375385284423828, + "step": 13816 + }, + { + "epoch": 2.15, + "learning_rate": 4.013386586441206e-06, + "logits/chosen": -2.067788600921631, + "logits/rejected": -2.431351661682129, + "logps/chosen": -136.42669677734375, + "logps/rejected": -282.8839111328125, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.386528968811035, + "rewards/margins": 5.454248428344727, + "rewards/rejected": -16.840778350830078, + "step": 13817 + }, + { + "epoch": 2.15, + "learning_rate": 4.012653145910058e-06, + "logits/chosen": -2.341989517211914, + "logits/rejected": -2.6641759872436523, + "logps/chosen": -132.64410400390625, + "logps/rejected": -413.6485290527344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.735493659973145, + "rewards/margins": 7.344829559326172, + "rewards/rejected": -16.080324172973633, + "step": 13818 + }, + { + "epoch": 2.15, + "learning_rate": 4.0119197053789095e-06, + "logits/chosen": -2.6995255947113037, + "logits/rejected": -2.9685187339782715, + "logps/chosen": -281.3678283691406, + "logps/rejected": -581.5872192382812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.24443244934082, + "rewards/margins": 10.671489715576172, + "rewards/rejected": -17.915924072265625, + "step": 13819 + }, + { + "epoch": 2.15, + "learning_rate": 4.011186264847762e-06, + "logits/chosen": -2.509357213973999, + "logits/rejected": -1.1779817342758179, + "logps/chosen": -577.5722045898438, + "logps/rejected": -337.553466796875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5652008056640625, + "rewards/margins": 7.580158233642578, + "rewards/rejected": -13.14535903930664, + "step": 13820 + }, + { + "epoch": 2.15, + "learning_rate": 4.010452824316614e-06, + "logits/chosen": -2.1576502323150635, + "logits/rejected": -2.4876558780670166, + "logps/chosen": -289.584228515625, + "logps/rejected": -353.22015380859375, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.391240119934082, + "rewards/margins": 5.814489841461182, + "rewards/rejected": -14.205730438232422, + "step": 13821 + }, + { + "epoch": 2.15, + "learning_rate": 4.009719383785466e-06, + "logits/chosen": -2.733158826828003, + "logits/rejected": -2.106391191482544, + "logps/chosen": -597.3928833007812, + "logps/rejected": -441.399169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061923027038574, + "rewards/margins": 11.728805541992188, + "rewards/rejected": -16.790729522705078, + "step": 13822 + }, + { + "epoch": 2.15, + "learning_rate": 4.008985943254318e-06, + "logits/chosen": -2.6100564002990723, + "logits/rejected": -2.830676555633545, + "logps/chosen": -281.8465881347656, + "logps/rejected": -441.2236022949219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.296278953552246, + "rewards/margins": 10.103509902954102, + "rewards/rejected": -17.399789810180664, + "step": 13823 + }, + { + "epoch": 2.15, + "learning_rate": 4.0082525027231706e-06, + "logits/chosen": -2.693168878555298, + "logits/rejected": -2.306321859359741, + "logps/chosen": -349.66534423828125, + "logps/rejected": -315.3351135253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.047972202301025, + "rewards/margins": 11.00367546081543, + "rewards/rejected": -18.051647186279297, + "step": 13824 + }, + { + "epoch": 2.15, + "learning_rate": 4.0075190621920224e-06, + "logits/chosen": -2.3487889766693115, + "logits/rejected": -2.7510862350463867, + "logps/chosen": -135.96295166015625, + "logps/rejected": -304.1202392578125, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.89484167098999, + "rewards/margins": 6.7658915519714355, + "rewards/rejected": -14.660733222961426, + "step": 13825 + }, + { + "epoch": 2.15, + "learning_rate": 4.006785621660874e-06, + "logits/chosen": -1.470810055732727, + "logits/rejected": -2.4459469318389893, + "logps/chosen": -166.53298950195312, + "logps/rejected": -506.0455627441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.533498764038086, + "rewards/margins": 13.520097732543945, + "rewards/rejected": -23.05359649658203, + "step": 13826 + }, + { + "epoch": 2.15, + "learning_rate": 4.006052181129726e-06, + "logits/chosen": -2.7018091678619385, + "logits/rejected": -2.361722946166992, + "logps/chosen": -218.16604614257812, + "logps/rejected": -158.59527587890625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.515433311462402, + "rewards/margins": 7.190947532653809, + "rewards/rejected": -12.706380844116211, + "step": 13827 + }, + { + "epoch": 2.15, + "learning_rate": 4.005318740598578e-06, + "logits/chosen": -2.9501240253448486, + "logits/rejected": -2.6574268341064453, + "logps/chosen": -390.43194580078125, + "logps/rejected": -492.362548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7851502895355225, + "rewards/margins": 11.0615234375, + "rewards/rejected": -13.846673965454102, + "step": 13828 + }, + { + "epoch": 2.15, + "learning_rate": 4.004585300067431e-06, + "logits/chosen": -2.5988364219665527, + "logits/rejected": -2.7507288455963135, + "logps/chosen": -174.1241455078125, + "logps/rejected": -206.4409942626953, + "loss": 0.0781, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.886616706848145, + "rewards/margins": 4.326484203338623, + "rewards/rejected": -13.213101387023926, + "step": 13829 + }, + { + "epoch": 2.15, + "learning_rate": 4.003851859536283e-06, + "logits/chosen": -2.764157295227051, + "logits/rejected": -1.7515414953231812, + "logps/chosen": -423.7660827636719, + "logps/rejected": -414.07293701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.061947822570801, + "rewards/margins": 9.822175025939941, + "rewards/rejected": -16.884122848510742, + "step": 13830 + }, + { + "epoch": 2.15, + "learning_rate": 4.003118419005135e-06, + "logits/chosen": -1.7323896884918213, + "logits/rejected": -2.60219144821167, + "logps/chosen": -167.4000701904297, + "logps/rejected": -431.2447509765625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.252803325653076, + "rewards/margins": 9.083517074584961, + "rewards/rejected": -16.336320877075195, + "step": 13831 + }, + { + "epoch": 2.15, + "learning_rate": 4.002384978473987e-06, + "logits/chosen": -2.7379183769226074, + "logits/rejected": -2.0655672550201416, + "logps/chosen": -330.34027099609375, + "logps/rejected": -226.4405059814453, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.154268741607666, + "rewards/margins": 12.041193008422852, + "rewards/rejected": -19.19546127319336, + "step": 13832 + }, + { + "epoch": 2.15, + "learning_rate": 4.00165153794284e-06, + "logits/chosen": -2.1046969890594482, + "logits/rejected": -2.5297014713287354, + "logps/chosen": -149.513427734375, + "logps/rejected": -412.8692626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8548994064331055, + "rewards/margins": 11.466585159301758, + "rewards/rejected": -19.32148551940918, + "step": 13833 + }, + { + "epoch": 2.15, + "learning_rate": 4.000918097411692e-06, + "logits/chosen": -2.619508743286133, + "logits/rejected": -2.9778964519500732, + "logps/chosen": -617.68994140625, + "logps/rejected": -633.9536743164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.01875638961792, + "rewards/margins": 12.549063682556152, + "rewards/rejected": -18.567819595336914, + "step": 13834 + }, + { + "epoch": 2.15, + "learning_rate": 4.000184656880544e-06, + "logits/chosen": -2.6740596294403076, + "logits/rejected": -2.64047908782959, + "logps/chosen": -100.89309692382812, + "logps/rejected": -273.0325927734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.938681602478027, + "rewards/margins": 8.116962432861328, + "rewards/rejected": -17.055644989013672, + "step": 13835 + }, + { + "epoch": 2.15, + "learning_rate": 3.999451216349396e-06, + "logits/chosen": -2.4976556301116943, + "logits/rejected": -2.9580600261688232, + "logps/chosen": -99.52096557617188, + "logps/rejected": -208.8472442626953, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216660499572754, + "rewards/margins": 7.566650390625, + "rewards/rejected": -13.783310890197754, + "step": 13836 + }, + { + "epoch": 2.15, + "learning_rate": 3.9987177758182475e-06, + "logits/chosen": -1.5250918865203857, + "logits/rejected": -2.322777032852173, + "logps/chosen": -188.6597900390625, + "logps/rejected": -421.258544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.835973739624023, + "rewards/margins": 11.56036376953125, + "rewards/rejected": -20.396339416503906, + "step": 13837 + }, + { + "epoch": 2.15, + "learning_rate": 3.9979843352871e-06, + "logits/chosen": -2.0387258529663086, + "logits/rejected": -2.5130951404571533, + "logps/chosen": -245.3245086669922, + "logps/rejected": -391.2279968261719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.720662593841553, + "rewards/margins": 10.247415542602539, + "rewards/rejected": -16.96807861328125, + "step": 13838 + }, + { + "epoch": 2.15, + "learning_rate": 3.997250894755952e-06, + "logits/chosen": -2.290912389755249, + "logits/rejected": -2.704653739929199, + "logps/chosen": -417.41802978515625, + "logps/rejected": -613.0272216796875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.45360279083252, + "rewards/margins": 7.976371765136719, + "rewards/rejected": -18.429973602294922, + "step": 13839 + }, + { + "epoch": 2.15, + "learning_rate": 3.996517454224804e-06, + "logits/chosen": -2.748825788497925, + "logits/rejected": -2.001875162124634, + "logps/chosen": -317.8402404785156, + "logps/rejected": -253.1592559814453, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4858980178833, + "rewards/margins": 6.204935073852539, + "rewards/rejected": -15.690834045410156, + "step": 13840 + }, + { + "epoch": 2.15, + "learning_rate": 3.995784013693656e-06, + "logits/chosen": -2.701892137527466, + "logits/rejected": -1.8241887092590332, + "logps/chosen": -648.1434936523438, + "logps/rejected": -527.8990478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.530353546142578, + "rewards/margins": 13.866046905517578, + "rewards/rejected": -23.396400451660156, + "step": 13841 + }, + { + "epoch": 2.15, + "learning_rate": 3.995050573162509e-06, + "logits/chosen": -2.9112493991851807, + "logits/rejected": -1.7623183727264404, + "logps/chosen": -412.7289733886719, + "logps/rejected": -314.6761474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4253830909729, + "rewards/margins": 10.696985244750977, + "rewards/rejected": -17.12236785888672, + "step": 13842 + }, + { + "epoch": 2.15, + "learning_rate": 3.9943171326313605e-06, + "logits/chosen": -2.805891990661621, + "logits/rejected": -2.8446221351623535, + "logps/chosen": -137.29238891601562, + "logps/rejected": -392.5290222167969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5731048583984375, + "rewards/margins": 10.052967071533203, + "rewards/rejected": -17.62607192993164, + "step": 13843 + }, + { + "epoch": 2.15, + "learning_rate": 3.993583692100212e-06, + "logits/chosen": -2.715721607208252, + "logits/rejected": -2.802762031555176, + "logps/chosen": -281.2728271484375, + "logps/rejected": -453.80914306640625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.93144416809082, + "rewards/margins": 7.270567893981934, + "rewards/rejected": -14.202011108398438, + "step": 13844 + }, + { + "epoch": 2.15, + "learning_rate": 3.992850251569064e-06, + "logits/chosen": -2.3861939907073975, + "logits/rejected": -2.500429391860962, + "logps/chosen": -224.5863037109375, + "logps/rejected": -438.565185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.32956600189209, + "rewards/margins": 12.544203758239746, + "rewards/rejected": -19.873769760131836, + "step": 13845 + }, + { + "epoch": 2.15, + "learning_rate": 3.992116811037917e-06, + "logits/chosen": -1.8942111730575562, + "logits/rejected": -2.942337989807129, + "logps/chosen": -394.3860168457031, + "logps/rejected": -697.0656127929688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.19229793548584, + "rewards/margins": 9.742086410522461, + "rewards/rejected": -19.934383392333984, + "step": 13846 + }, + { + "epoch": 2.15, + "learning_rate": 3.991383370506769e-06, + "logits/chosen": -3.0446012020111084, + "logits/rejected": -2.177903890609741, + "logps/chosen": -904.196533203125, + "logps/rejected": -410.7043762207031, + "loss": 0.8959, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.723447799682617, + "rewards/margins": 3.740880012512207, + "rewards/rejected": -12.46432876586914, + "step": 13847 + }, + { + "epoch": 2.15, + "learning_rate": 3.9906499299756216e-06, + "logits/chosen": -2.2681963443756104, + "logits/rejected": -2.6411867141723633, + "logps/chosen": -281.9709167480469, + "logps/rejected": -490.8698425292969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4357218742370605, + "rewards/margins": 11.390277862548828, + "rewards/rejected": -16.826000213623047, + "step": 13848 + }, + { + "epoch": 2.15, + "learning_rate": 3.9899164894444735e-06, + "logits/chosen": -1.221843957901001, + "logits/rejected": -2.444340944290161, + "logps/chosen": -179.1292724609375, + "logps/rejected": -456.68731689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417201042175293, + "rewards/margins": 14.588248252868652, + "rewards/rejected": -20.005449295043945, + "step": 13849 + }, + { + "epoch": 2.15, + "learning_rate": 3.989183048913325e-06, + "logits/chosen": -2.681699275970459, + "logits/rejected": -2.6254053115844727, + "logps/chosen": -413.5220947265625, + "logps/rejected": -543.9403076171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.136820316314697, + "rewards/margins": 10.380369186401367, + "rewards/rejected": -17.517189025878906, + "step": 13850 + }, + { + "epoch": 2.15, + "learning_rate": 3.988449608382178e-06, + "logits/chosen": -2.5681536197662354, + "logits/rejected": -2.2175216674804688, + "logps/chosen": -474.5410461425781, + "logps/rejected": -418.76104736328125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.170236587524414, + "rewards/margins": 6.653575420379639, + "rewards/rejected": -14.823812484741211, + "step": 13851 + }, + { + "epoch": 2.15, + "learning_rate": 3.98771616785103e-06, + "logits/chosen": -2.442917585372925, + "logits/rejected": -2.558198928833008, + "logps/chosen": -201.54299926757812, + "logps/rejected": -287.4443359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.517290115356445, + "rewards/margins": 7.782529354095459, + "rewards/rejected": -17.299819946289062, + "step": 13852 + }, + { + "epoch": 2.15, + "learning_rate": 3.986982727319882e-06, + "logits/chosen": -2.5147457122802734, + "logits/rejected": -1.441702961921692, + "logps/chosen": -223.5019073486328, + "logps/rejected": -297.0209045410156, + "loss": 0.1141, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1339430809021, + "rewards/margins": 6.722592353820801, + "rewards/rejected": -13.856534957885742, + "step": 13853 + }, + { + "epoch": 2.15, + "learning_rate": 3.986249286788734e-06, + "logits/chosen": -2.9185943603515625, + "logits/rejected": -2.684084892272949, + "logps/chosen": -471.1230163574219, + "logps/rejected": -482.207763671875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.037919998168945, + "rewards/margins": 8.68170166015625, + "rewards/rejected": -14.719621658325195, + "step": 13854 + }, + { + "epoch": 2.15, + "learning_rate": 3.985515846257586e-06, + "logits/chosen": -2.775845527648926, + "logits/rejected": -2.1439290046691895, + "logps/chosen": -403.41632080078125, + "logps/rejected": -298.2090148925781, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213096618652344, + "rewards/margins": 7.853317737579346, + "rewards/rejected": -14.066413879394531, + "step": 13855 + }, + { + "epoch": 2.15, + "learning_rate": 3.984782405726438e-06, + "logits/chosen": -2.2847330570220947, + "logits/rejected": -2.5079503059387207, + "logps/chosen": -310.19073486328125, + "logps/rejected": -445.3059997558594, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.897161960601807, + "rewards/margins": 8.423669815063477, + "rewards/rejected": -16.320831298828125, + "step": 13856 + }, + { + "epoch": 2.16, + "learning_rate": 3.98404896519529e-06, + "logits/chosen": -2.9803295135498047, + "logits/rejected": -2.6997411251068115, + "logps/chosen": -476.360595703125, + "logps/rejected": -427.49822998046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.201423645019531, + "rewards/margins": 8.122600555419922, + "rewards/rejected": -16.324024200439453, + "step": 13857 + }, + { + "epoch": 2.16, + "learning_rate": 3.983315524664142e-06, + "logits/chosen": -1.1717864274978638, + "logits/rejected": -2.638267755508423, + "logps/chosen": -168.3515167236328, + "logps/rejected": -474.91644287109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.377532005310059, + "rewards/margins": 8.538063049316406, + "rewards/rejected": -16.91559600830078, + "step": 13858 + }, + { + "epoch": 2.16, + "learning_rate": 3.982582084132994e-06, + "logits/chosen": -2.5566446781158447, + "logits/rejected": -1.3868083953857422, + "logps/chosen": -340.0146484375, + "logps/rejected": -248.68572998046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9261064529418945, + "rewards/margins": 7.779158592224121, + "rewards/rejected": -15.705265045166016, + "step": 13859 + }, + { + "epoch": 2.16, + "learning_rate": 3.981848643601847e-06, + "logits/chosen": -2.333286762237549, + "logits/rejected": -2.9763736724853516, + "logps/chosen": -267.70257568359375, + "logps/rejected": -539.682861328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8533830642700195, + "rewards/margins": 7.904748916625977, + "rewards/rejected": -15.75813102722168, + "step": 13860 + }, + { + "epoch": 2.16, + "learning_rate": 3.9811152030706985e-06, + "logits/chosen": -2.5993306636810303, + "logits/rejected": -2.8475637435913086, + "logps/chosen": -194.31976318359375, + "logps/rejected": -228.16009521484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.843124866485596, + "rewards/margins": 7.926813125610352, + "rewards/rejected": -13.769937515258789, + "step": 13861 + }, + { + "epoch": 2.16, + "learning_rate": 3.98038176253955e-06, + "logits/chosen": -1.4668327569961548, + "logits/rejected": -2.456364631652832, + "logps/chosen": -193.85003662109375, + "logps/rejected": -522.3944091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.642024993896484, + "rewards/margins": 11.17939281463623, + "rewards/rejected": -18.82141876220703, + "step": 13862 + }, + { + "epoch": 2.16, + "learning_rate": 3.979648322008402e-06, + "logits/chosen": -2.534454345703125, + "logits/rejected": -1.4718847274780273, + "logps/chosen": -596.0927734375, + "logps/rejected": -349.38568115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344313621520996, + "rewards/margins": 10.251566886901855, + "rewards/rejected": -18.59588050842285, + "step": 13863 + }, + { + "epoch": 2.16, + "learning_rate": 3.978914881477255e-06, + "logits/chosen": -2.7362711429595947, + "logits/rejected": -1.723495364189148, + "logps/chosen": -433.7249755859375, + "logps/rejected": -324.8547058105469, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.382314682006836, + "rewards/margins": 7.953381061553955, + "rewards/rejected": -15.335695266723633, + "step": 13864 + }, + { + "epoch": 2.16, + "learning_rate": 3.978181440946108e-06, + "logits/chosen": -2.501072406768799, + "logits/rejected": -2.9335644245147705, + "logps/chosen": -310.43963623046875, + "logps/rejected": -502.5942687988281, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.15489387512207, + "rewards/margins": 8.882192611694336, + "rewards/rejected": -19.037086486816406, + "step": 13865 + }, + { + "epoch": 2.16, + "learning_rate": 3.97744800041496e-06, + "logits/chosen": -2.794745445251465, + "logits/rejected": -2.4418141841888428, + "logps/chosen": -218.43014526367188, + "logps/rejected": -273.5885925292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.054898262023926, + "rewards/margins": 10.58434772491455, + "rewards/rejected": -20.639245986938477, + "step": 13866 + }, + { + "epoch": 2.16, + "learning_rate": 3.9767145598838115e-06, + "logits/chosen": -2.581702947616577, + "logits/rejected": -2.8103668689727783, + "logps/chosen": -671.1141357421875, + "logps/rejected": -460.33099365234375, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.448780059814453, + "rewards/margins": 6.905730724334717, + "rewards/rejected": -14.354511260986328, + "step": 13867 + }, + { + "epoch": 2.16, + "learning_rate": 3.975981119352663e-06, + "logits/chosen": -1.9509758949279785, + "logits/rejected": -2.414003849029541, + "logps/chosen": -296.05352783203125, + "logps/rejected": -461.2059631347656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.27029800415039, + "rewards/margins": 8.554388046264648, + "rewards/rejected": -19.82468605041504, + "step": 13868 + }, + { + "epoch": 2.16, + "learning_rate": 3.975247678821516e-06, + "logits/chosen": -2.608949661254883, + "logits/rejected": -2.73875093460083, + "logps/chosen": -733.607421875, + "logps/rejected": -554.621337890625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.626005172729492, + "rewards/margins": 7.6853837966918945, + "rewards/rejected": -15.31138801574707, + "step": 13869 + }, + { + "epoch": 2.16, + "learning_rate": 3.974514238290368e-06, + "logits/chosen": -1.2338002920150757, + "logits/rejected": -2.710742473602295, + "logps/chosen": -398.83447265625, + "logps/rejected": -542.9766235351562, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.639771461486816, + "rewards/margins": 9.425607681274414, + "rewards/rejected": -20.065380096435547, + "step": 13870 + }, + { + "epoch": 2.16, + "learning_rate": 3.97378079775922e-06, + "logits/chosen": -2.4188990592956543, + "logits/rejected": -2.8098294734954834, + "logps/chosen": -583.1362915039062, + "logps/rejected": -537.4755859375, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.693521499633789, + "rewards/margins": 6.660067558288574, + "rewards/rejected": -17.353588104248047, + "step": 13871 + }, + { + "epoch": 2.16, + "learning_rate": 3.973047357228072e-06, + "logits/chosen": -2.1052486896514893, + "logits/rejected": -2.5603134632110596, + "logps/chosen": -195.74253845214844, + "logps/rejected": -435.63250732421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872861862182617, + "rewards/margins": 12.26954174041748, + "rewards/rejected": -18.14240264892578, + "step": 13872 + }, + { + "epoch": 2.16, + "learning_rate": 3.9723139166969245e-06, + "logits/chosen": -0.8902957439422607, + "logits/rejected": -2.4934537410736084, + "logps/chosen": -210.73709106445312, + "logps/rejected": -475.9205627441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.665060043334961, + "rewards/margins": 8.976837158203125, + "rewards/rejected": -17.64189910888672, + "step": 13873 + }, + { + "epoch": 2.16, + "learning_rate": 3.971580476165776e-06, + "logits/chosen": -1.4959940910339355, + "logits/rejected": -2.4889559745788574, + "logps/chosen": -190.7318115234375, + "logps/rejected": -590.4691162109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.46379566192627, + "rewards/margins": 7.593426704406738, + "rewards/rejected": -19.057222366333008, + "step": 13874 + }, + { + "epoch": 2.16, + "learning_rate": 3.970847035634628e-06, + "logits/chosen": -2.6945536136627197, + "logits/rejected": -2.791741132736206, + "logps/chosen": -212.10354614257812, + "logps/rejected": -372.6043395996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.93796443939209, + "rewards/margins": 9.438580513000488, + "rewards/rejected": -16.376544952392578, + "step": 13875 + }, + { + "epoch": 2.16, + "learning_rate": 3.97011359510348e-06, + "logits/chosen": -1.843508243560791, + "logits/rejected": -2.82539963722229, + "logps/chosen": -192.14572143554688, + "logps/rejected": -527.9488525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.813191413879395, + "rewards/margins": 10.3341064453125, + "rewards/rejected": -19.147296905517578, + "step": 13876 + }, + { + "epoch": 2.16, + "learning_rate": 3.969380154572332e-06, + "logits/chosen": -2.83707857131958, + "logits/rejected": -1.5981361865997314, + "logps/chosen": -346.0780944824219, + "logps/rejected": -492.9234313964844, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.865357398986816, + "rewards/margins": 12.016109466552734, + "rewards/rejected": -19.881465911865234, + "step": 13877 + }, + { + "epoch": 2.16, + "learning_rate": 3.968646714041185e-06, + "logits/chosen": -1.9822206497192383, + "logits/rejected": -2.792888879776001, + "logps/chosen": -250.65122985839844, + "logps/rejected": -325.07611083984375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.059985160827637, + "rewards/margins": 6.789934158325195, + "rewards/rejected": -15.849920272827148, + "step": 13878 + }, + { + "epoch": 2.16, + "learning_rate": 3.9679132735100366e-06, + "logits/chosen": -2.216693878173828, + "logits/rejected": -2.8464560508728027, + "logps/chosen": -1059.0947265625, + "logps/rejected": -1006.820556640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.224969863891602, + "rewards/margins": 8.459561347961426, + "rewards/rejected": -18.684532165527344, + "step": 13879 + }, + { + "epoch": 2.16, + "learning_rate": 3.9671798329788885e-06, + "logits/chosen": -2.901648998260498, + "logits/rejected": -2.9653494358062744, + "logps/chosen": -288.3669128417969, + "logps/rejected": -301.82666015625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.491018295288086, + "rewards/margins": 5.655788898468018, + "rewards/rejected": -18.146808624267578, + "step": 13880 + }, + { + "epoch": 2.16, + "learning_rate": 3.966446392447741e-06, + "logits/chosen": -2.8096718788146973, + "logits/rejected": -2.7341325283050537, + "logps/chosen": -281.80950927734375, + "logps/rejected": -437.94073486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.462932586669922, + "rewards/margins": 10.780698776245117, + "rewards/rejected": -21.24363136291504, + "step": 13881 + }, + { + "epoch": 2.16, + "learning_rate": 3.965712951916594e-06, + "logits/chosen": -1.3707456588745117, + "logits/rejected": -2.454887628555298, + "logps/chosen": -186.58169555664062, + "logps/rejected": -407.3787841796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.746743202209473, + "rewards/margins": 8.507465362548828, + "rewards/rejected": -18.254209518432617, + "step": 13882 + }, + { + "epoch": 2.16, + "learning_rate": 3.964979511385446e-06, + "logits/chosen": -2.363917350769043, + "logits/rejected": -2.268427848815918, + "logps/chosen": -307.01129150390625, + "logps/rejected": -397.4619140625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.376449584960938, + "rewards/margins": 7.7681965827941895, + "rewards/rejected": -16.14464569091797, + "step": 13883 + }, + { + "epoch": 2.16, + "learning_rate": 3.964246070854298e-06, + "logits/chosen": -2.0779547691345215, + "logits/rejected": -2.5250773429870605, + "logps/chosen": -300.9573974609375, + "logps/rejected": -456.1175537109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.306092262268066, + "rewards/margins": 6.847895622253418, + "rewards/rejected": -15.153987884521484, + "step": 13884 + }, + { + "epoch": 2.16, + "learning_rate": 3.9635126303231495e-06, + "logits/chosen": -2.6554880142211914, + "logits/rejected": -2.9507157802581787, + "logps/chosen": -765.2415771484375, + "logps/rejected": -757.1392822265625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.712957382202148, + "rewards/margins": 7.191892623901367, + "rewards/rejected": -15.904850006103516, + "step": 13885 + }, + { + "epoch": 2.16, + "learning_rate": 3.962779189792002e-06, + "logits/chosen": -1.1727622747421265, + "logits/rejected": -2.5871291160583496, + "logps/chosen": -196.16542053222656, + "logps/rejected": -476.70233154296875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.973617553710938, + "rewards/margins": 10.548904418945312, + "rewards/rejected": -21.52252197265625, + "step": 13886 + }, + { + "epoch": 2.16, + "learning_rate": 3.962045749260854e-06, + "logits/chosen": -2.8679490089416504, + "logits/rejected": -2.063333511352539, + "logps/chosen": -858.1364135742188, + "logps/rejected": -620.651123046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.757320404052734, + "rewards/margins": 9.173126220703125, + "rewards/rejected": -18.93044662475586, + "step": 13887 + }, + { + "epoch": 2.16, + "learning_rate": 3.961312308729706e-06, + "logits/chosen": -1.9275939464569092, + "logits/rejected": -2.938746213912964, + "logps/chosen": -154.17108154296875, + "logps/rejected": -541.0186157226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811576843261719, + "rewards/margins": 14.158875465393066, + "rewards/rejected": -20.97045135498047, + "step": 13888 + }, + { + "epoch": 2.16, + "learning_rate": 3.960578868198558e-06, + "logits/chosen": -2.9109838008880615, + "logits/rejected": -2.8136954307556152, + "logps/chosen": -302.50323486328125, + "logps/rejected": -532.0811767578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.082947731018066, + "rewards/margins": 10.612635612487793, + "rewards/rejected": -15.69558334350586, + "step": 13889 + }, + { + "epoch": 2.16, + "learning_rate": 3.95984542766741e-06, + "logits/chosen": -2.503527879714966, + "logits/rejected": -2.010770797729492, + "logps/chosen": -374.90826416015625, + "logps/rejected": -376.5748291015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.774924278259277, + "rewards/margins": 9.088712692260742, + "rewards/rejected": -19.863636016845703, + "step": 13890 + }, + { + "epoch": 2.16, + "learning_rate": 3.9591119871362625e-06, + "logits/chosen": -2.7249815464019775, + "logits/rejected": -2.4206020832061768, + "logps/chosen": -157.21200561523438, + "logps/rejected": -243.52093505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684021472930908, + "rewards/margins": 8.773248672485352, + "rewards/rejected": -14.457270622253418, + "step": 13891 + }, + { + "epoch": 2.16, + "learning_rate": 3.958378546605114e-06, + "logits/chosen": -2.402949333190918, + "logits/rejected": -3.080418348312378, + "logps/chosen": -244.88909912109375, + "logps/rejected": -539.3928833007812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.976221084594727, + "rewards/margins": 10.401426315307617, + "rewards/rejected": -18.377647399902344, + "step": 13892 + }, + { + "epoch": 2.16, + "learning_rate": 3.957645106073966e-06, + "logits/chosen": -2.9939188957214355, + "logits/rejected": -2.715341091156006, + "logps/chosen": -146.21771240234375, + "logps/rejected": -234.44097900390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.588791847229004, + "rewards/margins": 10.106107711791992, + "rewards/rejected": -16.694900512695312, + "step": 13893 + }, + { + "epoch": 2.16, + "learning_rate": 3.956911665542818e-06, + "logits/chosen": -2.3315398693084717, + "logits/rejected": -2.7899258136749268, + "logps/chosen": -94.54896545410156, + "logps/rejected": -302.526123046875, + "loss": 0.0919, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.913342475891113, + "rewards/margins": 7.599465370178223, + "rewards/rejected": -15.512807846069336, + "step": 13894 + }, + { + "epoch": 2.16, + "learning_rate": 3.956178225011671e-06, + "logits/chosen": -2.8496663570404053, + "logits/rejected": -2.010611057281494, + "logps/chosen": -338.01666259765625, + "logps/rejected": -314.72393798828125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.54885482788086, + "rewards/margins": 6.672183036804199, + "rewards/rejected": -15.221036911010742, + "step": 13895 + }, + { + "epoch": 2.16, + "learning_rate": 3.955444784480523e-06, + "logits/chosen": -2.2897117137908936, + "logits/rejected": -2.259291172027588, + "logps/chosen": -195.10662841796875, + "logps/rejected": -348.802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.728821754455566, + "rewards/margins": 8.977128982543945, + "rewards/rejected": -21.705951690673828, + "step": 13896 + }, + { + "epoch": 2.16, + "learning_rate": 3.954711343949375e-06, + "logits/chosen": -2.6972413063049316, + "logits/rejected": -2.067741632461548, + "logps/chosen": -238.1667938232422, + "logps/rejected": -243.14202880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.949705600738525, + "rewards/margins": 9.401695251464844, + "rewards/rejected": -16.351402282714844, + "step": 13897 + }, + { + "epoch": 2.16, + "learning_rate": 3.953977903418227e-06, + "logits/chosen": -2.8406217098236084, + "logits/rejected": -2.952991247177124, + "logps/chosen": -48.89295196533203, + "logps/rejected": -206.29299926757812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7990922927856445, + "rewards/margins": 11.184642791748047, + "rewards/rejected": -14.983734130859375, + "step": 13898 + }, + { + "epoch": 2.16, + "learning_rate": 3.953244462887079e-06, + "logits/chosen": -2.666529655456543, + "logits/rejected": -2.1663384437561035, + "logps/chosen": -519.7323608398438, + "logps/rejected": -462.933837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.917322158813477, + "rewards/margins": 10.084795951843262, + "rewards/rejected": -19.002117156982422, + "step": 13899 + }, + { + "epoch": 2.16, + "learning_rate": 3.952511022355932e-06, + "logits/chosen": -2.1765968799591064, + "logits/rejected": -2.570133924484253, + "logps/chosen": -232.43646240234375, + "logps/rejected": -345.2567443847656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.062171936035156, + "rewards/margins": 9.585793495178223, + "rewards/rejected": -15.647965431213379, + "step": 13900 + }, + { + "epoch": 2.16, + "learning_rate": 3.951777581824784e-06, + "logits/chosen": -1.196850061416626, + "logits/rejected": -2.7407190799713135, + "logps/chosen": -196.97662353515625, + "logps/rejected": -423.045166015625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.274410247802734, + "rewards/margins": 7.6033034324646, + "rewards/rejected": -18.877714157104492, + "step": 13901 + }, + { + "epoch": 2.16, + "learning_rate": 3.951044141293636e-06, + "logits/chosen": -1.6492186784744263, + "logits/rejected": -2.5671470165252686, + "logps/chosen": -200.24093627929688, + "logps/rejected": -344.4481201171875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.56637191772461, + "rewards/margins": 5.514682769775391, + "rewards/rejected": -17.0810546875, + "step": 13902 + }, + { + "epoch": 2.16, + "learning_rate": 3.950310700762488e-06, + "logits/chosen": -2.82346773147583, + "logits/rejected": -1.646978735923767, + "logps/chosen": -1096.470703125, + "logps/rejected": -469.8403625488281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.119009017944336, + "rewards/margins": 12.468822479248047, + "rewards/rejected": -21.587833404541016, + "step": 13903 + }, + { + "epoch": 2.16, + "learning_rate": 3.94957726023134e-06, + "logits/chosen": -2.7380776405334473, + "logits/rejected": -2.83451509475708, + "logps/chosen": -431.88372802734375, + "logps/rejected": -443.22943115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.477666854858398, + "rewards/margins": 13.340124130249023, + "rewards/rejected": -22.817790985107422, + "step": 13904 + }, + { + "epoch": 2.16, + "learning_rate": 3.948843819700192e-06, + "logits/chosen": -2.3999502658843994, + "logits/rejected": -2.426178216934204, + "logps/chosen": -282.1615295410156, + "logps/rejected": -321.8304443359375, + "loss": 1.8334, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.474132537841797, + "rewards/margins": 1.6842870712280273, + "rewards/rejected": -14.15842056274414, + "step": 13905 + }, + { + "epoch": 2.16, + "learning_rate": 3.948110379169044e-06, + "logits/chosen": -2.582465648651123, + "logits/rejected": -2.730536460876465, + "logps/chosen": -245.44659423828125, + "logps/rejected": -423.2981872558594, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.390094757080078, + "rewards/margins": 5.735734462738037, + "rewards/rejected": -19.12582778930664, + "step": 13906 + }, + { + "epoch": 2.16, + "learning_rate": 3.947376938637896e-06, + "logits/chosen": -2.7326321601867676, + "logits/rejected": -1.6951947212219238, + "logps/chosen": -719.0802001953125, + "logps/rejected": -502.0909423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.874528884887695, + "rewards/margins": 10.769449234008789, + "rewards/rejected": -23.643978118896484, + "step": 13907 + }, + { + "epoch": 2.16, + "learning_rate": 3.946643498106748e-06, + "logits/chosen": -2.0493695735931396, + "logits/rejected": -2.7357077598571777, + "logps/chosen": -224.5284423828125, + "logps/rejected": -389.0841064453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.46379280090332, + "rewards/margins": 8.113668441772461, + "rewards/rejected": -18.57746124267578, + "step": 13908 + }, + { + "epoch": 2.16, + "learning_rate": 3.9459100575756005e-06, + "logits/chosen": -2.439241647720337, + "logits/rejected": -2.5818898677825928, + "logps/chosen": -199.9846954345703, + "logps/rejected": -380.0410461425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.31566047668457, + "rewards/margins": 11.99258804321289, + "rewards/rejected": -21.308246612548828, + "step": 13909 + }, + { + "epoch": 2.16, + "learning_rate": 3.9451766170444524e-06, + "logits/chosen": -1.435897946357727, + "logits/rejected": -2.278449058532715, + "logps/chosen": -167.58839416503906, + "logps/rejected": -252.34420776367188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.77151870727539, + "rewards/margins": 7.376048564910889, + "rewards/rejected": -16.147567749023438, + "step": 13910 + }, + { + "epoch": 2.16, + "learning_rate": 3.944443176513304e-06, + "logits/chosen": -2.934959650039673, + "logits/rejected": -1.1511605978012085, + "logps/chosen": -642.76806640625, + "logps/rejected": -557.6694946289062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.016242027282715, + "rewards/margins": 9.281707763671875, + "rewards/rejected": -18.297950744628906, + "step": 13911 + }, + { + "epoch": 2.16, + "learning_rate": 3.943709735982156e-06, + "logits/chosen": -2.3586103916168213, + "logits/rejected": -2.14762544631958, + "logps/chosen": -191.817138671875, + "logps/rejected": -404.39178466796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.865584373474121, + "rewards/margins": 10.340394973754883, + "rewards/rejected": -18.205978393554688, + "step": 13912 + }, + { + "epoch": 2.16, + "learning_rate": 3.942976295451009e-06, + "logits/chosen": -2.6636223793029785, + "logits/rejected": -1.5138964653015137, + "logps/chosen": -326.4317321777344, + "logps/rejected": -455.45599365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.39348030090332, + "rewards/margins": 11.050106048583984, + "rewards/rejected": -22.443586349487305, + "step": 13913 + }, + { + "epoch": 2.16, + "learning_rate": 3.942242854919861e-06, + "logits/chosen": -2.0156078338623047, + "logits/rejected": -2.6428775787353516, + "logps/chosen": -169.79904174804688, + "logps/rejected": -534.9334716796875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.658232688903809, + "rewards/margins": 7.338259696960449, + "rewards/rejected": -17.996492385864258, + "step": 13914 + }, + { + "epoch": 2.16, + "learning_rate": 3.9415094143887135e-06, + "logits/chosen": -2.0520503520965576, + "logits/rejected": -2.5914306640625, + "logps/chosen": -130.67294311523438, + "logps/rejected": -410.4716796875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.767826080322266, + "rewards/margins": 11.649682998657227, + "rewards/rejected": -23.417509078979492, + "step": 13915 + }, + { + "epoch": 2.16, + "learning_rate": 3.940775973857565e-06, + "logits/chosen": -1.5139552354812622, + "logits/rejected": -2.5494484901428223, + "logps/chosen": -152.54376220703125, + "logps/rejected": -315.0176696777344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.197003364562988, + "rewards/margins": 7.577150344848633, + "rewards/rejected": -16.774154663085938, + "step": 13916 + }, + { + "epoch": 2.16, + "learning_rate": 3.940042533326417e-06, + "logits/chosen": -1.9156389236450195, + "logits/rejected": -2.6328516006469727, + "logps/chosen": -424.2821350097656, + "logps/rejected": -674.544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.785733222961426, + "rewards/margins": 8.942464828491211, + "rewards/rejected": -21.728199005126953, + "step": 13917 + }, + { + "epoch": 2.16, + "learning_rate": 3.93930909279527e-06, + "logits/chosen": -2.698235511779785, + "logits/rejected": -2.519451141357422, + "logps/chosen": -353.48095703125, + "logps/rejected": -291.6042175292969, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.099441528320312, + "rewards/margins": 6.312840461730957, + "rewards/rejected": -14.41228199005127, + "step": 13918 + }, + { + "epoch": 2.16, + "learning_rate": 3.938575652264122e-06, + "logits/chosen": -2.2814440727233887, + "logits/rejected": -2.749377727508545, + "logps/chosen": -233.85597229003906, + "logps/rejected": -603.8233642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.970739364624023, + "rewards/margins": 11.537156105041504, + "rewards/rejected": -20.507896423339844, + "step": 13919 + }, + { + "epoch": 2.16, + "learning_rate": 3.937842211732974e-06, + "logits/chosen": -1.494511604309082, + "logits/rejected": -2.58718204498291, + "logps/chosen": -141.32020568847656, + "logps/rejected": -314.21893310546875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.323349952697754, + "rewards/margins": 6.754462242126465, + "rewards/rejected": -19.07781219482422, + "step": 13920 + }, + { + "epoch": 2.17, + "learning_rate": 3.937108771201826e-06, + "logits/chosen": -2.663287401199341, + "logits/rejected": -2.924583673477173, + "logps/chosen": -126.3099365234375, + "logps/rejected": -241.9691162109375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.742908477783203, + "rewards/margins": 7.946457386016846, + "rewards/rejected": -17.68936538696289, + "step": 13921 + }, + { + "epoch": 2.17, + "learning_rate": 3.936375330670678e-06, + "logits/chosen": -2.268481731414795, + "logits/rejected": -2.9858410358428955, + "logps/chosen": -100.33157348632812, + "logps/rejected": -249.0484161376953, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.944309234619141, + "rewards/margins": 7.125096321105957, + "rewards/rejected": -14.069405555725098, + "step": 13922 + }, + { + "epoch": 2.17, + "learning_rate": 3.93564189013953e-06, + "logits/chosen": -2.1865108013153076, + "logits/rejected": -2.69736909866333, + "logps/chosen": -168.6173095703125, + "logps/rejected": -270.82049560546875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.065640449523926, + "rewards/margins": 5.507060527801514, + "rewards/rejected": -15.572700500488281, + "step": 13923 + }, + { + "epoch": 2.17, + "learning_rate": 3.934908449608382e-06, + "logits/chosen": -2.2879927158355713, + "logits/rejected": -2.515228748321533, + "logps/chosen": -132.8240509033203, + "logps/rejected": -396.2539367675781, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.89364242553711, + "rewards/margins": 10.279165267944336, + "rewards/rejected": -19.172807693481445, + "step": 13924 + }, + { + "epoch": 2.17, + "learning_rate": 3.934175009077234e-06, + "logits/chosen": -1.957970380783081, + "logits/rejected": -2.307666778564453, + "logps/chosen": -272.0147399902344, + "logps/rejected": -402.5260314941406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.716436386108398, + "rewards/margins": 6.891661643981934, + "rewards/rejected": -16.608097076416016, + "step": 13925 + }, + { + "epoch": 2.17, + "learning_rate": 3.933441568546086e-06, + "logits/chosen": -1.2877310514450073, + "logits/rejected": -2.642385721206665, + "logps/chosen": -203.07098388671875, + "logps/rejected": -675.1854858398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.947836875915527, + "rewards/margins": 16.13661003112793, + "rewards/rejected": -26.08444595336914, + "step": 13926 + }, + { + "epoch": 2.17, + "learning_rate": 3.932708128014939e-06, + "logits/chosen": -2.7695536613464355, + "logits/rejected": -0.961513876914978, + "logps/chosen": -379.7487487792969, + "logps/rejected": -297.09075927734375, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.00605583190918, + "rewards/margins": 5.480506420135498, + "rewards/rejected": -17.486562728881836, + "step": 13927 + }, + { + "epoch": 2.17, + "learning_rate": 3.9319746874837905e-06, + "logits/chosen": -1.2806559801101685, + "logits/rejected": -1.867422103881836, + "logps/chosen": -211.602783203125, + "logps/rejected": -400.67822265625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.536420822143555, + "rewards/margins": 10.610365867614746, + "rewards/rejected": -21.146787643432617, + "step": 13928 + }, + { + "epoch": 2.17, + "learning_rate": 3.931241246952642e-06, + "logits/chosen": -2.6062448024749756, + "logits/rejected": -2.5261778831481934, + "logps/chosen": -142.1864471435547, + "logps/rejected": -359.49566650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.176870346069336, + "rewards/margins": 10.91163444519043, + "rewards/rejected": -19.088504791259766, + "step": 13929 + }, + { + "epoch": 2.17, + "learning_rate": 3.930507806421494e-06, + "logits/chosen": -2.4935123920440674, + "logits/rejected": -1.6992160081863403, + "logps/chosen": -272.3880310058594, + "logps/rejected": -252.23764038085938, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7116546630859375, + "rewards/margins": 5.571316719055176, + "rewards/rejected": -13.28297233581543, + "step": 13930 + }, + { + "epoch": 2.17, + "learning_rate": 3.929774365890347e-06, + "logits/chosen": -2.7857840061187744, + "logits/rejected": -2.645988702774048, + "logps/chosen": -338.83453369140625, + "logps/rejected": -302.75933837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.918644905090332, + "rewards/margins": 9.230326652526855, + "rewards/rejected": -19.148971557617188, + "step": 13931 + }, + { + "epoch": 2.17, + "learning_rate": 3.9290409253592e-06, + "logits/chosen": -1.5938910245895386, + "logits/rejected": -2.781965494155884, + "logps/chosen": -215.81610107421875, + "logps/rejected": -482.53887939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.448391914367676, + "rewards/margins": 8.833415031433105, + "rewards/rejected": -18.28180694580078, + "step": 13932 + }, + { + "epoch": 2.17, + "learning_rate": 3.9283074848280516e-06, + "logits/chosen": -0.8080517649650574, + "logits/rejected": -2.1561191082000732, + "logps/chosen": -132.52651977539062, + "logps/rejected": -321.8145751953125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.223770141601562, + "rewards/margins": 8.06778335571289, + "rewards/rejected": -18.291553497314453, + "step": 13933 + }, + { + "epoch": 2.17, + "learning_rate": 3.9275740442969034e-06, + "logits/chosen": -1.3082201480865479, + "logits/rejected": -2.5907785892486572, + "logps/chosen": -204.2073974609375, + "logps/rejected": -421.57635498046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.183629035949707, + "rewards/margins": 10.416290283203125, + "rewards/rejected": -17.599918365478516, + "step": 13934 + }, + { + "epoch": 2.17, + "learning_rate": 3.926840603765756e-06, + "logits/chosen": -1.8864048719406128, + "logits/rejected": -2.7473561763763428, + "logps/chosen": -517.2442626953125, + "logps/rejected": -483.2691650390625, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.63300895690918, + "rewards/margins": 6.60989236831665, + "rewards/rejected": -17.242900848388672, + "step": 13935 + }, + { + "epoch": 2.17, + "learning_rate": 3.926107163234608e-06, + "logits/chosen": -1.7135710716247559, + "logits/rejected": -2.9495654106140137, + "logps/chosen": -106.86463928222656, + "logps/rejected": -374.78277587890625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.30776596069336, + "rewards/margins": 6.1479034423828125, + "rewards/rejected": -15.455669403076172, + "step": 13936 + }, + { + "epoch": 2.17, + "learning_rate": 3.92537372270346e-06, + "logits/chosen": -2.9209537506103516, + "logits/rejected": -2.949559450149536, + "logps/chosen": -196.73062133789062, + "logps/rejected": -164.1886444091797, + "loss": 0.1241, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.453592300415039, + "rewards/margins": 4.276167392730713, + "rewards/rejected": -12.729759216308594, + "step": 13937 + }, + { + "epoch": 2.17, + "learning_rate": 3.924640282172312e-06, + "logits/chosen": -2.8063230514526367, + "logits/rejected": -2.950529098510742, + "logps/chosen": -508.94287109375, + "logps/rejected": -711.8958740234375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.322601318359375, + "rewards/margins": 7.556740760803223, + "rewards/rejected": -14.879343032836914, + "step": 13938 + }, + { + "epoch": 2.17, + "learning_rate": 3.923906841641164e-06, + "logits/chosen": -2.373258590698242, + "logits/rejected": -2.4122214317321777, + "logps/chosen": -220.62493896484375, + "logps/rejected": -350.4326171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.215346813201904, + "rewards/margins": 10.066739082336426, + "rewards/rejected": -17.282085418701172, + "step": 13939 + }, + { + "epoch": 2.17, + "learning_rate": 3.923173401110016e-06, + "logits/chosen": -2.7783701419830322, + "logits/rejected": -2.2032103538513184, + "logps/chosen": -285.1866455078125, + "logps/rejected": -245.38836669921875, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.035539627075195, + "rewards/margins": 8.453110694885254, + "rewards/rejected": -17.488651275634766, + "step": 13940 + }, + { + "epoch": 2.17, + "learning_rate": 3.922439960578868e-06, + "logits/chosen": -2.042372703552246, + "logits/rejected": -2.7964770793914795, + "logps/chosen": -435.2577819824219, + "logps/rejected": -518.664306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.375205993652344, + "rewards/margins": 9.705718994140625, + "rewards/rejected": -21.08092498779297, + "step": 13941 + }, + { + "epoch": 2.17, + "learning_rate": 3.92170652004772e-06, + "logits/chosen": -2.7763590812683105, + "logits/rejected": -2.805436372756958, + "logps/chosen": -162.40870666503906, + "logps/rejected": -317.810791015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417301177978516, + "rewards/margins": 8.757580757141113, + "rewards/rejected": -14.174881935119629, + "step": 13942 + }, + { + "epoch": 2.17, + "learning_rate": 3.920973079516572e-06, + "logits/chosen": -1.8124754428863525, + "logits/rejected": -2.48189377784729, + "logps/chosen": -637.096923828125, + "logps/rejected": -543.4711303710938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.192680358886719, + "rewards/margins": 13.088627815246582, + "rewards/rejected": -25.281307220458984, + "step": 13943 + }, + { + "epoch": 2.17, + "learning_rate": 3.920239638985425e-06, + "logits/chosen": -1.5618215799331665, + "logits/rejected": -2.2553300857543945, + "logps/chosen": -177.32650756835938, + "logps/rejected": -429.3095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.235553741455078, + "rewards/margins": 13.271059036254883, + "rewards/rejected": -21.50661277770996, + "step": 13944 + }, + { + "epoch": 2.17, + "learning_rate": 3.919506198454277e-06, + "logits/chosen": -1.7051072120666504, + "logits/rejected": -2.7450690269470215, + "logps/chosen": -321.66107177734375, + "logps/rejected": -832.158935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.386149883270264, + "rewards/margins": 11.821882247924805, + "rewards/rejected": -19.208032608032227, + "step": 13945 + }, + { + "epoch": 2.17, + "learning_rate": 3.9187727579231285e-06, + "logits/chosen": -2.0625932216644287, + "logits/rejected": -2.465308666229248, + "logps/chosen": -266.70538330078125, + "logps/rejected": -425.05511474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.502850532531738, + "rewards/margins": 9.439228057861328, + "rewards/rejected": -19.94207763671875, + "step": 13946 + }, + { + "epoch": 2.17, + "learning_rate": 3.91803931739198e-06, + "logits/chosen": -1.224098563194275, + "logits/rejected": -2.1052229404449463, + "logps/chosen": -142.09072875976562, + "logps/rejected": -444.2840270996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.26570463180542, + "rewards/margins": 13.214363098144531, + "rewards/rejected": -20.48006820678711, + "step": 13947 + }, + { + "epoch": 2.17, + "learning_rate": 3.917305876860833e-06, + "logits/chosen": -1.9335590600967407, + "logits/rejected": -2.5517189502716064, + "logps/chosen": -278.1808166503906, + "logps/rejected": -327.581787109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.259809494018555, + "rewards/margins": 7.742992401123047, + "rewards/rejected": -18.0028018951416, + "step": 13948 + }, + { + "epoch": 2.17, + "learning_rate": 3.916572436329686e-06, + "logits/chosen": -2.669813871383667, + "logits/rejected": -2.947659492492676, + "logps/chosen": -129.18417358398438, + "logps/rejected": -307.0213928222656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.845301628112793, + "rewards/margins": 9.750982284545898, + "rewards/rejected": -16.596284866333008, + "step": 13949 + }, + { + "epoch": 2.17, + "learning_rate": 3.915838995798538e-06, + "logits/chosen": -2.522430419921875, + "logits/rejected": -2.254586696624756, + "logps/chosen": -328.3335266113281, + "logps/rejected": -484.3771057128906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.577106475830078, + "rewards/margins": 11.412622451782227, + "rewards/rejected": -21.989727020263672, + "step": 13950 + }, + { + "epoch": 2.17, + "learning_rate": 3.91510555526739e-06, + "logits/chosen": -2.398813009262085, + "logits/rejected": -2.1133527755737305, + "logps/chosen": -589.2487182617188, + "logps/rejected": -443.75665283203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.126263618469238, + "rewards/margins": 8.786338806152344, + "rewards/rejected": -20.912601470947266, + "step": 13951 + }, + { + "epoch": 2.17, + "learning_rate": 3.9143721147362415e-06, + "logits/chosen": -0.8827322721481323, + "logits/rejected": -2.4768199920654297, + "logps/chosen": -184.12330627441406, + "logps/rejected": -296.3053283691406, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.429436683654785, + "rewards/margins": 6.42349100112915, + "rewards/rejected": -16.852928161621094, + "step": 13952 + }, + { + "epoch": 2.17, + "learning_rate": 3.913638674205094e-06, + "logits/chosen": -2.6230430603027344, + "logits/rejected": -2.960315465927124, + "logps/chosen": -84.97068786621094, + "logps/rejected": -278.30316162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1711812019348145, + "rewards/margins": 8.700150489807129, + "rewards/rejected": -14.871332168579102, + "step": 13953 + }, + { + "epoch": 2.17, + "learning_rate": 3.912905233673946e-06, + "logits/chosen": -2.6240592002868652, + "logits/rejected": -2.2857296466827393, + "logps/chosen": -422.5037536621094, + "logps/rejected": -406.0932922363281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.680506706237793, + "rewards/margins": 8.833398818969727, + "rewards/rejected": -15.513906478881836, + "step": 13954 + }, + { + "epoch": 2.17, + "learning_rate": 3.912171793142798e-06, + "logits/chosen": -2.44224214553833, + "logits/rejected": -2.6616382598876953, + "logps/chosen": -329.8807678222656, + "logps/rejected": -358.8275451660156, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.161717414855957, + "rewards/margins": 6.288854598999023, + "rewards/rejected": -18.450572967529297, + "step": 13955 + }, + { + "epoch": 2.17, + "learning_rate": 3.91143835261165e-06, + "logits/chosen": -1.5372674465179443, + "logits/rejected": -2.219897985458374, + "logps/chosen": -138.38699340820312, + "logps/rejected": -291.7492980957031, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.216806411743164, + "rewards/margins": 5.886355876922607, + "rewards/rejected": -17.103160858154297, + "step": 13956 + }, + { + "epoch": 2.17, + "learning_rate": 3.910704912080502e-06, + "logits/chosen": -2.500288963317871, + "logits/rejected": -2.8339836597442627, + "logps/chosen": -122.33600616455078, + "logps/rejected": -427.6114196777344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.23294734954834, + "rewards/margins": 9.518877029418945, + "rewards/rejected": -16.75182342529297, + "step": 13957 + }, + { + "epoch": 2.17, + "learning_rate": 3.9099714715493544e-06, + "logits/chosen": -2.043947696685791, + "logits/rejected": -2.257988691329956, + "logps/chosen": -249.39773559570312, + "logps/rejected": -352.12841796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.616781234741211, + "rewards/margins": 7.137346267700195, + "rewards/rejected": -16.754127502441406, + "step": 13958 + }, + { + "epoch": 2.17, + "learning_rate": 3.909238031018206e-06, + "logits/chosen": -0.6884073615074158, + "logits/rejected": -2.0728368759155273, + "logps/chosen": -221.6754913330078, + "logps/rejected": -668.6473999023438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.941902160644531, + "rewards/margins": 13.944816589355469, + "rewards/rejected": -25.88671875, + "step": 13959 + }, + { + "epoch": 2.17, + "learning_rate": 3.908504590487058e-06, + "logits/chosen": -2.0855391025543213, + "logits/rejected": -2.674835205078125, + "logps/chosen": -282.2892761230469, + "logps/rejected": -514.616455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035805702209473, + "rewards/margins": 13.135786056518555, + "rewards/rejected": -18.171592712402344, + "step": 13960 + }, + { + "epoch": 2.17, + "learning_rate": 3.90777114995591e-06, + "logits/chosen": -2.202240228652954, + "logits/rejected": -2.4341390132904053, + "logps/chosen": -202.89422607421875, + "logps/rejected": -323.4342956542969, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.140357971191406, + "rewards/margins": 7.784355163574219, + "rewards/rejected": -16.924713134765625, + "step": 13961 + }, + { + "epoch": 2.17, + "learning_rate": 3.907037709424763e-06, + "logits/chosen": -2.6551642417907715, + "logits/rejected": -2.514631509780884, + "logps/chosen": -417.64801025390625, + "logps/rejected": -483.15386962890625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.23967170715332, + "rewards/margins": 6.983489990234375, + "rewards/rejected": -15.223161697387695, + "step": 13962 + }, + { + "epoch": 2.17, + "learning_rate": 3.906304268893615e-06, + "logits/chosen": -1.7691683769226074, + "logits/rejected": -2.711252450942993, + "logps/chosen": -209.4883575439453, + "logps/rejected": -460.14190673828125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.220609664916992, + "rewards/margins": 6.261218070983887, + "rewards/rejected": -15.481826782226562, + "step": 13963 + }, + { + "epoch": 2.17, + "learning_rate": 3.9055708283624666e-06, + "logits/chosen": -1.237892985343933, + "logits/rejected": -2.401484966278076, + "logps/chosen": -243.08663940429688, + "logps/rejected": -684.4229125976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.41843318939209, + "rewards/margins": 19.34164810180664, + "rewards/rejected": -27.760080337524414, + "step": 13964 + }, + { + "epoch": 2.17, + "learning_rate": 3.904837387831319e-06, + "logits/chosen": -2.795130491256714, + "logits/rejected": -2.9133715629577637, + "logps/chosen": -148.49642944335938, + "logps/rejected": -427.83917236328125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.487462043762207, + "rewards/margins": 7.4745192527771, + "rewards/rejected": -18.96198081970215, + "step": 13965 + }, + { + "epoch": 2.17, + "learning_rate": 3.904103947300171e-06, + "logits/chosen": -1.6314555406570435, + "logits/rejected": -2.1980092525482178, + "logps/chosen": -357.596435546875, + "logps/rejected": -605.524169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.38955307006836, + "rewards/margins": 9.564960479736328, + "rewards/rejected": -22.954513549804688, + "step": 13966 + }, + { + "epoch": 2.17, + "learning_rate": 3.903370506769024e-06, + "logits/chosen": -2.6140294075012207, + "logits/rejected": -1.9741460084915161, + "logps/chosen": -606.56640625, + "logps/rejected": -556.04296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.60703182220459, + "rewards/margins": 11.327503204345703, + "rewards/rejected": -20.93453598022461, + "step": 13967 + }, + { + "epoch": 2.17, + "learning_rate": 3.902637066237876e-06, + "logits/chosen": -2.462998628616333, + "logits/rejected": -2.522200107574463, + "logps/chosen": -210.58221435546875, + "logps/rejected": -407.3198547363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.174016952514648, + "rewards/margins": 11.811527252197266, + "rewards/rejected": -21.985544204711914, + "step": 13968 + }, + { + "epoch": 2.17, + "learning_rate": 3.901903625706728e-06, + "logits/chosen": -1.715796709060669, + "logits/rejected": -2.5863547325134277, + "logps/chosen": -199.62734985351562, + "logps/rejected": -418.30023193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.688454627990723, + "rewards/margins": 9.686004638671875, + "rewards/rejected": -18.37445831298828, + "step": 13969 + }, + { + "epoch": 2.17, + "learning_rate": 3.9011701851755795e-06, + "logits/chosen": -2.1592698097229004, + "logits/rejected": -2.419614553451538, + "logps/chosen": -395.4834899902344, + "logps/rejected": -515.3489990234375, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.050582885742188, + "rewards/margins": 6.903433322906494, + "rewards/rejected": -18.954017639160156, + "step": 13970 + }, + { + "epoch": 2.17, + "learning_rate": 3.900436744644432e-06, + "logits/chosen": -1.8396811485290527, + "logits/rejected": -2.5400705337524414, + "logps/chosen": -212.82666015625, + "logps/rejected": -567.1156005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.671439170837402, + "rewards/margins": 13.833955764770508, + "rewards/rejected": -22.505395889282227, + "step": 13971 + }, + { + "epoch": 2.17, + "learning_rate": 3.899703304113284e-06, + "logits/chosen": -2.671860456466675, + "logits/rejected": -1.0667160749435425, + "logps/chosen": -293.0480041503906, + "logps/rejected": -260.7385559082031, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.075998306274414, + "rewards/margins": 6.93526029586792, + "rewards/rejected": -14.011259078979492, + "step": 13972 + }, + { + "epoch": 2.17, + "learning_rate": 3.898969863582136e-06, + "logits/chosen": -1.8963040113449097, + "logits/rejected": -2.6898014545440674, + "logps/chosen": -423.017333984375, + "logps/rejected": -485.9600830078125, + "loss": 0.1721, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.289541244506836, + "rewards/margins": 4.22964334487915, + "rewards/rejected": -17.519184112548828, + "step": 13973 + }, + { + "epoch": 2.17, + "learning_rate": 3.898236423050988e-06, + "logits/chosen": -2.611297130584717, + "logits/rejected": -2.9768173694610596, + "logps/chosen": -271.13568115234375, + "logps/rejected": -370.9166259765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.818009853363037, + "rewards/margins": 9.484804153442383, + "rewards/rejected": -17.302814483642578, + "step": 13974 + }, + { + "epoch": 2.17, + "learning_rate": 3.89750298251984e-06, + "logits/chosen": -2.326141357421875, + "logits/rejected": -2.9298553466796875, + "logps/chosen": -141.5748291015625, + "logps/rejected": -320.48052978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.480107307434082, + "rewards/margins": 10.918182373046875, + "rewards/rejected": -16.398290634155273, + "step": 13975 + }, + { + "epoch": 2.17, + "learning_rate": 3.8967695419886925e-06, + "logits/chosen": -1.8182181119918823, + "logits/rejected": -2.638439416885376, + "logps/chosen": -257.1607360839844, + "logps/rejected": -555.4109497070312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.537215232849121, + "rewards/margins": 7.500432014465332, + "rewards/rejected": -16.037647247314453, + "step": 13976 + }, + { + "epoch": 2.17, + "learning_rate": 3.896036101457544e-06, + "logits/chosen": -1.139407753944397, + "logits/rejected": -1.9542262554168701, + "logps/chosen": -195.64749145507812, + "logps/rejected": -401.74346923828125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.496304512023926, + "rewards/margins": 8.108065605163574, + "rewards/rejected": -17.6043701171875, + "step": 13977 + }, + { + "epoch": 2.17, + "learning_rate": 3.895302660926396e-06, + "logits/chosen": -2.582977294921875, + "logits/rejected": -1.9400476217269897, + "logps/chosen": -209.4403839111328, + "logps/rejected": -355.84564208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.238809108734131, + "rewards/margins": 12.33969783782959, + "rewards/rejected": -17.578506469726562, + "step": 13978 + }, + { + "epoch": 2.17, + "learning_rate": 3.894569220395248e-06, + "logits/chosen": -2.6121208667755127, + "logits/rejected": -2.3144888877868652, + "logps/chosen": -381.1451416015625, + "logps/rejected": -424.38519287109375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.251773834228516, + "rewards/margins": 8.56797981262207, + "rewards/rejected": -14.819753646850586, + "step": 13979 + }, + { + "epoch": 2.17, + "learning_rate": 3.893835779864101e-06, + "logits/chosen": -2.5579237937927246, + "logits/rejected": -2.4857683181762695, + "logps/chosen": -490.2510681152344, + "logps/rejected": -452.78125, + "loss": 0.0852, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.704551696777344, + "rewards/margins": 4.837881565093994, + "rewards/rejected": -15.54243278503418, + "step": 13980 + }, + { + "epoch": 2.17, + "learning_rate": 3.893102339332953e-06, + "logits/chosen": -2.3709590435028076, + "logits/rejected": -1.925447940826416, + "logps/chosen": -130.68673706054688, + "logps/rejected": -371.493896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6085662841796875, + "rewards/margins": 12.916085243225098, + "rewards/rejected": -20.52465057373047, + "step": 13981 + }, + { + "epoch": 2.17, + "learning_rate": 3.8923688988018054e-06, + "logits/chosen": -1.9500608444213867, + "logits/rejected": -2.7231836318969727, + "logps/chosen": -212.33229064941406, + "logps/rejected": -567.3180541992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.444361686706543, + "rewards/margins": 11.629637718200684, + "rewards/rejected": -23.073999404907227, + "step": 13982 + }, + { + "epoch": 2.17, + "learning_rate": 3.891635458270657e-06, + "logits/chosen": -2.432922124862671, + "logits/rejected": -2.775864601135254, + "logps/chosen": -296.2293395996094, + "logps/rejected": -358.08599853515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.644084453582764, + "rewards/margins": 7.916855335235596, + "rewards/rejected": -15.56093978881836, + "step": 13983 + }, + { + "epoch": 2.17, + "learning_rate": 3.89090201773951e-06, + "logits/chosen": -1.701371431350708, + "logits/rejected": -2.6213772296905518, + "logps/chosen": -199.10726928710938, + "logps/rejected": -752.9151611328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.069498062133789, + "rewards/margins": 10.183305740356445, + "rewards/rejected": -20.252803802490234, + "step": 13984 + }, + { + "epoch": 2.17, + "learning_rate": 3.890168577208362e-06, + "logits/chosen": -1.3801958560943604, + "logits/rejected": -2.582826614379883, + "logps/chosen": -156.92616271972656, + "logps/rejected": -474.7132568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.901793479919434, + "rewards/margins": 11.166778564453125, + "rewards/rejected": -20.068572998046875, + "step": 13985 + }, + { + "epoch": 2.18, + "learning_rate": 3.889435136677214e-06, + "logits/chosen": -2.390648365020752, + "logits/rejected": -2.7260963916778564, + "logps/chosen": -418.95416259765625, + "logps/rejected": -659.6305541992188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.577897071838379, + "rewards/margins": 10.012751579284668, + "rewards/rejected": -18.590648651123047, + "step": 13986 + }, + { + "epoch": 2.18, + "learning_rate": 3.888701696146066e-06, + "logits/chosen": -2.5832624435424805, + "logits/rejected": -2.766085386276245, + "logps/chosen": -507.4100341796875, + "logps/rejected": -404.9954833984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.676620483398438, + "rewards/margins": 7.892706871032715, + "rewards/rejected": -19.56932830810547, + "step": 13987 + }, + { + "epoch": 2.18, + "learning_rate": 3.8879682556149176e-06, + "logits/chosen": -2.6274969577789307, + "logits/rejected": -2.1252458095550537, + "logps/chosen": -886.7857666015625, + "logps/rejected": -769.0121459960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.014719009399414, + "rewards/margins": 10.053071022033691, + "rewards/rejected": -26.067790985107422, + "step": 13988 + }, + { + "epoch": 2.18, + "learning_rate": 3.88723481508377e-06, + "logits/chosen": -2.692728281021118, + "logits/rejected": -2.738560199737549, + "logps/chosen": -156.33523559570312, + "logps/rejected": -344.5041198730469, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.643522262573242, + "rewards/margins": 12.940018653869629, + "rewards/rejected": -21.583541870117188, + "step": 13989 + }, + { + "epoch": 2.18, + "learning_rate": 3.886501374552622e-06, + "logits/chosen": -1.9771755933761597, + "logits/rejected": -2.744243860244751, + "logps/chosen": -200.95301818847656, + "logps/rejected": -383.179931640625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.907256126403809, + "rewards/margins": 7.7647552490234375, + "rewards/rejected": -17.672012329101562, + "step": 13990 + }, + { + "epoch": 2.18, + "learning_rate": 3.885767934021474e-06, + "logits/chosen": -1.3101991415023804, + "logits/rejected": -2.367077589035034, + "logps/chosen": -153.50155639648438, + "logps/rejected": -417.7904052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3982648849487305, + "rewards/margins": 13.324016571044922, + "rewards/rejected": -20.72228240966797, + "step": 13991 + }, + { + "epoch": 2.18, + "learning_rate": 3.885034493490326e-06, + "logits/chosen": -2.9094793796539307, + "logits/rejected": -2.2123115062713623, + "logps/chosen": -451.2821044921875, + "logps/rejected": -477.0728759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.327727317810059, + "rewards/margins": 9.04701042175293, + "rewards/rejected": -15.374738693237305, + "step": 13992 + }, + { + "epoch": 2.18, + "learning_rate": 3.884301052959179e-06, + "logits/chosen": -2.523939847946167, + "logits/rejected": -2.7377054691314697, + "logps/chosen": -160.8818359375, + "logps/rejected": -315.44207763671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.621866226196289, + "rewards/margins": 9.184060096740723, + "rewards/rejected": -17.805927276611328, + "step": 13993 + }, + { + "epoch": 2.18, + "learning_rate": 3.8835676124280305e-06, + "logits/chosen": -1.9429473876953125, + "logits/rejected": -2.6505017280578613, + "logps/chosen": -228.4876708984375, + "logps/rejected": -373.0045471191406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.283136367797852, + "rewards/margins": 7.276845932006836, + "rewards/rejected": -17.559982299804688, + "step": 13994 + }, + { + "epoch": 2.18, + "learning_rate": 3.882834171896882e-06, + "logits/chosen": -2.648979663848877, + "logits/rejected": -2.7825419902801514, + "logps/chosen": -468.7371520996094, + "logps/rejected": -787.7611694335938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1101579666137695, + "rewards/margins": 9.524641036987305, + "rewards/rejected": -16.63479995727539, + "step": 13995 + }, + { + "epoch": 2.18, + "learning_rate": 3.882100731365734e-06, + "logits/chosen": -1.7830673456192017, + "logits/rejected": -2.6517279148101807, + "logps/chosen": -282.68792724609375, + "logps/rejected": -306.380859375, + "loss": 0.702, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.657170295715332, + "rewards/margins": 3.0032734870910645, + "rewards/rejected": -14.660444259643555, + "step": 13996 + }, + { + "epoch": 2.18, + "learning_rate": 3.881367290834586e-06, + "logits/chosen": -2.403923511505127, + "logits/rejected": -2.0313708782196045, + "logps/chosen": -185.3640594482422, + "logps/rejected": -254.94461059570312, + "loss": 2.2106, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.941638946533203, + "rewards/margins": 2.388498306274414, + "rewards/rejected": -16.330137252807617, + "step": 13997 + }, + { + "epoch": 2.18, + "learning_rate": 3.880633850303439e-06, + "logits/chosen": -2.1332545280456543, + "logits/rejected": -2.039834976196289, + "logps/chosen": -156.97760009765625, + "logps/rejected": -280.37890625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.813441276550293, + "rewards/margins": 7.361814498901367, + "rewards/rejected": -20.175254821777344, + "step": 13998 + }, + { + "epoch": 2.18, + "learning_rate": 3.879900409772292e-06, + "logits/chosen": -1.2931678295135498, + "logits/rejected": -2.5277748107910156, + "logps/chosen": -271.0860595703125, + "logps/rejected": -457.27496337890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.892481803894043, + "rewards/margins": 8.7386474609375, + "rewards/rejected": -15.631128311157227, + "step": 13999 + }, + { + "epoch": 2.18, + "learning_rate": 3.8791669692411435e-06, + "logits/chosen": -1.9814295768737793, + "logits/rejected": -2.7524750232696533, + "logps/chosen": -90.2125015258789, + "logps/rejected": -215.30682373046875, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802832126617432, + "rewards/margins": 7.018711566925049, + "rewards/rejected": -14.82154369354248, + "step": 14000 + }, + { + "epoch": 2.18, + "learning_rate": 3.878433528709995e-06, + "logits/chosen": -1.4887176752090454, + "logits/rejected": -2.2462363243103027, + "logps/chosen": -116.77304077148438, + "logps/rejected": -338.0479736328125, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.38318920135498, + "rewards/margins": 7.002613067626953, + "rewards/rejected": -17.38580322265625, + "step": 14001 + }, + { + "epoch": 2.18, + "learning_rate": 3.877700088178848e-06, + "logits/chosen": -2.4878311157226562, + "logits/rejected": -1.8548246622085571, + "logps/chosen": -416.1631774902344, + "logps/rejected": -326.3556823730469, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.772610664367676, + "rewards/margins": 8.121804237365723, + "rewards/rejected": -18.8944149017334, + "step": 14002 + }, + { + "epoch": 2.18, + "learning_rate": 3.8769666476477e-06, + "logits/chosen": -1.3230781555175781, + "logits/rejected": -2.615837335586548, + "logps/chosen": -275.973876953125, + "logps/rejected": -580.345458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.748263359069824, + "rewards/margins": 11.471251487731934, + "rewards/rejected": -18.219514846801758, + "step": 14003 + }, + { + "epoch": 2.18, + "learning_rate": 3.876233207116552e-06, + "logits/chosen": -3.0771589279174805, + "logits/rejected": -3.0894389152526855, + "logps/chosen": -309.643798828125, + "logps/rejected": -400.3111572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.810915946960449, + "rewards/margins": 11.858258247375488, + "rewards/rejected": -18.669174194335938, + "step": 14004 + }, + { + "epoch": 2.18, + "learning_rate": 3.875499766585404e-06, + "logits/chosen": -2.860931634902954, + "logits/rejected": -1.2509301900863647, + "logps/chosen": -403.3025207519531, + "logps/rejected": -444.206298828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.484758377075195, + "rewards/margins": 7.384455680847168, + "rewards/rejected": -14.869214057922363, + "step": 14005 + }, + { + "epoch": 2.18, + "learning_rate": 3.874766326054256e-06, + "logits/chosen": -1.79109787940979, + "logits/rejected": -1.7459900379180908, + "logps/chosen": -347.1739196777344, + "logps/rejected": -277.4359130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.655327796936035, + "rewards/margins": 10.209428787231445, + "rewards/rejected": -15.864757537841797, + "step": 14006 + }, + { + "epoch": 2.18, + "learning_rate": 3.874032885523108e-06, + "logits/chosen": -2.52461314201355, + "logits/rejected": -2.2851598262786865, + "logps/chosen": -176.71371459960938, + "logps/rejected": -437.92987060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.256417274475098, + "rewards/margins": 12.699398040771484, + "rewards/rejected": -20.9558162689209, + "step": 14007 + }, + { + "epoch": 2.18, + "learning_rate": 3.87329944499196e-06, + "logits/chosen": -2.695223093032837, + "logits/rejected": -2.7813093662261963, + "logps/chosen": -197.9612579345703, + "logps/rejected": -230.853271484375, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.537095069885254, + "rewards/margins": 6.590696811676025, + "rewards/rejected": -13.127791404724121, + "step": 14008 + }, + { + "epoch": 2.18, + "learning_rate": 3.872566004460812e-06, + "logits/chosen": -1.9299532175064087, + "logits/rejected": -2.408407688140869, + "logps/chosen": -147.86207580566406, + "logps/rejected": -333.5753173828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.150593757629395, + "rewards/margins": 9.905797004699707, + "rewards/rejected": -19.0563907623291, + "step": 14009 + }, + { + "epoch": 2.18, + "learning_rate": 3.871832563929664e-06, + "logits/chosen": -1.5961867570877075, + "logits/rejected": -2.3188138008117676, + "logps/chosen": -187.71353149414062, + "logps/rejected": -418.7715148925781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.264250755310059, + "rewards/margins": 13.286964416503906, + "rewards/rejected": -20.55121612548828, + "step": 14010 + }, + { + "epoch": 2.18, + "learning_rate": 3.871099123398517e-06, + "logits/chosen": -2.9769351482391357, + "logits/rejected": -2.316762924194336, + "logps/chosen": -559.0408325195312, + "logps/rejected": -428.892578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.219989776611328, + "rewards/margins": 9.948358535766602, + "rewards/rejected": -19.168346405029297, + "step": 14011 + }, + { + "epoch": 2.18, + "learning_rate": 3.8703656828673686e-06, + "logits/chosen": -2.345219850540161, + "logits/rejected": -2.114487886428833, + "logps/chosen": -364.1501159667969, + "logps/rejected": -405.5164794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.583832740783691, + "rewards/margins": 12.319436073303223, + "rewards/rejected": -20.903268814086914, + "step": 14012 + }, + { + "epoch": 2.18, + "learning_rate": 3.8696322423362205e-06, + "logits/chosen": -2.873330593109131, + "logits/rejected": -2.720996379852295, + "logps/chosen": -300.83203125, + "logps/rejected": -387.38751220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.650454998016357, + "rewards/margins": 11.785571098327637, + "rewards/rejected": -18.43602752685547, + "step": 14013 + }, + { + "epoch": 2.18, + "learning_rate": 3.868898801805072e-06, + "logits/chosen": -2.2015185356140137, + "logits/rejected": -1.5859395265579224, + "logps/chosen": -219.11471557617188, + "logps/rejected": -326.74346923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.267374992370605, + "rewards/margins": 12.423662185668945, + "rewards/rejected": -20.691036224365234, + "step": 14014 + }, + { + "epoch": 2.18, + "learning_rate": 3.868165361273925e-06, + "logits/chosen": -2.2927825450897217, + "logits/rejected": -2.429898738861084, + "logps/chosen": -285.54058837890625, + "logps/rejected": -408.83660888671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.96507453918457, + "rewards/margins": 7.857223033905029, + "rewards/rejected": -18.822296142578125, + "step": 14015 + }, + { + "epoch": 2.18, + "learning_rate": 3.867431920742778e-06, + "logits/chosen": -1.1453908681869507, + "logits/rejected": -2.257056474685669, + "logps/chosen": -146.7730712890625, + "logps/rejected": -412.00396728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.847054481506348, + "rewards/margins": 13.637418746948242, + "rewards/rejected": -22.484474182128906, + "step": 14016 + }, + { + "epoch": 2.18, + "learning_rate": 3.86669848021163e-06, + "logits/chosen": -2.7955121994018555, + "logits/rejected": -2.633114814758301, + "logps/chosen": -561.9107666015625, + "logps/rejected": -560.883544921875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.88684368133545, + "rewards/margins": 6.300177097320557, + "rewards/rejected": -16.187021255493164, + "step": 14017 + }, + { + "epoch": 2.18, + "learning_rate": 3.8659650396804815e-06, + "logits/chosen": -1.6690740585327148, + "logits/rejected": -1.978196144104004, + "logps/chosen": -198.78009033203125, + "logps/rejected": -576.8375244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.931272506713867, + "rewards/margins": 12.429497718811035, + "rewards/rejected": -21.36077117919922, + "step": 14018 + }, + { + "epoch": 2.18, + "learning_rate": 3.865231599149333e-06, + "logits/chosen": -1.8119077682495117, + "logits/rejected": -2.696357011795044, + "logps/chosen": -173.31187438964844, + "logps/rejected": -691.4597778320312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.862554550170898, + "rewards/margins": 9.524637222290039, + "rewards/rejected": -20.387191772460938, + "step": 14019 + }, + { + "epoch": 2.18, + "learning_rate": 3.864498158618186e-06, + "logits/chosen": -2.8587050437927246, + "logits/rejected": -1.9452855587005615, + "logps/chosen": -429.1369323730469, + "logps/rejected": -456.85821533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.006359100341797, + "rewards/margins": 12.838232040405273, + "rewards/rejected": -21.844589233398438, + "step": 14020 + }, + { + "epoch": 2.18, + "learning_rate": 3.863764718087038e-06, + "logits/chosen": -1.810362696647644, + "logits/rejected": -2.8112640380859375, + "logps/chosen": -164.271484375, + "logps/rejected": -528.2332763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.703582763671875, + "rewards/margins": 11.179953575134277, + "rewards/rejected": -17.88353729248047, + "step": 14021 + }, + { + "epoch": 2.18, + "learning_rate": 3.86303127755589e-06, + "logits/chosen": -2.702094316482544, + "logits/rejected": -2.6820082664489746, + "logps/chosen": -300.8952331542969, + "logps/rejected": -267.76116943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.064647674560547, + "rewards/margins": 11.534175872802734, + "rewards/rejected": -19.59882354736328, + "step": 14022 + }, + { + "epoch": 2.18, + "learning_rate": 3.862297837024742e-06, + "logits/chosen": -2.886737108230591, + "logits/rejected": -2.5855064392089844, + "logps/chosen": -320.2138671875, + "logps/rejected": -276.7833251953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.140412330627441, + "rewards/margins": 7.909451484680176, + "rewards/rejected": -18.049863815307617, + "step": 14023 + }, + { + "epoch": 2.18, + "learning_rate": 3.861564396493594e-06, + "logits/chosen": -2.2988977432250977, + "logits/rejected": -2.5998644828796387, + "logps/chosen": -306.03057861328125, + "logps/rejected": -475.89080810546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.272663116455078, + "rewards/margins": 9.711132049560547, + "rewards/rejected": -18.983795166015625, + "step": 14024 + }, + { + "epoch": 2.18, + "learning_rate": 3.860830955962446e-06, + "logits/chosen": -1.8777772188186646, + "logits/rejected": -2.8790132999420166, + "logps/chosen": -153.50885009765625, + "logps/rejected": -393.5032958984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.425759315490723, + "rewards/margins": 7.802316188812256, + "rewards/rejected": -16.22807502746582, + "step": 14025 + }, + { + "epoch": 2.18, + "learning_rate": 3.860097515431298e-06, + "logits/chosen": -1.8085434436798096, + "logits/rejected": -2.5553383827209473, + "logps/chosen": -182.3221435546875, + "logps/rejected": -500.42718505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760239601135254, + "rewards/margins": 15.127551078796387, + "rewards/rejected": -20.88779067993164, + "step": 14026 + }, + { + "epoch": 2.18, + "learning_rate": 3.85936407490015e-06, + "logits/chosen": -2.85788631439209, + "logits/rejected": -2.643691062927246, + "logps/chosen": -490.10064697265625, + "logps/rejected": -515.6680908203125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.132582187652588, + "rewards/margins": 6.500389575958252, + "rewards/rejected": -13.63297176361084, + "step": 14027 + }, + { + "epoch": 2.18, + "learning_rate": 3.858630634369002e-06, + "logits/chosen": -2.727893590927124, + "logits/rejected": -2.8136472702026367, + "logps/chosen": -266.90771484375, + "logps/rejected": -232.3031463623047, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.982158660888672, + "rewards/margins": 5.575517654418945, + "rewards/rejected": -14.557676315307617, + "step": 14028 + }, + { + "epoch": 2.18, + "learning_rate": 3.857897193837855e-06, + "logits/chosen": -2.7365264892578125, + "logits/rejected": -2.631305694580078, + "logps/chosen": -787.9591064453125, + "logps/rejected": -763.9447631835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.33981990814209, + "rewards/margins": 12.464152336120605, + "rewards/rejected": -18.803972244262695, + "step": 14029 + }, + { + "epoch": 2.18, + "learning_rate": 3.857163753306707e-06, + "logits/chosen": -2.516977310180664, + "logits/rejected": -2.3975183963775635, + "logps/chosen": -347.52593994140625, + "logps/rejected": -359.4352722167969, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.596940994262695, + "rewards/margins": 6.973570823669434, + "rewards/rejected": -15.570511817932129, + "step": 14030 + }, + { + "epoch": 2.18, + "learning_rate": 3.8564303127755585e-06, + "logits/chosen": -2.6931583881378174, + "logits/rejected": -2.8528881072998047, + "logps/chosen": -412.22625732421875, + "logps/rejected": -610.5390014648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9347801208496094, + "rewards/margins": 13.181720733642578, + "rewards/rejected": -16.116500854492188, + "step": 14031 + }, + { + "epoch": 2.18, + "learning_rate": 3.855696872244411e-06, + "logits/chosen": -2.7626657485961914, + "logits/rejected": -2.924898624420166, + "logps/chosen": -267.7802429199219, + "logps/rejected": -438.02532958984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.516178131103516, + "rewards/margins": 9.438928604125977, + "rewards/rejected": -19.955106735229492, + "step": 14032 + }, + { + "epoch": 2.18, + "learning_rate": 3.854963431713264e-06, + "logits/chosen": -1.8423007726669312, + "logits/rejected": -2.6896438598632812, + "logps/chosen": -404.044189453125, + "logps/rejected": -563.1243896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.077787399291992, + "rewards/margins": 13.331912994384766, + "rewards/rejected": -22.409700393676758, + "step": 14033 + }, + { + "epoch": 2.18, + "learning_rate": 3.854229991182116e-06, + "logits/chosen": -1.7263200283050537, + "logits/rejected": -2.5743277072906494, + "logps/chosen": -254.9934539794922, + "logps/rejected": -406.3067626953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.890046119689941, + "rewards/margins": 9.642814636230469, + "rewards/rejected": -20.532859802246094, + "step": 14034 + }, + { + "epoch": 2.18, + "learning_rate": 3.853496550650968e-06, + "logits/chosen": -2.8642523288726807, + "logits/rejected": -2.0892796516418457, + "logps/chosen": -195.79934692382812, + "logps/rejected": -455.9693908691406, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.699064254760742, + "rewards/margins": 5.743770599365234, + "rewards/rejected": -14.442834854125977, + "step": 14035 + }, + { + "epoch": 2.18, + "learning_rate": 3.85276311011982e-06, + "logits/chosen": -2.8373305797576904, + "logits/rejected": -1.8145785331726074, + "logps/chosen": -757.922607421875, + "logps/rejected": -487.8680114746094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.416589736938477, + "rewards/margins": 8.010250091552734, + "rewards/rejected": -14.426839828491211, + "step": 14036 + }, + { + "epoch": 2.18, + "learning_rate": 3.8520296695886715e-06, + "logits/chosen": -1.8000935316085815, + "logits/rejected": -2.8408873081207275, + "logps/chosen": -545.6768798828125, + "logps/rejected": -806.1031494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.510573387145996, + "rewards/margins": 12.986927032470703, + "rewards/rejected": -21.497499465942383, + "step": 14037 + }, + { + "epoch": 2.18, + "learning_rate": 3.851296229057524e-06, + "logits/chosen": -1.3688430786132812, + "logits/rejected": -2.3560726642608643, + "logps/chosen": -216.03823852539062, + "logps/rejected": -409.84759521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.306925773620605, + "rewards/margins": 10.008255958557129, + "rewards/rejected": -21.315181732177734, + "step": 14038 + }, + { + "epoch": 2.18, + "learning_rate": 3.850562788526376e-06, + "logits/chosen": -1.645899772644043, + "logits/rejected": -2.6346523761749268, + "logps/chosen": -328.3115234375, + "logps/rejected": -567.41796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.76724910736084, + "rewards/margins": 10.381192207336426, + "rewards/rejected": -19.148441314697266, + "step": 14039 + }, + { + "epoch": 2.18, + "learning_rate": 3.849829347995228e-06, + "logits/chosen": -2.0439858436584473, + "logits/rejected": -2.833515167236328, + "logps/chosen": -115.0465087890625, + "logps/rejected": -354.410888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.712246894836426, + "rewards/margins": 9.764368057250977, + "rewards/rejected": -17.47661590576172, + "step": 14040 + }, + { + "epoch": 2.18, + "learning_rate": 3.84909590746408e-06, + "logits/chosen": -2.357239007949829, + "logits/rejected": -2.8713598251342773, + "logps/chosen": -110.3409194946289, + "logps/rejected": -302.9754943847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.798519134521484, + "rewards/margins": 10.3876314163208, + "rewards/rejected": -18.18614959716797, + "step": 14041 + }, + { + "epoch": 2.18, + "learning_rate": 3.8483624669329325e-06, + "logits/chosen": -2.303478717803955, + "logits/rejected": -2.456711530685425, + "logps/chosen": -427.6427001953125, + "logps/rejected": -545.5609130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.811931610107422, + "rewards/margins": 12.436773300170898, + "rewards/rejected": -24.24870491027832, + "step": 14042 + }, + { + "epoch": 2.18, + "learning_rate": 3.8476290264017844e-06, + "logits/chosen": -2.761992931365967, + "logits/rejected": -2.3354814052581787, + "logps/chosen": -410.5316162109375, + "logps/rejected": -363.6087951660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.963926315307617, + "rewards/margins": 12.036069869995117, + "rewards/rejected": -21.999996185302734, + "step": 14043 + }, + { + "epoch": 2.18, + "learning_rate": 3.846895585870636e-06, + "logits/chosen": -2.049682378768921, + "logits/rejected": -2.765432357788086, + "logps/chosen": -260.0216979980469, + "logps/rejected": -308.1925354003906, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.840534210205078, + "rewards/margins": 5.53338098526001, + "rewards/rejected": -16.37391471862793, + "step": 14044 + }, + { + "epoch": 2.18, + "learning_rate": 3.846162145339488e-06, + "logits/chosen": -1.1326731443405151, + "logits/rejected": -2.252441644668579, + "logps/chosen": -213.07730102539062, + "logps/rejected": -537.832763671875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.523148536682129, + "rewards/margins": 9.210336685180664, + "rewards/rejected": -20.73348617553711, + "step": 14045 + }, + { + "epoch": 2.18, + "learning_rate": 3.84542870480834e-06, + "logits/chosen": -2.8025243282318115, + "logits/rejected": -2.913374185562134, + "logps/chosen": -456.0610046386719, + "logps/rejected": -499.3222961425781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.85127067565918, + "rewards/margins": 9.249794960021973, + "rewards/rejected": -16.10106658935547, + "step": 14046 + }, + { + "epoch": 2.18, + "learning_rate": 3.844695264277193e-06, + "logits/chosen": -2.546708106994629, + "logits/rejected": -2.544818639755249, + "logps/chosen": -204.72372436523438, + "logps/rejected": -399.4677429199219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.14866828918457, + "rewards/margins": 9.512822151184082, + "rewards/rejected": -17.661489486694336, + "step": 14047 + }, + { + "epoch": 2.18, + "learning_rate": 3.843961823746045e-06, + "logits/chosen": -3.0414505004882812, + "logits/rejected": -2.467916250228882, + "logps/chosen": -269.2969970703125, + "logps/rejected": -420.8810729980469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.036787509918213, + "rewards/margins": 10.072200775146484, + "rewards/rejected": -17.108989715576172, + "step": 14048 + }, + { + "epoch": 2.18, + "learning_rate": 3.843228383214897e-06, + "logits/chosen": -2.8459293842315674, + "logits/rejected": -2.6145284175872803, + "logps/chosen": -320.6317443847656, + "logps/rejected": -392.3450927734375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.901264190673828, + "rewards/margins": 7.246830463409424, + "rewards/rejected": -16.148094177246094, + "step": 14049 + }, + { + "epoch": 2.19, + "learning_rate": 3.842494942683749e-06, + "logits/chosen": -1.7323051691055298, + "logits/rejected": -2.7220091819763184, + "logps/chosen": -202.653564453125, + "logps/rejected": -284.26727294921875, + "loss": 0.0663, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.31977653503418, + "rewards/margins": 2.7672393321990967, + "rewards/rejected": -12.087015151977539, + "step": 14050 + }, + { + "epoch": 2.19, + "learning_rate": 3.841761502152602e-06, + "logits/chosen": -2.834803819656372, + "logits/rejected": -2.128270387649536, + "logps/chosen": -789.126953125, + "logps/rejected": -444.43463134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7938750982284546, + "rewards/margins": 10.821283340454102, + "rewards/rejected": -12.615158081054688, + "step": 14051 + }, + { + "epoch": 2.19, + "learning_rate": 3.841028061621454e-06, + "logits/chosen": -2.544053554534912, + "logits/rejected": -2.438114881515503, + "logps/chosen": -233.61961364746094, + "logps/rejected": -321.19171142578125, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.978977203369141, + "rewards/margins": 7.379166126251221, + "rewards/rejected": -14.35814380645752, + "step": 14052 + }, + { + "epoch": 2.19, + "learning_rate": 3.840294621090306e-06, + "logits/chosen": -1.6288033723831177, + "logits/rejected": -2.4453418254852295, + "logps/chosen": -194.04159545898438, + "logps/rejected": -395.9905090332031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.246439933776855, + "rewards/margins": 7.451659202575684, + "rewards/rejected": -16.69809913635254, + "step": 14053 + }, + { + "epoch": 2.19, + "learning_rate": 3.839561180559158e-06, + "logits/chosen": -0.7759752869606018, + "logits/rejected": -1.9713433980941772, + "logps/chosen": -229.02357482910156, + "logps/rejected": -637.0462646484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.667646408081055, + "rewards/margins": 15.887619972229004, + "rewards/rejected": -24.555267333984375, + "step": 14054 + }, + { + "epoch": 2.19, + "learning_rate": 3.8388277400280095e-06, + "logits/chosen": -2.6130151748657227, + "logits/rejected": -2.808676242828369, + "logps/chosen": -314.2929992675781, + "logps/rejected": -326.6330261230469, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.11906909942627, + "rewards/margins": 8.045763969421387, + "rewards/rejected": -17.164833068847656, + "step": 14055 + }, + { + "epoch": 2.19, + "learning_rate": 3.838094299496862e-06, + "logits/chosen": -1.4107261896133423, + "logits/rejected": -2.4479315280914307, + "logps/chosen": -160.99343872070312, + "logps/rejected": -319.3848876953125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.184741020202637, + "rewards/margins": 6.013258934020996, + "rewards/rejected": -17.197999954223633, + "step": 14056 + }, + { + "epoch": 2.19, + "learning_rate": 3.837360858965714e-06, + "logits/chosen": -2.702665090560913, + "logits/rejected": -2.781921625137329, + "logps/chosen": -294.62530517578125, + "logps/rejected": -257.54766845703125, + "loss": 0.1011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.24758243560791, + "rewards/margins": 6.016510963439941, + "rewards/rejected": -13.264093399047852, + "step": 14057 + }, + { + "epoch": 2.19, + "learning_rate": 3.836627418434566e-06, + "logits/chosen": -0.6890186667442322, + "logits/rejected": -2.3946878910064697, + "logps/chosen": -158.65309143066406, + "logps/rejected": -580.2822875976562, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.35517692565918, + "rewards/margins": 12.678433418273926, + "rewards/rejected": -25.033611297607422, + "step": 14058 + }, + { + "epoch": 2.19, + "learning_rate": 3.835893977903418e-06, + "logits/chosen": -2.669402599334717, + "logits/rejected": -2.805725574493408, + "logps/chosen": -206.2479705810547, + "logps/rejected": -256.541259765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67121696472168, + "rewards/margins": 7.970627307891846, + "rewards/rejected": -15.641843795776367, + "step": 14059 + }, + { + "epoch": 2.19, + "learning_rate": 3.835160537372271e-06, + "logits/chosen": -2.3313615322113037, + "logits/rejected": -0.9396695494651794, + "logps/chosen": -267.6227722167969, + "logps/rejected": -163.32882690429688, + "loss": 1.3114, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.488574028015137, + "rewards/margins": 1.441784381866455, + "rewards/rejected": -13.93035888671875, + "step": 14060 + }, + { + "epoch": 2.19, + "learning_rate": 3.8344270968411225e-06, + "logits/chosen": -2.199063539505005, + "logits/rejected": -2.344416856765747, + "logps/chosen": -304.791015625, + "logps/rejected": -341.70794677734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.852408409118652, + "rewards/margins": 7.610230445861816, + "rewards/rejected": -16.46263885498047, + "step": 14061 + }, + { + "epoch": 2.19, + "learning_rate": 3.833693656309974e-06, + "logits/chosen": -1.9024947881698608, + "logits/rejected": -2.3628199100494385, + "logps/chosen": -215.23695373535156, + "logps/rejected": -401.2293395996094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.002996444702148, + "rewards/margins": 6.338725566864014, + "rewards/rejected": -16.341720581054688, + "step": 14062 + }, + { + "epoch": 2.19, + "learning_rate": 3.832960215778826e-06, + "logits/chosen": -0.5279245972633362, + "logits/rejected": -2.5723536014556885, + "logps/chosen": -163.8396453857422, + "logps/rejected": -467.5632629394531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.658214569091797, + "rewards/margins": 10.284761428833008, + "rewards/rejected": -20.942975997924805, + "step": 14063 + }, + { + "epoch": 2.19, + "learning_rate": 3.832226775247678e-06, + "logits/chosen": -2.5022969245910645, + "logits/rejected": -2.7046868801116943, + "logps/chosen": -360.328125, + "logps/rejected": -366.29693603515625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.581006050109863, + "rewards/margins": 6.190386772155762, + "rewards/rejected": -16.771392822265625, + "step": 14064 + }, + { + "epoch": 2.19, + "learning_rate": 3.831493334716531e-06, + "logits/chosen": -1.8589766025543213, + "logits/rejected": -2.415001630783081, + "logps/chosen": -357.0386657714844, + "logps/rejected": -597.76904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.160530090332031, + "rewards/margins": 10.815238952636719, + "rewards/rejected": -19.97576904296875, + "step": 14065 + }, + { + "epoch": 2.19, + "learning_rate": 3.8307598941853836e-06, + "logits/chosen": -2.3354873657226562, + "logits/rejected": -1.7167489528656006, + "logps/chosen": -201.29226684570312, + "logps/rejected": -229.17950439453125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.458673477172852, + "rewards/margins": 7.5616984367370605, + "rewards/rejected": -18.02037239074707, + "step": 14066 + }, + { + "epoch": 2.19, + "learning_rate": 3.8300264536542354e-06, + "logits/chosen": -1.6337571144104004, + "logits/rejected": -2.837390422821045, + "logps/chosen": -170.05593872070312, + "logps/rejected": -475.0134582519531, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.044820785522461, + "rewards/margins": 9.433982849121094, + "rewards/rejected": -18.478803634643555, + "step": 14067 + }, + { + "epoch": 2.19, + "learning_rate": 3.829293013123087e-06, + "logits/chosen": -1.661780834197998, + "logits/rejected": -2.4959166049957275, + "logps/chosen": -338.55364990234375, + "logps/rejected": -562.2118530273438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.544614791870117, + "rewards/margins": 9.359424591064453, + "rewards/rejected": -18.90403938293457, + "step": 14068 + }, + { + "epoch": 2.19, + "learning_rate": 3.82855957259194e-06, + "logits/chosen": -3.02526593208313, + "logits/rejected": -2.6460776329040527, + "logps/chosen": -132.9501953125, + "logps/rejected": -198.46920776367188, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.764017105102539, + "rewards/margins": 5.4014763832092285, + "rewards/rejected": -14.16549301147461, + "step": 14069 + }, + { + "epoch": 2.19, + "learning_rate": 3.827826132060792e-06, + "logits/chosen": -2.4832496643066406, + "logits/rejected": -2.5890049934387207, + "logps/chosen": -128.28465270996094, + "logps/rejected": -257.9976501464844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.027647972106934, + "rewards/margins": 8.9674072265625, + "rewards/rejected": -16.99505615234375, + "step": 14070 + }, + { + "epoch": 2.19, + "learning_rate": 3.827092691529644e-06, + "logits/chosen": -1.6795376539230347, + "logits/rejected": -2.629438877105713, + "logps/chosen": -414.15252685546875, + "logps/rejected": -753.5443115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.68890380859375, + "rewards/margins": 15.338350296020508, + "rewards/rejected": -28.027254104614258, + "step": 14071 + }, + { + "epoch": 2.19, + "learning_rate": 3.826359250998496e-06, + "logits/chosen": -1.8370976448059082, + "logits/rejected": -2.459439516067505, + "logps/chosen": -152.60321044921875, + "logps/rejected": -424.5140380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.276451110839844, + "rewards/margins": 11.166903495788574, + "rewards/rejected": -18.443355560302734, + "step": 14072 + }, + { + "epoch": 2.19, + "learning_rate": 3.825625810467348e-06, + "logits/chosen": -2.766596794128418, + "logits/rejected": -2.452773332595825, + "logps/chosen": -436.5828857421875, + "logps/rejected": -445.79730224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.824533939361572, + "rewards/margins": 11.320352554321289, + "rewards/rejected": -19.144886016845703, + "step": 14073 + }, + { + "epoch": 2.19, + "learning_rate": 3.8248923699362e-06, + "logits/chosen": -2.921753406524658, + "logits/rejected": -2.8524818420410156, + "logps/chosen": -337.3143615722656, + "logps/rejected": -380.0838928222656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.541364669799805, + "rewards/margins": 10.886604309082031, + "rewards/rejected": -21.427968978881836, + "step": 14074 + }, + { + "epoch": 2.19, + "learning_rate": 3.824158929405052e-06, + "logits/chosen": -2.450011730194092, + "logits/rejected": -2.4741601943969727, + "logps/chosen": -100.45362854003906, + "logps/rejected": -343.10064697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.079298973083496, + "rewards/margins": 11.014666557312012, + "rewards/rejected": -19.093965530395508, + "step": 14075 + }, + { + "epoch": 2.19, + "learning_rate": 3.823425488873904e-06, + "logits/chosen": -1.5289270877838135, + "logits/rejected": -2.8188014030456543, + "logps/chosen": -201.56784057617188, + "logps/rejected": -415.08087158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.045612335205078, + "rewards/margins": 9.509435653686523, + "rewards/rejected": -18.5550479888916, + "step": 14076 + }, + { + "epoch": 2.19, + "learning_rate": 3.822692048342756e-06, + "logits/chosen": -1.0250470638275146, + "logits/rejected": -2.708427667617798, + "logps/chosen": -141.68035888671875, + "logps/rejected": -378.77557373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.249794960021973, + "rewards/margins": 11.525227546691895, + "rewards/rejected": -19.775022506713867, + "step": 14077 + }, + { + "epoch": 2.19, + "learning_rate": 3.821958607811609e-06, + "logits/chosen": -2.6606197357177734, + "logits/rejected": -3.027909994125366, + "logps/chosen": -148.47384643554688, + "logps/rejected": -285.23455810546875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701709270477295, + "rewards/margins": 8.836889266967773, + "rewards/rejected": -15.538599014282227, + "step": 14078 + }, + { + "epoch": 2.19, + "learning_rate": 3.8212251672804605e-06, + "logits/chosen": -2.6340339183807373, + "logits/rejected": -2.2695934772491455, + "logps/chosen": -273.9674987792969, + "logps/rejected": -232.8544921875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.583368301391602, + "rewards/margins": 6.292054653167725, + "rewards/rejected": -16.875423431396484, + "step": 14079 + }, + { + "epoch": 2.19, + "learning_rate": 3.820491726749312e-06, + "logits/chosen": -1.6997746229171753, + "logits/rejected": -2.7015652656555176, + "logps/chosen": -142.17474365234375, + "logps/rejected": -485.6229248046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.725128173828125, + "rewards/margins": 9.808128356933594, + "rewards/rejected": -18.53325653076172, + "step": 14080 + }, + { + "epoch": 2.19, + "learning_rate": 3.819758286218164e-06, + "logits/chosen": -2.867231607437134, + "logits/rejected": -2.9891788959503174, + "logps/chosen": -316.590576171875, + "logps/rejected": -393.4989013671875, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.382265090942383, + "rewards/margins": 6.5875020027160645, + "rewards/rejected": -17.969768524169922, + "step": 14081 + }, + { + "epoch": 2.19, + "learning_rate": 3.819024845687017e-06, + "logits/chosen": -2.8725926876068115, + "logits/rejected": -3.011746644973755, + "logps/chosen": -120.53956604003906, + "logps/rejected": -340.3052978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.501941204071045, + "rewards/margins": 9.414359092712402, + "rewards/rejected": -13.916299819946289, + "step": 14082 + }, + { + "epoch": 2.19, + "learning_rate": 3.81829140515587e-06, + "logits/chosen": -2.6037795543670654, + "logits/rejected": -0.6755268573760986, + "logps/chosen": -360.606201171875, + "logps/rejected": -197.93313598632812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.984673500061035, + "rewards/margins": 8.050548553466797, + "rewards/rejected": -16.035221099853516, + "step": 14083 + }, + { + "epoch": 2.19, + "learning_rate": 3.817557964624722e-06, + "logits/chosen": -2.181440830230713, + "logits/rejected": -2.827310562133789, + "logps/chosen": -129.51150512695312, + "logps/rejected": -447.6885070800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.957311153411865, + "rewards/margins": 9.444206237792969, + "rewards/rejected": -15.401517868041992, + "step": 14084 + }, + { + "epoch": 2.19, + "learning_rate": 3.8168245240935735e-06, + "logits/chosen": -2.7842562198638916, + "logits/rejected": -2.7003352642059326, + "logps/chosen": -208.52516174316406, + "logps/rejected": -445.6446838378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.271528720855713, + "rewards/margins": 14.140588760375977, + "rewards/rejected": -21.41211700439453, + "step": 14085 + }, + { + "epoch": 2.19, + "learning_rate": 3.816091083562425e-06, + "logits/chosen": -2.7465567588806152, + "logits/rejected": -1.4662785530090332, + "logps/chosen": -396.48455810546875, + "logps/rejected": -290.9689025878906, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8737244606018066, + "rewards/margins": 11.88180923461914, + "rewards/rejected": -14.755533218383789, + "step": 14086 + }, + { + "epoch": 2.19, + "learning_rate": 3.815357643031278e-06, + "logits/chosen": -1.9698282480239868, + "logits/rejected": -2.629610061645508, + "logps/chosen": -225.97459411621094, + "logps/rejected": -421.1540832519531, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.844029426574707, + "rewards/margins": 7.956564903259277, + "rewards/rejected": -15.800594329833984, + "step": 14087 + }, + { + "epoch": 2.19, + "learning_rate": 3.81462420250013e-06, + "logits/chosen": -1.7614881992340088, + "logits/rejected": -2.4185791015625, + "logps/chosen": -171.55270385742188, + "logps/rejected": -358.62481689453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.95117473602295, + "rewards/margins": 9.218884468078613, + "rewards/rejected": -19.170059204101562, + "step": 14088 + }, + { + "epoch": 2.19, + "learning_rate": 3.813890761968982e-06, + "logits/chosen": -1.8024617433547974, + "logits/rejected": -2.570291757583618, + "logps/chosen": -270.64227294921875, + "logps/rejected": -451.30865478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649770736694336, + "rewards/margins": 9.68693733215332, + "rewards/rejected": -17.336708068847656, + "step": 14089 + }, + { + "epoch": 2.19, + "learning_rate": 3.8131573214378337e-06, + "logits/chosen": -2.422929286956787, + "logits/rejected": -2.895643711090088, + "logps/chosen": -84.130859375, + "logps/rejected": -282.50592041015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.155750274658203, + "rewards/margins": 8.587676048278809, + "rewards/rejected": -15.743426322937012, + "step": 14090 + }, + { + "epoch": 2.19, + "learning_rate": 3.8124238809066864e-06, + "logits/chosen": -2.1241226196289062, + "logits/rejected": -2.7246456146240234, + "logps/chosen": -333.01226806640625, + "logps/rejected": -555.2474365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.786849021911621, + "rewards/margins": 10.323522567749023, + "rewards/rejected": -20.110370635986328, + "step": 14091 + }, + { + "epoch": 2.19, + "learning_rate": 3.8116904403755383e-06, + "logits/chosen": -2.96504807472229, + "logits/rejected": -1.9871196746826172, + "logps/chosen": -298.533447265625, + "logps/rejected": -209.7758331298828, + "loss": 0.817, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.580879211425781, + "rewards/margins": 1.4378259181976318, + "rewards/rejected": -10.018705368041992, + "step": 14092 + }, + { + "epoch": 2.19, + "learning_rate": 3.81095699984439e-06, + "logits/chosen": -2.967158555984497, + "logits/rejected": -1.9728389978408813, + "logps/chosen": -317.7107238769531, + "logps/rejected": -220.4792938232422, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.595281600952148, + "rewards/margins": 5.15073823928833, + "rewards/rejected": -14.74601936340332, + "step": 14093 + }, + { + "epoch": 2.19, + "learning_rate": 3.810223559313242e-06, + "logits/chosen": -2.860860586166382, + "logits/rejected": -2.1544339656829834, + "logps/chosen": -338.5856018066406, + "logps/rejected": -287.7047424316406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.434399604797363, + "rewards/margins": 10.226710319519043, + "rewards/rejected": -14.661109924316406, + "step": 14094 + }, + { + "epoch": 2.19, + "learning_rate": 3.8094901187820944e-06, + "logits/chosen": -2.946202516555786, + "logits/rejected": -2.6973891258239746, + "logps/chosen": -233.76455688476562, + "logps/rejected": -402.14935302734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.620338439941406, + "rewards/margins": 8.734968185424805, + "rewards/rejected": -17.35530662536621, + "step": 14095 + }, + { + "epoch": 2.19, + "learning_rate": 3.808756678250947e-06, + "logits/chosen": -1.955119013786316, + "logits/rejected": -2.4675416946411133, + "logps/chosen": -176.4622802734375, + "logps/rejected": -408.38458251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034029960632324, + "rewards/margins": 13.263435363769531, + "rewards/rejected": -19.297466278076172, + "step": 14096 + }, + { + "epoch": 2.19, + "learning_rate": 3.808023237719799e-06, + "logits/chosen": -1.273012399673462, + "logits/rejected": -2.130385160446167, + "logps/chosen": -183.4232635498047, + "logps/rejected": -334.716552734375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.352062225341797, + "rewards/margins": 7.979062080383301, + "rewards/rejected": -21.33112335205078, + "step": 14097 + }, + { + "epoch": 2.19, + "learning_rate": 3.807289797188651e-06, + "logits/chosen": -1.1143336296081543, + "logits/rejected": -2.6146979331970215, + "logps/chosen": -307.6934814453125, + "logps/rejected": -468.1651916503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8745880126953125, + "rewards/margins": 12.830739974975586, + "rewards/rejected": -19.70532989501953, + "step": 14098 + }, + { + "epoch": 2.19, + "learning_rate": 3.8065563566575027e-06, + "logits/chosen": -2.4355123043060303, + "logits/rejected": -2.655242681503296, + "logps/chosen": -209.49581909179688, + "logps/rejected": -262.3058776855469, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1933817863464355, + "rewards/margins": 5.403822898864746, + "rewards/rejected": -12.597204208374023, + "step": 14099 + }, + { + "epoch": 2.19, + "learning_rate": 3.8058229161263555e-06, + "logits/chosen": -2.5911459922790527, + "logits/rejected": -1.7394049167633057, + "logps/chosen": -207.8824462890625, + "logps/rejected": -247.49301147460938, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.840936660766602, + "rewards/margins": 7.187250137329102, + "rewards/rejected": -16.028186798095703, + "step": 14100 + }, + { + "epoch": 2.19, + "learning_rate": 3.8050894755952073e-06, + "logits/chosen": -2.7124953269958496, + "logits/rejected": -2.8841092586517334, + "logps/chosen": -459.47930908203125, + "logps/rejected": -590.2177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.113903045654297, + "rewards/margins": 13.125364303588867, + "rewards/rejected": -21.23926544189453, + "step": 14101 + }, + { + "epoch": 2.19, + "learning_rate": 3.8043560350640592e-06, + "logits/chosen": -2.686124324798584, + "logits/rejected": -2.703937530517578, + "logps/chosen": -249.83255004882812, + "logps/rejected": -384.5565185546875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.433683395385742, + "rewards/margins": 6.612335681915283, + "rewards/rejected": -17.0460205078125, + "step": 14102 + }, + { + "epoch": 2.19, + "learning_rate": 3.803622594532911e-06, + "logits/chosen": -1.5676017999649048, + "logits/rejected": -2.464115619659424, + "logps/chosen": -312.24029541015625, + "logps/rejected": -394.8172607421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.306548118591309, + "rewards/margins": 9.461675643920898, + "rewards/rejected": -18.76822280883789, + "step": 14103 + }, + { + "epoch": 2.19, + "learning_rate": 3.8028891540017634e-06, + "logits/chosen": -1.6086667776107788, + "logits/rejected": -2.766770601272583, + "logps/chosen": -293.0765075683594, + "logps/rejected": -454.5083923339844, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.383821487426758, + "rewards/margins": 8.051653861999512, + "rewards/rejected": -17.435476303100586, + "step": 14104 + }, + { + "epoch": 2.19, + "learning_rate": 3.802155713470616e-06, + "logits/chosen": -2.626331090927124, + "logits/rejected": -2.502706527709961, + "logps/chosen": -233.63296508789062, + "logps/rejected": -381.2811584472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.488127708435059, + "rewards/margins": 10.875917434692383, + "rewards/rejected": -16.364046096801758, + "step": 14105 + }, + { + "epoch": 2.19, + "learning_rate": 3.801422272939468e-06, + "logits/chosen": -2.7507922649383545, + "logits/rejected": -2.873662233352661, + "logps/chosen": -127.90530395507812, + "logps/rejected": -319.45184326171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.08879566192627, + "rewards/margins": 9.154353141784668, + "rewards/rejected": -17.243148803710938, + "step": 14106 + }, + { + "epoch": 2.19, + "learning_rate": 3.80068883240832e-06, + "logits/chosen": -2.6975862979888916, + "logits/rejected": -2.9162161350250244, + "logps/chosen": -600.0353393554688, + "logps/rejected": -425.3646240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7483742237091064, + "rewards/margins": 9.670654296875, + "rewards/rejected": -13.419028282165527, + "step": 14107 + }, + { + "epoch": 2.19, + "learning_rate": 3.7999553918771718e-06, + "logits/chosen": -1.59955894947052, + "logits/rejected": -2.375627279281616, + "logps/chosen": -179.99435424804688, + "logps/rejected": -399.93768310546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.81985855102539, + "rewards/margins": 6.540403366088867, + "rewards/rejected": -18.360261917114258, + "step": 14108 + }, + { + "epoch": 2.19, + "learning_rate": 3.7992219513460245e-06, + "logits/chosen": -2.6173274517059326, + "logits/rejected": -1.678288459777832, + "logps/chosen": -266.4051818847656, + "logps/rejected": -275.12548828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.189225673675537, + "rewards/margins": 8.561203956604004, + "rewards/rejected": -14.750429153442383, + "step": 14109 + }, + { + "epoch": 2.19, + "learning_rate": 3.7984885108148764e-06, + "logits/chosen": -2.6736788749694824, + "logits/rejected": -2.623500347137451, + "logps/chosen": -254.9124755859375, + "logps/rejected": -297.9475402832031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.422065734863281, + "rewards/margins": 10.344022750854492, + "rewards/rejected": -16.766088485717773, + "step": 14110 + }, + { + "epoch": 2.19, + "learning_rate": 3.7977550702837282e-06, + "logits/chosen": -1.9936306476593018, + "logits/rejected": -2.6903014183044434, + "logps/chosen": -144.58404541015625, + "logps/rejected": -434.96856689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.455296039581299, + "rewards/margins": 14.731505393981934, + "rewards/rejected": -21.18680191040039, + "step": 14111 + }, + { + "epoch": 2.19, + "learning_rate": 3.7970216297525805e-06, + "logits/chosen": -2.7490618228912354, + "logits/rejected": -2.280951499938965, + "logps/chosen": -240.37747192382812, + "logps/rejected": -342.311279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8383331298828125, + "rewards/margins": 10.994049072265625, + "rewards/rejected": -15.832382202148438, + "step": 14112 + }, + { + "epoch": 2.19, + "learning_rate": 3.7962881892214324e-06, + "logits/chosen": -1.978806495666504, + "logits/rejected": -2.4697601795196533, + "logps/chosen": -148.14183044433594, + "logps/rejected": -435.93292236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.522706985473633, + "rewards/margins": 10.378717422485352, + "rewards/rejected": -16.901424407958984, + "step": 14113 + }, + { + "epoch": 2.2, + "learning_rate": 3.795554748690285e-06, + "logits/chosen": -2.333613872528076, + "logits/rejected": -2.8653478622436523, + "logps/chosen": -193.36024475097656, + "logps/rejected": -284.5203857421875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.831912994384766, + "rewards/margins": 8.039115905761719, + "rewards/rejected": -15.871028900146484, + "step": 14114 + }, + { + "epoch": 2.2, + "learning_rate": 3.794821308159137e-06, + "logits/chosen": -2.2504260540008545, + "logits/rejected": -2.724600315093994, + "logps/chosen": -452.60369873046875, + "logps/rejected": -561.5106201171875, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.272627830505371, + "rewards/margins": 8.156928062438965, + "rewards/rejected": -15.429555892944336, + "step": 14115 + }, + { + "epoch": 2.2, + "learning_rate": 3.794087867627989e-06, + "logits/chosen": -2.4345922470092773, + "logits/rejected": -2.780146598815918, + "logps/chosen": -579.107666015625, + "logps/rejected": -635.5609741210938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0918378829956055, + "rewards/margins": 9.199546813964844, + "rewards/rejected": -15.291383743286133, + "step": 14116 + }, + { + "epoch": 2.2, + "learning_rate": 3.7933544270968408e-06, + "logits/chosen": -1.386410117149353, + "logits/rejected": -2.6167542934417725, + "logps/chosen": -189.44320678710938, + "logps/rejected": -374.0281982421875, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.692420959472656, + "rewards/margins": 7.6872992515563965, + "rewards/rejected": -15.379720687866211, + "step": 14117 + }, + { + "epoch": 2.2, + "learning_rate": 3.7926209865656935e-06, + "logits/chosen": -2.7879393100738525, + "logits/rejected": -2.7714011669158936, + "logps/chosen": -165.70480346679688, + "logps/rejected": -532.6558837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0017008781433105, + "rewards/margins": 14.032930374145508, + "rewards/rejected": -20.034631729125977, + "step": 14118 + }, + { + "epoch": 2.2, + "learning_rate": 3.7918875460345454e-06, + "logits/chosen": -2.412696123123169, + "logits/rejected": -2.815551280975342, + "logps/chosen": -606.0551147460938, + "logps/rejected": -752.7341918945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.903590202331543, + "rewards/margins": 13.96924877166748, + "rewards/rejected": -23.872838973999023, + "step": 14119 + }, + { + "epoch": 2.2, + "learning_rate": 3.7911541055033973e-06, + "logits/chosen": -2.3143153190612793, + "logits/rejected": -2.602987766265869, + "logps/chosen": -452.1989440917969, + "logps/rejected": -550.4608154296875, + "loss": 0.1059, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.326177597045898, + "rewards/margins": 5.214656352996826, + "rewards/rejected": -14.540833473205566, + "step": 14120 + }, + { + "epoch": 2.2, + "learning_rate": 3.7904206649722496e-06, + "logits/chosen": -2.4821152687072754, + "logits/rejected": -2.929058790206909, + "logps/chosen": -261.31707763671875, + "logps/rejected": -400.4156799316406, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.683297157287598, + "rewards/margins": 10.430288314819336, + "rewards/rejected": -19.11358642578125, + "step": 14121 + }, + { + "epoch": 2.2, + "learning_rate": 3.7896872244411023e-06, + "logits/chosen": -2.9960832595825195, + "logits/rejected": -2.405395030975342, + "logps/chosen": -568.64453125, + "logps/rejected": -316.64154052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.969850540161133, + "rewards/margins": 11.458517074584961, + "rewards/rejected": -16.428367614746094, + "step": 14122 + }, + { + "epoch": 2.2, + "learning_rate": 3.788953783909954e-06, + "logits/chosen": -2.3322980403900146, + "logits/rejected": -2.8054280281066895, + "logps/chosen": -662.1290893554688, + "logps/rejected": -581.7692260742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.583913803100586, + "rewards/margins": 11.764527320861816, + "rewards/rejected": -18.34844207763672, + "step": 14123 + }, + { + "epoch": 2.2, + "learning_rate": 3.788220343378806e-06, + "logits/chosen": -1.7992545366287231, + "logits/rejected": -2.587679862976074, + "logps/chosen": -140.9593963623047, + "logps/rejected": -445.191650390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.662639141082764, + "rewards/margins": 11.941251754760742, + "rewards/rejected": -18.603891372680664, + "step": 14124 + }, + { + "epoch": 2.2, + "learning_rate": 3.787486902847658e-06, + "logits/chosen": -2.928147077560425, + "logits/rejected": -2.6135635375976562, + "logps/chosen": -214.09156799316406, + "logps/rejected": -245.0338592529297, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.648867607116699, + "rewards/margins": 8.626068115234375, + "rewards/rejected": -14.274934768676758, + "step": 14125 + }, + { + "epoch": 2.2, + "learning_rate": 3.78675346231651e-06, + "logits/chosen": -2.235163927078247, + "logits/rejected": -2.6484858989715576, + "logps/chosen": -656.3783569335938, + "logps/rejected": -720.216064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.394766330718994, + "rewards/margins": 13.719970703125, + "rewards/rejected": -18.11473846435547, + "step": 14126 + }, + { + "epoch": 2.2, + "learning_rate": 3.7860200217853625e-06, + "logits/chosen": -2.279839038848877, + "logits/rejected": -2.5345871448516846, + "logps/chosen": -101.19271850585938, + "logps/rejected": -272.6776123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.773338317871094, + "rewards/margins": 9.224186897277832, + "rewards/rejected": -13.997525215148926, + "step": 14127 + }, + { + "epoch": 2.2, + "learning_rate": 3.7852865812542144e-06, + "logits/chosen": -1.9496996402740479, + "logits/rejected": -2.5373830795288086, + "logps/chosen": -128.46243286132812, + "logps/rejected": -424.5296630859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.83292293548584, + "rewards/margins": 11.815829277038574, + "rewards/rejected": -21.648752212524414, + "step": 14128 + }, + { + "epoch": 2.2, + "learning_rate": 3.7845531407230667e-06, + "logits/chosen": -2.576772451400757, + "logits/rejected": -2.942941188812256, + "logps/chosen": -455.14111328125, + "logps/rejected": -444.8006591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.899050712585449, + "rewards/margins": 9.935319900512695, + "rewards/rejected": -16.83437156677246, + "step": 14129 + }, + { + "epoch": 2.2, + "learning_rate": 3.7838197001919186e-06, + "logits/chosen": -2.4741179943084717, + "logits/rejected": -2.8084821701049805, + "logps/chosen": -207.92990112304688, + "logps/rejected": -382.3506164550781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.16001033782959, + "rewards/margins": 8.501786231994629, + "rewards/rejected": -17.66179656982422, + "step": 14130 + }, + { + "epoch": 2.2, + "learning_rate": 3.7830862596607713e-06, + "logits/chosen": -1.9542161226272583, + "logits/rejected": -2.698744535446167, + "logps/chosen": -608.032470703125, + "logps/rejected": -627.2902221679688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.722949028015137, + "rewards/margins": 10.191316604614258, + "rewards/rejected": -17.91426658630371, + "step": 14131 + }, + { + "epoch": 2.2, + "learning_rate": 3.782352819129623e-06, + "logits/chosen": -1.3411169052124023, + "logits/rejected": -2.289134979248047, + "logps/chosen": -205.90353393554688, + "logps/rejected": -499.1745300292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.449906349182129, + "rewards/margins": 12.078217506408691, + "rewards/rejected": -23.52812385559082, + "step": 14132 + }, + { + "epoch": 2.2, + "learning_rate": 3.781619378598475e-06, + "logits/chosen": -2.0445611476898193, + "logits/rejected": -2.8349814414978027, + "logps/chosen": -174.77410888671875, + "logps/rejected": -340.405029296875, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.516106605529785, + "rewards/margins": 4.386432647705078, + "rewards/rejected": -14.902539253234863, + "step": 14133 + }, + { + "epoch": 2.2, + "learning_rate": 3.780885938067327e-06, + "logits/chosen": -2.3295352458953857, + "logits/rejected": -2.355996608734131, + "logps/chosen": -224.9532470703125, + "logps/rejected": -298.61431884765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.641375541687012, + "rewards/margins": 8.984922409057617, + "rewards/rejected": -14.626297950744629, + "step": 14134 + }, + { + "epoch": 2.2, + "learning_rate": 3.780152497536179e-06, + "logits/chosen": -2.631873846054077, + "logits/rejected": -2.792548894882202, + "logps/chosen": -842.8281860351562, + "logps/rejected": -467.16748046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.845229625701904, + "rewards/margins": 8.146112442016602, + "rewards/rejected": -15.991342544555664, + "step": 14135 + }, + { + "epoch": 2.2, + "learning_rate": 3.7794190570050316e-06, + "logits/chosen": -2.319246530532837, + "logits/rejected": -2.644322156906128, + "logps/chosen": -680.693603515625, + "logps/rejected": -584.7691040039062, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.049221992492676, + "rewards/margins": 12.172215461730957, + "rewards/rejected": -22.221437454223633, + "step": 14136 + }, + { + "epoch": 2.2, + "learning_rate": 3.7786856164738834e-06, + "logits/chosen": -1.2807579040527344, + "logits/rejected": -2.256887435913086, + "logps/chosen": -126.5940170288086, + "logps/rejected": -304.73516845703125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.718744277954102, + "rewards/margins": 5.568186283111572, + "rewards/rejected": -16.286930084228516, + "step": 14137 + }, + { + "epoch": 2.2, + "learning_rate": 3.7779521759427357e-06, + "logits/chosen": -1.801247000694275, + "logits/rejected": -2.890245199203491, + "logps/chosen": -221.3479461669922, + "logps/rejected": -475.8497619628906, + "loss": 1.5818, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.39150619506836, + "rewards/margins": 1.6447014808654785, + "rewards/rejected": -14.03620719909668, + "step": 14138 + }, + { + "epoch": 2.2, + "learning_rate": 3.7772187354115876e-06, + "logits/chosen": -2.085829973220825, + "logits/rejected": -2.4640328884124756, + "logps/chosen": -214.31459045410156, + "logps/rejected": -290.6127624511719, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429117202758789, + "rewards/margins": 5.213091850280762, + "rewards/rejected": -13.642208099365234, + "step": 14139 + }, + { + "epoch": 2.2, + "learning_rate": 3.7764852948804403e-06, + "logits/chosen": -1.7538002729415894, + "logits/rejected": -2.3277933597564697, + "logps/chosen": -191.44924926757812, + "logps/rejected": -383.773193359375, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.986653327941895, + "rewards/margins": 4.323818683624268, + "rewards/rejected": -14.31047248840332, + "step": 14140 + }, + { + "epoch": 2.2, + "learning_rate": 3.7757518543492922e-06, + "logits/chosen": -1.5090162754058838, + "logits/rejected": -2.4231882095336914, + "logps/chosen": -161.05355834960938, + "logps/rejected": -409.80517578125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.597892761230469, + "rewards/margins": 5.966170310974121, + "rewards/rejected": -17.564064025878906, + "step": 14141 + }, + { + "epoch": 2.2, + "learning_rate": 3.775018413818144e-06, + "logits/chosen": -2.16450834274292, + "logits/rejected": -1.8147411346435547, + "logps/chosen": -228.2616424560547, + "logps/rejected": -294.0038146972656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.964278221130371, + "rewards/margins": 10.189059257507324, + "rewards/rejected": -19.153337478637695, + "step": 14142 + }, + { + "epoch": 2.2, + "learning_rate": 3.774284973286996e-06, + "logits/chosen": -2.5788540840148926, + "logits/rejected": -2.3660213947296143, + "logps/chosen": -285.9610290527344, + "logps/rejected": -391.0754699707031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.806911468505859, + "rewards/margins": 7.490513324737549, + "rewards/rejected": -15.29742431640625, + "step": 14143 + }, + { + "epoch": 2.2, + "learning_rate": 3.773551532755848e-06, + "logits/chosen": -2.1537301540374756, + "logits/rejected": -2.5546224117279053, + "logps/chosen": -340.8384704589844, + "logps/rejected": -452.8780517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.319647312164307, + "rewards/margins": 11.98375129699707, + "rewards/rejected": -18.30339813232422, + "step": 14144 + }, + { + "epoch": 2.2, + "learning_rate": 3.7728180922247006e-06, + "logits/chosen": -2.769984483718872, + "logits/rejected": -2.1140613555908203, + "logps/chosen": -380.50445556640625, + "logps/rejected": -288.6177673339844, + "loss": 0.3585, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.812722206115723, + "rewards/margins": 3.500356435775757, + "rewards/rejected": -14.313077926635742, + "step": 14145 + }, + { + "epoch": 2.2, + "learning_rate": 3.772084651693553e-06, + "logits/chosen": -2.886477470397949, + "logits/rejected": -2.7987098693847656, + "logps/chosen": -187.4070587158203, + "logps/rejected": -312.9951171875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.970020294189453, + "rewards/margins": 6.361545085906982, + "rewards/rejected": -16.331565856933594, + "step": 14146 + }, + { + "epoch": 2.2, + "learning_rate": 3.7713512111624048e-06, + "logits/chosen": -2.5704240798950195, + "logits/rejected": -1.4759551286697388, + "logps/chosen": -369.781982421875, + "logps/rejected": -267.0389404296875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.284036636352539, + "rewards/margins": 7.947469711303711, + "rewards/rejected": -16.23150634765625, + "step": 14147 + }, + { + "epoch": 2.2, + "learning_rate": 3.7706177706312566e-06, + "logits/chosen": -2.4486992359161377, + "logits/rejected": -2.7100772857666016, + "logps/chosen": -345.51214599609375, + "logps/rejected": -417.2501220703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.530726432800293, + "rewards/margins": 7.9169020652771, + "rewards/rejected": -17.447628021240234, + "step": 14148 + }, + { + "epoch": 2.2, + "learning_rate": 3.7698843301001094e-06, + "logits/chosen": -2.1581504344940186, + "logits/rejected": -2.386542320251465, + "logps/chosen": -237.91250610351562, + "logps/rejected": -377.21209716796875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.168201446533203, + "rewards/margins": 6.827098369598389, + "rewards/rejected": -17.99530029296875, + "step": 14149 + }, + { + "epoch": 2.2, + "learning_rate": 3.7691508895689612e-06, + "logits/chosen": -1.9520975351333618, + "logits/rejected": -2.702521562576294, + "logps/chosen": -117.92106628417969, + "logps/rejected": -343.99407958984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8558030128479, + "rewards/margins": 9.673779487609863, + "rewards/rejected": -17.529582977294922, + "step": 14150 + }, + { + "epoch": 2.2, + "learning_rate": 3.768417449037813e-06, + "logits/chosen": -1.9415837526321411, + "logits/rejected": -2.983013391494751, + "logps/chosen": -210.81390380859375, + "logps/rejected": -476.1318359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.851123809814453, + "rewards/margins": 8.618757247924805, + "rewards/rejected": -18.469881057739258, + "step": 14151 + }, + { + "epoch": 2.2, + "learning_rate": 3.767684008506665e-06, + "logits/chosen": -2.448467969894409, + "logits/rejected": -2.6549999713897705, + "logps/chosen": -544.45654296875, + "logps/rejected": -702.5460205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.834578514099121, + "rewards/margins": 13.579694747924805, + "rewards/rejected": -19.41427230834961, + "step": 14152 + }, + { + "epoch": 2.2, + "learning_rate": 3.766950567975517e-06, + "logits/chosen": -2.5564522743225098, + "logits/rejected": -2.6961076259613037, + "logps/chosen": -527.1719360351562, + "logps/rejected": -606.2410278320312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.58690357208252, + "rewards/margins": 10.545928955078125, + "rewards/rejected": -21.132831573486328, + "step": 14153 + }, + { + "epoch": 2.2, + "learning_rate": 3.7662171274443696e-06, + "logits/chosen": -2.7646689414978027, + "logits/rejected": -2.7826850414276123, + "logps/chosen": -332.9974365234375, + "logps/rejected": -384.05908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.708514213562012, + "rewards/margins": 9.71623420715332, + "rewards/rejected": -17.42474937438965, + "step": 14154 + }, + { + "epoch": 2.2, + "learning_rate": 3.765483686913222e-06, + "logits/chosen": -2.658198833465576, + "logits/rejected": -1.2343430519104004, + "logps/chosen": -939.1865234375, + "logps/rejected": -419.99249267578125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.254785537719727, + "rewards/margins": 5.303823947906494, + "rewards/rejected": -16.558609008789062, + "step": 14155 + }, + { + "epoch": 2.2, + "learning_rate": 3.7647502463820738e-06, + "logits/chosen": -1.2278363704681396, + "logits/rejected": -1.9123477935791016, + "logps/chosen": -467.2514953613281, + "logps/rejected": -427.8239440917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061118125915527, + "rewards/margins": 14.36957836151123, + "rewards/rejected": -19.430696487426758, + "step": 14156 + }, + { + "epoch": 2.2, + "learning_rate": 3.7640168058509257e-06, + "logits/chosen": -2.5106492042541504, + "logits/rejected": -2.747699022293091, + "logps/chosen": -425.4287109375, + "logps/rejected": -421.29754638671875, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.116840362548828, + "rewards/margins": 6.744284629821777, + "rewards/rejected": -14.861124038696289, + "step": 14157 + }, + { + "epoch": 2.2, + "learning_rate": 3.7632833653197784e-06, + "logits/chosen": -1.0017173290252686, + "logits/rejected": -2.7248339653015137, + "logps/chosen": -202.10980224609375, + "logps/rejected": -566.0556030273438, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.59201717376709, + "rewards/margins": 6.448338508605957, + "rewards/rejected": -18.040355682373047, + "step": 14158 + }, + { + "epoch": 2.2, + "learning_rate": 3.7625499247886303e-06, + "logits/chosen": -1.9772634506225586, + "logits/rejected": -2.545231342315674, + "logps/chosen": -149.25698852539062, + "logps/rejected": -347.1676025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.805368423461914, + "rewards/margins": 10.276962280273438, + "rewards/rejected": -17.08233070373535, + "step": 14159 + }, + { + "epoch": 2.2, + "learning_rate": 3.761816484257482e-06, + "logits/chosen": -2.825732946395874, + "logits/rejected": -2.346496343612671, + "logps/chosen": -776.9562377929688, + "logps/rejected": -563.2216186523438, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.478649139404297, + "rewards/margins": 6.972213268280029, + "rewards/rejected": -16.450862884521484, + "step": 14160 + }, + { + "epoch": 2.2, + "learning_rate": 3.761083043726334e-06, + "logits/chosen": -2.92042875289917, + "logits/rejected": -2.2572107315063477, + "logps/chosen": -170.44070434570312, + "logps/rejected": -192.28509521484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.695106506347656, + "rewards/margins": 8.396589279174805, + "rewards/rejected": -15.091694831848145, + "step": 14161 + }, + { + "epoch": 2.2, + "learning_rate": 3.7603496031951863e-06, + "logits/chosen": -2.636934757232666, + "logits/rejected": -2.2755751609802246, + "logps/chosen": -221.06741333007812, + "logps/rejected": -374.1501159667969, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.650291442871094, + "rewards/margins": 7.301462173461914, + "rewards/rejected": -16.951753616333008, + "step": 14162 + }, + { + "epoch": 2.2, + "learning_rate": 3.759616162664039e-06, + "logits/chosen": -2.5170741081237793, + "logits/rejected": -2.4360716342926025, + "logps/chosen": -247.3551025390625, + "logps/rejected": -306.4747314453125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.368309020996094, + "rewards/margins": 7.254269123077393, + "rewards/rejected": -17.622577667236328, + "step": 14163 + }, + { + "epoch": 2.2, + "learning_rate": 3.758882722132891e-06, + "logits/chosen": -2.2750356197357178, + "logits/rejected": -2.4271724224090576, + "logps/chosen": -157.92755126953125, + "logps/rejected": -362.9764099121094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7551751136779785, + "rewards/margins": 12.30950927734375, + "rewards/rejected": -20.06468391418457, + "step": 14164 + }, + { + "epoch": 2.2, + "learning_rate": 3.758149281601743e-06, + "logits/chosen": -1.9733206033706665, + "logits/rejected": -2.6358704566955566, + "logps/chosen": -204.3235626220703, + "logps/rejected": -462.1626892089844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.818907737731934, + "rewards/margins": 11.789016723632812, + "rewards/rejected": -18.607925415039062, + "step": 14165 + }, + { + "epoch": 2.2, + "learning_rate": 3.7574158410705947e-06, + "logits/chosen": -2.0017576217651367, + "logits/rejected": -2.7534759044647217, + "logps/chosen": -312.9435119628906, + "logps/rejected": -522.7193603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.855915069580078, + "rewards/margins": 11.124763488769531, + "rewards/rejected": -19.98067855834961, + "step": 14166 + }, + { + "epoch": 2.2, + "learning_rate": 3.7566824005394474e-06, + "logits/chosen": -2.4135913848876953, + "logits/rejected": -2.8486664295196533, + "logps/chosen": -128.26722717285156, + "logps/rejected": -337.2611083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.297321796417236, + "rewards/margins": 11.156861305236816, + "rewards/rejected": -18.454181671142578, + "step": 14167 + }, + { + "epoch": 2.2, + "learning_rate": 3.7559489600082993e-06, + "logits/chosen": -2.726491928100586, + "logits/rejected": -2.5551488399505615, + "logps/chosen": -139.89527893066406, + "logps/rejected": -373.5196533203125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.208236694335938, + "rewards/margins": 11.716673851013184, + "rewards/rejected": -19.924911499023438, + "step": 14168 + }, + { + "epoch": 2.2, + "learning_rate": 3.755215519477151e-06, + "logits/chosen": -2.8067455291748047, + "logits/rejected": -2.893897294998169, + "logps/chosen": -184.4805145263672, + "logps/rejected": -462.2718811035156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.83358383178711, + "rewards/margins": 9.155224800109863, + "rewards/rejected": -19.988807678222656, + "step": 14169 + }, + { + "epoch": 2.2, + "learning_rate": 3.754482078946003e-06, + "logits/chosen": -1.57496976852417, + "logits/rejected": -2.6950740814208984, + "logps/chosen": -184.24978637695312, + "logps/rejected": -606.6690063476562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.033595561981201, + "rewards/margins": 15.018756866455078, + "rewards/rejected": -22.052352905273438, + "step": 14170 + }, + { + "epoch": 2.2, + "learning_rate": 3.7537486384148558e-06, + "logits/chosen": -1.9962586164474487, + "logits/rejected": -2.276947259902954, + "logps/chosen": -313.508544921875, + "logps/rejected": -456.5558166503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.033676147460938, + "rewards/margins": 11.098482131958008, + "rewards/rejected": -20.132158279418945, + "step": 14171 + }, + { + "epoch": 2.2, + "learning_rate": 3.753015197883708e-06, + "logits/chosen": -0.6877034902572632, + "logits/rejected": -2.442587375640869, + "logps/chosen": -95.84017944335938, + "logps/rejected": -309.4677429199219, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.728724002838135, + "rewards/margins": 7.093213081359863, + "rewards/rejected": -14.821937561035156, + "step": 14172 + }, + { + "epoch": 2.2, + "learning_rate": 3.75228175735256e-06, + "logits/chosen": -2.9181997776031494, + "logits/rejected": -2.4048993587493896, + "logps/chosen": -747.5344848632812, + "logps/rejected": -440.13861083984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.755683898925781, + "rewards/margins": 7.621621131896973, + "rewards/rejected": -16.377304077148438, + "step": 14173 + }, + { + "epoch": 2.2, + "learning_rate": 3.751548316821412e-06, + "logits/chosen": -2.5320451259613037, + "logits/rejected": -2.695889472961426, + "logps/chosen": -206.83172607421875, + "logps/rejected": -495.68975830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.931740760803223, + "rewards/margins": 10.925939559936523, + "rewards/rejected": -18.857681274414062, + "step": 14174 + }, + { + "epoch": 2.2, + "learning_rate": 3.7508148762902637e-06, + "logits/chosen": -1.7614258527755737, + "logits/rejected": -2.908841609954834, + "logps/chosen": -370.9127197265625, + "logps/rejected": -716.9622192382812, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.840935707092285, + "rewards/margins": 13.778388023376465, + "rewards/rejected": -24.61932373046875, + "step": 14175 + }, + { + "epoch": 2.2, + "learning_rate": 3.7500814357591164e-06, + "logits/chosen": -2.0958809852600098, + "logits/rejected": -2.739387035369873, + "logps/chosen": -214.286376953125, + "logps/rejected": -414.3133850097656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.704939842224121, + "rewards/margins": 8.79039478302002, + "rewards/rejected": -16.49533462524414, + "step": 14176 + }, + { + "epoch": 2.2, + "learning_rate": 3.7493479952279683e-06, + "logits/chosen": -2.698429822921753, + "logits/rejected": -2.386850357055664, + "logps/chosen": -456.54071044921875, + "logps/rejected": -610.8685913085938, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.385004043579102, + "rewards/margins": 6.900015830993652, + "rewards/rejected": -13.285018920898438, + "step": 14177 + }, + { + "epoch": 2.2, + "learning_rate": 3.74861455469682e-06, + "logits/chosen": -2.3802363872528076, + "logits/rejected": -2.553220510482788, + "logps/chosen": -124.51240539550781, + "logps/rejected": -297.84320068359375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.384085655212402, + "rewards/margins": 7.425829887390137, + "rewards/rejected": -17.80991554260254, + "step": 14178 + }, + { + "epoch": 2.21, + "learning_rate": 3.7478811141656725e-06, + "logits/chosen": -2.376352310180664, + "logits/rejected": -2.4601552486419678, + "logps/chosen": -55.9710807800293, + "logps/rejected": -302.59429931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.987417697906494, + "rewards/margins": 12.331783294677734, + "rewards/rejected": -16.319202423095703, + "step": 14179 + }, + { + "epoch": 2.21, + "learning_rate": 3.747147673634525e-06, + "logits/chosen": -1.9288872480392456, + "logits/rejected": -2.1280107498168945, + "logps/chosen": -616.1966552734375, + "logps/rejected": -626.9150390625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.168205261230469, + "rewards/margins": 14.027894020080566, + "rewards/rejected": -23.19609832763672, + "step": 14180 + }, + { + "epoch": 2.21, + "learning_rate": 3.746414233103377e-06, + "logits/chosen": -2.336463451385498, + "logits/rejected": -2.3629770278930664, + "logps/chosen": -253.19155883789062, + "logps/rejected": -445.61724853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.50239372253418, + "rewards/margins": 12.881783485412598, + "rewards/rejected": -23.384178161621094, + "step": 14181 + }, + { + "epoch": 2.21, + "learning_rate": 3.745680792572229e-06, + "logits/chosen": -1.2455024719238281, + "logits/rejected": -2.168888568878174, + "logps/chosen": -240.97955322265625, + "logps/rejected": -559.6768798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904788970947266, + "rewards/margins": 17.770038604736328, + "rewards/rejected": -24.674827575683594, + "step": 14182 + }, + { + "epoch": 2.21, + "learning_rate": 3.744947352041081e-06, + "logits/chosen": -2.6679515838623047, + "logits/rejected": -2.140288829803467, + "logps/chosen": -142.10372924804688, + "logps/rejected": -257.75897216796875, + "loss": 0.6119, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.863808631896973, + "rewards/margins": 4.946451663970947, + "rewards/rejected": -12.810260772705078, + "step": 14183 + }, + { + "epoch": 2.21, + "learning_rate": 3.7442139115099327e-06, + "logits/chosen": -2.605172872543335, + "logits/rejected": -2.176386594772339, + "logps/chosen": -642.7794189453125, + "logps/rejected": -597.4822998046875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.111785888671875, + "rewards/margins": 8.619641304016113, + "rewards/rejected": -16.731426239013672, + "step": 14184 + }, + { + "epoch": 2.21, + "learning_rate": 3.7434804709787854e-06, + "logits/chosen": -1.2681061029434204, + "logits/rejected": -2.4754183292388916, + "logps/chosen": -175.59112548828125, + "logps/rejected": -455.9936828613281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.616838455200195, + "rewards/margins": 10.239635467529297, + "rewards/rejected": -22.856473922729492, + "step": 14185 + }, + { + "epoch": 2.21, + "learning_rate": 3.7427470304476373e-06, + "logits/chosen": -2.7277426719665527, + "logits/rejected": -1.7294222116470337, + "logps/chosen": -264.3673095703125, + "logps/rejected": -283.1415100097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6385297775268555, + "rewards/margins": 9.558095932006836, + "rewards/rejected": -17.196624755859375, + "step": 14186 + }, + { + "epoch": 2.21, + "learning_rate": 3.742013589916489e-06, + "logits/chosen": -2.7536749839782715, + "logits/rejected": -1.947136640548706, + "logps/chosen": -452.7605285644531, + "logps/rejected": -608.3325805664062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.89322566986084, + "rewards/margins": 7.699565887451172, + "rewards/rejected": -18.592792510986328, + "step": 14187 + }, + { + "epoch": 2.21, + "learning_rate": 3.7412801493853415e-06, + "logits/chosen": -2.939971446990967, + "logits/rejected": -2.820404529571533, + "logps/chosen": -613.224853515625, + "logps/rejected": -625.38134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.160696983337402, + "rewards/margins": 12.04985523223877, + "rewards/rejected": -19.210552215576172, + "step": 14188 + }, + { + "epoch": 2.21, + "learning_rate": 3.7405467088541942e-06, + "logits/chosen": -1.9173990488052368, + "logits/rejected": -2.5953540802001953, + "logps/chosen": -272.14935302734375, + "logps/rejected": -416.84783935546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.583169937133789, + "rewards/margins": 8.402106285095215, + "rewards/rejected": -16.985275268554688, + "step": 14189 + }, + { + "epoch": 2.21, + "learning_rate": 3.739813268323046e-06, + "logits/chosen": -2.807979106903076, + "logits/rejected": -2.2941834926605225, + "logps/chosen": -307.2878723144531, + "logps/rejected": -323.42645263671875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.568578720092773, + "rewards/margins": 7.166726589202881, + "rewards/rejected": -16.735305786132812, + "step": 14190 + }, + { + "epoch": 2.21, + "learning_rate": 3.739079827791898e-06, + "logits/chosen": -2.8453779220581055, + "logits/rejected": -2.170966863632202, + "logps/chosen": -329.65802001953125, + "logps/rejected": -330.17193603515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.200017929077148, + "rewards/margins": 9.822120666503906, + "rewards/rejected": -19.022138595581055, + "step": 14191 + }, + { + "epoch": 2.21, + "learning_rate": 3.73834638726075e-06, + "logits/chosen": -2.6155102252960205, + "logits/rejected": -2.3729088306427, + "logps/chosen": -341.594482421875, + "logps/rejected": -415.4993591308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.619596481323242, + "rewards/margins": 11.142515182495117, + "rewards/rejected": -18.76211166381836, + "step": 14192 + }, + { + "epoch": 2.21, + "learning_rate": 3.7376129467296017e-06, + "logits/chosen": -2.2788758277893066, + "logits/rejected": -2.0404162406921387, + "logps/chosen": -173.5901336669922, + "logps/rejected": -375.8003234863281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.75475549697876, + "rewards/margins": 14.39813232421875, + "rewards/rejected": -21.15288734436035, + "step": 14193 + }, + { + "epoch": 2.21, + "learning_rate": 3.7368795061984545e-06, + "logits/chosen": -1.5003470182418823, + "logits/rejected": -2.6296563148498535, + "logps/chosen": -225.44874572753906, + "logps/rejected": -552.8713989257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.785254955291748, + "rewards/margins": 11.914865493774414, + "rewards/rejected": -19.70012092590332, + "step": 14194 + }, + { + "epoch": 2.21, + "learning_rate": 3.7361460656673063e-06, + "logits/chosen": -1.790585994720459, + "logits/rejected": -2.307645082473755, + "logps/chosen": -130.22740173339844, + "logps/rejected": -361.27984619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5072736740112305, + "rewards/margins": 14.878782272338867, + "rewards/rejected": -21.38605499267578, + "step": 14195 + }, + { + "epoch": 2.21, + "learning_rate": 3.7354126251361586e-06, + "logits/chosen": -1.1209213733673096, + "logits/rejected": -2.5348827838897705, + "logps/chosen": -210.54754638671875, + "logps/rejected": -784.226806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.515663146972656, + "rewards/margins": 14.919333457946777, + "rewards/rejected": -27.43499755859375, + "step": 14196 + }, + { + "epoch": 2.21, + "learning_rate": 3.7346791846050105e-06, + "logits/chosen": -1.998818278312683, + "logits/rejected": -2.434828519821167, + "logps/chosen": -370.6382751464844, + "logps/rejected": -525.6668701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.817415237426758, + "rewards/margins": 12.882301330566406, + "rewards/rejected": -21.699716567993164, + "step": 14197 + }, + { + "epoch": 2.21, + "learning_rate": 3.7339457440738633e-06, + "logits/chosen": -0.9462173581123352, + "logits/rejected": -2.578855037689209, + "logps/chosen": -138.5475616455078, + "logps/rejected": -727.4324340820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.82602596282959, + "rewards/margins": 14.531128883361816, + "rewards/rejected": -23.357154846191406, + "step": 14198 + }, + { + "epoch": 2.21, + "learning_rate": 3.733212303542715e-06, + "logits/chosen": -1.8106991052627563, + "logits/rejected": -2.2644410133361816, + "logps/chosen": -334.7044982910156, + "logps/rejected": -467.5780029296875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.555150032043457, + "rewards/margins": 7.564481735229492, + "rewards/rejected": -19.119632720947266, + "step": 14199 + }, + { + "epoch": 2.21, + "learning_rate": 3.732478863011567e-06, + "logits/chosen": -2.2061126232147217, + "logits/rejected": -2.6741528511047363, + "logps/chosen": -194.23861694335938, + "logps/rejected": -540.4422607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.85661506652832, + "rewards/margins": 17.467241287231445, + "rewards/rejected": -22.323856353759766, + "step": 14200 + }, + { + "epoch": 2.21, + "learning_rate": 3.731745422480419e-06, + "logits/chosen": -2.4802045822143555, + "logits/rejected": -2.7938733100891113, + "logps/chosen": -362.8177185058594, + "logps/rejected": -466.7996826171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.37693977355957, + "rewards/margins": 10.872635841369629, + "rewards/rejected": -16.249576568603516, + "step": 14201 + }, + { + "epoch": 2.21, + "learning_rate": 3.7310119819492708e-06, + "logits/chosen": -2.3398637771606445, + "logits/rejected": -2.759493112564087, + "logps/chosen": -201.51901245117188, + "logps/rejected": -408.3113098144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.816816329956055, + "rewards/margins": 10.142875671386719, + "rewards/rejected": -20.95969009399414, + "step": 14202 + }, + { + "epoch": 2.21, + "learning_rate": 3.7302785414181235e-06, + "logits/chosen": -1.2927641868591309, + "logits/rejected": -1.918418288230896, + "logps/chosen": -280.1437683105469, + "logps/rejected": -494.244873046875, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.657083988189697, + "rewards/margins": 11.030366897583008, + "rewards/rejected": -18.687450408935547, + "step": 14203 + }, + { + "epoch": 2.21, + "learning_rate": 3.7295451008869754e-06, + "logits/chosen": -2.6511147022247314, + "logits/rejected": -1.4479765892028809, + "logps/chosen": -921.2071533203125, + "logps/rejected": -532.3826293945312, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.244260787963867, + "rewards/margins": 9.924083709716797, + "rewards/rejected": -21.168346405029297, + "step": 14204 + }, + { + "epoch": 2.21, + "learning_rate": 3.7288116603558277e-06, + "logits/chosen": -2.5421805381774902, + "logits/rejected": -1.6714891195297241, + "logps/chosen": -355.9095153808594, + "logps/rejected": -576.46630859375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.735376358032227, + "rewards/margins": 10.736088752746582, + "rewards/rejected": -24.471466064453125, + "step": 14205 + }, + { + "epoch": 2.21, + "learning_rate": 3.7280782198246795e-06, + "logits/chosen": -0.5311307907104492, + "logits/rejected": -1.9183508157730103, + "logps/chosen": -139.7020721435547, + "logps/rejected": -588.3214111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.195176124572754, + "rewards/margins": 14.970378875732422, + "rewards/rejected": -24.165555953979492, + "step": 14206 + }, + { + "epoch": 2.21, + "learning_rate": 3.7273447792935323e-06, + "logits/chosen": -1.8035820722579956, + "logits/rejected": -1.6839849948883057, + "logps/chosen": -360.3737487792969, + "logps/rejected": -599.9354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.945794105529785, + "rewards/margins": 15.049230575561523, + "rewards/rejected": -25.995025634765625, + "step": 14207 + }, + { + "epoch": 2.21, + "learning_rate": 3.726611338762384e-06, + "logits/chosen": -2.6810622215270996, + "logits/rejected": -2.6337761878967285, + "logps/chosen": -163.72515869140625, + "logps/rejected": -340.27471923828125, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.629282474517822, + "rewards/margins": 8.237336158752441, + "rewards/rejected": -14.866618156433105, + "step": 14208 + }, + { + "epoch": 2.21, + "learning_rate": 3.725877898231236e-06, + "logits/chosen": -1.0136258602142334, + "logits/rejected": -2.5767579078674316, + "logps/chosen": -222.5556640625, + "logps/rejected": -646.5177001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.379533767700195, + "rewards/margins": 12.228761672973633, + "rewards/rejected": -24.608295440673828, + "step": 14209 + }, + { + "epoch": 2.21, + "learning_rate": 3.725144457700088e-06, + "logits/chosen": -2.8541817665100098, + "logits/rejected": -2.396519184112549, + "logps/chosen": -695.3484497070312, + "logps/rejected": -678.7762451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.769279479980469, + "rewards/margins": 10.127995491027832, + "rewards/rejected": -18.897274017333984, + "step": 14210 + }, + { + "epoch": 2.21, + "learning_rate": 3.7244110171689398e-06, + "logits/chosen": -1.6191262006759644, + "logits/rejected": -2.4649641513824463, + "logps/chosen": -257.00506591796875, + "logps/rejected": -520.319580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.965841293334961, + "rewards/margins": 11.91610336303711, + "rewards/rejected": -21.88194465637207, + "step": 14211 + }, + { + "epoch": 2.21, + "learning_rate": 3.7236775766377925e-06, + "logits/chosen": -2.3769776821136475, + "logits/rejected": -2.586756467819214, + "logps/chosen": -789.6124267578125, + "logps/rejected": -919.4717407226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.394200325012207, + "rewards/margins": 17.159931182861328, + "rewards/rejected": -29.55413246154785, + "step": 14212 + }, + { + "epoch": 2.21, + "learning_rate": 3.722944136106645e-06, + "logits/chosen": -2.518932342529297, + "logits/rejected": -2.5916106700897217, + "logps/chosen": -231.646728515625, + "logps/rejected": -268.90020751953125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.389747619628906, + "rewards/margins": 5.681936264038086, + "rewards/rejected": -14.071683883666992, + "step": 14213 + }, + { + "epoch": 2.21, + "learning_rate": 3.7222106955754967e-06, + "logits/chosen": -2.253411293029785, + "logits/rejected": -2.7244679927825928, + "logps/chosen": -155.55886840820312, + "logps/rejected": -371.19891357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.381011962890625, + "rewards/margins": 10.30186653137207, + "rewards/rejected": -19.682880401611328, + "step": 14214 + }, + { + "epoch": 2.21, + "learning_rate": 3.7214772550443486e-06, + "logits/chosen": -2.4167091846466064, + "logits/rejected": -2.7057669162750244, + "logps/chosen": -114.18220520019531, + "logps/rejected": -363.36419677734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.972464561462402, + "rewards/margins": 10.357809066772461, + "rewards/rejected": -16.330272674560547, + "step": 14215 + }, + { + "epoch": 2.21, + "learning_rate": 3.7207438145132013e-06, + "logits/chosen": -1.8564289808273315, + "logits/rejected": -2.5134265422821045, + "logps/chosen": -268.33746337890625, + "logps/rejected": -454.6543273925781, + "loss": 0.3841, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.98220443725586, + "rewards/margins": 8.758489608764648, + "rewards/rejected": -23.740692138671875, + "step": 14216 + }, + { + "epoch": 2.21, + "learning_rate": 3.720010373982053e-06, + "logits/chosen": -2.216264247894287, + "logits/rejected": -2.315507411956787, + "logps/chosen": -352.4291687011719, + "logps/rejected": -590.1591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.012237071990967, + "rewards/margins": 14.831056594848633, + "rewards/rejected": -21.843294143676758, + "step": 14217 + }, + { + "epoch": 2.21, + "learning_rate": 3.719276933450905e-06, + "logits/chosen": -1.0563936233520508, + "logits/rejected": -2.5014448165893555, + "logps/chosen": -169.43026733398438, + "logps/rejected": -405.90142822265625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.738362312316895, + "rewards/margins": 6.284582614898682, + "rewards/rejected": -16.022945404052734, + "step": 14218 + }, + { + "epoch": 2.21, + "learning_rate": 3.718543492919757e-06, + "logits/chosen": -2.8169219493865967, + "logits/rejected": -2.7728466987609863, + "logps/chosen": -260.46075439453125, + "logps/rejected": -286.0337219238281, + "loss": 1.3619, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.858219146728516, + "rewards/margins": 1.4519169330596924, + "rewards/rejected": -13.310136795043945, + "step": 14219 + }, + { + "epoch": 2.21, + "learning_rate": 3.7178100523886097e-06, + "logits/chosen": -2.323376178741455, + "logits/rejected": -2.833214282989502, + "logps/chosen": -196.8271026611328, + "logps/rejected": -375.93927001953125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.95777702331543, + "rewards/margins": 9.789953231811523, + "rewards/rejected": -19.747730255126953, + "step": 14220 + }, + { + "epoch": 2.21, + "learning_rate": 3.7170766118574615e-06, + "logits/chosen": -1.5352272987365723, + "logits/rejected": -2.6776883602142334, + "logps/chosen": -307.63421630859375, + "logps/rejected": -420.97613525390625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7619171142578125, + "rewards/margins": 5.544921875, + "rewards/rejected": -12.306838989257812, + "step": 14221 + }, + { + "epoch": 2.21, + "learning_rate": 3.716343171326314e-06, + "logits/chosen": -2.0266740322113037, + "logits/rejected": -2.831958293914795, + "logps/chosen": -134.43649291992188, + "logps/rejected": -364.80828857421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.274797439575195, + "rewards/margins": 10.337135314941406, + "rewards/rejected": -18.6119327545166, + "step": 14222 + }, + { + "epoch": 2.21, + "learning_rate": 3.7156097307951657e-06, + "logits/chosen": -2.6597230434417725, + "logits/rejected": -2.1005821228027344, + "logps/chosen": -516.397705078125, + "logps/rejected": -472.756103515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.63971996307373, + "rewards/margins": 11.587163925170898, + "rewards/rejected": -22.226882934570312, + "step": 14223 + }, + { + "epoch": 2.21, + "learning_rate": 3.7148762902640176e-06, + "logits/chosen": -2.6718621253967285, + "logits/rejected": -2.094449043273926, + "logps/chosen": -610.7457275390625, + "logps/rejected": -501.8061828613281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.531211853027344, + "rewards/margins": 15.134367942810059, + "rewards/rejected": -21.66558074951172, + "step": 14224 + }, + { + "epoch": 2.21, + "learning_rate": 3.7141428497328703e-06, + "logits/chosen": -2.778536558151245, + "logits/rejected": -2.6982195377349854, + "logps/chosen": -190.92855834960938, + "logps/rejected": -453.1837158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.674442291259766, + "rewards/margins": 9.53817367553711, + "rewards/rejected": -21.212615966796875, + "step": 14225 + }, + { + "epoch": 2.21, + "learning_rate": 3.713409409201722e-06, + "logits/chosen": -2.80580735206604, + "logits/rejected": -2.929903507232666, + "logps/chosen": -107.8332290649414, + "logps/rejected": -262.925537109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.811677932739258, + "rewards/margins": 7.906367301940918, + "rewards/rejected": -17.71804428100586, + "step": 14226 + }, + { + "epoch": 2.21, + "learning_rate": 3.712675968670574e-06, + "logits/chosen": -1.2305774688720703, + "logits/rejected": -2.5891435146331787, + "logps/chosen": -194.65721130371094, + "logps/rejected": -522.841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.382405281066895, + "rewards/margins": 12.090794563293457, + "rewards/rejected": -21.47319984436035, + "step": 14227 + }, + { + "epoch": 2.21, + "learning_rate": 3.711942528139426e-06, + "logits/chosen": -2.666714906692505, + "logits/rejected": -2.8069024085998535, + "logps/chosen": -209.74427795410156, + "logps/rejected": -211.33001708984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.734495639801025, + "rewards/margins": 6.999009609222412, + "rewards/rejected": -14.733505249023438, + "step": 14228 + }, + { + "epoch": 2.21, + "learning_rate": 3.7112090876082787e-06, + "logits/chosen": -2.3584227561950684, + "logits/rejected": -2.4767062664031982, + "logps/chosen": -339.8000793457031, + "logps/rejected": -507.8902893066406, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.070404052734375, + "rewards/margins": 9.992016792297363, + "rewards/rejected": -19.062419891357422, + "step": 14229 + }, + { + "epoch": 2.21, + "learning_rate": 3.710475647077131e-06, + "logits/chosen": -2.6000099182128906, + "logits/rejected": -2.623394250869751, + "logps/chosen": -452.3099365234375, + "logps/rejected": -535.7689819335938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.607484340667725, + "rewards/margins": 11.4786958694458, + "rewards/rejected": -19.086179733276367, + "step": 14230 + }, + { + "epoch": 2.21, + "learning_rate": 3.709742206545983e-06, + "logits/chosen": -1.1323429346084595, + "logits/rejected": -2.5215651988983154, + "logps/chosen": -116.81504821777344, + "logps/rejected": -474.64410400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.427435874938965, + "rewards/margins": 13.991454124450684, + "rewards/rejected": -21.41888999938965, + "step": 14231 + }, + { + "epoch": 2.21, + "learning_rate": 3.7090087660148347e-06, + "logits/chosen": -2.253589630126953, + "logits/rejected": -2.551929473876953, + "logps/chosen": -119.31814575195312, + "logps/rejected": -297.93536376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.910719871520996, + "rewards/margins": 10.212408065795898, + "rewards/rejected": -18.12312889099121, + "step": 14232 + }, + { + "epoch": 2.21, + "learning_rate": 3.7082753254836866e-06, + "logits/chosen": -2.6678225994110107, + "logits/rejected": -1.250523567199707, + "logps/chosen": -489.7813415527344, + "logps/rejected": -245.8104248046875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.098365783691406, + "rewards/margins": 10.31191635131836, + "rewards/rejected": -15.410282135009766, + "step": 14233 + }, + { + "epoch": 2.21, + "learning_rate": 3.7075418849525393e-06, + "logits/chosen": -2.645364999771118, + "logits/rejected": -2.328117847442627, + "logps/chosen": -472.03399658203125, + "logps/rejected": -328.49896240234375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.30524730682373, + "rewards/margins": 5.132405757904053, + "rewards/rejected": -14.437652587890625, + "step": 14234 + }, + { + "epoch": 2.21, + "learning_rate": 3.7068084444213912e-06, + "logits/chosen": -1.749352216720581, + "logits/rejected": -2.8345553874969482, + "logps/chosen": -210.5184326171875, + "logps/rejected": -378.28369140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.174814224243164, + "rewards/margins": 8.079554557800293, + "rewards/rejected": -16.25436782836914, + "step": 14235 + }, + { + "epoch": 2.21, + "learning_rate": 3.706075003890243e-06, + "logits/chosen": -2.6989593505859375, + "logits/rejected": -1.7903481721878052, + "logps/chosen": -243.4850311279297, + "logps/rejected": -226.4564208984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.676642417907715, + "rewards/margins": 8.265277862548828, + "rewards/rejected": -13.94192123413086, + "step": 14236 + }, + { + "epoch": 2.21, + "learning_rate": 3.705341563359095e-06, + "logits/chosen": -2.3577799797058105, + "logits/rejected": -1.9668455123901367, + "logps/chosen": -219.18174743652344, + "logps/rejected": -388.7021484375, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.445039749145508, + "rewards/margins": 6.11285924911499, + "rewards/rejected": -18.557899475097656, + "step": 14237 + }, + { + "epoch": 2.21, + "learning_rate": 3.7046081228279477e-06, + "logits/chosen": -2.700270652770996, + "logits/rejected": -2.025721549987793, + "logps/chosen": -288.6302795410156, + "logps/rejected": -529.76513671875, + "loss": 0.1954, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.025185585021973, + "rewards/margins": 10.647649765014648, + "rewards/rejected": -21.672836303710938, + "step": 14238 + }, + { + "epoch": 2.21, + "learning_rate": 3.7038746822968e-06, + "logits/chosen": -2.570270299911499, + "logits/rejected": -1.3525631427764893, + "logps/chosen": -206.4052734375, + "logps/rejected": -296.0985107421875, + "loss": 0.349, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.669051170349121, + "rewards/margins": 6.811382293701172, + "rewards/rejected": -15.480433464050293, + "step": 14239 + }, + { + "epoch": 2.21, + "learning_rate": 3.703141241765652e-06, + "logits/chosen": -1.3707636594772339, + "logits/rejected": -2.610990047454834, + "logps/chosen": -340.3877868652344, + "logps/rejected": -408.44146728515625, + "loss": 3.4307, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.865193367004395, + "rewards/margins": 4.465243339538574, + "rewards/rejected": -18.33043670654297, + "step": 14240 + }, + { + "epoch": 2.21, + "learning_rate": 3.7024078012345038e-06, + "logits/chosen": -2.421398401260376, + "logits/rejected": -2.83817195892334, + "logps/chosen": -219.89239501953125, + "logps/rejected": -228.9817657470703, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.941872596740723, + "rewards/margins": 8.424386978149414, + "rewards/rejected": -17.366260528564453, + "step": 14241 + }, + { + "epoch": 2.21, + "learning_rate": 3.7016743607033556e-06, + "logits/chosen": -1.9998935461044312, + "logits/rejected": -2.2622880935668945, + "logps/chosen": -254.7082061767578, + "logps/rejected": -498.53619384765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.746444702148438, + "rewards/margins": 7.9550604820251465, + "rewards/rejected": -19.701505661010742, + "step": 14242 + }, + { + "epoch": 2.22, + "learning_rate": 3.7009409201722084e-06, + "logits/chosen": -2.169928550720215, + "logits/rejected": -2.3515818119049072, + "logps/chosen": -264.8088684082031, + "logps/rejected": -433.12744140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.04905891418457, + "rewards/margins": 8.758940696716309, + "rewards/rejected": -18.807998657226562, + "step": 14243 + }, + { + "epoch": 2.22, + "learning_rate": 3.7002074796410602e-06, + "logits/chosen": -2.8003766536712646, + "logits/rejected": -2.9470150470733643, + "logps/chosen": -157.44882202148438, + "logps/rejected": -350.15069580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.820708274841309, + "rewards/margins": 13.041786193847656, + "rewards/rejected": -18.86249351501465, + "step": 14244 + }, + { + "epoch": 2.22, + "learning_rate": 3.699474039109912e-06, + "logits/chosen": -2.18701171875, + "logits/rejected": -2.706599712371826, + "logps/chosen": -228.16897583007812, + "logps/rejected": -452.5447998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.833018779754639, + "rewards/margins": 12.203363418579102, + "rewards/rejected": -20.036380767822266, + "step": 14245 + }, + { + "epoch": 2.22, + "learning_rate": 3.6987405985787644e-06, + "logits/chosen": -2.9713501930236816, + "logits/rejected": -1.5477516651153564, + "logps/chosen": -509.0454406738281, + "logps/rejected": -392.795654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.114250183105469, + "rewards/margins": 9.227594375610352, + "rewards/rejected": -17.341842651367188, + "step": 14246 + }, + { + "epoch": 2.22, + "learning_rate": 3.698007158047617e-06, + "logits/chosen": -0.8209309577941895, + "logits/rejected": -2.29333233833313, + "logps/chosen": -261.55816650390625, + "logps/rejected": -635.8421630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.519908905029297, + "rewards/margins": 18.08962631225586, + "rewards/rejected": -24.609535217285156, + "step": 14247 + }, + { + "epoch": 2.22, + "learning_rate": 3.697273717516469e-06, + "logits/chosen": -2.52797269821167, + "logits/rejected": -2.146799087524414, + "logps/chosen": -703.602294921875, + "logps/rejected": -525.557861328125, + "loss": 0.1122, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.018818855285645, + "rewards/margins": 7.088199615478516, + "rewards/rejected": -17.107017517089844, + "step": 14248 + }, + { + "epoch": 2.22, + "learning_rate": 3.696540276985321e-06, + "logits/chosen": -1.2035998106002808, + "logits/rejected": -2.6413304805755615, + "logps/chosen": -194.17100524902344, + "logps/rejected": -331.0830078125, + "loss": 0.0912, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.386219024658203, + "rewards/margins": 5.151037693023682, + "rewards/rejected": -17.53725814819336, + "step": 14249 + }, + { + "epoch": 2.22, + "learning_rate": 3.6958068364541728e-06, + "logits/chosen": -2.5034971237182617, + "logits/rejected": -1.9570127725601196, + "logps/chosen": -342.1130065917969, + "logps/rejected": -501.11383056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.105790138244629, + "rewards/margins": 10.035367965698242, + "rewards/rejected": -21.141159057617188, + "step": 14250 + }, + { + "epoch": 2.22, + "learning_rate": 3.6950733959230247e-06, + "logits/chosen": -1.1891615390777588, + "logits/rejected": -2.5722815990448, + "logps/chosen": -259.4195556640625, + "logps/rejected": -510.22589111328125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.56531047821045, + "rewards/margins": 7.636999130249023, + "rewards/rejected": -17.202308654785156, + "step": 14251 + }, + { + "epoch": 2.22, + "learning_rate": 3.6943399553918774e-06, + "logits/chosen": -2.707941770553589, + "logits/rejected": -1.9367696046829224, + "logps/chosen": -268.551025390625, + "logps/rejected": -481.2637023925781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.820474624633789, + "rewards/margins": 10.421772956848145, + "rewards/rejected": -22.24224853515625, + "step": 14252 + }, + { + "epoch": 2.22, + "learning_rate": 3.6936065148607293e-06, + "logits/chosen": -2.617598533630371, + "logits/rejected": -1.281733751296997, + "logps/chosen": -583.8507080078125, + "logps/rejected": -536.346435546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.535106658935547, + "rewards/margins": 7.514012813568115, + "rewards/rejected": -18.04911994934082, + "step": 14253 + }, + { + "epoch": 2.22, + "learning_rate": 3.692873074329581e-06, + "logits/chosen": -1.1494590044021606, + "logits/rejected": -2.4457459449768066, + "logps/chosen": -156.4898681640625, + "logps/rejected": -497.3156433105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.08284854888916, + "rewards/margins": 10.827717781066895, + "rewards/rejected": -18.910566329956055, + "step": 14254 + }, + { + "epoch": 2.22, + "learning_rate": 3.6921396337984334e-06, + "logits/chosen": -1.5913525819778442, + "logits/rejected": -2.6345343589782715, + "logps/chosen": -148.93246459960938, + "logps/rejected": -398.3428039550781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.799558639526367, + "rewards/margins": 8.109012603759766, + "rewards/rejected": -17.908571243286133, + "step": 14255 + }, + { + "epoch": 2.22, + "learning_rate": 3.691406193267286e-06, + "logits/chosen": -1.1037991046905518, + "logits/rejected": -2.521897792816162, + "logps/chosen": -178.8219451904297, + "logps/rejected": -374.0434265136719, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.682317733764648, + "rewards/margins": 7.465238094329834, + "rewards/rejected": -17.14755630493164, + "step": 14256 + }, + { + "epoch": 2.22, + "learning_rate": 3.690672752736138e-06, + "logits/chosen": -2.2106616497039795, + "logits/rejected": -2.5503151416778564, + "logps/chosen": -379.7635498046875, + "logps/rejected": -562.134033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.809815406799316, + "rewards/margins": 11.262212753295898, + "rewards/rejected": -20.07202911376953, + "step": 14257 + }, + { + "epoch": 2.22, + "learning_rate": 3.68993931220499e-06, + "logits/chosen": -2.402439594268799, + "logits/rejected": -2.2087035179138184, + "logps/chosen": -122.01084899902344, + "logps/rejected": -315.609130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.833591461181641, + "rewards/margins": 11.878567695617676, + "rewards/rejected": -17.712158203125, + "step": 14258 + }, + { + "epoch": 2.22, + "learning_rate": 3.689205871673842e-06, + "logits/chosen": -1.5966265201568604, + "logits/rejected": -2.505326271057129, + "logps/chosen": -223.877197265625, + "logps/rejected": -671.6388549804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.593282699584961, + "rewards/margins": 18.577287673950195, + "rewards/rejected": -27.170570373535156, + "step": 14259 + }, + { + "epoch": 2.22, + "learning_rate": 3.6884724311426945e-06, + "logits/chosen": -2.028581380844116, + "logits/rejected": -2.6325302124023438, + "logps/chosen": -136.17921447753906, + "logps/rejected": -377.1144104003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.272745132446289, + "rewards/margins": 11.693572998046875, + "rewards/rejected": -20.96631622314453, + "step": 14260 + }, + { + "epoch": 2.22, + "learning_rate": 3.6877389906115464e-06, + "logits/chosen": -1.5979692935943604, + "logits/rejected": -2.4220619201660156, + "logps/chosen": -132.23814392089844, + "logps/rejected": -497.387451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2246623039245605, + "rewards/margins": 19.72079849243164, + "rewards/rejected": -26.945459365844727, + "step": 14261 + }, + { + "epoch": 2.22, + "learning_rate": 3.6870055500803983e-06, + "logits/chosen": -2.27524471282959, + "logits/rejected": -2.4580063819885254, + "logps/chosen": -120.94708251953125, + "logps/rejected": -183.33447265625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.25992202758789, + "rewards/margins": 5.938107490539551, + "rewards/rejected": -15.198029518127441, + "step": 14262 + }, + { + "epoch": 2.22, + "learning_rate": 3.6862721095492506e-06, + "logits/chosen": -2.8456966876983643, + "logits/rejected": -2.8235890865325928, + "logps/chosen": -163.1201934814453, + "logps/rejected": -241.70858764648438, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.089313507080078, + "rewards/margins": 8.096270561218262, + "rewards/rejected": -18.185585021972656, + "step": 14263 + }, + { + "epoch": 2.22, + "learning_rate": 3.6855386690181025e-06, + "logits/chosen": -2.7003350257873535, + "logits/rejected": -2.8122997283935547, + "logps/chosen": -297.83306884765625, + "logps/rejected": -420.7570495605469, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.02655029296875, + "rewards/margins": 9.4780855178833, + "rewards/rejected": -15.504636764526367, + "step": 14264 + }, + { + "epoch": 2.22, + "learning_rate": 3.684805228486955e-06, + "logits/chosen": -1.3138214349746704, + "logits/rejected": -2.309494972229004, + "logps/chosen": -235.2804412841797, + "logps/rejected": -609.1806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.401310920715332, + "rewards/margins": 15.573200225830078, + "rewards/rejected": -23.974510192871094, + "step": 14265 + }, + { + "epoch": 2.22, + "learning_rate": 3.684071787955807e-06, + "logits/chosen": -2.4415509700775146, + "logits/rejected": -1.8104373216629028, + "logps/chosen": -339.4053649902344, + "logps/rejected": -301.0045166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.925283432006836, + "rewards/margins": 8.886009216308594, + "rewards/rejected": -14.81129264831543, + "step": 14266 + }, + { + "epoch": 2.22, + "learning_rate": 3.683338347424659e-06, + "logits/chosen": -2.729120969772339, + "logits/rejected": -2.101226568222046, + "logps/chosen": -401.90447998046875, + "logps/rejected": -544.3948974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.092976570129395, + "rewards/margins": 11.890890121459961, + "rewards/rejected": -22.983867645263672, + "step": 14267 + }, + { + "epoch": 2.22, + "learning_rate": 3.682604906893511e-06, + "logits/chosen": -2.817671775817871, + "logits/rejected": -2.781010627746582, + "logps/chosen": -133.034912109375, + "logps/rejected": -251.16729736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.493589401245117, + "rewards/margins": 9.129161834716797, + "rewards/rejected": -14.622751235961914, + "step": 14268 + }, + { + "epoch": 2.22, + "learning_rate": 3.6818714663623636e-06, + "logits/chosen": -2.63812518119812, + "logits/rejected": -1.6477937698364258, + "logps/chosen": -612.0272827148438, + "logps/rejected": -585.245849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.761621475219727, + "rewards/margins": 15.100772857666016, + "rewards/rejected": -24.862394332885742, + "step": 14269 + }, + { + "epoch": 2.22, + "learning_rate": 3.6811380258312154e-06, + "logits/chosen": -2.795975923538208, + "logits/rejected": -2.382932186126709, + "logps/chosen": -212.72445678710938, + "logps/rejected": -287.16424560546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.650942802429199, + "rewards/margins": 9.268359184265137, + "rewards/rejected": -15.919301986694336, + "step": 14270 + }, + { + "epoch": 2.22, + "learning_rate": 3.6804045853000673e-06, + "logits/chosen": -1.9572930335998535, + "logits/rejected": -2.5211005210876465, + "logps/chosen": -305.2540588378906, + "logps/rejected": -447.96368408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.072172164916992, + "rewards/margins": 10.954950332641602, + "rewards/rejected": -19.027122497558594, + "step": 14271 + }, + { + "epoch": 2.22, + "learning_rate": 3.6796711447689196e-06, + "logits/chosen": -1.2661705017089844, + "logits/rejected": -2.580009698867798, + "logps/chosen": -270.3305969238281, + "logps/rejected": -550.937255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.239330291748047, + "rewards/margins": 12.017599105834961, + "rewards/rejected": -20.256927490234375, + "step": 14272 + }, + { + "epoch": 2.22, + "learning_rate": 3.6789377042377715e-06, + "logits/chosen": -1.8235937356948853, + "logits/rejected": -2.2803478240966797, + "logps/chosen": -165.62611389160156, + "logps/rejected": -449.0693664550781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.126931190490723, + "rewards/margins": 10.09738540649414, + "rewards/rejected": -17.224315643310547, + "step": 14273 + }, + { + "epoch": 2.22, + "learning_rate": 3.6782042637066242e-06, + "logits/chosen": -1.963847279548645, + "logits/rejected": -2.628182888031006, + "logps/chosen": -550.3739624023438, + "logps/rejected": -586.3935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.162822723388672, + "rewards/margins": 12.91122817993164, + "rewards/rejected": -23.074050903320312, + "step": 14274 + }, + { + "epoch": 2.22, + "learning_rate": 3.677470823175476e-06, + "logits/chosen": -2.556077718734741, + "logits/rejected": -2.9886958599090576, + "logps/chosen": -120.03034973144531, + "logps/rejected": -283.08349609375, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.784101486206055, + "rewards/margins": 4.212811470031738, + "rewards/rejected": -14.996912956237793, + "step": 14275 + }, + { + "epoch": 2.22, + "learning_rate": 3.676737382644328e-06, + "logits/chosen": -1.221585988998413, + "logits/rejected": -2.3387985229492188, + "logps/chosen": -175.77285766601562, + "logps/rejected": -523.1732177734375, + "loss": 0.0904, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.851245880126953, + "rewards/margins": 7.8476786613464355, + "rewards/rejected": -18.698925018310547, + "step": 14276 + }, + { + "epoch": 2.22, + "learning_rate": 3.67600394211318e-06, + "logits/chosen": -2.7359378337860107, + "logits/rejected": -2.9582114219665527, + "logps/chosen": -174.31072998046875, + "logps/rejected": -346.6751708984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.455777168273926, + "rewards/margins": 7.30781364440918, + "rewards/rejected": -14.763590812683105, + "step": 14277 + }, + { + "epoch": 2.22, + "learning_rate": 3.6752705015820326e-06, + "logits/chosen": -1.5001778602600098, + "logits/rejected": -2.810521364212036, + "logps/chosen": -213.66146850585938, + "logps/rejected": -327.12591552734375, + "loss": 0.2444, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.076835632324219, + "rewards/margins": 6.692471981048584, + "rewards/rejected": -13.769308090209961, + "step": 14278 + }, + { + "epoch": 2.22, + "learning_rate": 3.6745370610508845e-06, + "logits/chosen": -2.0019712448120117, + "logits/rejected": -2.2485172748565674, + "logps/chosen": -194.48663330078125, + "logps/rejected": -263.7187194824219, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.633988380432129, + "rewards/margins": 6.728599548339844, + "rewards/rejected": -17.362586975097656, + "step": 14279 + }, + { + "epoch": 2.22, + "learning_rate": 3.6738036205197368e-06, + "logits/chosen": -2.179093360900879, + "logits/rejected": -2.930590867996216, + "logps/chosen": -461.4273681640625, + "logps/rejected": -479.97637939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.266793251037598, + "rewards/margins": 11.546504020690918, + "rewards/rejected": -18.813297271728516, + "step": 14280 + }, + { + "epoch": 2.22, + "learning_rate": 3.6730701799885886e-06, + "logits/chosen": -1.5545296669006348, + "logits/rejected": -1.3477898836135864, + "logps/chosen": -245.09902954101562, + "logps/rejected": -452.896728515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.927467346191406, + "rewards/margins": 9.490477561950684, + "rewards/rejected": -20.417945861816406, + "step": 14281 + }, + { + "epoch": 2.22, + "learning_rate": 3.6723367394574405e-06, + "logits/chosen": -2.6688389778137207, + "logits/rejected": -2.159968137741089, + "logps/chosen": -440.0504150390625, + "logps/rejected": -383.54925537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.216522693634033, + "rewards/margins": 10.703468322753906, + "rewards/rejected": -15.919990539550781, + "step": 14282 + }, + { + "epoch": 2.22, + "learning_rate": 3.6716032989262932e-06, + "logits/chosen": -2.7747507095336914, + "logits/rejected": -1.7889045476913452, + "logps/chosen": -631.5099487304688, + "logps/rejected": -457.5933532714844, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.543885231018066, + "rewards/margins": 9.468420028686523, + "rewards/rejected": -18.012304306030273, + "step": 14283 + }, + { + "epoch": 2.22, + "learning_rate": 3.670869858395145e-06, + "logits/chosen": -2.7215821743011475, + "logits/rejected": -1.6587533950805664, + "logps/chosen": -685.7199096679688, + "logps/rejected": -649.7628173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.73786735534668, + "rewards/margins": 11.024850845336914, + "rewards/rejected": -21.762718200683594, + "step": 14284 + }, + { + "epoch": 2.22, + "learning_rate": 3.670136417863997e-06, + "logits/chosen": -2.5931038856506348, + "logits/rejected": -2.4908945560455322, + "logps/chosen": -684.327880859375, + "logps/rejected": -659.34521484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.049762725830078, + "rewards/margins": 7.800405025482178, + "rewards/rejected": -19.850168228149414, + "step": 14285 + }, + { + "epoch": 2.22, + "learning_rate": 3.669402977332849e-06, + "logits/chosen": -1.9279029369354248, + "logits/rejected": -2.8608763217926025, + "logps/chosen": -212.68557739257812, + "logps/rejected": -531.76220703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.770263671875, + "rewards/margins": 13.489459991455078, + "rewards/rejected": -20.259723663330078, + "step": 14286 + }, + { + "epoch": 2.22, + "learning_rate": 3.6686695368017016e-06, + "logits/chosen": -2.468968391418457, + "logits/rejected": -2.6483523845672607, + "logps/chosen": -343.38348388671875, + "logps/rejected": -385.95880126953125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.092357635498047, + "rewards/margins": 7.2805094718933105, + "rewards/rejected": -16.372867584228516, + "step": 14287 + }, + { + "epoch": 2.22, + "learning_rate": 3.6679360962705535e-06, + "logits/chosen": -2.747368335723877, + "logits/rejected": -2.638659715652466, + "logps/chosen": -399.662841796875, + "logps/rejected": -532.615478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.7665433883667, + "rewards/margins": 9.511950492858887, + "rewards/rejected": -19.278493881225586, + "step": 14288 + }, + { + "epoch": 2.22, + "learning_rate": 3.6672026557394058e-06, + "logits/chosen": -0.8514009118080139, + "logits/rejected": -2.4802141189575195, + "logps/chosen": -202.37060546875, + "logps/rejected": -585.7012939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.049817085266113, + "rewards/margins": 11.449999809265137, + "rewards/rejected": -18.49981689453125, + "step": 14289 + }, + { + "epoch": 2.22, + "learning_rate": 3.6664692152082577e-06, + "logits/chosen": -1.2714868783950806, + "logits/rejected": -2.7431676387786865, + "logps/chosen": -138.93511962890625, + "logps/rejected": -432.1888427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.129644393920898, + "rewards/margins": 9.035274505615234, + "rewards/rejected": -18.164920806884766, + "step": 14290 + }, + { + "epoch": 2.22, + "learning_rate": 3.6657357746771095e-06, + "logits/chosen": -2.4391794204711914, + "logits/rejected": -2.807401657104492, + "logps/chosen": -267.16864013671875, + "logps/rejected": -243.9594268798828, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651552200317383, + "rewards/margins": 8.280527114868164, + "rewards/rejected": -14.932080268859863, + "step": 14291 + }, + { + "epoch": 2.22, + "learning_rate": 3.6650023341459623e-06, + "logits/chosen": -2.4134833812713623, + "logits/rejected": -2.5173490047454834, + "logps/chosen": -175.40597534179688, + "logps/rejected": -352.88348388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.404817581176758, + "rewards/margins": 11.520304679870605, + "rewards/rejected": -20.925121307373047, + "step": 14292 + }, + { + "epoch": 2.22, + "learning_rate": 3.664268893614814e-06, + "logits/chosen": -2.6642816066741943, + "logits/rejected": -2.9568209648132324, + "logps/chosen": -166.05081176757812, + "logps/rejected": -337.2341613769531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1705641746521, + "rewards/margins": 8.361677169799805, + "rewards/rejected": -12.532240867614746, + "step": 14293 + }, + { + "epoch": 2.22, + "learning_rate": 3.663535453083666e-06, + "logits/chosen": -2.6489856243133545, + "logits/rejected": -2.0344059467315674, + "logps/chosen": -486.5101623535156, + "logps/rejected": -380.1048583984375, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.631902694702148, + "rewards/margins": 8.752538681030273, + "rewards/rejected": -19.384441375732422, + "step": 14294 + }, + { + "epoch": 2.22, + "learning_rate": 3.662802012552518e-06, + "logits/chosen": -2.6454882621765137, + "logits/rejected": -1.607938289642334, + "logps/chosen": -787.96337890625, + "logps/rejected": -472.9828796386719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.418159484863281, + "rewards/margins": 10.995597839355469, + "rewards/rejected": -15.41375732421875, + "step": 14295 + }, + { + "epoch": 2.22, + "learning_rate": 3.6620685720213706e-06, + "logits/chosen": -2.257791042327881, + "logits/rejected": -2.587448835372925, + "logps/chosen": -232.54171752929688, + "logps/rejected": -387.1126708984375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.150059700012207, + "rewards/margins": 7.5137939453125, + "rewards/rejected": -18.66385269165039, + "step": 14296 + }, + { + "epoch": 2.22, + "learning_rate": 3.6613351314902225e-06, + "logits/chosen": -1.6468145847320557, + "logits/rejected": -2.472749710083008, + "logps/chosen": -113.0544662475586, + "logps/rejected": -300.16583251953125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.175907135009766, + "rewards/margins": 7.437686920166016, + "rewards/rejected": -15.613594055175781, + "step": 14297 + }, + { + "epoch": 2.22, + "learning_rate": 3.660601690959075e-06, + "logits/chosen": -2.7774338722229004, + "logits/rejected": -1.421709418296814, + "logps/chosen": -253.91554260253906, + "logps/rejected": -173.6241455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.702266693115234, + "rewards/margins": 8.756406784057617, + "rewards/rejected": -15.458673477172852, + "step": 14298 + }, + { + "epoch": 2.22, + "learning_rate": 3.6598682504279267e-06, + "logits/chosen": -3.006056308746338, + "logits/rejected": -2.7218077182769775, + "logps/chosen": -258.25445556640625, + "logps/rejected": -310.3380126953125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906728744506836, + "rewards/margins": 9.086226463317871, + "rewards/rejected": -14.99295425415039, + "step": 14299 + }, + { + "epoch": 2.22, + "learning_rate": 3.6591348098967786e-06, + "logits/chosen": -2.311220645904541, + "logits/rejected": -2.819467067718506, + "logps/chosen": -140.20159912109375, + "logps/rejected": -334.0412292480469, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.258875846862793, + "rewards/margins": 9.007871627807617, + "rewards/rejected": -15.266748428344727, + "step": 14300 + }, + { + "epoch": 2.22, + "learning_rate": 3.6584013693656313e-06, + "logits/chosen": -2.668125629425049, + "logits/rejected": -2.491137742996216, + "logps/chosen": -284.12164306640625, + "logps/rejected": -377.3024597167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.078770637512207, + "rewards/margins": 10.035102844238281, + "rewards/rejected": -18.113874435424805, + "step": 14301 + }, + { + "epoch": 2.22, + "learning_rate": 3.657667928834483e-06, + "logits/chosen": -1.562004804611206, + "logits/rejected": -2.469054698944092, + "logps/chosen": -141.46536254882812, + "logps/rejected": -368.03173828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.352958679199219, + "rewards/margins": 9.491037368774414, + "rewards/rejected": -17.843996047973633, + "step": 14302 + }, + { + "epoch": 2.22, + "learning_rate": 3.656934488303335e-06, + "logits/chosen": -2.8149662017822266, + "logits/rejected": -2.3403728008270264, + "logps/chosen": -739.5999145507812, + "logps/rejected": -546.96044921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.343184947967529, + "rewards/margins": 8.73812198638916, + "rewards/rejected": -14.081307411193848, + "step": 14303 + }, + { + "epoch": 2.22, + "learning_rate": 3.656201047772187e-06, + "logits/chosen": -1.7399314641952515, + "logits/rejected": -2.8622615337371826, + "logps/chosen": -107.38748168945312, + "logps/rejected": -607.54443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.43575668334961, + "rewards/margins": 11.94681453704834, + "rewards/rejected": -21.382572174072266, + "step": 14304 + }, + { + "epoch": 2.22, + "learning_rate": 3.6554676072410396e-06, + "logits/chosen": -1.6588021516799927, + "logits/rejected": -2.7030951976776123, + "logps/chosen": -321.9091796875, + "logps/rejected": -682.4788208007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069454193115234, + "rewards/margins": 13.383947372436523, + "rewards/rejected": -19.453401565551758, + "step": 14305 + }, + { + "epoch": 2.22, + "learning_rate": 3.654734166709892e-06, + "logits/chosen": -2.9420061111450195, + "logits/rejected": -2.9157207012176514, + "logps/chosen": -387.6944885253906, + "logps/rejected": -371.2826232910156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.506120204925537, + "rewards/margins": 8.234951972961426, + "rewards/rejected": -14.741071701049805, + "step": 14306 + }, + { + "epoch": 2.23, + "learning_rate": 3.654000726178744e-06, + "logits/chosen": -1.1973868608474731, + "logits/rejected": -2.4930660724639893, + "logps/chosen": -390.2630310058594, + "logps/rejected": -636.8125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.430817604064941, + "rewards/margins": 13.753600120544434, + "rewards/rejected": -25.184417724609375, + "step": 14307 + }, + { + "epoch": 2.23, + "learning_rate": 3.6532672856475957e-06, + "logits/chosen": -2.5736782550811768, + "logits/rejected": -2.7485413551330566, + "logps/chosen": -557.6233520507812, + "logps/rejected": -694.239013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.576076030731201, + "rewards/margins": 9.842842102050781, + "rewards/rejected": -16.41891860961914, + "step": 14308 + }, + { + "epoch": 2.23, + "learning_rate": 3.6525338451164484e-06, + "logits/chosen": -1.4065111875534058, + "logits/rejected": -2.4725112915039062, + "logps/chosen": -410.0609130859375, + "logps/rejected": -453.49053955078125, + "loss": 1.0585, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.23980712890625, + "rewards/margins": 5.309171676635742, + "rewards/rejected": -17.548978805541992, + "step": 14309 + }, + { + "epoch": 2.23, + "learning_rate": 3.6518004045853003e-06, + "logits/chosen": -2.5213074684143066, + "logits/rejected": -1.5172325372695923, + "logps/chosen": -363.32794189453125, + "logps/rejected": -376.8585205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.812599182128906, + "rewards/margins": 10.485973358154297, + "rewards/rejected": -22.298572540283203, + "step": 14310 + }, + { + "epoch": 2.23, + "learning_rate": 3.651066964054152e-06, + "logits/chosen": -1.6042389869689941, + "logits/rejected": -1.3842434883117676, + "logps/chosen": -645.51025390625, + "logps/rejected": -424.8390808105469, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.743377685546875, + "rewards/margins": 8.682619094848633, + "rewards/rejected": -19.425996780395508, + "step": 14311 + }, + { + "epoch": 2.23, + "learning_rate": 3.650333523523004e-06, + "logits/chosen": -1.996827244758606, + "logits/rejected": -2.5849099159240723, + "logps/chosen": -346.2237854003906, + "logps/rejected": -372.7027282714844, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.904956817626953, + "rewards/margins": 10.342967987060547, + "rewards/rejected": -19.2479248046875, + "step": 14312 + }, + { + "epoch": 2.23, + "learning_rate": 3.649600082991856e-06, + "logits/chosen": -2.78020977973938, + "logits/rejected": -1.860718846321106, + "logps/chosen": -263.7217712402344, + "logps/rejected": -202.68008422851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0260703563690186, + "rewards/margins": 12.912878036499023, + "rewards/rejected": -15.938948631286621, + "step": 14313 + }, + { + "epoch": 2.23, + "learning_rate": 3.6488666424607087e-06, + "logits/chosen": -2.2330427169799805, + "logits/rejected": -2.7824676036834717, + "logps/chosen": -189.90908813476562, + "logps/rejected": -531.60107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.717261791229248, + "rewards/margins": 12.942924499511719, + "rewards/rejected": -20.660186767578125, + "step": 14314 + }, + { + "epoch": 2.23, + "learning_rate": 3.648133201929561e-06, + "logits/chosen": -1.3707079887390137, + "logits/rejected": -2.5776283740997314, + "logps/chosen": -162.53524780273438, + "logps/rejected": -495.4432373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.092886924743652, + "rewards/margins": 10.403878211975098, + "rewards/rejected": -21.49676513671875, + "step": 14315 + }, + { + "epoch": 2.23, + "learning_rate": 3.647399761398413e-06, + "logits/chosen": -2.2839815616607666, + "logits/rejected": -2.7768194675445557, + "logps/chosen": -296.3623962402344, + "logps/rejected": -479.4383544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.636206150054932, + "rewards/margins": 10.960028648376465, + "rewards/rejected": -17.596235275268555, + "step": 14316 + }, + { + "epoch": 2.23, + "learning_rate": 3.6466663208672647e-06, + "logits/chosen": -2.766084909439087, + "logits/rejected": -1.9267446994781494, + "logps/chosen": -687.4951171875, + "logps/rejected": -478.7244873046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.844268798828125, + "rewards/margins": 8.97757625579834, + "rewards/rejected": -17.82184600830078, + "step": 14317 + }, + { + "epoch": 2.23, + "learning_rate": 3.6459328803361174e-06, + "logits/chosen": -2.8336715698242188, + "logits/rejected": -1.9180548191070557, + "logps/chosen": -1265.6495361328125, + "logps/rejected": -782.2359008789062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.41879653930664, + "rewards/margins": 7.52418851852417, + "rewards/rejected": -17.94298553466797, + "step": 14318 + }, + { + "epoch": 2.23, + "learning_rate": 3.6451994398049693e-06, + "logits/chosen": -1.9917012453079224, + "logits/rejected": -2.485814332962036, + "logps/chosen": -95.37309265136719, + "logps/rejected": -301.38287353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.17371940612793, + "rewards/margins": 10.560628890991211, + "rewards/rejected": -17.73434829711914, + "step": 14319 + }, + { + "epoch": 2.23, + "learning_rate": 3.644465999273821e-06, + "logits/chosen": -2.653005599975586, + "logits/rejected": -2.1385068893432617, + "logps/chosen": -210.52442932128906, + "logps/rejected": -325.6549072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.710793495178223, + "rewards/margins": 11.668172836303711, + "rewards/rejected": -17.37896728515625, + "step": 14320 + }, + { + "epoch": 2.23, + "learning_rate": 3.643732558742673e-06, + "logits/chosen": -2.2378039360046387, + "logits/rejected": -2.920494556427002, + "logps/chosen": -205.96788024902344, + "logps/rejected": -424.59259033203125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.174047470092773, + "rewards/margins": 9.027181625366211, + "rewards/rejected": -17.201229095458984, + "step": 14321 + }, + { + "epoch": 2.23, + "learning_rate": 3.6429991182115254e-06, + "logits/chosen": -2.428870677947998, + "logits/rejected": -2.717377185821533, + "logps/chosen": -81.34772491455078, + "logps/rejected": -247.13307189941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.441573143005371, + "rewards/margins": 9.747523307800293, + "rewards/rejected": -16.189096450805664, + "step": 14322 + }, + { + "epoch": 2.23, + "learning_rate": 3.642265677680378e-06, + "logits/chosen": -2.3771438598632812, + "logits/rejected": -2.4912827014923096, + "logps/chosen": -303.45458984375, + "logps/rejected": -368.1234130859375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.942834854125977, + "rewards/margins": 8.896339416503906, + "rewards/rejected": -17.839174270629883, + "step": 14323 + }, + { + "epoch": 2.23, + "learning_rate": 3.64153223714923e-06, + "logits/chosen": -2.2368390560150146, + "logits/rejected": -2.844331979751587, + "logps/chosen": -139.66116333007812, + "logps/rejected": -318.39691162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7283101081848145, + "rewards/margins": 8.849857330322266, + "rewards/rejected": -15.578167915344238, + "step": 14324 + }, + { + "epoch": 2.23, + "learning_rate": 3.640798796618082e-06, + "logits/chosen": -2.653268575668335, + "logits/rejected": -1.9541982412338257, + "logps/chosen": -591.4683837890625, + "logps/rejected": -513.2996826171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.118062019348145, + "rewards/margins": 9.48398208618164, + "rewards/rejected": -19.6020450592041, + "step": 14325 + }, + { + "epoch": 2.23, + "learning_rate": 3.6400653560869337e-06, + "logits/chosen": -2.340419292449951, + "logits/rejected": -2.9823877811431885, + "logps/chosen": -316.0088806152344, + "logps/rejected": -686.3228149414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.634298324584961, + "rewards/margins": 14.039106369018555, + "rewards/rejected": -21.673404693603516, + "step": 14326 + }, + { + "epoch": 2.23, + "learning_rate": 3.6393319155557865e-06, + "logits/chosen": -2.3366568088531494, + "logits/rejected": -2.8162734508514404, + "logps/chosen": -92.98033905029297, + "logps/rejected": -308.2074279785156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.23158073425293, + "rewards/margins": 7.934796333312988, + "rewards/rejected": -16.166378021240234, + "step": 14327 + }, + { + "epoch": 2.23, + "learning_rate": 3.6385984750246383e-06, + "logits/chosen": -2.320197105407715, + "logits/rejected": -2.7773473262786865, + "logps/chosen": -497.68951416015625, + "logps/rejected": -622.06201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.744926452636719, + "rewards/margins": 10.545927047729492, + "rewards/rejected": -17.29085350036621, + "step": 14328 + }, + { + "epoch": 2.23, + "learning_rate": 3.6378650344934902e-06, + "logits/chosen": -2.0509607791900635, + "logits/rejected": -1.98576819896698, + "logps/chosen": -160.39231872558594, + "logps/rejected": -299.12548828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.869510650634766, + "rewards/margins": 10.14377212524414, + "rewards/rejected": -19.013282775878906, + "step": 14329 + }, + { + "epoch": 2.23, + "learning_rate": 3.637131593962342e-06, + "logits/chosen": -1.549738883972168, + "logits/rejected": -2.2747702598571777, + "logps/chosen": -215.91925048828125, + "logps/rejected": -468.0688781738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.026122093200684, + "rewards/margins": 10.185519218444824, + "rewards/rejected": -21.211641311645508, + "step": 14330 + }, + { + "epoch": 2.23, + "learning_rate": 3.6363981534311944e-06, + "logits/chosen": -2.213949203491211, + "logits/rejected": -2.441728353500366, + "logps/chosen": -252.18865966796875, + "logps/rejected": -360.03125, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.635262489318848, + "rewards/margins": 5.134825706481934, + "rewards/rejected": -20.77008819580078, + "step": 14331 + }, + { + "epoch": 2.23, + "learning_rate": 3.635664712900047e-06, + "logits/chosen": -2.5595481395721436, + "logits/rejected": -2.705890417098999, + "logps/chosen": -559.0598754882812, + "logps/rejected": -594.2952880859375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.889958381652832, + "rewards/margins": 9.021757125854492, + "rewards/rejected": -17.91171646118164, + "step": 14332 + }, + { + "epoch": 2.23, + "learning_rate": 3.634931272368899e-06, + "logits/chosen": -1.323687195777893, + "logits/rejected": -2.641024351119995, + "logps/chosen": -120.0924301147461, + "logps/rejected": -328.8820495605469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020570755004883, + "rewards/margins": 9.387998580932617, + "rewards/rejected": -15.4085693359375, + "step": 14333 + }, + { + "epoch": 2.23, + "learning_rate": 3.634197831837751e-06, + "logits/chosen": -2.616942882537842, + "logits/rejected": -2.7929415702819824, + "logps/chosen": -271.46856689453125, + "logps/rejected": -368.9788818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.173767566680908, + "rewards/margins": 9.107666969299316, + "rewards/rejected": -16.281435012817383, + "step": 14334 + }, + { + "epoch": 2.23, + "learning_rate": 3.6334643913066028e-06, + "logits/chosen": -2.5602476596832275, + "logits/rejected": -1.7549046277999878, + "logps/chosen": -314.4571838378906, + "logps/rejected": -355.2314758300781, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.656354904174805, + "rewards/margins": 9.076139450073242, + "rewards/rejected": -18.732494354248047, + "step": 14335 + }, + { + "epoch": 2.23, + "learning_rate": 3.6327309507754555e-06, + "logits/chosen": -2.632996082305908, + "logits/rejected": -2.2116036415100098, + "logps/chosen": -433.6875305175781, + "logps/rejected": -673.235107421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.219152450561523, + "rewards/margins": 13.023380279541016, + "rewards/rejected": -22.24253273010254, + "step": 14336 + }, + { + "epoch": 2.23, + "learning_rate": 3.6319975102443074e-06, + "logits/chosen": -2.787266492843628, + "logits/rejected": -2.6061971187591553, + "logps/chosen": -224.65731811523438, + "logps/rejected": -434.2146301269531, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.048219680786133, + "rewards/margins": 9.577777862548828, + "rewards/rejected": -18.625999450683594, + "step": 14337 + }, + { + "epoch": 2.23, + "learning_rate": 3.6312640697131592e-06, + "logits/chosen": -2.5977351665496826, + "logits/rejected": -1.9515347480773926, + "logps/chosen": -642.12744140625, + "logps/rejected": -608.9258422851562, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.398053169250488, + "rewards/margins": 7.543643474578857, + "rewards/rejected": -17.941696166992188, + "step": 14338 + }, + { + "epoch": 2.23, + "learning_rate": 3.6305306291820115e-06, + "logits/chosen": -2.0609145164489746, + "logits/rejected": -2.7686169147491455, + "logps/chosen": -154.0039520263672, + "logps/rejected": -226.33953857421875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2735090255737305, + "rewards/margins": 7.264049053192139, + "rewards/rejected": -13.537558555603027, + "step": 14339 + }, + { + "epoch": 2.23, + "learning_rate": 3.6297971886508634e-06, + "logits/chosen": -2.5744917392730713, + "logits/rejected": -1.718118667602539, + "logps/chosen": -242.35328674316406, + "logps/rejected": -318.65399169921875, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.741840362548828, + "rewards/margins": 9.146110534667969, + "rewards/rejected": -20.887950897216797, + "step": 14340 + }, + { + "epoch": 2.23, + "learning_rate": 3.629063748119716e-06, + "logits/chosen": -2.1237480640411377, + "logits/rejected": -2.909344434738159, + "logps/chosen": -210.59271240234375, + "logps/rejected": -366.5152893066406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.733401775360107, + "rewards/margins": 12.323883056640625, + "rewards/rejected": -19.05728530883789, + "step": 14341 + }, + { + "epoch": 2.23, + "learning_rate": 3.628330307588568e-06, + "logits/chosen": -2.7802505493164062, + "logits/rejected": -2.648735284805298, + "logps/chosen": -465.5549621582031, + "logps/rejected": -216.0559539794922, + "loss": 0.8505, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.819036483764648, + "rewards/margins": 3.1310224533081055, + "rewards/rejected": -12.950057983398438, + "step": 14342 + }, + { + "epoch": 2.23, + "learning_rate": 3.62759686705742e-06, + "logits/chosen": -2.6360466480255127, + "logits/rejected": -2.6907105445861816, + "logps/chosen": -355.23980712890625, + "logps/rejected": -531.705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.780409812927246, + "rewards/margins": 14.39150619506836, + "rewards/rejected": -20.171916961669922, + "step": 14343 + }, + { + "epoch": 2.23, + "learning_rate": 3.6268634265262718e-06, + "logits/chosen": -2.5849382877349854, + "logits/rejected": -2.724822759628296, + "logps/chosen": -204.70281982421875, + "logps/rejected": -320.90087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.155379295349121, + "rewards/margins": 11.211084365844727, + "rewards/rejected": -16.36646270751953, + "step": 14344 + }, + { + "epoch": 2.23, + "learning_rate": 3.6261299859951245e-06, + "logits/chosen": -0.7890562415122986, + "logits/rejected": -2.6258392333984375, + "logps/chosen": -93.35359954833984, + "logps/rejected": -364.95721435546875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.09396743774414, + "rewards/margins": 8.265228271484375, + "rewards/rejected": -16.359195709228516, + "step": 14345 + }, + { + "epoch": 2.23, + "learning_rate": 3.6253965454639764e-06, + "logits/chosen": -2.011009693145752, + "logits/rejected": -2.7730252742767334, + "logps/chosen": -394.6584167480469, + "logps/rejected": -655.0301513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.452357292175293, + "rewards/margins": 8.970915794372559, + "rewards/rejected": -18.42327308654785, + "step": 14346 + }, + { + "epoch": 2.23, + "learning_rate": 3.6246631049328283e-06, + "logits/chosen": -2.519479751586914, + "logits/rejected": -2.6738779544830322, + "logps/chosen": -394.017578125, + "logps/rejected": -515.146240234375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.027813911437988, + "rewards/margins": 3.5787277221679688, + "rewards/rejected": -13.606541633605957, + "step": 14347 + }, + { + "epoch": 2.23, + "learning_rate": 3.6239296644016806e-06, + "logits/chosen": -2.5741052627563477, + "logits/rejected": -2.6396067142486572, + "logps/chosen": -210.1096649169922, + "logps/rejected": -517.344482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651812553405762, + "rewards/margins": 13.907295227050781, + "rewards/rejected": -21.559106826782227, + "step": 14348 + }, + { + "epoch": 2.23, + "learning_rate": 3.6231962238705324e-06, + "logits/chosen": -2.7862775325775146, + "logits/rejected": -2.8261046409606934, + "logps/chosen": -563.9072875976562, + "logps/rejected": -507.87322998046875, + "loss": 0.2585, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.120004653930664, + "rewards/margins": 7.011201858520508, + "rewards/rejected": -16.131206512451172, + "step": 14349 + }, + { + "epoch": 2.23, + "learning_rate": 3.622462783339385e-06, + "logits/chosen": -2.7733705043792725, + "logits/rejected": -0.5899451971054077, + "logps/chosen": -522.5032348632812, + "logps/rejected": -163.66172790527344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7779669761657715, + "rewards/margins": 8.20042610168457, + "rewards/rejected": -10.978392601013184, + "step": 14350 + }, + { + "epoch": 2.23, + "learning_rate": 3.621729342808237e-06, + "logits/chosen": -2.0759689807891846, + "logits/rejected": -1.8116130828857422, + "logps/chosen": -155.86648559570312, + "logps/rejected": -367.87811279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.028345108032227, + "rewards/margins": 11.94507122039795, + "rewards/rejected": -21.97341537475586, + "step": 14351 + }, + { + "epoch": 2.23, + "learning_rate": 3.620995902277089e-06, + "logits/chosen": -2.518759250640869, + "logits/rejected": -2.8107893466949463, + "logps/chosen": -326.4866027832031, + "logps/rejected": -442.8203430175781, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.585422039031982, + "rewards/margins": 9.760727882385254, + "rewards/rejected": -15.346149444580078, + "step": 14352 + }, + { + "epoch": 2.23, + "learning_rate": 3.620262461745941e-06, + "logits/chosen": -2.5963399410247803, + "logits/rejected": -2.816816806793213, + "logps/chosen": -249.35946655273438, + "logps/rejected": -576.766357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.193721771240234, + "rewards/margins": 10.120572090148926, + "rewards/rejected": -19.314292907714844, + "step": 14353 + }, + { + "epoch": 2.23, + "learning_rate": 3.6195290212147935e-06, + "logits/chosen": -1.86247980594635, + "logits/rejected": -2.7100257873535156, + "logps/chosen": -305.1604309082031, + "logps/rejected": -603.4263916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.705636024475098, + "rewards/margins": 13.90847396850586, + "rewards/rejected": -23.61410903930664, + "step": 14354 + }, + { + "epoch": 2.23, + "learning_rate": 3.6187955806836454e-06, + "logits/chosen": -1.383514165878296, + "logits/rejected": -2.2355642318725586, + "logps/chosen": -164.55821228027344, + "logps/rejected": -391.52471923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.413827896118164, + "rewards/margins": 9.104900360107422, + "rewards/rejected": -19.518728256225586, + "step": 14355 + }, + { + "epoch": 2.23, + "learning_rate": 3.6180621401524977e-06, + "logits/chosen": -2.679368495941162, + "logits/rejected": -2.304884910583496, + "logps/chosen": -520.1529541015625, + "logps/rejected": -490.44317626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.522481918334961, + "rewards/margins": 10.321342468261719, + "rewards/rejected": -17.843822479248047, + "step": 14356 + }, + { + "epoch": 2.23, + "learning_rate": 3.6173286996213496e-06, + "logits/chosen": -2.232231855392456, + "logits/rejected": -2.7646687030792236, + "logps/chosen": -237.7098388671875, + "logps/rejected": -414.8080749511719, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.002662658691406, + "rewards/margins": 5.757164478302002, + "rewards/rejected": -16.75982666015625, + "step": 14357 + }, + { + "epoch": 2.23, + "learning_rate": 3.6165952590902023e-06, + "logits/chosen": -1.1852741241455078, + "logits/rejected": -2.397367238998413, + "logps/chosen": -272.9794921875, + "logps/rejected": -685.1485595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.424470901489258, + "rewards/margins": 14.623695373535156, + "rewards/rejected": -26.048166275024414, + "step": 14358 + }, + { + "epoch": 2.23, + "learning_rate": 3.615861818559054e-06, + "logits/chosen": -2.3923757076263428, + "logits/rejected": -3.0053560733795166, + "logps/chosen": -80.62162780761719, + "logps/rejected": -337.039306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.19654655456543, + "rewards/margins": 9.811059951782227, + "rewards/rejected": -17.007606506347656, + "step": 14359 + }, + { + "epoch": 2.23, + "learning_rate": 3.615128378027906e-06, + "logits/chosen": -1.7076927423477173, + "logits/rejected": -2.244163751602173, + "logps/chosen": -327.269775390625, + "logps/rejected": -499.76751708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.498138427734375, + "rewards/margins": 10.352951049804688, + "rewards/rejected": -16.851089477539062, + "step": 14360 + }, + { + "epoch": 2.23, + "learning_rate": 3.614394937496758e-06, + "logits/chosen": -2.4214730262756348, + "logits/rejected": -1.0511250495910645, + "logps/chosen": -211.26751708984375, + "logps/rejected": -189.72296142578125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.646561622619629, + "rewards/margins": 7.076936721801758, + "rewards/rejected": -13.723498344421387, + "step": 14361 + }, + { + "epoch": 2.23, + "learning_rate": 3.61366149696561e-06, + "logits/chosen": -1.9270946979522705, + "logits/rejected": -2.4983932971954346, + "logps/chosen": -105.52783966064453, + "logps/rejected": -304.07525634765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.071364402770996, + "rewards/margins": 9.338380813598633, + "rewards/rejected": -17.409744262695312, + "step": 14362 + }, + { + "epoch": 2.23, + "learning_rate": 3.6129280564344626e-06, + "logits/chosen": -2.4522268772125244, + "logits/rejected": -2.7788662910461426, + "logps/chosen": -93.3963851928711, + "logps/rejected": -276.1378479003906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.858357906341553, + "rewards/margins": 11.224284172058105, + "rewards/rejected": -18.0826416015625, + "step": 14363 + }, + { + "epoch": 2.23, + "learning_rate": 3.6121946159033144e-06, + "logits/chosen": -1.6687860488891602, + "logits/rejected": -2.9718258380889893, + "logps/chosen": -158.78614807128906, + "logps/rejected": -652.7570190429688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.361869812011719, + "rewards/margins": 10.352109909057617, + "rewards/rejected": -15.713979721069336, + "step": 14364 + }, + { + "epoch": 2.23, + "learning_rate": 3.6114611753721667e-06, + "logits/chosen": -2.6288185119628906, + "logits/rejected": -2.9648642539978027, + "logps/chosen": -134.99465942382812, + "logps/rejected": -321.37933349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9519968032836914, + "rewards/margins": 10.145878791809082, + "rewards/rejected": -14.097875595092773, + "step": 14365 + }, + { + "epoch": 2.23, + "learning_rate": 3.6107277348410186e-06, + "logits/chosen": -2.4462530612945557, + "logits/rejected": -2.00238037109375, + "logps/chosen": -264.7502136230469, + "logps/rejected": -288.57330322265625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.560051918029785, + "rewards/margins": 8.603315353393555, + "rewards/rejected": -15.163368225097656, + "step": 14366 + }, + { + "epoch": 2.23, + "learning_rate": 3.6099942943098713e-06, + "logits/chosen": -1.9999585151672363, + "logits/rejected": -2.6300747394561768, + "logps/chosen": -296.45135498046875, + "logps/rejected": -367.97833251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.345499515533447, + "rewards/margins": 10.490232467651367, + "rewards/rejected": -15.835731506347656, + "step": 14367 + }, + { + "epoch": 2.23, + "learning_rate": 3.6092608537787232e-06, + "logits/chosen": -2.4662230014801025, + "logits/rejected": -2.7300398349761963, + "logps/chosen": -83.77963256835938, + "logps/rejected": -246.40090942382812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.21541690826416, + "rewards/margins": 9.365979194641113, + "rewards/rejected": -13.581396102905273, + "step": 14368 + }, + { + "epoch": 2.23, + "learning_rate": 3.608527413247575e-06, + "logits/chosen": -2.3077309131622314, + "logits/rejected": -2.7508316040039062, + "logps/chosen": -309.00079345703125, + "logps/rejected": -730.1533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.736030578613281, + "rewards/margins": 19.321823120117188, + "rewards/rejected": -29.05785369873047, + "step": 14369 + }, + { + "epoch": 2.23, + "learning_rate": 3.607793972716427e-06, + "logits/chosen": -2.6130874156951904, + "logits/rejected": -1.7006622552871704, + "logps/chosen": -345.02349853515625, + "logps/rejected": -363.3708190917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.608657836914062, + "rewards/margins": 12.399559020996094, + "rewards/rejected": -23.008216857910156, + "step": 14370 + }, + { + "epoch": 2.23, + "learning_rate": 3.607060532185279e-06, + "logits/chosen": -2.841362714767456, + "logits/rejected": -2.143754005432129, + "logps/chosen": -334.12860107421875, + "logps/rejected": -391.9423828125, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.117386817932129, + "rewards/margins": 8.528214454650879, + "rewards/rejected": -17.645601272583008, + "step": 14371 + }, + { + "epoch": 2.24, + "learning_rate": 3.6063270916541316e-06, + "logits/chosen": -2.198502779006958, + "logits/rejected": -2.7512762546539307, + "logps/chosen": -580.519287109375, + "logps/rejected": -1364.00244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.376920700073242, + "rewards/margins": 17.51279640197754, + "rewards/rejected": -25.88971710205078, + "step": 14372 + }, + { + "epoch": 2.24, + "learning_rate": 3.605593651122984e-06, + "logits/chosen": -1.5882865190505981, + "logits/rejected": -2.6563968658447266, + "logps/chosen": -191.63751220703125, + "logps/rejected": -597.3372802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.400136947631836, + "rewards/margins": 15.762434005737305, + "rewards/rejected": -24.16257095336914, + "step": 14373 + }, + { + "epoch": 2.24, + "learning_rate": 3.6048602105918358e-06, + "logits/chosen": -2.583664894104004, + "logits/rejected": -1.7809828519821167, + "logps/chosen": -1020.1624145507812, + "logps/rejected": -687.460693359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.727118492126465, + "rewards/margins": 7.833586692810059, + "rewards/rejected": -19.560705184936523, + "step": 14374 + }, + { + "epoch": 2.24, + "learning_rate": 3.6041267700606876e-06, + "logits/chosen": -1.6186751127243042, + "logits/rejected": -2.29052996635437, + "logps/chosen": -220.51455688476562, + "logps/rejected": -407.5453186035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.698943138122559, + "rewards/margins": 11.410276412963867, + "rewards/rejected": -21.10921859741211, + "step": 14375 + }, + { + "epoch": 2.24, + "learning_rate": 3.6033933295295404e-06, + "logits/chosen": -2.345810651779175, + "logits/rejected": -1.8793734312057495, + "logps/chosen": -596.89599609375, + "logps/rejected": -780.0274658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.556535720825195, + "rewards/margins": 16.933773040771484, + "rewards/rejected": -27.490306854248047, + "step": 14376 + }, + { + "epoch": 2.24, + "learning_rate": 3.6026598889983922e-06, + "logits/chosen": -1.4113534688949585, + "logits/rejected": -2.748081922531128, + "logps/chosen": -468.75457763671875, + "logps/rejected": -806.27001953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.968244552612305, + "rewards/margins": 9.654930114746094, + "rewards/rejected": -19.62317657470703, + "step": 14377 + }, + { + "epoch": 2.24, + "learning_rate": 3.601926448467244e-06, + "logits/chosen": -2.19549822807312, + "logits/rejected": -2.482236862182617, + "logps/chosen": -258.7115478515625, + "logps/rejected": -410.91357421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.261505126953125, + "rewards/margins": 9.083209991455078, + "rewards/rejected": -21.344715118408203, + "step": 14378 + }, + { + "epoch": 2.24, + "learning_rate": 3.601193007936096e-06, + "logits/chosen": -2.4732253551483154, + "logits/rejected": -2.8384621143341064, + "logps/chosen": -163.02525329589844, + "logps/rejected": -258.47784423828125, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.428769111633301, + "rewards/margins": 6.585544109344482, + "rewards/rejected": -14.014312744140625, + "step": 14379 + }, + { + "epoch": 2.24, + "learning_rate": 3.600459567404948e-06, + "logits/chosen": -2.7787792682647705, + "logits/rejected": -2.176560401916504, + "logps/chosen": -452.6396484375, + "logps/rejected": -272.3739318847656, + "loss": 3.7958, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.114578247070312, + "rewards/margins": 1.1528663635253906, + "rewards/rejected": -11.26744556427002, + "step": 14380 + }, + { + "epoch": 2.24, + "learning_rate": 3.5997261268738006e-06, + "logits/chosen": -2.599546432495117, + "logits/rejected": -2.7541680335998535, + "logps/chosen": -239.18008422851562, + "logps/rejected": -268.17132568359375, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.907968521118164, + "rewards/margins": 6.375949859619141, + "rewards/rejected": -16.283918380737305, + "step": 14381 + }, + { + "epoch": 2.24, + "learning_rate": 3.598992686342653e-06, + "logits/chosen": -2.2142632007598877, + "logits/rejected": -2.3237457275390625, + "logps/chosen": -172.11117553710938, + "logps/rejected": -378.0851135253906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.09843635559082, + "rewards/margins": 7.807399749755859, + "rewards/rejected": -16.90583610534668, + "step": 14382 + }, + { + "epoch": 2.24, + "learning_rate": 3.5982592458115048e-06, + "logits/chosen": -2.7035446166992188, + "logits/rejected": -1.95685613155365, + "logps/chosen": -146.9882049560547, + "logps/rejected": -223.5885009765625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.97805118560791, + "rewards/margins": 8.044275283813477, + "rewards/rejected": -17.022327423095703, + "step": 14383 + }, + { + "epoch": 2.24, + "learning_rate": 3.5975258052803567e-06, + "logits/chosen": -2.608642578125, + "logits/rejected": -2.793198585510254, + "logps/chosen": -277.52972412109375, + "logps/rejected": -443.51092529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.424335479736328, + "rewards/margins": 10.803417205810547, + "rewards/rejected": -17.227752685546875, + "step": 14384 + }, + { + "epoch": 2.24, + "learning_rate": 3.5967923647492094e-06, + "logits/chosen": -2.6754634380340576, + "logits/rejected": -2.6827900409698486, + "logps/chosen": -711.9068603515625, + "logps/rejected": -549.788818359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.422705173492432, + "rewards/margins": 8.751977920532227, + "rewards/rejected": -15.1746826171875, + "step": 14385 + }, + { + "epoch": 2.24, + "learning_rate": 3.5960589242180613e-06, + "logits/chosen": -2.306464672088623, + "logits/rejected": -1.6083637475967407, + "logps/chosen": -310.45703125, + "logps/rejected": -296.01788330078125, + "loss": 0.2001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.589746475219727, + "rewards/margins": 6.383618354797363, + "rewards/rejected": -13.97336483001709, + "step": 14386 + }, + { + "epoch": 2.24, + "learning_rate": 3.595325483686913e-06, + "logits/chosen": -2.23079776763916, + "logits/rejected": -2.1685287952423096, + "logps/chosen": -222.891845703125, + "logps/rejected": -372.79595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.601506233215332, + "rewards/margins": 13.450349807739258, + "rewards/rejected": -19.051856994628906, + "step": 14387 + }, + { + "epoch": 2.24, + "learning_rate": 3.594592043155765e-06, + "logits/chosen": -2.6983909606933594, + "logits/rejected": -2.4194297790527344, + "logps/chosen": -210.14483642578125, + "logps/rejected": -383.7326354980469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.949317932128906, + "rewards/margins": 9.467899322509766, + "rewards/rejected": -17.417217254638672, + "step": 14388 + }, + { + "epoch": 2.24, + "learning_rate": 3.5938586026246173e-06, + "logits/chosen": -2.4191431999206543, + "logits/rejected": -1.2550365924835205, + "logps/chosen": -162.739990234375, + "logps/rejected": -171.33209228515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.147847175598145, + "rewards/margins": 6.889565467834473, + "rewards/rejected": -15.037412643432617, + "step": 14389 + }, + { + "epoch": 2.24, + "learning_rate": 3.59312516209347e-06, + "logits/chosen": -2.8664474487304688, + "logits/rejected": -2.9258556365966797, + "logps/chosen": -290.69403076171875, + "logps/rejected": -283.40057373046875, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.238566398620605, + "rewards/margins": 5.428387641906738, + "rewards/rejected": -13.666954040527344, + "step": 14390 + }, + { + "epoch": 2.24, + "learning_rate": 3.592391721562322e-06, + "logits/chosen": -2.166374683380127, + "logits/rejected": -2.414604663848877, + "logps/chosen": -139.90628051757812, + "logps/rejected": -234.67466735839844, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.662878036499023, + "rewards/margins": 5.462985992431641, + "rewards/rejected": -17.125864028930664, + "step": 14391 + }, + { + "epoch": 2.24, + "learning_rate": 3.591658281031174e-06, + "logits/chosen": -2.865793228149414, + "logits/rejected": -2.300743579864502, + "logps/chosen": -529.5982055664062, + "logps/rejected": -426.51629638671875, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.221892356872559, + "rewards/margins": 6.854839324951172, + "rewards/rejected": -14.07673168182373, + "step": 14392 + }, + { + "epoch": 2.24, + "learning_rate": 3.5909248405000257e-06, + "logits/chosen": -2.387845516204834, + "logits/rejected": -2.7859227657318115, + "logps/chosen": -199.7215576171875, + "logps/rejected": -313.904052734375, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.4518461227417, + "rewards/margins": 8.321558952331543, + "rewards/rejected": -21.773405075073242, + "step": 14393 + }, + { + "epoch": 2.24, + "learning_rate": 3.5901913999688784e-06, + "logits/chosen": -2.4222874641418457, + "logits/rejected": -2.8075122833251953, + "logps/chosen": -416.6814270019531, + "logps/rejected": -616.326416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.411599159240723, + "rewards/margins": 14.179141998291016, + "rewards/rejected": -21.590740203857422, + "step": 14394 + }, + { + "epoch": 2.24, + "learning_rate": 3.5894579594377303e-06, + "logits/chosen": -2.767401933670044, + "logits/rejected": -2.9313294887542725, + "logps/chosen": -242.30874633789062, + "logps/rejected": -433.41229248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.240423679351807, + "rewards/margins": 12.45566177368164, + "rewards/rejected": -17.69608497619629, + "step": 14395 + }, + { + "epoch": 2.24, + "learning_rate": 3.588724518906582e-06, + "logits/chosen": -1.1852920055389404, + "logits/rejected": -1.931254267692566, + "logps/chosen": -211.51693725585938, + "logps/rejected": -694.2946166992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.527992248535156, + "rewards/margins": 18.273000717163086, + "rewards/rejected": -25.800994873046875, + "step": 14396 + }, + { + "epoch": 2.24, + "learning_rate": 3.587991078375434e-06, + "logits/chosen": -2.7299792766571045, + "logits/rejected": -2.879755973815918, + "logps/chosen": -385.95733642578125, + "logps/rejected": -306.8409729003906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.544349193572998, + "rewards/margins": 8.695509910583496, + "rewards/rejected": -16.239858627319336, + "step": 14397 + }, + { + "epoch": 2.24, + "learning_rate": 3.5872576378442863e-06, + "logits/chosen": -2.6177403926849365, + "logits/rejected": -2.490539789199829, + "logps/chosen": -439.22137451171875, + "logps/rejected": -468.8555603027344, + "loss": 0.4051, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.246899604797363, + "rewards/margins": 3.776106834411621, + "rewards/rejected": -17.023006439208984, + "step": 14398 + }, + { + "epoch": 2.24, + "learning_rate": 3.586524197313139e-06, + "logits/chosen": -0.5128933191299438, + "logits/rejected": -1.4369001388549805, + "logps/chosen": -151.96563720703125, + "logps/rejected": -512.1778564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.430691719055176, + "rewards/margins": 11.828052520751953, + "rewards/rejected": -21.258743286132812, + "step": 14399 + }, + { + "epoch": 2.24, + "learning_rate": 3.585790756781991e-06, + "logits/chosen": -2.7199866771698, + "logits/rejected": -2.157543420791626, + "logps/chosen": -399.6163635253906, + "logps/rejected": -517.2105712890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.47538948059082, + "rewards/margins": 8.90206527709961, + "rewards/rejected": -17.377456665039062, + "step": 14400 + }, + { + "epoch": 2.24, + "learning_rate": 3.585057316250843e-06, + "logits/chosen": -1.337674856185913, + "logits/rejected": -1.8451449871063232, + "logps/chosen": -170.60279846191406, + "logps/rejected": -280.610107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.221763610839844, + "rewards/margins": 10.016960144042969, + "rewards/rejected": -21.238723754882812, + "step": 14401 + }, + { + "epoch": 2.24, + "learning_rate": 3.5843238757196947e-06, + "logits/chosen": -2.6447055339813232, + "logits/rejected": -2.0573630332946777, + "logps/chosen": -342.68475341796875, + "logps/rejected": -339.6106872558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.934289932250977, + "rewards/margins": 12.301065444946289, + "rewards/rejected": -18.235355377197266, + "step": 14402 + }, + { + "epoch": 2.24, + "learning_rate": 3.5835904351885474e-06, + "logits/chosen": -2.5076911449432373, + "logits/rejected": -2.753474235534668, + "logps/chosen": -154.64422607421875, + "logps/rejected": -247.53561401367188, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.192647933959961, + "rewards/margins": 7.223546504974365, + "rewards/rejected": -17.416194915771484, + "step": 14403 + }, + { + "epoch": 2.24, + "learning_rate": 3.5828569946573993e-06, + "logits/chosen": -1.2768546342849731, + "logits/rejected": -2.0514867305755615, + "logps/chosen": -187.41192626953125, + "logps/rejected": -435.13165283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.723156929016113, + "rewards/margins": 10.183881759643555, + "rewards/rejected": -20.907039642333984, + "step": 14404 + }, + { + "epoch": 2.24, + "learning_rate": 3.582123554126251e-06, + "logits/chosen": -2.723545551300049, + "logits/rejected": -2.861572265625, + "logps/chosen": -168.61331176757812, + "logps/rejected": -315.9200134277344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.678061485290527, + "rewards/margins": 8.575542449951172, + "rewards/rejected": -16.253604888916016, + "step": 14405 + }, + { + "epoch": 2.24, + "learning_rate": 3.5813901135951035e-06, + "logits/chosen": -2.663928985595703, + "logits/rejected": -2.489966630935669, + "logps/chosen": -557.0775756835938, + "logps/rejected": -530.9979858398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.637125015258789, + "rewards/margins": 15.22913932800293, + "rewards/rejected": -21.86626434326172, + "step": 14406 + }, + { + "epoch": 2.24, + "learning_rate": 3.5806566730639562e-06, + "logits/chosen": -2.0882315635681152, + "logits/rejected": -2.8315820693969727, + "logps/chosen": -228.86073303222656, + "logps/rejected": -465.0841064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.101099967956543, + "rewards/margins": 9.657470703125, + "rewards/rejected": -18.75857162475586, + "step": 14407 + }, + { + "epoch": 2.24, + "learning_rate": 3.579923232532808e-06, + "logits/chosen": -1.900313377380371, + "logits/rejected": -2.9350430965423584, + "logps/chosen": -439.17547607421875, + "logps/rejected": -585.2705688476562, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.628101348876953, + "rewards/margins": 5.973515510559082, + "rewards/rejected": -17.60161590576172, + "step": 14408 + }, + { + "epoch": 2.24, + "learning_rate": 3.57918979200166e-06, + "logits/chosen": -2.221583366394043, + "logits/rejected": -2.597498893737793, + "logps/chosen": -281.26922607421875, + "logps/rejected": -444.05731201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.739415168762207, + "rewards/margins": 9.982829093933105, + "rewards/rejected": -18.722244262695312, + "step": 14409 + }, + { + "epoch": 2.24, + "learning_rate": 3.578456351470512e-06, + "logits/chosen": -2.589129686355591, + "logits/rejected": -1.3634065389633179, + "logps/chosen": -188.7611083984375, + "logps/rejected": -197.6090087890625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.057489395141602, + "rewards/margins": 7.626871109008789, + "rewards/rejected": -15.68436050415039, + "step": 14410 + }, + { + "epoch": 2.24, + "learning_rate": 3.5777229109393637e-06, + "logits/chosen": -2.409698963165283, + "logits/rejected": -2.7414298057556152, + "logps/chosen": -142.64077758789062, + "logps/rejected": -247.11549377441406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3362226486206055, + "rewards/margins": 9.367897033691406, + "rewards/rejected": -12.704120635986328, + "step": 14411 + }, + { + "epoch": 2.24, + "learning_rate": 3.5769894704082165e-06, + "logits/chosen": -1.653251051902771, + "logits/rejected": -2.428211212158203, + "logps/chosen": -274.51092529296875, + "logps/rejected": -439.16607666015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.40509033203125, + "rewards/margins": 9.889776229858398, + "rewards/rejected": -20.29486846923828, + "step": 14412 + }, + { + "epoch": 2.24, + "learning_rate": 3.5762560298770683e-06, + "logits/chosen": -2.4987361431121826, + "logits/rejected": -2.1810810565948486, + "logps/chosen": -284.658935546875, + "logps/rejected": -464.28338623046875, + "loss": 0.2169, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.383943557739258, + "rewards/margins": 5.693294525146484, + "rewards/rejected": -19.077238082885742, + "step": 14413 + }, + { + "epoch": 2.24, + "learning_rate": 3.57552258934592e-06, + "logits/chosen": -1.0744913816452026, + "logits/rejected": -2.348177909851074, + "logps/chosen": -224.13250732421875, + "logps/rejected": -681.9033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0301666259765625, + "rewards/margins": 20.30840301513672, + "rewards/rejected": -27.33856964111328, + "step": 14414 + }, + { + "epoch": 2.24, + "learning_rate": 3.5747891488147725e-06, + "logits/chosen": -1.0888057947158813, + "logits/rejected": -1.529687523841858, + "logps/chosen": -140.26531982421875, + "logps/rejected": -302.1977844238281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.826936721801758, + "rewards/margins": 8.848703384399414, + "rewards/rejected": -17.675640106201172, + "step": 14415 + }, + { + "epoch": 2.24, + "learning_rate": 3.5740557082836252e-06, + "logits/chosen": -2.4651455879211426, + "logits/rejected": -1.9264910221099854, + "logps/chosen": -415.2164001464844, + "logps/rejected": -529.7408447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.519158363342285, + "rewards/margins": 9.644970893859863, + "rewards/rejected": -19.16412925720215, + "step": 14416 + }, + { + "epoch": 2.24, + "learning_rate": 3.573322267752477e-06, + "logits/chosen": -1.4489893913269043, + "logits/rejected": -2.0479624271392822, + "logps/chosen": -210.10317993164062, + "logps/rejected": -384.3607177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.45471477508545, + "rewards/margins": 9.681339263916016, + "rewards/rejected": -21.13605499267578, + "step": 14417 + }, + { + "epoch": 2.24, + "learning_rate": 3.572588827221329e-06, + "logits/chosen": -2.405682325363159, + "logits/rejected": -2.8179209232330322, + "logps/chosen": -256.1959533691406, + "logps/rejected": -533.95947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0282793045043945, + "rewards/margins": 9.287382125854492, + "rewards/rejected": -16.315662384033203, + "step": 14418 + }, + { + "epoch": 2.24, + "learning_rate": 3.571855386690181e-06, + "logits/chosen": -1.5603023767471313, + "logits/rejected": -2.6260180473327637, + "logps/chosen": -160.2084197998047, + "logps/rejected": -472.21636962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.097413539886475, + "rewards/margins": 11.938787460327148, + "rewards/rejected": -19.03620147705078, + "step": 14419 + }, + { + "epoch": 2.24, + "learning_rate": 3.5711219461590327e-06, + "logits/chosen": -2.1041195392608643, + "logits/rejected": -2.950608968734741, + "logps/chosen": -138.93397521972656, + "logps/rejected": -715.0631713867188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.481623649597168, + "rewards/margins": 17.733232498168945, + "rewards/rejected": -26.214855194091797, + "step": 14420 + }, + { + "epoch": 2.24, + "learning_rate": 3.5703885056278855e-06, + "logits/chosen": -2.6671340465545654, + "logits/rejected": -2.8765695095062256, + "logps/chosen": -98.26626586914062, + "logps/rejected": -337.4151916503906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.469482421875, + "rewards/margins": 11.548685073852539, + "rewards/rejected": -20.018169403076172, + "step": 14421 + }, + { + "epoch": 2.24, + "learning_rate": 3.5696550650967374e-06, + "logits/chosen": -2.18270206451416, + "logits/rejected": -1.8874582052230835, + "logps/chosen": -204.10720825195312, + "logps/rejected": -217.29476928710938, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.732915878295898, + "rewards/margins": 6.067158222198486, + "rewards/rejected": -15.800073623657227, + "step": 14422 + }, + { + "epoch": 2.24, + "learning_rate": 3.5689216245655897e-06, + "logits/chosen": -2.7993156909942627, + "logits/rejected": -2.200091600418091, + "logps/chosen": -301.82476806640625, + "logps/rejected": -259.42486572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.687330722808838, + "rewards/margins": 9.043424606323242, + "rewards/rejected": -15.730754852294922, + "step": 14423 + }, + { + "epoch": 2.24, + "learning_rate": 3.5681881840344415e-06, + "logits/chosen": -2.514594554901123, + "logits/rejected": -2.6193950176239014, + "logps/chosen": -288.9764099121094, + "logps/rejected": -340.6079406738281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.619940757751465, + "rewards/margins": 8.946640014648438, + "rewards/rejected": -17.566579818725586, + "step": 14424 + }, + { + "epoch": 2.24, + "learning_rate": 3.5674547435032943e-06, + "logits/chosen": -2.3637442588806152, + "logits/rejected": -2.59445858001709, + "logps/chosen": -555.539306640625, + "logps/rejected": -423.1014404296875, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.03390884399414, + "rewards/margins": 5.211262226104736, + "rewards/rejected": -17.24517059326172, + "step": 14425 + }, + { + "epoch": 2.24, + "learning_rate": 3.566721302972146e-06, + "logits/chosen": -1.219132423400879, + "logits/rejected": -2.786367416381836, + "logps/chosen": -203.47694396972656, + "logps/rejected": -629.480224609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.823871612548828, + "rewards/margins": 8.760473251342773, + "rewards/rejected": -18.5843448638916, + "step": 14426 + }, + { + "epoch": 2.24, + "learning_rate": 3.565987862440998e-06, + "logits/chosen": -2.5600993633270264, + "logits/rejected": -2.691330671310425, + "logps/chosen": -144.26626586914062, + "logps/rejected": -547.2987670898438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985454559326172, + "rewards/margins": 10.674436569213867, + "rewards/rejected": -18.65989112854004, + "step": 14427 + }, + { + "epoch": 2.24, + "learning_rate": 3.56525442190985e-06, + "logits/chosen": -2.8603973388671875, + "logits/rejected": -2.1089909076690674, + "logps/chosen": -312.8780517578125, + "logps/rejected": -229.5418243408203, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.915677070617676, + "rewards/margins": 11.462556838989258, + "rewards/rejected": -16.378232955932617, + "step": 14428 + }, + { + "epoch": 2.24, + "learning_rate": 3.5645209813787018e-06, + "logits/chosen": -1.349896788597107, + "logits/rejected": -2.2550466060638428, + "logps/chosen": -240.64630126953125, + "logps/rejected": -492.91943359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.380609512329102, + "rewards/margins": 7.471774101257324, + "rewards/rejected": -15.852383613586426, + "step": 14429 + }, + { + "epoch": 2.24, + "learning_rate": 3.5637875408475545e-06, + "logits/chosen": -1.297546148300171, + "logits/rejected": -2.4987425804138184, + "logps/chosen": -260.961181640625, + "logps/rejected": -559.2305908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.84242057800293, + "rewards/margins": 13.619965553283691, + "rewards/rejected": -21.462387084960938, + "step": 14430 + }, + { + "epoch": 2.24, + "learning_rate": 3.5630541003164064e-06, + "logits/chosen": -2.0635299682617188, + "logits/rejected": -2.1998507976531982, + "logps/chosen": -281.30523681640625, + "logps/rejected": -415.8626708984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.338766098022461, + "rewards/margins": 7.8000922203063965, + "rewards/rejected": -17.138858795166016, + "step": 14431 + }, + { + "epoch": 2.24, + "learning_rate": 3.5623206597852587e-06, + "logits/chosen": -2.569526195526123, + "logits/rejected": -2.1381163597106934, + "logps/chosen": -392.5500183105469, + "logps/rejected": -514.9623413085938, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.662491798400879, + "rewards/margins": 11.615782737731934, + "rewards/rejected": -22.278274536132812, + "step": 14432 + }, + { + "epoch": 2.24, + "learning_rate": 3.5615872192541106e-06, + "logits/chosen": -1.296937108039856, + "logits/rejected": -2.3752496242523193, + "logps/chosen": -192.8469696044922, + "logps/rejected": -388.1640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.682245254516602, + "rewards/margins": 12.522745132446289, + "rewards/rejected": -22.20499038696289, + "step": 14433 + }, + { + "epoch": 2.24, + "learning_rate": 3.5608537787229633e-06, + "logits/chosen": -2.560930013656616, + "logits/rejected": -2.1735129356384277, + "logps/chosen": -271.8507995605469, + "logps/rejected": -395.16485595703125, + "loss": 0.0733, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.55870532989502, + "rewards/margins": 5.3155694007873535, + "rewards/rejected": -15.874275207519531, + "step": 14434 + }, + { + "epoch": 2.24, + "learning_rate": 3.560120338191815e-06, + "logits/chosen": -2.638357639312744, + "logits/rejected": -1.4470583200454712, + "logps/chosen": -193.36575317382812, + "logps/rejected": -254.7796173095703, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534608840942383, + "rewards/margins": 9.281976699829102, + "rewards/rejected": -15.816585540771484, + "step": 14435 + }, + { + "epoch": 2.25, + "learning_rate": 3.559386897660667e-06, + "logits/chosen": -1.9918185472488403, + "logits/rejected": -2.5418052673339844, + "logps/chosen": -131.2543182373047, + "logps/rejected": -369.2413024902344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.320561408996582, + "rewards/margins": 11.235431671142578, + "rewards/rejected": -18.555992126464844, + "step": 14436 + }, + { + "epoch": 2.25, + "learning_rate": 3.558653457129519e-06, + "logits/chosen": -2.870213031768799, + "logits/rejected": -2.4263908863067627, + "logps/chosen": -269.4134521484375, + "logps/rejected": -354.4642333984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535138130187988, + "rewards/margins": 10.993131637573242, + "rewards/rejected": -17.528270721435547, + "step": 14437 + }, + { + "epoch": 2.25, + "learning_rate": 3.557920016598371e-06, + "logits/chosen": -2.111929416656494, + "logits/rejected": -2.4296185970306396, + "logps/chosen": -146.1781005859375, + "logps/rejected": -345.4795837402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.332284927368164, + "rewards/margins": 11.007433891296387, + "rewards/rejected": -22.339717864990234, + "step": 14438 + }, + { + "epoch": 2.25, + "learning_rate": 3.5571865760672235e-06, + "logits/chosen": -1.7078684568405151, + "logits/rejected": -2.5745465755462646, + "logps/chosen": -202.62673950195312, + "logps/rejected": -503.50640869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.300942420959473, + "rewards/margins": 14.087011337280273, + "rewards/rejected": -23.387954711914062, + "step": 14439 + }, + { + "epoch": 2.25, + "learning_rate": 3.556453135536076e-06, + "logits/chosen": -2.2819366455078125, + "logits/rejected": -2.610581636428833, + "logps/chosen": -176.02020263671875, + "logps/rejected": -493.127685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.296089172363281, + "rewards/margins": 15.075191497802734, + "rewards/rejected": -23.371280670166016, + "step": 14440 + }, + { + "epoch": 2.25, + "learning_rate": 3.5557196950049277e-06, + "logits/chosen": -2.8684866428375244, + "logits/rejected": -2.870656728744507, + "logps/chosen": -214.4659423828125, + "logps/rejected": -418.4544677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.378166198730469, + "rewards/margins": 10.531230926513672, + "rewards/rejected": -16.90939712524414, + "step": 14441 + }, + { + "epoch": 2.25, + "learning_rate": 3.5549862544737796e-06, + "logits/chosen": -2.6137020587921143, + "logits/rejected": -1.8841474056243896, + "logps/chosen": -293.31158447265625, + "logps/rejected": -319.7626037597656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.17204475402832, + "rewards/margins": 8.652653694152832, + "rewards/rejected": -17.82469940185547, + "step": 14442 + }, + { + "epoch": 2.25, + "learning_rate": 3.5542528139426323e-06, + "logits/chosen": -2.599574089050293, + "logits/rejected": -2.202371120452881, + "logps/chosen": -305.6259460449219, + "logps/rejected": -362.3894958496094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.429869651794434, + "rewards/margins": 10.827861785888672, + "rewards/rejected": -18.257732391357422, + "step": 14443 + }, + { + "epoch": 2.25, + "learning_rate": 3.553519373411484e-06, + "logits/chosen": -2.41758394241333, + "logits/rejected": -2.620059013366699, + "logps/chosen": -574.8692626953125, + "logps/rejected": -682.4583129882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3538665771484375, + "rewards/margins": 11.222152709960938, + "rewards/rejected": -17.576019287109375, + "step": 14444 + }, + { + "epoch": 2.25, + "learning_rate": 3.552785932880336e-06, + "logits/chosen": -2.011561155319214, + "logits/rejected": -2.253528356552124, + "logps/chosen": -329.3335876464844, + "logps/rejected": -325.004638671875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.961766242980957, + "rewards/margins": 8.057474136352539, + "rewards/rejected": -17.019241333007812, + "step": 14445 + }, + { + "epoch": 2.25, + "learning_rate": 3.552052492349188e-06, + "logits/chosen": -2.0899155139923096, + "logits/rejected": -3.055823802947998, + "logps/chosen": -185.33587646484375, + "logps/rejected": -250.1513671875, + "loss": 0.6838, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.881551265716553, + "rewards/margins": 3.795289993286133, + "rewards/rejected": -10.676840782165527, + "step": 14446 + }, + { + "epoch": 2.25, + "learning_rate": 3.5513190518180407e-06, + "logits/chosen": -2.4450461864471436, + "logits/rejected": -2.985435724258423, + "logps/chosen": -130.79859924316406, + "logps/rejected": -346.4980773925781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.580562114715576, + "rewards/margins": 8.635452270507812, + "rewards/rejected": -14.216014862060547, + "step": 14447 + }, + { + "epoch": 2.25, + "learning_rate": 3.5505856112868925e-06, + "logits/chosen": -2.409825563430786, + "logits/rejected": -2.414731979370117, + "logps/chosen": -313.69061279296875, + "logps/rejected": -295.07470703125, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.624149322509766, + "rewards/margins": 6.304567813873291, + "rewards/rejected": -17.92871856689453, + "step": 14448 + }, + { + "epoch": 2.25, + "learning_rate": 3.549852170755745e-06, + "logits/chosen": -1.747355580329895, + "logits/rejected": -2.3700554370880127, + "logps/chosen": -225.16871643066406, + "logps/rejected": -544.46337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.691402435302734, + "rewards/margins": 13.905311584472656, + "rewards/rejected": -21.59671401977539, + "step": 14449 + }, + { + "epoch": 2.25, + "learning_rate": 3.5491187302245967e-06, + "logits/chosen": -1.7803609371185303, + "logits/rejected": -2.58811616897583, + "logps/chosen": -192.3453826904297, + "logps/rejected": -432.69677734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.304842948913574, + "rewards/margins": 6.487414836883545, + "rewards/rejected": -15.792257308959961, + "step": 14450 + }, + { + "epoch": 2.25, + "learning_rate": 3.5483852896934486e-06, + "logits/chosen": -2.7911550998687744, + "logits/rejected": -2.6950011253356934, + "logps/chosen": -319.8371276855469, + "logps/rejected": -254.7606964111328, + "loss": 0.8803, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.926335334777832, + "rewards/margins": 3.913145065307617, + "rewards/rejected": -12.83948040008545, + "step": 14451 + }, + { + "epoch": 2.25, + "learning_rate": 3.5476518491623013e-06, + "logits/chosen": -1.5433077812194824, + "logits/rejected": -2.7361321449279785, + "logps/chosen": -318.2078857421875, + "logps/rejected": -662.5462036132812, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.521446228027344, + "rewards/margins": 11.990253448486328, + "rewards/rejected": -20.511699676513672, + "step": 14452 + }, + { + "epoch": 2.25, + "learning_rate": 3.546918408631153e-06, + "logits/chosen": -2.5358290672302246, + "logits/rejected": -1.7194687128067017, + "logps/chosen": -236.21487426757812, + "logps/rejected": -343.4713134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2672810554504395, + "rewards/margins": 10.333374977111816, + "rewards/rejected": -17.600656509399414, + "step": 14453 + }, + { + "epoch": 2.25, + "learning_rate": 3.546184968100005e-06, + "logits/chosen": -2.4948983192443848, + "logits/rejected": -2.7679970264434814, + "logps/chosen": -558.1951904296875, + "logps/rejected": -674.338134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077884197235107, + "rewards/margins": 15.07443618774414, + "rewards/rejected": -21.152320861816406, + "step": 14454 + }, + { + "epoch": 2.25, + "learning_rate": 3.545451527568857e-06, + "logits/chosen": -2.8909451961517334, + "logits/rejected": -2.4865353107452393, + "logps/chosen": -1036.3929443359375, + "logps/rejected": -733.1690673828125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.235849380493164, + "rewards/margins": 6.977052211761475, + "rewards/rejected": -17.212902069091797, + "step": 14455 + }, + { + "epoch": 2.25, + "learning_rate": 3.5447180870377097e-06, + "logits/chosen": -2.410778045654297, + "logits/rejected": -2.8365423679351807, + "logps/chosen": -944.8190307617188, + "logps/rejected": -822.5721435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.639286041259766, + "rewards/margins": 11.130199432373047, + "rewards/rejected": -20.769485473632812, + "step": 14456 + }, + { + "epoch": 2.25, + "learning_rate": 3.543984646506562e-06, + "logits/chosen": -2.460082769393921, + "logits/rejected": -2.7586240768432617, + "logps/chosen": -593.2909545898438, + "logps/rejected": -769.57958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.335637092590332, + "rewards/margins": 15.337356567382812, + "rewards/rejected": -24.67299461364746, + "step": 14457 + }, + { + "epoch": 2.25, + "learning_rate": 3.543251205975414e-06, + "logits/chosen": -2.1486058235168457, + "logits/rejected": -2.733142137527466, + "logps/chosen": -610.2272338867188, + "logps/rejected": -684.3172607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.248327255249023, + "rewards/margins": 15.03553581237793, + "rewards/rejected": -28.283863067626953, + "step": 14458 + }, + { + "epoch": 2.25, + "learning_rate": 3.5425177654442657e-06, + "logits/chosen": -0.9906843304634094, + "logits/rejected": -2.548356771469116, + "logps/chosen": -168.60374450683594, + "logps/rejected": -409.28802490234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.407188415527344, + "rewards/margins": 8.824773788452148, + "rewards/rejected": -19.231962203979492, + "step": 14459 + }, + { + "epoch": 2.25, + "learning_rate": 3.5417843249131176e-06, + "logits/chosen": -2.8851566314697266, + "logits/rejected": -2.549405336380005, + "logps/chosen": -168.3609161376953, + "logps/rejected": -383.5941162109375, + "loss": 0.2156, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.806105136871338, + "rewards/margins": 10.342551231384277, + "rewards/rejected": -18.148656845092773, + "step": 14460 + }, + { + "epoch": 2.25, + "learning_rate": 3.5410508843819703e-06, + "logits/chosen": -2.6342086791992188, + "logits/rejected": -1.6815840005874634, + "logps/chosen": -375.2530822753906, + "logps/rejected": -264.8885498046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.859627723693848, + "rewards/margins": 9.065235137939453, + "rewards/rejected": -15.924861907958984, + "step": 14461 + }, + { + "epoch": 2.25, + "learning_rate": 3.5403174438508222e-06, + "logits/chosen": -2.714817762374878, + "logits/rejected": -2.8383734226226807, + "logps/chosen": -139.0692596435547, + "logps/rejected": -243.87399291992188, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.264923572540283, + "rewards/margins": 9.098959922790527, + "rewards/rejected": -15.363883972167969, + "step": 14462 + }, + { + "epoch": 2.25, + "learning_rate": 3.539584003319674e-06, + "logits/chosen": -2.9143595695495605, + "logits/rejected": -1.619235873222351, + "logps/chosen": -451.4163818359375, + "logps/rejected": -242.5791778564453, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.162508964538574, + "rewards/margins": 9.1885347366333, + "rewards/rejected": -17.351043701171875, + "step": 14463 + }, + { + "epoch": 2.25, + "learning_rate": 3.538850562788526e-06, + "logits/chosen": -0.7371146082878113, + "logits/rejected": -1.9218535423278809, + "logps/chosen": -134.5644073486328, + "logps/rejected": -328.2096252441406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.68893051147461, + "rewards/margins": 8.574623107910156, + "rewards/rejected": -19.263553619384766, + "step": 14464 + }, + { + "epoch": 2.25, + "learning_rate": 3.5381171222573787e-06, + "logits/chosen": -2.1890616416931152, + "logits/rejected": -2.480167865753174, + "logps/chosen": -156.0247802734375, + "logps/rejected": -304.69427490234375, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.958907127380371, + "rewards/margins": 8.064295768737793, + "rewards/rejected": -19.023202896118164, + "step": 14465 + }, + { + "epoch": 2.25, + "learning_rate": 3.537383681726231e-06, + "logits/chosen": -2.6127469539642334, + "logits/rejected": -2.4103879928588867, + "logps/chosen": -657.0550537109375, + "logps/rejected": -621.0667114257812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.225091934204102, + "rewards/margins": 12.478483200073242, + "rewards/rejected": -18.703575134277344, + "step": 14466 + }, + { + "epoch": 2.25, + "learning_rate": 3.536650241195083e-06, + "logits/chosen": -2.8106751441955566, + "logits/rejected": -2.919621706008911, + "logps/chosen": -141.71429443359375, + "logps/rejected": -206.72378540039062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.174321174621582, + "rewards/margins": 10.473223686218262, + "rewards/rejected": -17.647544860839844, + "step": 14467 + }, + { + "epoch": 2.25, + "learning_rate": 3.5359168006639348e-06, + "logits/chosen": -1.7821511030197144, + "logits/rejected": -2.730722665786743, + "logps/chosen": -390.1976623535156, + "logps/rejected": -811.3013916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.389402389526367, + "rewards/margins": 11.98240852355957, + "rewards/rejected": -21.371810913085938, + "step": 14468 + }, + { + "epoch": 2.25, + "learning_rate": 3.5351833601327866e-06, + "logits/chosen": -1.8667150735855103, + "logits/rejected": -2.671908140182495, + "logps/chosen": -333.7415466308594, + "logps/rejected": -540.8612670898438, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.642715454101562, + "rewards/margins": 9.323755264282227, + "rewards/rejected": -18.96647071838379, + "step": 14469 + }, + { + "epoch": 2.25, + "learning_rate": 3.534449919601639e-06, + "logits/chosen": -2.5745506286621094, + "logits/rejected": -2.135291337966919, + "logps/chosen": -207.1228485107422, + "logps/rejected": -290.31439208984375, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.076567649841309, + "rewards/margins": 7.158049583435059, + "rewards/rejected": -16.234617233276367, + "step": 14470 + }, + { + "epoch": 2.25, + "learning_rate": 3.5337164790704912e-06, + "logits/chosen": -1.3398747444152832, + "logits/rejected": -2.8868656158447266, + "logps/chosen": -316.5224304199219, + "logps/rejected": -349.35455322265625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7747650146484375, + "rewards/margins": 12.032390594482422, + "rewards/rejected": -17.80715560913086, + "step": 14471 + }, + { + "epoch": 2.25, + "learning_rate": 3.532983038539343e-06, + "logits/chosen": -2.5393691062927246, + "logits/rejected": -1.9326454401016235, + "logps/chosen": -324.4493713378906, + "logps/rejected": -351.12774658203125, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.606088638305664, + "rewards/margins": 6.805803298950195, + "rewards/rejected": -16.41189193725586, + "step": 14472 + }, + { + "epoch": 2.25, + "learning_rate": 3.5322495980081954e-06, + "logits/chosen": -2.6121904850006104, + "logits/rejected": -2.936950922012329, + "logps/chosen": -384.5139465332031, + "logps/rejected": -463.99456787109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.591493606567383, + "rewards/margins": 7.026633262634277, + "rewards/rejected": -15.618127822875977, + "step": 14473 + }, + { + "epoch": 2.25, + "learning_rate": 3.5315161574770477e-06, + "logits/chosen": -1.6538376808166504, + "logits/rejected": -2.4093456268310547, + "logps/chosen": -161.90582275390625, + "logps/rejected": -253.27267456054688, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.53511905670166, + "rewards/margins": 7.0758795738220215, + "rewards/rejected": -17.610998153686523, + "step": 14474 + }, + { + "epoch": 2.25, + "learning_rate": 3.5307827169459e-06, + "logits/chosen": -1.5377833843231201, + "logits/rejected": -2.4753689765930176, + "logps/chosen": -176.27891540527344, + "logps/rejected": -384.0799560546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.566815376281738, + "rewards/margins": 10.47122859954834, + "rewards/rejected": -18.038043975830078, + "step": 14475 + }, + { + "epoch": 2.25, + "learning_rate": 3.530049276414752e-06, + "logits/chosen": -2.3686089515686035, + "logits/rejected": -2.4278929233551025, + "logps/chosen": -312.42828369140625, + "logps/rejected": -290.21014404296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.156283378601074, + "rewards/margins": 8.201417922973633, + "rewards/rejected": -15.35770034790039, + "step": 14476 + }, + { + "epoch": 2.25, + "learning_rate": 3.5293158358836038e-06, + "logits/chosen": -2.0434277057647705, + "logits/rejected": -2.7530908584594727, + "logps/chosen": -128.52886962890625, + "logps/rejected": -419.7317199707031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.013691425323486, + "rewards/margins": 14.668741226196289, + "rewards/rejected": -19.68243408203125, + "step": 14477 + }, + { + "epoch": 2.25, + "learning_rate": 3.528582395352456e-06, + "logits/chosen": -1.6123768091201782, + "logits/rejected": -2.486640453338623, + "logps/chosen": -259.91259765625, + "logps/rejected": -429.18792724609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.066319465637207, + "rewards/margins": 9.170380592346191, + "rewards/rejected": -18.2367000579834, + "step": 14478 + }, + { + "epoch": 2.25, + "learning_rate": 3.527848954821308e-06, + "logits/chosen": -1.3294066190719604, + "logits/rejected": -2.4876513481140137, + "logps/chosen": -275.43280029296875, + "logps/rejected": -367.54925537109375, + "loss": 0.084, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.011463165283203, + "rewards/margins": 10.071395874023438, + "rewards/rejected": -19.08285903930664, + "step": 14479 + }, + { + "epoch": 2.25, + "learning_rate": 3.5271155142901603e-06, + "logits/chosen": -2.3786327838897705, + "logits/rejected": -2.7428805828094482, + "logps/chosen": -123.68788146972656, + "logps/rejected": -369.9554138183594, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.559374809265137, + "rewards/margins": 5.313908100128174, + "rewards/rejected": -12.873283386230469, + "step": 14480 + }, + { + "epoch": 2.25, + "learning_rate": 3.526382073759012e-06, + "logits/chosen": -0.9947076439857483, + "logits/rejected": -1.4740086793899536, + "logps/chosen": -250.24630737304688, + "logps/rejected": -572.6063842773438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.865549087524414, + "rewards/margins": 11.989568710327148, + "rewards/rejected": -21.855117797851562, + "step": 14481 + }, + { + "epoch": 2.25, + "learning_rate": 3.5256486332278644e-06, + "logits/chosen": -2.731170892715454, + "logits/rejected": -1.9385281801223755, + "logps/chosen": -277.12139892578125, + "logps/rejected": -368.51116943359375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.914751052856445, + "rewards/margins": 9.543437004089355, + "rewards/rejected": -20.458187103271484, + "step": 14482 + }, + { + "epoch": 2.25, + "learning_rate": 3.5249151926967168e-06, + "logits/chosen": -1.4269065856933594, + "logits/rejected": -2.105295419692993, + "logps/chosen": -206.51025390625, + "logps/rejected": -382.2390441894531, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.995563507080078, + "rewards/margins": 8.389708518981934, + "rewards/rejected": -20.385272979736328, + "step": 14483 + }, + { + "epoch": 2.25, + "learning_rate": 3.524181752165569e-06, + "logits/chosen": -2.4353482723236084, + "logits/rejected": -2.9977569580078125, + "logps/chosen": -59.30706024169922, + "logps/rejected": -361.34326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.622917175292969, + "rewards/margins": 11.076318740844727, + "rewards/rejected": -15.699235916137695, + "step": 14484 + }, + { + "epoch": 2.25, + "learning_rate": 3.523448311634421e-06, + "logits/chosen": -2.8451523780822754, + "logits/rejected": -2.241406202316284, + "logps/chosen": -321.7102355957031, + "logps/rejected": -305.1305236816406, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6316728591918945, + "rewards/margins": 6.783533573150635, + "rewards/rejected": -14.415206909179688, + "step": 14485 + }, + { + "epoch": 2.25, + "learning_rate": 3.5227148711032732e-06, + "logits/chosen": -2.682856321334839, + "logits/rejected": -2.6787540912628174, + "logps/chosen": -142.367919921875, + "logps/rejected": -271.4255065917969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.41459846496582, + "rewards/margins": 8.456137657165527, + "rewards/rejected": -17.87073516845703, + "step": 14486 + }, + { + "epoch": 2.25, + "learning_rate": 3.521981430572125e-06, + "logits/chosen": -2.5687220096588135, + "logits/rejected": -2.755446434020996, + "logps/chosen": -137.4880828857422, + "logps/rejected": -331.66241455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.063579559326172, + "rewards/margins": 10.38292121887207, + "rewards/rejected": -16.446500778198242, + "step": 14487 + }, + { + "epoch": 2.25, + "learning_rate": 3.521247990040977e-06, + "logits/chosen": -2.8375232219696045, + "logits/rejected": -2.7292561531066895, + "logps/chosen": -718.0584106445312, + "logps/rejected": -624.9692993164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.996960639953613, + "rewards/margins": 10.941522598266602, + "rewards/rejected": -17.93848419189453, + "step": 14488 + }, + { + "epoch": 2.25, + "learning_rate": 3.5205145495098293e-06, + "logits/chosen": -2.0042195320129395, + "logits/rejected": -2.679560899734497, + "logps/chosen": -394.3148498535156, + "logps/rejected": -583.150634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.052474975585938, + "rewards/margins": 14.262903213500977, + "rewards/rejected": -23.315380096435547, + "step": 14489 + }, + { + "epoch": 2.25, + "learning_rate": 3.519781108978681e-06, + "logits/chosen": -2.261090040206909, + "logits/rejected": -2.6078972816467285, + "logps/chosen": -375.48150634765625, + "logps/rejected": -693.0377197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.265242576599121, + "rewards/margins": 13.366020202636719, + "rewards/rejected": -19.631263732910156, + "step": 14490 + }, + { + "epoch": 2.25, + "learning_rate": 3.519047668447534e-06, + "logits/chosen": -2.6985957622528076, + "logits/rejected": -2.577731132507324, + "logps/chosen": -116.83131408691406, + "logps/rejected": -229.4879150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.479429721832275, + "rewards/margins": 10.133678436279297, + "rewards/rejected": -16.613109588623047, + "step": 14491 + }, + { + "epoch": 2.25, + "learning_rate": 3.5183142279163858e-06, + "logits/chosen": -2.051236391067505, + "logits/rejected": -2.6318037509918213, + "logps/chosen": -331.70703125, + "logps/rejected": -683.8952026367188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.40482234954834, + "rewards/margins": 14.454244613647461, + "rewards/rejected": -19.859066009521484, + "step": 14492 + }, + { + "epoch": 2.25, + "learning_rate": 3.517580787385238e-06, + "logits/chosen": -2.3092803955078125, + "logits/rejected": -2.6970551013946533, + "logps/chosen": -332.5094299316406, + "logps/rejected": -408.82452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.274166107177734, + "rewards/margins": 12.645072937011719, + "rewards/rejected": -21.919239044189453, + "step": 14493 + }, + { + "epoch": 2.25, + "learning_rate": 3.51684734685409e-06, + "logits/chosen": -1.942631721496582, + "logits/rejected": -2.7816550731658936, + "logps/chosen": -144.18017578125, + "logps/rejected": -403.2466125488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.673315048217773, + "rewards/margins": 14.224899291992188, + "rewards/rejected": -21.89821434020996, + "step": 14494 + }, + { + "epoch": 2.25, + "learning_rate": 3.5161139063229423e-06, + "logits/chosen": -2.649942636489868, + "logits/rejected": -2.388524055480957, + "logps/chosen": -188.27210998535156, + "logps/rejected": -508.35284423828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.304636001586914, + "rewards/margins": 9.197561264038086, + "rewards/rejected": -19.502197265625, + "step": 14495 + }, + { + "epoch": 2.25, + "learning_rate": 3.515380465791794e-06, + "logits/chosen": -2.2905118465423584, + "logits/rejected": -2.6712095737457275, + "logps/chosen": -265.9164123535156, + "logps/rejected": -380.1057434082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.868703365325928, + "rewards/margins": 11.793834686279297, + "rewards/rejected": -18.662538528442383, + "step": 14496 + }, + { + "epoch": 2.25, + "learning_rate": 3.514647025260646e-06, + "logits/chosen": -2.824495792388916, + "logits/rejected": -2.3211045265197754, + "logps/chosen": -738.5869140625, + "logps/rejected": -490.82684326171875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.266566276550293, + "rewards/margins": 6.163788795471191, + "rewards/rejected": -15.430355072021484, + "step": 14497 + }, + { + "epoch": 2.25, + "learning_rate": 3.5139135847294983e-06, + "logits/chosen": -2.2677500247955322, + "logits/rejected": -2.6683576107025146, + "logps/chosen": -151.2490234375, + "logps/rejected": -225.3352813720703, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.665058135986328, + "rewards/margins": 9.253904342651367, + "rewards/rejected": -16.918962478637695, + "step": 14498 + }, + { + "epoch": 2.25, + "learning_rate": 3.5131801441983506e-06, + "logits/chosen": -2.585810899734497, + "logits/rejected": -2.3942134380340576, + "logps/chosen": -101.87547302246094, + "logps/rejected": -235.85137939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.330040454864502, + "rewards/margins": 9.990816116333008, + "rewards/rejected": -17.32085609436035, + "step": 14499 + }, + { + "epoch": 2.26, + "learning_rate": 3.512446703667203e-06, + "logits/chosen": -2.895156145095825, + "logits/rejected": -2.863419771194458, + "logps/chosen": -608.5357666015625, + "logps/rejected": -669.9867553710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.646764278411865, + "rewards/margins": 11.510875701904297, + "rewards/rejected": -19.15764045715332, + "step": 14500 + }, + { + "epoch": 2.26, + "learning_rate": 3.511713263136055e-06, + "logits/chosen": -2.373688220977783, + "logits/rejected": -2.8494529724121094, + "logps/chosen": -401.40655517578125, + "logps/rejected": -541.6888427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.925410747528076, + "rewards/margins": 14.085700988769531, + "rewards/rejected": -22.011112213134766, + "step": 14501 + }, + { + "epoch": 2.26, + "learning_rate": 3.510979822604907e-06, + "logits/chosen": -2.8295981884002686, + "logits/rejected": -2.101574420928955, + "logps/chosen": -532.4553833007812, + "logps/rejected": -551.3336181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.398256301879883, + "rewards/margins": 11.370065689086914, + "rewards/rejected": -20.768321990966797, + "step": 14502 + }, + { + "epoch": 2.26, + "learning_rate": 3.510246382073759e-06, + "logits/chosen": -2.0497591495513916, + "logits/rejected": -2.724490165710449, + "logps/chosen": -204.4630126953125, + "logps/rejected": -390.04443359375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.597392082214355, + "rewards/margins": 7.777050495147705, + "rewards/rejected": -17.37444305419922, + "step": 14503 + }, + { + "epoch": 2.26, + "learning_rate": 3.5095129415426113e-06, + "logits/chosen": -2.7700531482696533, + "logits/rejected": -2.976741075515747, + "logps/chosen": -168.52125549316406, + "logps/rejected": -704.2285766601562, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.503952980041504, + "rewards/margins": 7.318736553192139, + "rewards/rejected": -17.822689056396484, + "step": 14504 + }, + { + "epoch": 2.26, + "learning_rate": 3.508779501011463e-06, + "logits/chosen": -2.567770481109619, + "logits/rejected": -1.941672682762146, + "logps/chosen": -337.7322082519531, + "logps/rejected": -511.78070068359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.77917194366455, + "rewards/margins": 8.464864730834961, + "rewards/rejected": -19.244037628173828, + "step": 14505 + }, + { + "epoch": 2.26, + "learning_rate": 3.5080460604803155e-06, + "logits/chosen": -2.6959588527679443, + "logits/rejected": -2.761230945587158, + "logps/chosen": -199.96884155273438, + "logps/rejected": -229.37496948242188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.041297912597656, + "rewards/margins": 7.299530982971191, + "rewards/rejected": -17.340829849243164, + "step": 14506 + }, + { + "epoch": 2.26, + "learning_rate": 3.5073126199491673e-06, + "logits/chosen": -2.0925023555755615, + "logits/rejected": -2.839888095855713, + "logps/chosen": -207.8416748046875, + "logps/rejected": -300.2137756347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.875260353088379, + "rewards/margins": 13.744053840637207, + "rewards/rejected": -18.619314193725586, + "step": 14507 + }, + { + "epoch": 2.26, + "learning_rate": 3.5065791794180196e-06, + "logits/chosen": -0.9509340524673462, + "logits/rejected": -2.106553316116333, + "logps/chosen": -295.9587707519531, + "logps/rejected": -695.6236572265625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.275871276855469, + "rewards/margins": 14.86435604095459, + "rewards/rejected": -24.140226364135742, + "step": 14508 + }, + { + "epoch": 2.26, + "learning_rate": 3.505845738886872e-06, + "logits/chosen": -2.719069480895996, + "logits/rejected": -2.9204487800598145, + "logps/chosen": -172.7237548828125, + "logps/rejected": -344.9004211425781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.62932014465332, + "rewards/margins": 8.640165328979492, + "rewards/rejected": -15.269485473632812, + "step": 14509 + }, + { + "epoch": 2.26, + "learning_rate": 3.505112298355724e-06, + "logits/chosen": -2.6279289722442627, + "logits/rejected": -1.682569980621338, + "logps/chosen": -268.08856201171875, + "logps/rejected": -311.8824462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.232558250427246, + "rewards/margins": 12.633636474609375, + "rewards/rejected": -19.866195678710938, + "step": 14510 + }, + { + "epoch": 2.26, + "learning_rate": 3.504378857824576e-06, + "logits/chosen": -2.4986820220947266, + "logits/rejected": -2.7330737113952637, + "logps/chosen": -193.12709045410156, + "logps/rejected": -393.403076171875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9624223709106445, + "rewards/margins": 6.89930534362793, + "rewards/rejected": -13.86172866821289, + "step": 14511 + }, + { + "epoch": 2.26, + "learning_rate": 3.503645417293428e-06, + "logits/chosen": -1.5598725080490112, + "logits/rejected": -2.6855318546295166, + "logps/chosen": -205.67059326171875, + "logps/rejected": -792.4177856445312, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.811810493469238, + "rewards/margins": 7.750885963439941, + "rewards/rejected": -20.56269645690918, + "step": 14512 + }, + { + "epoch": 2.26, + "learning_rate": 3.5029119767622803e-06, + "logits/chosen": -2.614650011062622, + "logits/rejected": -1.8200793266296387, + "logps/chosen": -543.3984985351562, + "logps/rejected": -375.4256286621094, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.295351028442383, + "rewards/margins": 5.545175552368164, + "rewards/rejected": -14.840526580810547, + "step": 14513 + }, + { + "epoch": 2.26, + "learning_rate": 3.502178536231132e-06, + "logits/chosen": -2.774869441986084, + "logits/rejected": -1.768330454826355, + "logps/chosen": -365.75225830078125, + "logps/rejected": -353.3232421875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.524148941040039, + "rewards/margins": 7.510335922241211, + "rewards/rejected": -15.03448486328125, + "step": 14514 + }, + { + "epoch": 2.26, + "learning_rate": 3.5014450956999845e-06, + "logits/chosen": -1.7470531463623047, + "logits/rejected": -2.7561216354370117, + "logps/chosen": -193.81942749023438, + "logps/rejected": -423.4215087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.135173797607422, + "rewards/margins": 12.170238494873047, + "rewards/rejected": -17.30541229248047, + "step": 14515 + }, + { + "epoch": 2.26, + "learning_rate": 3.5007116551688368e-06, + "logits/chosen": -2.226351022720337, + "logits/rejected": -2.633387804031372, + "logps/chosen": -171.86758422851562, + "logps/rejected": -360.8842468261719, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.13498592376709, + "rewards/margins": 8.779898643493652, + "rewards/rejected": -17.914884567260742, + "step": 14516 + }, + { + "epoch": 2.26, + "learning_rate": 3.4999782146376887e-06, + "logits/chosen": -0.9453277587890625, + "logits/rejected": -2.3856492042541504, + "logps/chosen": -141.5835418701172, + "logps/rejected": -369.8159484863281, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.317350387573242, + "rewards/margins": 7.382678985595703, + "rewards/rejected": -16.700029373168945, + "step": 14517 + }, + { + "epoch": 2.26, + "learning_rate": 3.499244774106541e-06, + "logits/chosen": -2.5215868949890137, + "logits/rejected": -2.2688863277435303, + "logps/chosen": -592.3012084960938, + "logps/rejected": -709.2666625976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0628252029418945, + "rewards/margins": 14.892358779907227, + "rewards/rejected": -20.955184936523438, + "step": 14518 + }, + { + "epoch": 2.26, + "learning_rate": 3.498511333575393e-06, + "logits/chosen": -2.5178163051605225, + "logits/rejected": -1.7597949504852295, + "logps/chosen": -238.04110717773438, + "logps/rejected": -196.44467163085938, + "loss": 1.2138, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.47018051147461, + "rewards/margins": 3.2641096115112305, + "rewards/rejected": -12.734289169311523, + "step": 14519 + }, + { + "epoch": 2.26, + "learning_rate": 3.497777893044245e-06, + "logits/chosen": -2.2767064571380615, + "logits/rejected": -2.616209030151367, + "logps/chosen": -482.7255859375, + "logps/rejected": -603.0632934570312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.276244163513184, + "rewards/margins": 10.43718433380127, + "rewards/rejected": -18.713428497314453, + "step": 14520 + }, + { + "epoch": 2.26, + "learning_rate": 3.497044452513097e-06, + "logits/chosen": -2.676182985305786, + "logits/rejected": -2.8491640090942383, + "logps/chosen": -220.3846435546875, + "logps/rejected": -394.8167724609375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.734375953674316, + "rewards/margins": 5.888936996459961, + "rewards/rejected": -14.623312950134277, + "step": 14521 + }, + { + "epoch": 2.26, + "learning_rate": 3.4963110119819493e-06, + "logits/chosen": -2.3431291580200195, + "logits/rejected": -2.6478819847106934, + "logps/chosen": -173.52349853515625, + "logps/rejected": -515.101318359375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.056461334228516, + "rewards/margins": 6.960737228393555, + "rewards/rejected": -17.01719856262207, + "step": 14522 + }, + { + "epoch": 2.26, + "learning_rate": 3.495577571450801e-06, + "logits/chosen": -2.2428317070007324, + "logits/rejected": -2.7248618602752686, + "logps/chosen": -555.6427612304688, + "logps/rejected": -604.16650390625, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.23714542388916, + "rewards/margins": 8.901829719543457, + "rewards/rejected": -20.138975143432617, + "step": 14523 + }, + { + "epoch": 2.26, + "learning_rate": 3.4948441309196535e-06, + "logits/chosen": -2.347707748413086, + "logits/rejected": -2.764317035675049, + "logps/chosen": -135.3447265625, + "logps/rejected": -355.918212890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.076922416687012, + "rewards/margins": 8.487458229064941, + "rewards/rejected": -17.564380645751953, + "step": 14524 + }, + { + "epoch": 2.26, + "learning_rate": 3.494110690388506e-06, + "logits/chosen": -2.951737642288208, + "logits/rejected": -1.9817283153533936, + "logps/chosen": -443.4853515625, + "logps/rejected": -145.71786499023438, + "loss": 0.1128, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.10617733001709, + "rewards/margins": 3.326038122177124, + "rewards/rejected": -10.432214736938477, + "step": 14525 + }, + { + "epoch": 2.26, + "learning_rate": 3.4933772498573577e-06, + "logits/chosen": -2.368572235107422, + "logits/rejected": -2.874662160873413, + "logps/chosen": -384.8709716796875, + "logps/rejected": -499.2149658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1896286010742188, + "rewards/margins": 13.22362995147705, + "rewards/rejected": -15.41325855255127, + "step": 14526 + }, + { + "epoch": 2.26, + "learning_rate": 3.49264380932621e-06, + "logits/chosen": -2.152344226837158, + "logits/rejected": -1.8040180206298828, + "logps/chosen": -169.5244140625, + "logps/rejected": -219.19696044921875, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.334405899047852, + "rewards/margins": 4.555486679077148, + "rewards/rejected": -12.889892578125, + "step": 14527 + }, + { + "epoch": 2.26, + "learning_rate": 3.491910368795062e-06, + "logits/chosen": -2.9216041564941406, + "logits/rejected": -2.661989212036133, + "logps/chosen": -685.4432373046875, + "logps/rejected": -525.2863159179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.368649482727051, + "rewards/margins": 16.71442985534668, + "rewards/rejected": -22.083078384399414, + "step": 14528 + }, + { + "epoch": 2.26, + "learning_rate": 3.491176928263914e-06, + "logits/chosen": -2.4945168495178223, + "logits/rejected": -2.8915624618530273, + "logps/chosen": -119.57492065429688, + "logps/rejected": -410.9533386230469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08123254776001, + "rewards/margins": 11.233223915100098, + "rewards/rejected": -17.314456939697266, + "step": 14529 + }, + { + "epoch": 2.26, + "learning_rate": 3.490443487732766e-06, + "logits/chosen": -2.733752489089966, + "logits/rejected": -2.948482036590576, + "logps/chosen": -112.72676849365234, + "logps/rejected": -280.9375305175781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397615432739258, + "rewards/margins": 9.43007755279541, + "rewards/rejected": -13.827692985534668, + "step": 14530 + }, + { + "epoch": 2.26, + "learning_rate": 3.4897100472016183e-06, + "logits/chosen": -2.9805057048797607, + "logits/rejected": -3.103801965713501, + "logps/chosen": -108.9955062866211, + "logps/rejected": -178.40382385253906, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369034767150879, + "rewards/margins": 7.961704254150391, + "rewards/rejected": -13.330738067626953, + "step": 14531 + }, + { + "epoch": 2.26, + "learning_rate": 3.4889766066704702e-06, + "logits/chosen": -2.369053363800049, + "logits/rejected": -2.430814027786255, + "logps/chosen": -504.9053039550781, + "logps/rejected": -617.735595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.44837760925293, + "rewards/margins": 9.87038803100586, + "rewards/rejected": -18.31876564025879, + "step": 14532 + }, + { + "epoch": 2.26, + "learning_rate": 3.488243166139323e-06, + "logits/chosen": -1.1819194555282593, + "logits/rejected": -2.604008436203003, + "logps/chosen": -445.9976806640625, + "logps/rejected": -546.051025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.577430725097656, + "rewards/margins": 14.524646759033203, + "rewards/rejected": -22.10207748413086, + "step": 14533 + }, + { + "epoch": 2.26, + "learning_rate": 3.487509725608175e-06, + "logits/chosen": -1.7906672954559326, + "logits/rejected": -2.7035982608795166, + "logps/chosen": -217.16656494140625, + "logps/rejected": -381.3124694824219, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7887444496154785, + "rewards/margins": 8.398648262023926, + "rewards/rejected": -14.187393188476562, + "step": 14534 + }, + { + "epoch": 2.26, + "learning_rate": 3.486776285077027e-06, + "logits/chosen": -1.795459270477295, + "logits/rejected": -2.340134620666504, + "logps/chosen": -175.18093872070312, + "logps/rejected": -321.1790466308594, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.291285514831543, + "rewards/margins": 7.819413185119629, + "rewards/rejected": -19.110698699951172, + "step": 14535 + }, + { + "epoch": 2.26, + "learning_rate": 3.486042844545879e-06, + "logits/chosen": -2.212235927581787, + "logits/rejected": -2.93129563331604, + "logps/chosen": -199.7721405029297, + "logps/rejected": -393.1568298339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.800206184387207, + "rewards/margins": 11.998784065246582, + "rewards/rejected": -17.79899024963379, + "step": 14536 + }, + { + "epoch": 2.26, + "learning_rate": 3.485309404014731e-06, + "logits/chosen": -2.808015823364258, + "logits/rejected": -1.817637324333191, + "logps/chosen": -342.8887023925781, + "logps/rejected": -202.0543212890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222505569458008, + "rewards/margins": 8.230478286743164, + "rewards/rejected": -14.452983856201172, + "step": 14537 + }, + { + "epoch": 2.26, + "learning_rate": 3.484575963483583e-06, + "logits/chosen": -2.769878387451172, + "logits/rejected": -2.7772576808929443, + "logps/chosen": -80.38957977294922, + "logps/rejected": -261.059814453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.542614936828613, + "rewards/margins": 8.03314208984375, + "rewards/rejected": -14.575757026672363, + "step": 14538 + }, + { + "epoch": 2.26, + "learning_rate": 3.483842522952435e-06, + "logits/chosen": -1.8740960359573364, + "logits/rejected": -2.684279680252075, + "logps/chosen": -275.5532531738281, + "logps/rejected": -541.4015502929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19188404083252, + "rewards/margins": 15.245111465454102, + "rewards/rejected": -23.436994552612305, + "step": 14539 + }, + { + "epoch": 2.26, + "learning_rate": 3.4831090824212874e-06, + "logits/chosen": -2.6939563751220703, + "logits/rejected": -2.337085723876953, + "logps/chosen": -301.9962158203125, + "logps/rejected": -316.3104248046875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.009146690368652, + "rewards/margins": 5.419610500335693, + "rewards/rejected": -15.428756713867188, + "step": 14540 + }, + { + "epoch": 2.26, + "learning_rate": 3.4823756418901397e-06, + "logits/chosen": -2.811415433883667, + "logits/rejected": -2.4070065021514893, + "logps/chosen": -329.757080078125, + "logps/rejected": -222.65069580078125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.221227645874023, + "rewards/margins": 5.987853050231934, + "rewards/rejected": -15.209081649780273, + "step": 14541 + }, + { + "epoch": 2.26, + "learning_rate": 3.481642201358992e-06, + "logits/chosen": -1.6975094079971313, + "logits/rejected": -2.2203209400177, + "logps/chosen": -135.30160522460938, + "logps/rejected": -317.90960693359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.464323997497559, + "rewards/margins": 10.270838737487793, + "rewards/rejected": -17.73516273498535, + "step": 14542 + }, + { + "epoch": 2.26, + "learning_rate": 3.480908760827844e-06, + "logits/chosen": -2.423042058944702, + "logits/rejected": -2.1876800060272217, + "logps/chosen": -433.5766296386719, + "logps/rejected": -405.6247253417969, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.005250930786133, + "rewards/margins": 11.500774383544922, + "rewards/rejected": -20.506025314331055, + "step": 14543 + }, + { + "epoch": 2.26, + "learning_rate": 3.480175320296696e-06, + "logits/chosen": -2.4820005893707275, + "logits/rejected": -2.6644482612609863, + "logps/chosen": -360.9627685546875, + "logps/rejected": -426.9886474609375, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.819392204284668, + "rewards/margins": 10.995716094970703, + "rewards/rejected": -20.815109252929688, + "step": 14544 + }, + { + "epoch": 2.26, + "learning_rate": 3.479441879765548e-06, + "logits/chosen": -2.51901912689209, + "logits/rejected": -1.601825475692749, + "logps/chosen": -1944.674072265625, + "logps/rejected": -606.1585693359375, + "loss": 0.0418, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.234651565551758, + "rewards/margins": 6.685351848602295, + "rewards/rejected": -21.920001983642578, + "step": 14545 + }, + { + "epoch": 2.26, + "learning_rate": 3.4787084392344e-06, + "logits/chosen": -2.542687177658081, + "logits/rejected": -2.8965559005737305, + "logps/chosen": -328.1201477050781, + "logps/rejected": -296.81207275390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.521341323852539, + "rewards/margins": 8.451533317565918, + "rewards/rejected": -13.972874641418457, + "step": 14546 + }, + { + "epoch": 2.26, + "learning_rate": 3.477974998703252e-06, + "logits/chosen": -1.5737801790237427, + "logits/rejected": -2.4858486652374268, + "logps/chosen": -265.1311950683594, + "logps/rejected": -581.6612548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.714097023010254, + "rewards/margins": 14.773488998413086, + "rewards/rejected": -21.487586975097656, + "step": 14547 + }, + { + "epoch": 2.26, + "learning_rate": 3.477241558172104e-06, + "logits/chosen": -2.390446186065674, + "logits/rejected": -2.7274184226989746, + "logps/chosen": -494.28314208984375, + "logps/rejected": -598.5220947265625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5171074867248535, + "rewards/margins": 10.266892433166504, + "rewards/rejected": -16.784000396728516, + "step": 14548 + }, + { + "epoch": 2.26, + "learning_rate": 3.4765081176409564e-06, + "logits/chosen": -1.25263512134552, + "logits/rejected": -2.463198661804199, + "logps/chosen": -221.26971435546875, + "logps/rejected": -492.52734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.246596336364746, + "rewards/margins": 10.995165824890137, + "rewards/rejected": -23.241762161254883, + "step": 14549 + }, + { + "epoch": 2.26, + "learning_rate": 3.4757746771098087e-06, + "logits/chosen": -1.9476290941238403, + "logits/rejected": -2.815978765487671, + "logps/chosen": -170.5438995361328, + "logps/rejected": -368.870361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.092761993408203, + "rewards/margins": 12.909151077270508, + "rewards/rejected": -23.00191307067871, + "step": 14550 + }, + { + "epoch": 2.26, + "learning_rate": 3.475041236578661e-06, + "logits/chosen": -2.6649436950683594, + "logits/rejected": -1.967574119567871, + "logps/chosen": -356.615478515625, + "logps/rejected": -267.09515380859375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.760554313659668, + "rewards/margins": 6.380327224731445, + "rewards/rejected": -14.140881538391113, + "step": 14551 + }, + { + "epoch": 2.26, + "learning_rate": 3.474307796047513e-06, + "logits/chosen": -1.8335387706756592, + "logits/rejected": -2.573777914047241, + "logps/chosen": -230.00489807128906, + "logps/rejected": -382.26080322265625, + "loss": 0.0613, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.487728118896484, + "rewards/margins": 7.95115852355957, + "rewards/rejected": -18.438886642456055, + "step": 14552 + }, + { + "epoch": 2.26, + "learning_rate": 3.473574355516365e-06, + "logits/chosen": -2.5586557388305664, + "logits/rejected": -2.035428285598755, + "logps/chosen": -344.6419677734375, + "logps/rejected": -434.40045166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.447112083435059, + "rewards/margins": 10.675661087036133, + "rewards/rejected": -20.122772216796875, + "step": 14553 + }, + { + "epoch": 2.26, + "learning_rate": 3.472840914985217e-06, + "logits/chosen": -2.196512222290039, + "logits/rejected": -2.889624834060669, + "logps/chosen": -122.78173828125, + "logps/rejected": -320.34942626953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.616703033447266, + "rewards/margins": 8.571617126464844, + "rewards/rejected": -16.18832015991211, + "step": 14554 + }, + { + "epoch": 2.26, + "learning_rate": 3.4721074744540694e-06, + "logits/chosen": -2.807544469833374, + "logits/rejected": -2.2234957218170166, + "logps/chosen": -613.5293579101562, + "logps/rejected": -488.875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.788951873779297, + "rewards/margins": 10.04989242553711, + "rewards/rejected": -22.838844299316406, + "step": 14555 + }, + { + "epoch": 2.26, + "learning_rate": 3.4713740339229212e-06, + "logits/chosen": -2.660691261291504, + "logits/rejected": -2.8411099910736084, + "logps/chosen": -321.3522644042969, + "logps/rejected": -355.7633361816406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.744009971618652, + "rewards/margins": 6.944467544555664, + "rewards/rejected": -12.688477516174316, + "step": 14556 + }, + { + "epoch": 2.26, + "learning_rate": 3.470640593391773e-06, + "logits/chosen": -2.2936151027679443, + "logits/rejected": -0.6292222142219543, + "logps/chosen": -393.55047607421875, + "logps/rejected": -333.6741638183594, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.005636215209961, + "rewards/margins": 6.873064994812012, + "rewards/rejected": -16.878700256347656, + "step": 14557 + }, + { + "epoch": 2.26, + "learning_rate": 3.469907152860626e-06, + "logits/chosen": -2.492558002471924, + "logits/rejected": -2.071981191635132, + "logps/chosen": -239.21322631835938, + "logps/rejected": -323.3270263671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.727654457092285, + "rewards/margins": 7.983360290527344, + "rewards/rejected": -14.711014747619629, + "step": 14558 + }, + { + "epoch": 2.26, + "learning_rate": 3.4691737123294777e-06, + "logits/chosen": -2.9462101459503174, + "logits/rejected": -1.6313202381134033, + "logps/chosen": -365.31005859375, + "logps/rejected": -294.0459899902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4340057373046875, + "rewards/margins": 10.519577026367188, + "rewards/rejected": -14.953582763671875, + "step": 14559 + }, + { + "epoch": 2.26, + "learning_rate": 3.46844027179833e-06, + "logits/chosen": -1.695176362991333, + "logits/rejected": -2.4181740283966064, + "logps/chosen": -281.1814270019531, + "logps/rejected": -484.5252685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.528260707855225, + "rewards/margins": 11.810781478881836, + "rewards/rejected": -18.33904266357422, + "step": 14560 + }, + { + "epoch": 2.26, + "learning_rate": 3.467706831267182e-06, + "logits/chosen": -1.4852111339569092, + "logits/rejected": -2.2119078636169434, + "logps/chosen": -212.17584228515625, + "logps/rejected": -326.6358337402344, + "loss": 0.4258, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.142366409301758, + "rewards/margins": 4.460305213928223, + "rewards/rejected": -14.602670669555664, + "step": 14561 + }, + { + "epoch": 2.26, + "learning_rate": 3.466973390736034e-06, + "logits/chosen": -2.9304592609405518, + "logits/rejected": -2.483633279800415, + "logps/chosen": -629.7611083984375, + "logps/rejected": -624.736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7966814041137695, + "rewards/margins": 10.359219551086426, + "rewards/rejected": -17.155900955200195, + "step": 14562 + }, + { + "epoch": 2.26, + "learning_rate": 3.466239950204886e-06, + "logits/chosen": -2.7512595653533936, + "logits/rejected": -2.164154291152954, + "logps/chosen": -210.62527465820312, + "logps/rejected": -282.5390930175781, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.458775997161865, + "rewards/margins": 10.433683395385742, + "rewards/rejected": -14.89245891571045, + "step": 14563 + }, + { + "epoch": 2.27, + "learning_rate": 3.4655065096737384e-06, + "logits/chosen": -2.885430335998535, + "logits/rejected": -2.3575387001037598, + "logps/chosen": -290.59503173828125, + "logps/rejected": -296.4664306640625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.672273635864258, + "rewards/margins": 7.0785231590271, + "rewards/rejected": -13.750797271728516, + "step": 14564 + }, + { + "epoch": 2.27, + "learning_rate": 3.4647730691425903e-06, + "logits/chosen": -2.3522958755493164, + "logits/rejected": -2.9415087699890137, + "logps/chosen": -116.16642761230469, + "logps/rejected": -230.67799377441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.692324638366699, + "rewards/margins": 10.044801712036133, + "rewards/rejected": -16.737125396728516, + "step": 14565 + }, + { + "epoch": 2.27, + "learning_rate": 3.4640396286114426e-06, + "logits/chosen": -2.7252633571624756, + "logits/rejected": -2.340583324432373, + "logps/chosen": -533.1334838867188, + "logps/rejected": -595.8096923828125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.352444648742676, + "rewards/margins": 10.524784088134766, + "rewards/rejected": -17.877229690551758, + "step": 14566 + }, + { + "epoch": 2.27, + "learning_rate": 3.463306188080295e-06, + "logits/chosen": -2.3118977546691895, + "logits/rejected": -2.656572103500366, + "logps/chosen": -301.2799072265625, + "logps/rejected": -572.5286865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2358503341674805, + "rewards/margins": 11.397090911865234, + "rewards/rejected": -18.63294219970703, + "step": 14567 + }, + { + "epoch": 2.27, + "learning_rate": 3.4625727475491467e-06, + "logits/chosen": -2.6412172317504883, + "logits/rejected": -1.6994975805282593, + "logps/chosen": -144.8233642578125, + "logps/rejected": -211.28257751464844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.677520751953125, + "rewards/margins": 8.6541166305542, + "rewards/rejected": -12.33163833618164, + "step": 14568 + }, + { + "epoch": 2.27, + "learning_rate": 3.461839307017999e-06, + "logits/chosen": -2.504695177078247, + "logits/rejected": -2.8060758113861084, + "logps/chosen": -190.4725341796875, + "logps/rejected": -290.4730529785156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.226174354553223, + "rewards/margins": 8.426021575927734, + "rewards/rejected": -15.652195930480957, + "step": 14569 + }, + { + "epoch": 2.27, + "learning_rate": 3.461105866486851e-06, + "logits/chosen": -1.905826449394226, + "logits/rejected": -2.55299973487854, + "logps/chosen": -334.75762939453125, + "logps/rejected": -574.46630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487321853637695, + "rewards/margins": 14.569987297058105, + "rewards/rejected": -21.057308197021484, + "step": 14570 + }, + { + "epoch": 2.27, + "learning_rate": 3.4603724259557032e-06, + "logits/chosen": -2.9244797229766846, + "logits/rejected": -2.435580015182495, + "logps/chosen": -994.499267578125, + "logps/rejected": -588.685791015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.536922454833984, + "rewards/margins": 8.185304641723633, + "rewards/rejected": -16.722225189208984, + "step": 14571 + }, + { + "epoch": 2.27, + "learning_rate": 3.459638985424555e-06, + "logits/chosen": -2.6859021186828613, + "logits/rejected": -2.8951175212860107, + "logps/chosen": -439.4372863769531, + "logps/rejected": -304.76361083984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.578173637390137, + "rewards/margins": 9.224030494689941, + "rewards/rejected": -14.802204132080078, + "step": 14572 + }, + { + "epoch": 2.27, + "learning_rate": 3.4589055448934074e-06, + "logits/chosen": -1.5674917697906494, + "logits/rejected": -2.4237587451934814, + "logps/chosen": -112.82180786132812, + "logps/rejected": -286.07781982421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.338958263397217, + "rewards/margins": 8.992796897888184, + "rewards/rejected": -14.331754684448242, + "step": 14573 + }, + { + "epoch": 2.27, + "learning_rate": 3.4581721043622593e-06, + "logits/chosen": -0.6640877723693848, + "logits/rejected": -2.3275160789489746, + "logps/chosen": -217.17417907714844, + "logps/rejected": -504.44635009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.560770034790039, + "rewards/margins": 9.315805435180664, + "rewards/rejected": -16.876575469970703, + "step": 14574 + }, + { + "epoch": 2.27, + "learning_rate": 3.457438663831112e-06, + "logits/chosen": -2.3295977115631104, + "logits/rejected": -2.949589729309082, + "logps/chosen": -146.3663787841797, + "logps/rejected": -262.2763366699219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4530744552612305, + "rewards/margins": 8.723234176635742, + "rewards/rejected": -16.17630958557129, + "step": 14575 + }, + { + "epoch": 2.27, + "learning_rate": 3.456705223299964e-06, + "logits/chosen": -2.7013869285583496, + "logits/rejected": -1.9905787706375122, + "logps/chosen": -473.5531921386719, + "logps/rejected": -464.7649230957031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.263450622558594, + "rewards/margins": 9.286760330200195, + "rewards/rejected": -19.550209045410156, + "step": 14576 + }, + { + "epoch": 2.27, + "learning_rate": 3.4559717827688158e-06, + "logits/chosen": -2.8766071796417236, + "logits/rejected": -2.645777940750122, + "logps/chosen": -229.489990234375, + "logps/rejected": -207.90194702148438, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.445041179656982, + "rewards/margins": 6.647459983825684, + "rewards/rejected": -13.092500686645508, + "step": 14577 + }, + { + "epoch": 2.27, + "learning_rate": 3.455238342237668e-06, + "logits/chosen": -2.8463568687438965, + "logits/rejected": -2.881808280944824, + "logps/chosen": -95.7156753540039, + "logps/rejected": -190.5966796875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129528045654297, + "rewards/margins": 5.75042724609375, + "rewards/rejected": -12.879955291748047, + "step": 14578 + }, + { + "epoch": 2.27, + "learning_rate": 3.45450490170652e-06, + "logits/chosen": -2.2576167583465576, + "logits/rejected": -2.790583610534668, + "logps/chosen": -354.938232421875, + "logps/rejected": -512.1588134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.149882316589355, + "rewards/margins": 10.558483123779297, + "rewards/rejected": -18.70836639404297, + "step": 14579 + }, + { + "epoch": 2.27, + "learning_rate": 3.4537714611753722e-06, + "logits/chosen": -0.6504037380218506, + "logits/rejected": -2.5692386627197266, + "logps/chosen": -148.58193969726562, + "logps/rejected": -646.8802490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.094936847686768, + "rewards/margins": 14.609184265136719, + "rewards/rejected": -20.704120635986328, + "step": 14580 + }, + { + "epoch": 2.27, + "learning_rate": 3.453038020644224e-06, + "logits/chosen": -2.5646843910217285, + "logits/rejected": -2.6625823974609375, + "logps/chosen": -142.78636169433594, + "logps/rejected": -361.5346374511719, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.247186660766602, + "rewards/margins": 10.666751861572266, + "rewards/rejected": -18.913938522338867, + "step": 14581 + }, + { + "epoch": 2.27, + "learning_rate": 3.4523045801130764e-06, + "logits/chosen": -2.772698402404785, + "logits/rejected": -2.3233556747436523, + "logps/chosen": -259.899169921875, + "logps/rejected": -324.2120361328125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.076818466186523, + "rewards/margins": 7.790997505187988, + "rewards/rejected": -16.867816925048828, + "step": 14582 + }, + { + "epoch": 2.27, + "learning_rate": 3.4515711395819287e-06, + "logits/chosen": -2.7836949825286865, + "logits/rejected": -1.8390554189682007, + "logps/chosen": -557.82177734375, + "logps/rejected": -292.6757507324219, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.203463554382324, + "rewards/margins": 4.961515426635742, + "rewards/rejected": -11.164978981018066, + "step": 14583 + }, + { + "epoch": 2.27, + "learning_rate": 3.450837699050781e-06, + "logits/chosen": -0.9097177982330322, + "logits/rejected": -2.8164069652557373, + "logps/chosen": -166.85890197753906, + "logps/rejected": -986.5739135742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.16076946258545, + "rewards/margins": 21.78324317932129, + "rewards/rejected": -29.944011688232422, + "step": 14584 + }, + { + "epoch": 2.27, + "learning_rate": 3.450104258519633e-06, + "logits/chosen": -2.7074904441833496, + "logits/rejected": -2.950935125350952, + "logps/chosen": -679.5628662109375, + "logps/rejected": -528.94384765625, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.670699119567871, + "rewards/margins": 6.3241353034973145, + "rewards/rejected": -13.994834899902344, + "step": 14585 + }, + { + "epoch": 2.27, + "learning_rate": 3.4493708179884848e-06, + "logits/chosen": -2.9493703842163086, + "logits/rejected": -2.201524019241333, + "logps/chosen": -604.1805419921875, + "logps/rejected": -1109.965576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.097412109375, + "rewards/margins": 14.11674690246582, + "rewards/rejected": -20.21415901184082, + "step": 14586 + }, + { + "epoch": 2.27, + "learning_rate": 3.448637377457337e-06, + "logits/chosen": -2.726148843765259, + "logits/rejected": -2.85988187789917, + "logps/chosen": -256.39300537109375, + "logps/rejected": -439.36236572265625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.975079536437988, + "rewards/margins": 9.574560165405273, + "rewards/rejected": -15.549638748168945, + "step": 14587 + }, + { + "epoch": 2.27, + "learning_rate": 3.447903936926189e-06, + "logits/chosen": -2.7764148712158203, + "logits/rejected": -1.6618937253952026, + "logps/chosen": -272.76434326171875, + "logps/rejected": -312.1653747558594, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.03763198852539, + "rewards/margins": 9.341541290283203, + "rewards/rejected": -17.379173278808594, + "step": 14588 + }, + { + "epoch": 2.27, + "learning_rate": 3.4471704963950413e-06, + "logits/chosen": -1.8789862394332886, + "logits/rejected": -1.9480496644973755, + "logps/chosen": -177.97250366210938, + "logps/rejected": -323.67169189453125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.17593002319336, + "rewards/margins": 7.682616233825684, + "rewards/rejected": -17.85854721069336, + "step": 14589 + }, + { + "epoch": 2.27, + "learning_rate": 3.446437055863893e-06, + "logits/chosen": -1.2077556848526, + "logits/rejected": -2.8033370971679688, + "logps/chosen": -171.13467407226562, + "logps/rejected": -732.4521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.108501434326172, + "rewards/margins": 12.64815902709961, + "rewards/rejected": -23.75666046142578, + "step": 14590 + }, + { + "epoch": 2.27, + "learning_rate": 3.4457036153327454e-06, + "logits/chosen": -1.7897484302520752, + "logits/rejected": -2.698057174682617, + "logps/chosen": -269.72021484375, + "logps/rejected": -398.80645751953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.794431686401367, + "rewards/margins": 7.614504814147949, + "rewards/rejected": -17.408935546875, + "step": 14591 + }, + { + "epoch": 2.27, + "learning_rate": 3.4449701748015977e-06, + "logits/chosen": -1.4223779439926147, + "logits/rejected": -2.234438896179199, + "logps/chosen": -252.5412139892578, + "logps/rejected": -405.8757629394531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.225221633911133, + "rewards/margins": 9.193885803222656, + "rewards/rejected": -19.419105529785156, + "step": 14592 + }, + { + "epoch": 2.27, + "learning_rate": 3.44423673427045e-06, + "logits/chosen": -2.556666612625122, + "logits/rejected": -2.9190196990966797, + "logps/chosen": -112.62844848632812, + "logps/rejected": -227.5889129638672, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.674069881439209, + "rewards/margins": 10.824228286743164, + "rewards/rejected": -15.498297691345215, + "step": 14593 + }, + { + "epoch": 2.27, + "learning_rate": 3.443503293739302e-06, + "logits/chosen": -2.03096604347229, + "logits/rejected": -2.579167604446411, + "logps/chosen": -291.6418151855469, + "logps/rejected": -438.894775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.731340408325195, + "rewards/margins": 10.44831371307373, + "rewards/rejected": -19.17965316772461, + "step": 14594 + }, + { + "epoch": 2.27, + "learning_rate": 3.442769853208154e-06, + "logits/chosen": -2.9635329246520996, + "logits/rejected": -1.6235154867172241, + "logps/chosen": -250.279296875, + "logps/rejected": -238.43553161621094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.364757537841797, + "rewards/margins": 8.284066200256348, + "rewards/rejected": -12.648824691772461, + "step": 14595 + }, + { + "epoch": 2.27, + "learning_rate": 3.442036412677006e-06, + "logits/chosen": -2.7571794986724854, + "logits/rejected": -2.199904680252075, + "logps/chosen": -420.5767517089844, + "logps/rejected": -534.832275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.344621658325195, + "rewards/margins": 12.25169563293457, + "rewards/rejected": -18.596317291259766, + "step": 14596 + }, + { + "epoch": 2.27, + "learning_rate": 3.441302972145858e-06, + "logits/chosen": -1.9038026332855225, + "logits/rejected": -2.311216354370117, + "logps/chosen": -238.1341552734375, + "logps/rejected": -547.3087158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.622949600219727, + "rewards/margins": 12.168655395507812, + "rewards/rejected": -23.79160499572754, + "step": 14597 + }, + { + "epoch": 2.27, + "learning_rate": 3.4405695316147103e-06, + "logits/chosen": -2.6496660709381104, + "logits/rejected": -2.7867271900177, + "logps/chosen": -417.9765625, + "logps/rejected": -456.87237548828125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.807672500610352, + "rewards/margins": 7.786649703979492, + "rewards/rejected": -13.594322204589844, + "step": 14598 + }, + { + "epoch": 2.27, + "learning_rate": 3.439836091083562e-06, + "logits/chosen": -2.810689926147461, + "logits/rejected": -2.604661226272583, + "logps/chosen": -571.4610595703125, + "logps/rejected": -590.8978271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802792549133301, + "rewards/margins": 13.217870712280273, + "rewards/rejected": -21.020662307739258, + "step": 14599 + }, + { + "epoch": 2.27, + "learning_rate": 3.439102650552415e-06, + "logits/chosen": -2.077059507369995, + "logits/rejected": -2.4866089820861816, + "logps/chosen": -149.4429168701172, + "logps/rejected": -362.9122314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096752166748047, + "rewards/margins": 12.990985870361328, + "rewards/rejected": -19.087738037109375, + "step": 14600 + }, + { + "epoch": 2.27, + "learning_rate": 3.4383692100212668e-06, + "logits/chosen": -2.7252309322357178, + "logits/rejected": -1.747532606124878, + "logps/chosen": -240.59324645996094, + "logps/rejected": -215.9873504638672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9114208221435547, + "rewards/margins": 11.941787719726562, + "rewards/rejected": -15.853209495544434, + "step": 14601 + }, + { + "epoch": 2.27, + "learning_rate": 3.437635769490119e-06, + "logits/chosen": -0.5651087760925293, + "logits/rejected": -2.080108165740967, + "logps/chosen": -178.8641815185547, + "logps/rejected": -430.1148681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.495963096618652, + "rewards/margins": 12.282651901245117, + "rewards/rejected": -20.778614044189453, + "step": 14602 + }, + { + "epoch": 2.27, + "learning_rate": 3.436902328958971e-06, + "logits/chosen": -2.45051646232605, + "logits/rejected": -2.742908477783203, + "logps/chosen": -96.42620849609375, + "logps/rejected": -381.703857421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1476945877075195, + "rewards/margins": 9.37732219696045, + "rewards/rejected": -16.52501678466797, + "step": 14603 + }, + { + "epoch": 2.27, + "learning_rate": 3.4361688884278232e-06, + "logits/chosen": -1.4626952409744263, + "logits/rejected": -2.3319454193115234, + "logps/chosen": -298.48052978515625, + "logps/rejected": -633.6209716796875, + "loss": 0.0808, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.925785064697266, + "rewards/margins": 7.516305923461914, + "rewards/rejected": -19.44209098815918, + "step": 14604 + }, + { + "epoch": 2.27, + "learning_rate": 3.435435447896675e-06, + "logits/chosen": -2.223375082015991, + "logits/rejected": -2.722963571548462, + "logps/chosen": -475.9769287109375, + "logps/rejected": -502.22222900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.675250053405762, + "rewards/margins": 11.055044174194336, + "rewards/rejected": -18.73029327392578, + "step": 14605 + }, + { + "epoch": 2.27, + "learning_rate": 3.434702007365527e-06, + "logits/chosen": -2.390864610671997, + "logits/rejected": -2.6822807788848877, + "logps/chosen": -261.7237243652344, + "logps/rejected": -472.42010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.73940658569336, + "rewards/margins": 10.467348098754883, + "rewards/rejected": -20.206756591796875, + "step": 14606 + }, + { + "epoch": 2.27, + "learning_rate": 3.4339685668343793e-06, + "logits/chosen": -2.905907154083252, + "logits/rejected": -1.9156255722045898, + "logps/chosen": -312.35693359375, + "logps/rejected": -254.80905151367188, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.542378902435303, + "rewards/margins": 7.343421459197998, + "rewards/rejected": -11.8858003616333, + "step": 14607 + }, + { + "epoch": 2.27, + "learning_rate": 3.4332351263032316e-06, + "logits/chosen": -2.7348859310150146, + "logits/rejected": -2.652909755706787, + "logps/chosen": -240.6036834716797, + "logps/rejected": -332.40948486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117697715759277, + "rewards/margins": 9.759374618530273, + "rewards/rejected": -15.87707233428955, + "step": 14608 + }, + { + "epoch": 2.27, + "learning_rate": 3.432501685772084e-06, + "logits/chosen": -2.098215341567993, + "logits/rejected": -2.5299177169799805, + "logps/chosen": -241.98193359375, + "logps/rejected": -424.50531005859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.717001914978027, + "rewards/margins": 8.663990020751953, + "rewards/rejected": -18.380992889404297, + "step": 14609 + }, + { + "epoch": 2.27, + "learning_rate": 3.4317682452409358e-06, + "logits/chosen": -2.1659834384918213, + "logits/rejected": -2.782975673675537, + "logps/chosen": -184.45883178710938, + "logps/rejected": -370.8448486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0057878494262695, + "rewards/margins": 12.48111343383789, + "rewards/rejected": -18.486902236938477, + "step": 14610 + }, + { + "epoch": 2.27, + "learning_rate": 3.431034804709788e-06, + "logits/chosen": -2.315845251083374, + "logits/rejected": -2.4930663108825684, + "logps/chosen": -382.6983947753906, + "logps/rejected": -447.25897216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.344485282897949, + "rewards/margins": 12.238868713378906, + "rewards/rejected": -17.583354949951172, + "step": 14611 + }, + { + "epoch": 2.27, + "learning_rate": 3.43030136417864e-06, + "logits/chosen": -2.4269838333129883, + "logits/rejected": -2.8218986988067627, + "logps/chosen": -133.161376953125, + "logps/rejected": -329.98236083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673142433166504, + "rewards/margins": 10.440285682678223, + "rewards/rejected": -17.113428115844727, + "step": 14612 + }, + { + "epoch": 2.27, + "learning_rate": 3.4295679236474923e-06, + "logits/chosen": -1.9121772050857544, + "logits/rejected": -2.3822803497314453, + "logps/chosen": -234.1256103515625, + "logps/rejected": -352.7310791015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183518409729004, + "rewards/margins": 7.80042028427124, + "rewards/rejected": -12.983938217163086, + "step": 14613 + }, + { + "epoch": 2.27, + "learning_rate": 3.428834483116344e-06, + "logits/chosen": -2.070037841796875, + "logits/rejected": -2.529576539993286, + "logps/chosen": -200.28900146484375, + "logps/rejected": -466.8406982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.254241943359375, + "rewards/margins": 11.699153900146484, + "rewards/rejected": -20.95339584350586, + "step": 14614 + }, + { + "epoch": 2.27, + "learning_rate": 3.428101042585196e-06, + "logits/chosen": -1.3223177194595337, + "logits/rejected": -2.7563929557800293, + "logps/chosen": -227.75680541992188, + "logps/rejected": -442.48974609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.157569885253906, + "rewards/margins": 8.100475311279297, + "rewards/rejected": -16.258045196533203, + "step": 14615 + }, + { + "epoch": 2.27, + "learning_rate": 3.4273676020540483e-06, + "logits/chosen": -2.331441640853882, + "logits/rejected": -2.4713850021362305, + "logps/chosen": -348.39923095703125, + "logps/rejected": -428.877685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.10051155090332, + "rewards/margins": 11.996271133422852, + "rewards/rejected": -16.096782684326172, + "step": 14616 + }, + { + "epoch": 2.27, + "learning_rate": 3.4266341615229006e-06, + "logits/chosen": -2.8331820964813232, + "logits/rejected": -2.8843774795532227, + "logps/chosen": -194.85044860839844, + "logps/rejected": -299.7655029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.01425552368164, + "rewards/margins": 9.77341079711914, + "rewards/rejected": -19.78766632080078, + "step": 14617 + }, + { + "epoch": 2.27, + "learning_rate": 3.425900720991753e-06, + "logits/chosen": -2.7233779430389404, + "logits/rejected": -2.746314287185669, + "logps/chosen": -439.8611755371094, + "logps/rejected": -408.127197265625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.586028099060059, + "rewards/margins": 6.946779251098633, + "rewards/rejected": -12.532808303833008, + "step": 14618 + }, + { + "epoch": 2.27, + "learning_rate": 3.425167280460605e-06, + "logits/chosen": -2.5646512508392334, + "logits/rejected": -2.7890381813049316, + "logps/chosen": -62.51215744018555, + "logps/rejected": -288.6163330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2507829666137695, + "rewards/margins": 11.616727828979492, + "rewards/rejected": -16.867509841918945, + "step": 14619 + }, + { + "epoch": 2.27, + "learning_rate": 3.424433839929457e-06, + "logits/chosen": -1.3660506010055542, + "logits/rejected": -2.9415831565856934, + "logps/chosen": -265.24029541015625, + "logps/rejected": -637.7282104492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.846749305725098, + "rewards/margins": 19.97167205810547, + "rewards/rejected": -25.81842041015625, + "step": 14620 + }, + { + "epoch": 2.27, + "learning_rate": 3.423700399398309e-06, + "logits/chosen": -2.487344741821289, + "logits/rejected": -2.810404062271118, + "logps/chosen": -253.57147216796875, + "logps/rejected": -327.87054443359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.660116195678711, + "rewards/margins": 10.310083389282227, + "rewards/rejected": -19.970199584960938, + "step": 14621 + }, + { + "epoch": 2.27, + "learning_rate": 3.4229669588671613e-06, + "logits/chosen": -2.762418031692505, + "logits/rejected": -2.719467878341675, + "logps/chosen": -157.11585998535156, + "logps/rejected": -234.93002319335938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.351980686187744, + "rewards/margins": 8.051168441772461, + "rewards/rejected": -14.403148651123047, + "step": 14622 + }, + { + "epoch": 2.27, + "learning_rate": 3.422233518336013e-06, + "logits/chosen": -2.716747522354126, + "logits/rejected": -2.7539515495300293, + "logps/chosen": -248.8310089111328, + "logps/rejected": -407.10821533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.313611030578613, + "rewards/margins": 12.269627571105957, + "rewards/rejected": -16.58323860168457, + "step": 14623 + }, + { + "epoch": 2.27, + "learning_rate": 3.4215000778048655e-06, + "logits/chosen": -2.6474156379699707, + "logits/rejected": -2.6677563190460205, + "logps/chosen": -127.56505584716797, + "logps/rejected": -259.9931640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.707887649536133, + "rewards/margins": 8.194353103637695, + "rewards/rejected": -17.902240753173828, + "step": 14624 + }, + { + "epoch": 2.27, + "learning_rate": 3.4207666372737178e-06, + "logits/chosen": -1.2909822463989258, + "logits/rejected": -2.2636971473693848, + "logps/chosen": -153.0404052734375, + "logps/rejected": -460.52777099609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.496320724487305, + "rewards/margins": 8.886001586914062, + "rewards/rejected": -16.382322311401367, + "step": 14625 + }, + { + "epoch": 2.27, + "learning_rate": 3.4200331967425697e-06, + "logits/chosen": -2.0280802249908447, + "logits/rejected": -2.5962584018707275, + "logps/chosen": -96.55796813964844, + "logps/rejected": -264.34271240234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.415463447570801, + "rewards/margins": 10.947452545166016, + "rewards/rejected": -17.362916946411133, + "step": 14626 + }, + { + "epoch": 2.27, + "learning_rate": 3.419299756211422e-06, + "logits/chosen": -1.339829921722412, + "logits/rejected": -2.286618232727051, + "logps/chosen": -156.91311645507812, + "logps/rejected": -246.0365753173828, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.156241416931152, + "rewards/margins": 4.6854658126831055, + "rewards/rejected": -15.841707229614258, + "step": 14627 + }, + { + "epoch": 2.27, + "learning_rate": 3.418566315680274e-06, + "logits/chosen": -2.561981678009033, + "logits/rejected": -1.9107152223587036, + "logps/chosen": -359.1197204589844, + "logps/rejected": -407.3306579589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.185197830200195, + "rewards/margins": 11.5653076171875, + "rewards/rejected": -19.750505447387695, + "step": 14628 + }, + { + "epoch": 2.28, + "learning_rate": 3.417832875149126e-06, + "logits/chosen": -2.9837801456451416, + "logits/rejected": -1.6106102466583252, + "logps/chosen": -910.278076171875, + "logps/rejected": -495.6419372558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.921493053436279, + "rewards/margins": 12.076733589172363, + "rewards/rejected": -17.998226165771484, + "step": 14629 + }, + { + "epoch": 2.28, + "learning_rate": 3.417099434617978e-06, + "logits/chosen": -1.9525634050369263, + "logits/rejected": -2.409287452697754, + "logps/chosen": -163.31185913085938, + "logps/rejected": -403.49066162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602252960205078, + "rewards/margins": 11.923322677612305, + "rewards/rejected": -18.525575637817383, + "step": 14630 + }, + { + "epoch": 2.28, + "learning_rate": 3.4163659940868303e-06, + "logits/chosen": -2.571492910385132, + "logits/rejected": -1.8031196594238281, + "logps/chosen": -443.30877685546875, + "logps/rejected": -431.38726806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.735256195068359, + "rewards/margins": 10.621030807495117, + "rewards/rejected": -17.356287002563477, + "step": 14631 + }, + { + "epoch": 2.28, + "learning_rate": 3.415632553555682e-06, + "logits/chosen": -2.712766170501709, + "logits/rejected": -2.3378067016601562, + "logps/chosen": -811.015625, + "logps/rejected": -759.2340087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.508227348327637, + "rewards/margins": 10.54955768585205, + "rewards/rejected": -19.057785034179688, + "step": 14632 + }, + { + "epoch": 2.28, + "learning_rate": 3.4148991130245345e-06, + "logits/chosen": -2.730623960494995, + "logits/rejected": -2.6803369522094727, + "logps/chosen": -304.5539245605469, + "logps/rejected": -369.83892822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.677396297454834, + "rewards/margins": 13.00391960144043, + "rewards/rejected": -16.681316375732422, + "step": 14633 + }, + { + "epoch": 2.28, + "learning_rate": 3.414165672493387e-06, + "logits/chosen": -2.752063512802124, + "logits/rejected": -1.3386359214782715, + "logps/chosen": -608.3903198242188, + "logps/rejected": -338.7225646972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.077394485473633, + "rewards/margins": 10.24887466430664, + "rewards/rejected": -18.32626724243164, + "step": 14634 + }, + { + "epoch": 2.28, + "learning_rate": 3.4134322319622387e-06, + "logits/chosen": -2.055879831314087, + "logits/rejected": -2.558522939682007, + "logps/chosen": -175.7833251953125, + "logps/rejected": -411.50701904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.914312362670898, + "rewards/margins": 11.518135070800781, + "rewards/rejected": -20.43244743347168, + "step": 14635 + }, + { + "epoch": 2.28, + "learning_rate": 3.412698791431091e-06, + "logits/chosen": -2.034318447113037, + "logits/rejected": -2.559549570083618, + "logps/chosen": -283.6514892578125, + "logps/rejected": -514.7478637695312, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.418481826782227, + "rewards/margins": 9.824254035949707, + "rewards/rejected": -17.242734909057617, + "step": 14636 + }, + { + "epoch": 2.28, + "learning_rate": 3.411965350899943e-06, + "logits/chosen": -2.801098585128784, + "logits/rejected": -1.7490826845169067, + "logps/chosen": -771.5115966796875, + "logps/rejected": -499.96771240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.35528564453125, + "rewards/margins": 10.681243896484375, + "rewards/rejected": -18.036529541015625, + "step": 14637 + }, + { + "epoch": 2.28, + "learning_rate": 3.411231910368795e-06, + "logits/chosen": -2.127443313598633, + "logits/rejected": -2.6337039470672607, + "logps/chosen": -263.4349060058594, + "logps/rejected": -292.07470703125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.645561218261719, + "rewards/margins": 6.379859447479248, + "rewards/rejected": -16.025421142578125, + "step": 14638 + }, + { + "epoch": 2.28, + "learning_rate": 3.410498469837647e-06, + "logits/chosen": -2.040189266204834, + "logits/rejected": -2.420121192932129, + "logps/chosen": -181.53628540039062, + "logps/rejected": -428.90252685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.724682331085205, + "rewards/margins": 12.711231231689453, + "rewards/rejected": -19.4359130859375, + "step": 14639 + }, + { + "epoch": 2.28, + "learning_rate": 3.4097650293064993e-06, + "logits/chosen": -2.1283016204833984, + "logits/rejected": -2.339263439178467, + "logps/chosen": -144.47389221191406, + "logps/rejected": -241.89590454101562, + "loss": 2.403, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.848440170288086, + "rewards/margins": 1.9489359855651855, + "rewards/rejected": -14.79737663269043, + "step": 14640 + }, + { + "epoch": 2.28, + "learning_rate": 3.4090315887753512e-06, + "logits/chosen": -2.826230764389038, + "logits/rejected": -2.3833980560302734, + "logps/chosen": -608.9263305664062, + "logps/rejected": -566.4703369140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.265210151672363, + "rewards/margins": 8.191320419311523, + "rewards/rejected": -15.456531524658203, + "step": 14641 + }, + { + "epoch": 2.28, + "learning_rate": 3.408298148244204e-06, + "logits/chosen": -2.223890781402588, + "logits/rejected": -2.2540717124938965, + "logps/chosen": -551.19091796875, + "logps/rejected": -439.60430908203125, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.298097610473633, + "rewards/margins": 7.581673622131348, + "rewards/rejected": -13.87977123260498, + "step": 14642 + }, + { + "epoch": 2.28, + "learning_rate": 3.407564707713056e-06, + "logits/chosen": -2.4425926208496094, + "logits/rejected": -2.6703202724456787, + "logps/chosen": -293.94775390625, + "logps/rejected": -450.00537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651512145996094, + "rewards/margins": 10.007946014404297, + "rewards/rejected": -17.65945816040039, + "step": 14643 + }, + { + "epoch": 2.28, + "learning_rate": 3.4068312671819077e-06, + "logits/chosen": -2.492107391357422, + "logits/rejected": -2.934253215789795, + "logps/chosen": -116.05224609375, + "logps/rejected": -220.9552001953125, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.043724060058594, + "rewards/margins": 6.303091049194336, + "rewards/rejected": -16.34681510925293, + "step": 14644 + }, + { + "epoch": 2.28, + "learning_rate": 3.40609782665076e-06, + "logits/chosen": -2.600090265274048, + "logits/rejected": -2.7956418991088867, + "logps/chosen": -353.40496826171875, + "logps/rejected": -550.6629638671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.555685520172119, + "rewards/margins": 8.163212776184082, + "rewards/rejected": -13.71889877319336, + "step": 14645 + }, + { + "epoch": 2.28, + "learning_rate": 3.405364386119612e-06, + "logits/chosen": -3.0095653533935547, + "logits/rejected": -3.0794453620910645, + "logps/chosen": -690.1234130859375, + "logps/rejected": -592.775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2636079788208, + "rewards/margins": 10.833529472351074, + "rewards/rejected": -20.097137451171875, + "step": 14646 + }, + { + "epoch": 2.28, + "learning_rate": 3.404630945588464e-06, + "logits/chosen": -1.503533124923706, + "logits/rejected": -2.310520887374878, + "logps/chosen": -104.14386749267578, + "logps/rejected": -273.00390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8478293418884277, + "rewards/margins": 7.950477123260498, + "rewards/rejected": -10.798306465148926, + "step": 14647 + }, + { + "epoch": 2.28, + "learning_rate": 3.403897505057316e-06, + "logits/chosen": -2.281036615371704, + "logits/rejected": -2.773092746734619, + "logps/chosen": -397.6323547363281, + "logps/rejected": -491.6358642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.852896690368652, + "rewards/margins": 13.480278015136719, + "rewards/rejected": -22.333175659179688, + "step": 14648 + }, + { + "epoch": 2.28, + "learning_rate": 3.4031640645261684e-06, + "logits/chosen": -2.0241973400115967, + "logits/rejected": -2.5286810398101807, + "logps/chosen": -124.01851654052734, + "logps/rejected": -367.98974609375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.205592155456543, + "rewards/margins": 8.646758079528809, + "rewards/rejected": -15.852350234985352, + "step": 14649 + }, + { + "epoch": 2.28, + "learning_rate": 3.4024306239950207e-06, + "logits/chosen": -1.6895705461502075, + "logits/rejected": -2.2260258197784424, + "logps/chosen": -213.80526733398438, + "logps/rejected": -489.502685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.900557518005371, + "rewards/margins": 11.941442489624023, + "rewards/rejected": -20.84200096130371, + "step": 14650 + }, + { + "epoch": 2.28, + "learning_rate": 3.401697183463873e-06, + "logits/chosen": -1.9954980611801147, + "logits/rejected": -2.7873942852020264, + "logps/chosen": -184.56463623046875, + "logps/rejected": -509.79388427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.614850997924805, + "rewards/margins": 9.959419250488281, + "rewards/rejected": -19.57427215576172, + "step": 14651 + }, + { + "epoch": 2.28, + "learning_rate": 3.400963742932725e-06, + "logits/chosen": -2.928225040435791, + "logits/rejected": -2.9757235050201416, + "logps/chosen": -143.59561157226562, + "logps/rejected": -359.7464599609375, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.060131549835205, + "rewards/margins": 8.453496932983398, + "rewards/rejected": -14.513628005981445, + "step": 14652 + }, + { + "epoch": 2.28, + "learning_rate": 3.400230302401577e-06, + "logits/chosen": -2.64642333984375, + "logits/rejected": -1.4718918800354004, + "logps/chosen": -206.6466064453125, + "logps/rejected": -199.28094482421875, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.326066970825195, + "rewards/margins": 6.138935089111328, + "rewards/rejected": -16.465002059936523, + "step": 14653 + }, + { + "epoch": 2.28, + "learning_rate": 3.399496861870429e-06, + "logits/chosen": -2.7299551963806152, + "logits/rejected": -2.324582576751709, + "logps/chosen": -731.2842407226562, + "logps/rejected": -616.9291381835938, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.088170051574707, + "rewards/margins": 8.14083480834961, + "rewards/rejected": -16.229005813598633, + "step": 14654 + }, + { + "epoch": 2.28, + "learning_rate": 3.398763421339281e-06, + "logits/chosen": -1.594905138015747, + "logits/rejected": -2.6894614696502686, + "logps/chosen": -234.61427307128906, + "logps/rejected": -553.5643920898438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.289280891418457, + "rewards/margins": 9.880746841430664, + "rewards/rejected": -18.170028686523438, + "step": 14655 + }, + { + "epoch": 2.28, + "learning_rate": 3.398029980808133e-06, + "logits/chosen": -0.7058491110801697, + "logits/rejected": -2.3411052227020264, + "logps/chosen": -141.6041259765625, + "logps/rejected": -480.0766906738281, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.855881690979004, + "rewards/margins": 13.297977447509766, + "rewards/rejected": -21.153858184814453, + "step": 14656 + }, + { + "epoch": 2.28, + "learning_rate": 3.397296540276985e-06, + "logits/chosen": -2.038479804992676, + "logits/rejected": -2.4496407508850098, + "logps/chosen": -124.12014770507812, + "logps/rejected": -622.66845703125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.228313446044922, + "rewards/margins": 11.20051383972168, + "rewards/rejected": -19.4288272857666, + "step": 14657 + }, + { + "epoch": 2.28, + "learning_rate": 3.3965630997458374e-06, + "logits/chosen": -2.1948156356811523, + "logits/rejected": -2.979649543762207, + "logps/chosen": -107.62258911132812, + "logps/rejected": -324.0589599609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.810898780822754, + "rewards/margins": 8.416864395141602, + "rewards/rejected": -13.227764129638672, + "step": 14658 + }, + { + "epoch": 2.28, + "learning_rate": 3.3958296592146897e-06, + "logits/chosen": -2.783493995666504, + "logits/rejected": -3.011782169342041, + "logps/chosen": -140.86875915527344, + "logps/rejected": -196.08653259277344, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.55051040649414, + "rewards/margins": 6.695824146270752, + "rewards/rejected": -16.246334075927734, + "step": 14659 + }, + { + "epoch": 2.28, + "learning_rate": 3.395096218683542e-06, + "logits/chosen": -2.8190526962280273, + "logits/rejected": -2.0862274169921875, + "logps/chosen": -517.862060546875, + "logps/rejected": -532.6470947265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.517389297485352, + "rewards/margins": 11.025453567504883, + "rewards/rejected": -17.542842864990234, + "step": 14660 + }, + { + "epoch": 2.28, + "learning_rate": 3.394362778152394e-06, + "logits/chosen": -3.0786025524139404, + "logits/rejected": -2.5940518379211426, + "logps/chosen": -233.83251953125, + "logps/rejected": -351.9710388183594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.646819114685059, + "rewards/margins": 11.39647102355957, + "rewards/rejected": -16.043289184570312, + "step": 14661 + }, + { + "epoch": 2.28, + "learning_rate": 3.393629337621246e-06, + "logits/chosen": -2.368403673171997, + "logits/rejected": -2.5673611164093018, + "logps/chosen": -170.9151611328125, + "logps/rejected": -368.6018981933594, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.58742904663086, + "rewards/margins": 7.501475811004639, + "rewards/rejected": -16.088905334472656, + "step": 14662 + }, + { + "epoch": 2.28, + "learning_rate": 3.392895897090098e-06, + "logits/chosen": -2.3590004444122314, + "logits/rejected": -2.255424976348877, + "logps/chosen": -224.1416015625, + "logps/rejected": -350.2320861816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.346565246582031, + "rewards/margins": 11.629936218261719, + "rewards/rejected": -19.97650146484375, + "step": 14663 + }, + { + "epoch": 2.28, + "learning_rate": 3.39216245655895e-06, + "logits/chosen": -1.4427721500396729, + "logits/rejected": -2.1927990913391113, + "logps/chosen": -226.00387573242188, + "logps/rejected": -459.74920654296875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.165031433105469, + "rewards/margins": 10.924459457397461, + "rewards/rejected": -22.08949089050293, + "step": 14664 + }, + { + "epoch": 2.28, + "learning_rate": 3.3914290160278022e-06, + "logits/chosen": -2.0692572593688965, + "logits/rejected": -2.3985774517059326, + "logps/chosen": -143.1696319580078, + "logps/rejected": -194.40463256835938, + "loss": 0.0616, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.127872467041016, + "rewards/margins": 5.219165325164795, + "rewards/rejected": -13.347038269042969, + "step": 14665 + }, + { + "epoch": 2.28, + "learning_rate": 3.390695575496654e-06, + "logits/chosen": -2.174438238143921, + "logits/rejected": -2.781989336013794, + "logps/chosen": -345.4265441894531, + "logps/rejected": -473.4477233886719, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.74119758605957, + "rewards/margins": 4.669240474700928, + "rewards/rejected": -18.410438537597656, + "step": 14666 + }, + { + "epoch": 2.28, + "learning_rate": 3.3899621349655064e-06, + "logits/chosen": -2.8787426948547363, + "logits/rejected": -2.7080399990081787, + "logps/chosen": -412.7769775390625, + "logps/rejected": -243.8993377685547, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.177729845046997, + "rewards/margins": 7.1178154945373535, + "rewards/rejected": -9.29554557800293, + "step": 14667 + }, + { + "epoch": 2.28, + "learning_rate": 3.3892286944343587e-06, + "logits/chosen": -2.7483043670654297, + "logits/rejected": -2.5800845623016357, + "logps/chosen": -248.60321044921875, + "logps/rejected": -470.8562316894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.869139671325684, + "rewards/margins": 14.006964683532715, + "rewards/rejected": -18.8761043548584, + "step": 14668 + }, + { + "epoch": 2.28, + "learning_rate": 3.388495253903211e-06, + "logits/chosen": -2.353681802749634, + "logits/rejected": -2.973942518234253, + "logps/chosen": -144.1055908203125, + "logps/rejected": -414.2466125488281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.111915588378906, + "rewards/margins": 9.32863712310791, + "rewards/rejected": -15.440553665161133, + "step": 14669 + }, + { + "epoch": 2.28, + "learning_rate": 3.387761813372063e-06, + "logits/chosen": -1.6368868350982666, + "logits/rejected": -2.465092897415161, + "logps/chosen": -251.6868896484375, + "logps/rejected": -284.9752197265625, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.84988784790039, + "rewards/margins": 5.493706226348877, + "rewards/rejected": -17.34359359741211, + "step": 14670 + }, + { + "epoch": 2.28, + "learning_rate": 3.387028372840915e-06, + "logits/chosen": -1.6116775274276733, + "logits/rejected": -2.312694787979126, + "logps/chosen": -260.40509033203125, + "logps/rejected": -303.37371826171875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.711214065551758, + "rewards/margins": 6.135272979736328, + "rewards/rejected": -15.846487045288086, + "step": 14671 + }, + { + "epoch": 2.28, + "learning_rate": 3.386294932309767e-06, + "logits/chosen": -1.4596730470657349, + "logits/rejected": -2.3621392250061035, + "logps/chosen": -190.95343017578125, + "logps/rejected": -574.70947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.443757057189941, + "rewards/margins": 13.583745956420898, + "rewards/rejected": -21.027502059936523, + "step": 14672 + }, + { + "epoch": 2.28, + "learning_rate": 3.3855614917786194e-06, + "logits/chosen": -2.6461122035980225, + "logits/rejected": -2.8224706649780273, + "logps/chosen": -113.1144790649414, + "logps/rejected": -286.9572448730469, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.027996063232422, + "rewards/margins": 8.224386215209961, + "rewards/rejected": -15.252382278442383, + "step": 14673 + }, + { + "epoch": 2.28, + "learning_rate": 3.3848280512474712e-06, + "logits/chosen": -2.571763038635254, + "logits/rejected": -1.4319578409194946, + "logps/chosen": -242.13558959960938, + "logps/rejected": -239.94833374023438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229400157928467, + "rewards/margins": 8.560625076293945, + "rewards/rejected": -14.790024757385254, + "step": 14674 + }, + { + "epoch": 2.28, + "learning_rate": 3.384094610716323e-06, + "logits/chosen": -2.658219814300537, + "logits/rejected": -2.3929216861724854, + "logps/chosen": -496.19158935546875, + "logps/rejected": -535.4729614257812, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.735357761383057, + "rewards/margins": 12.691667556762695, + "rewards/rejected": -20.427024841308594, + "step": 14675 + }, + { + "epoch": 2.28, + "learning_rate": 3.383361170185176e-06, + "logits/chosen": -2.6678974628448486, + "logits/rejected": -1.6288793087005615, + "logps/chosen": -627.5999755859375, + "logps/rejected": -510.59326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0019402503967285, + "rewards/margins": 14.769943237304688, + "rewards/rejected": -20.771883010864258, + "step": 14676 + }, + { + "epoch": 2.28, + "learning_rate": 3.3826277296540277e-06, + "logits/chosen": -2.661334753036499, + "logits/rejected": -2.354912042617798, + "logps/chosen": -666.7499389648438, + "logps/rejected": -586.6898193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.820379257202148, + "rewards/margins": 11.14305305480957, + "rewards/rejected": -20.96343231201172, + "step": 14677 + }, + { + "epoch": 2.28, + "learning_rate": 3.38189428912288e-06, + "logits/chosen": -2.9262287616729736, + "logits/rejected": -2.9977524280548096, + "logps/chosen": -143.753173828125, + "logps/rejected": -175.91537475585938, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6713128089904785, + "rewards/margins": 8.784150123596191, + "rewards/rejected": -15.455463409423828, + "step": 14678 + }, + { + "epoch": 2.28, + "learning_rate": 3.381160848591732e-06, + "logits/chosen": -2.801241874694824, + "logits/rejected": -2.3155555725097656, + "logps/chosen": -238.8917694091797, + "logps/rejected": -357.3397216796875, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.085968971252441, + "rewards/margins": 8.75225830078125, + "rewards/rejected": -16.838228225708008, + "step": 14679 + }, + { + "epoch": 2.28, + "learning_rate": 3.380427408060584e-06, + "logits/chosen": -2.444213390350342, + "logits/rejected": -1.459342122077942, + "logps/chosen": -419.6922912597656, + "logps/rejected": -276.300537109375, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.881853103637695, + "rewards/margins": 6.916897296905518, + "rewards/rejected": -16.798751831054688, + "step": 14680 + }, + { + "epoch": 2.28, + "learning_rate": 3.379693967529436e-06, + "logits/chosen": -2.7263565063476562, + "logits/rejected": -2.2928600311279297, + "logps/chosen": -158.10171508789062, + "logps/rejected": -227.69931030273438, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.155340194702148, + "rewards/margins": 9.03972053527832, + "rewards/rejected": -17.19506072998047, + "step": 14681 + }, + { + "epoch": 2.28, + "learning_rate": 3.3789605269982884e-06, + "logits/chosen": -2.2954020500183105, + "logits/rejected": -2.8643298149108887, + "logps/chosen": -142.75634765625, + "logps/rejected": -247.32984924316406, + "loss": 0.2613, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.834432601928711, + "rewards/margins": 4.777040481567383, + "rewards/rejected": -11.611473083496094, + "step": 14682 + }, + { + "epoch": 2.28, + "learning_rate": 3.3782270864671403e-06, + "logits/chosen": -1.1966546773910522, + "logits/rejected": -2.1421360969543457, + "logps/chosen": -107.27540588378906, + "logps/rejected": -317.3961181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764875411987305, + "rewards/margins": 10.419347763061523, + "rewards/rejected": -16.184223175048828, + "step": 14683 + }, + { + "epoch": 2.28, + "learning_rate": 3.3774936459359926e-06, + "logits/chosen": -2.5116634368896484, + "logits/rejected": -2.98179030418396, + "logps/chosen": -119.71067810058594, + "logps/rejected": -360.47601318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.289071083068848, + "rewards/margins": 11.379476547241211, + "rewards/rejected": -18.668548583984375, + "step": 14684 + }, + { + "epoch": 2.28, + "learning_rate": 3.376760205404845e-06, + "logits/chosen": -2.167149543762207, + "logits/rejected": -1.7154499292373657, + "logps/chosen": -270.65277099609375, + "logps/rejected": -592.1727905273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.508281707763672, + "rewards/margins": 12.79819107055664, + "rewards/rejected": -23.306472778320312, + "step": 14685 + }, + { + "epoch": 2.28, + "learning_rate": 3.3760267648736967e-06, + "logits/chosen": -2.5871357917785645, + "logits/rejected": -2.944720983505249, + "logps/chosen": -129.53836059570312, + "logps/rejected": -326.46673583984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837155342102051, + "rewards/margins": 8.815616607666016, + "rewards/rejected": -14.652772903442383, + "step": 14686 + }, + { + "epoch": 2.28, + "learning_rate": 3.375293324342549e-06, + "logits/chosen": -1.5739072561264038, + "logits/rejected": -2.718513250350952, + "logps/chosen": -152.59288024902344, + "logps/rejected": -377.04449462890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.625457763671875, + "rewards/margins": 9.138671875, + "rewards/rejected": -17.764129638671875, + "step": 14687 + }, + { + "epoch": 2.28, + "learning_rate": 3.374559883811401e-06, + "logits/chosen": -1.4298981428146362, + "logits/rejected": -2.1447272300720215, + "logps/chosen": -123.42677307128906, + "logps/rejected": -438.54071044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.929526329040527, + "rewards/margins": 14.376798629760742, + "rewards/rejected": -20.306324005126953, + "step": 14688 + }, + { + "epoch": 2.28, + "learning_rate": 3.3738264432802532e-06, + "logits/chosen": -2.7786929607391357, + "logits/rejected": -2.785886287689209, + "logps/chosen": -162.64901733398438, + "logps/rejected": -176.61019897460938, + "loss": 0.4227, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.757763862609863, + "rewards/margins": 3.0257599353790283, + "rewards/rejected": -13.783523559570312, + "step": 14689 + }, + { + "epoch": 2.28, + "learning_rate": 3.373093002749105e-06, + "logits/chosen": -2.860394239425659, + "logits/rejected": -2.1598596572875977, + "logps/chosen": -273.4000549316406, + "logps/rejected": -261.28131103515625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.040309906005859, + "rewards/margins": 6.030782699584961, + "rewards/rejected": -13.07109260559082, + "step": 14690 + }, + { + "epoch": 2.28, + "learning_rate": 3.3723595622179574e-06, + "logits/chosen": -1.9046059846878052, + "logits/rejected": -3.0818538665771484, + "logps/chosen": -72.3106460571289, + "logps/rejected": -367.22430419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.356439590454102, + "rewards/margins": 11.313758850097656, + "rewards/rejected": -17.670198440551758, + "step": 14691 + }, + { + "epoch": 2.28, + "learning_rate": 3.3716261216868093e-06, + "logits/chosen": -0.9866809248924255, + "logits/rejected": -2.3635382652282715, + "logps/chosen": -117.59515380859375, + "logps/rejected": -365.950439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.486490249633789, + "rewards/margins": 10.823299407958984, + "rewards/rejected": -19.309789657592773, + "step": 14692 + }, + { + "epoch": 2.29, + "learning_rate": 3.370892681155662e-06, + "logits/chosen": -2.4671006202697754, + "logits/rejected": -2.6264610290527344, + "logps/chosen": -263.4411926269531, + "logps/rejected": -346.4915771484375, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.819157600402832, + "rewards/margins": 5.226844787597656, + "rewards/rejected": -18.046001434326172, + "step": 14693 + }, + { + "epoch": 2.29, + "learning_rate": 3.370159240624514e-06, + "logits/chosen": -2.7070720195770264, + "logits/rejected": -2.5375864505767822, + "logps/chosen": -142.3599395751953, + "logps/rejected": -349.1812744140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779196739196777, + "rewards/margins": 9.044339179992676, + "rewards/rejected": -17.823535919189453, + "step": 14694 + }, + { + "epoch": 2.29, + "learning_rate": 3.3694258000933658e-06, + "logits/chosen": -2.5950767993927, + "logits/rejected": -2.968329668045044, + "logps/chosen": -302.8764343261719, + "logps/rejected": -357.8684387207031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6290082931518555, + "rewards/margins": 9.466971397399902, + "rewards/rejected": -14.095979690551758, + "step": 14695 + }, + { + "epoch": 2.29, + "learning_rate": 3.368692359562218e-06, + "logits/chosen": -2.1796627044677734, + "logits/rejected": -2.3628456592559814, + "logps/chosen": -235.84593200683594, + "logps/rejected": -282.025390625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.30461311340332, + "rewards/margins": 4.976365089416504, + "rewards/rejected": -15.28097915649414, + "step": 14696 + }, + { + "epoch": 2.29, + "learning_rate": 3.36795891903107e-06, + "logits/chosen": -2.8395602703094482, + "logits/rejected": -2.2889487743377686, + "logps/chosen": -228.10752868652344, + "logps/rejected": -374.21026611328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.593482494354248, + "rewards/margins": 8.957929611206055, + "rewards/rejected": -16.55141258239746, + "step": 14697 + }, + { + "epoch": 2.29, + "learning_rate": 3.3672254784999223e-06, + "logits/chosen": -2.920759916305542, + "logits/rejected": -2.8748910427093506, + "logps/chosen": -117.49669647216797, + "logps/rejected": -312.1345520019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.845767021179199, + "rewards/margins": 10.272161483764648, + "rewards/rejected": -17.11792755126953, + "step": 14698 + }, + { + "epoch": 2.29, + "learning_rate": 3.366492037968774e-06, + "logits/chosen": -2.8366146087646484, + "logits/rejected": -2.29608416557312, + "logps/chosen": -167.4361114501953, + "logps/rejected": -204.04408264160156, + "loss": 0.634, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.647411346435547, + "rewards/margins": 3.119157314300537, + "rewards/rejected": -11.766569137573242, + "step": 14699 + }, + { + "epoch": 2.29, + "learning_rate": 3.3657585974376264e-06, + "logits/chosen": -2.5939931869506836, + "logits/rejected": -2.0594735145568848, + "logps/chosen": -447.077392578125, + "logps/rejected": -400.5177001953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.384565353393555, + "rewards/margins": 7.724483013153076, + "rewards/rejected": -17.109046936035156, + "step": 14700 + }, + { + "epoch": 2.29, + "learning_rate": 3.3650251569064787e-06, + "logits/chosen": -2.625772476196289, + "logits/rejected": -2.5073211193084717, + "logps/chosen": -211.94314575195312, + "logps/rejected": -331.9144287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.701446533203125, + "rewards/margins": 10.91585922241211, + "rewards/rejected": -19.617305755615234, + "step": 14701 + }, + { + "epoch": 2.29, + "learning_rate": 3.364291716375331e-06, + "logits/chosen": -2.2112905979156494, + "logits/rejected": -2.8282389640808105, + "logps/chosen": -169.7637939453125, + "logps/rejected": -533.5472412109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.50779914855957, + "rewards/margins": 11.808162689208984, + "rewards/rejected": -19.315961837768555, + "step": 14702 + }, + { + "epoch": 2.29, + "learning_rate": 3.363558275844183e-06, + "logits/chosen": -2.446941375732422, + "logits/rejected": -2.6687004566192627, + "logps/chosen": -178.03109741210938, + "logps/rejected": -449.2030029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.493422508239746, + "rewards/margins": 9.139357566833496, + "rewards/rejected": -18.632780075073242, + "step": 14703 + }, + { + "epoch": 2.29, + "learning_rate": 3.362824835313035e-06, + "logits/chosen": -1.268009066581726, + "logits/rejected": -2.4125704765319824, + "logps/chosen": -172.4310760498047, + "logps/rejected": -561.916259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.739856719970703, + "rewards/margins": 10.491373062133789, + "rewards/rejected": -20.231229782104492, + "step": 14704 + }, + { + "epoch": 2.29, + "learning_rate": 3.362091394781887e-06, + "logits/chosen": -1.5606036186218262, + "logits/rejected": -2.4863224029541016, + "logps/chosen": -292.4926452636719, + "logps/rejected": -331.0464782714844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.464813232421875, + "rewards/margins": 7.904407024383545, + "rewards/rejected": -18.369220733642578, + "step": 14705 + }, + { + "epoch": 2.29, + "learning_rate": 3.361357954250739e-06, + "logits/chosen": -1.5189458131790161, + "logits/rejected": -2.399658203125, + "logps/chosen": -128.30960083007812, + "logps/rejected": -236.78712463378906, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.461851119995117, + "rewards/margins": 3.826554775238037, + "rewards/rejected": -15.288406372070312, + "step": 14706 + }, + { + "epoch": 2.29, + "learning_rate": 3.3606245137195913e-06, + "logits/chosen": -2.0028140544891357, + "logits/rejected": -2.677820920944214, + "logps/chosen": -96.89888763427734, + "logps/rejected": -224.375732421875, + "loss": 0.0835, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.720398902893066, + "rewards/margins": 7.31718635559082, + "rewards/rejected": -15.037586212158203, + "step": 14707 + }, + { + "epoch": 2.29, + "learning_rate": 3.359891073188443e-06, + "logits/chosen": -1.100024938583374, + "logits/rejected": -2.506519317626953, + "logps/chosen": -139.38865661621094, + "logps/rejected": -358.1020202636719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.592804908752441, + "rewards/margins": 8.394769668579102, + "rewards/rejected": -18.98757553100586, + "step": 14708 + }, + { + "epoch": 2.29, + "learning_rate": 3.3591576326572955e-06, + "logits/chosen": -2.998540163040161, + "logits/rejected": -2.7904257774353027, + "logps/chosen": -201.2711181640625, + "logps/rejected": -350.8544921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9456887245178223, + "rewards/margins": 9.932042121887207, + "rewards/rejected": -13.877731323242188, + "step": 14709 + }, + { + "epoch": 2.29, + "learning_rate": 3.3584241921261478e-06, + "logits/chosen": -2.8485941886901855, + "logits/rejected": -2.291961908340454, + "logps/chosen": -535.4583129882812, + "logps/rejected": -457.74639892578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.986668109893799, + "rewards/margins": 17.14623260498047, + "rewards/rejected": -24.13290023803711, + "step": 14710 + }, + { + "epoch": 2.29, + "learning_rate": 3.357690751595e-06, + "logits/chosen": -2.555126905441284, + "logits/rejected": -2.9195151329040527, + "logps/chosen": -187.40420532226562, + "logps/rejected": -401.417236328125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.390993118286133, + "rewards/margins": 7.975175857543945, + "rewards/rejected": -18.366168975830078, + "step": 14711 + }, + { + "epoch": 2.29, + "learning_rate": 3.356957311063852e-06, + "logits/chosen": -2.360286235809326, + "logits/rejected": -1.4357688426971436, + "logps/chosen": -205.98541259765625, + "logps/rejected": -414.01416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.350382804870605, + "rewards/margins": 14.719683647155762, + "rewards/rejected": -24.070066452026367, + "step": 14712 + }, + { + "epoch": 2.29, + "learning_rate": 3.356223870532704e-06, + "logits/chosen": -2.9299533367156982, + "logits/rejected": -1.889606237411499, + "logps/chosen": -771.526611328125, + "logps/rejected": -490.3883972167969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.103372573852539, + "rewards/margins": 9.820793151855469, + "rewards/rejected": -16.924165725708008, + "step": 14713 + }, + { + "epoch": 2.29, + "learning_rate": 3.355490430001556e-06, + "logits/chosen": -2.5502796173095703, + "logits/rejected": -2.0934653282165527, + "logps/chosen": -500.1960754394531, + "logps/rejected": -556.5948486328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.060859680175781, + "rewards/margins": 9.402223587036133, + "rewards/rejected": -20.463085174560547, + "step": 14714 + }, + { + "epoch": 2.29, + "learning_rate": 3.354756989470408e-06, + "logits/chosen": -2.586090087890625, + "logits/rejected": -1.3492066860198975, + "logps/chosen": -345.6944274902344, + "logps/rejected": -314.72637939453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.936561584472656, + "rewards/margins": 7.3046088218688965, + "rewards/rejected": -17.24117088317871, + "step": 14715 + }, + { + "epoch": 2.29, + "learning_rate": 3.3540235489392603e-06, + "logits/chosen": -1.7599856853485107, + "logits/rejected": -1.9056921005249023, + "logps/chosen": -242.52320861816406, + "logps/rejected": -413.53094482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135524272918701, + "rewards/margins": 14.578577995300293, + "rewards/rejected": -18.71410369873047, + "step": 14716 + }, + { + "epoch": 2.29, + "learning_rate": 3.353290108408112e-06, + "logits/chosen": -2.825164318084717, + "logits/rejected": -2.913146495819092, + "logps/chosen": -169.26092529296875, + "logps/rejected": -214.62828063964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994695663452148, + "rewards/margins": 10.016119003295898, + "rewards/rejected": -16.010814666748047, + "step": 14717 + }, + { + "epoch": 2.29, + "learning_rate": 3.352556667876965e-06, + "logits/chosen": -1.2447892427444458, + "logits/rejected": -2.5681393146514893, + "logps/chosen": -186.81454467773438, + "logps/rejected": -418.31854248046875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.837262153625488, + "rewards/margins": 6.093560695648193, + "rewards/rejected": -16.930822372436523, + "step": 14718 + }, + { + "epoch": 2.29, + "learning_rate": 3.3518232273458168e-06, + "logits/chosen": -2.606678009033203, + "logits/rejected": -3.0056865215301514, + "logps/chosen": -204.0622100830078, + "logps/rejected": -243.65109252929688, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.535459518432617, + "rewards/margins": 7.286102294921875, + "rewards/rejected": -14.821561813354492, + "step": 14719 + }, + { + "epoch": 2.29, + "learning_rate": 3.351089786814669e-06, + "logits/chosen": -2.2470436096191406, + "logits/rejected": -2.371760368347168, + "logps/chosen": -319.381103515625, + "logps/rejected": -562.962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.438632011413574, + "rewards/margins": 13.550830841064453, + "rewards/rejected": -22.989463806152344, + "step": 14720 + }, + { + "epoch": 2.29, + "learning_rate": 3.350356346283521e-06, + "logits/chosen": -2.213205337524414, + "logits/rejected": -2.6185076236724854, + "logps/chosen": -153.2197723388672, + "logps/rejected": -304.2560119628906, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.637218475341797, + "rewards/margins": 9.146442413330078, + "rewards/rejected": -17.783660888671875, + "step": 14721 + }, + { + "epoch": 2.29, + "learning_rate": 3.3496229057523733e-06, + "logits/chosen": -2.779364824295044, + "logits/rejected": -2.9052982330322266, + "logps/chosen": -563.67822265625, + "logps/rejected": -629.092529296875, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.101974487304688, + "rewards/margins": 7.715149402618408, + "rewards/rejected": -17.817123413085938, + "step": 14722 + }, + { + "epoch": 2.29, + "learning_rate": 3.348889465221225e-06, + "logits/chosen": -2.7110397815704346, + "logits/rejected": -2.9704089164733887, + "logps/chosen": -215.39906311035156, + "logps/rejected": -411.92388916015625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360213279724121, + "rewards/margins": 6.904885292053223, + "rewards/rejected": -13.265098571777344, + "step": 14723 + }, + { + "epoch": 2.29, + "learning_rate": 3.348156024690077e-06, + "logits/chosen": -2.169494867324829, + "logits/rejected": -2.627758264541626, + "logps/chosen": -180.95852661132812, + "logps/rejected": -233.92408752441406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.253164291381836, + "rewards/margins": 8.311525344848633, + "rewards/rejected": -14.564689636230469, + "step": 14724 + }, + { + "epoch": 2.29, + "learning_rate": 3.3474225841589293e-06, + "logits/chosen": -1.6612532138824463, + "logits/rejected": -2.7033820152282715, + "logps/chosen": -288.5006103515625, + "logps/rejected": -388.72955322265625, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.117427825927734, + "rewards/margins": 5.997869968414307, + "rewards/rejected": -17.115297317504883, + "step": 14725 + }, + { + "epoch": 2.29, + "learning_rate": 3.3466891436277816e-06, + "logits/chosen": -2.7779042720794678, + "logits/rejected": -2.8901069164276123, + "logps/chosen": -469.0987243652344, + "logps/rejected": -470.23626708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.7981538772583, + "rewards/margins": 9.343996047973633, + "rewards/rejected": -20.14215087890625, + "step": 14726 + }, + { + "epoch": 2.29, + "learning_rate": 3.345955703096634e-06, + "logits/chosen": -2.0692107677459717, + "logits/rejected": -2.657797336578369, + "logps/chosen": -334.90728759765625, + "logps/rejected": -498.1492919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92885684967041, + "rewards/margins": 12.157151222229004, + "rewards/rejected": -18.086008071899414, + "step": 14727 + }, + { + "epoch": 2.29, + "learning_rate": 3.345222262565486e-06, + "logits/chosen": -2.0661377906799316, + "logits/rejected": -2.4895811080932617, + "logps/chosen": -152.8252410888672, + "logps/rejected": -332.7330322265625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.890131950378418, + "rewards/margins": 6.204643249511719, + "rewards/rejected": -15.094776153564453, + "step": 14728 + }, + { + "epoch": 2.29, + "learning_rate": 3.344488822034338e-06, + "logits/chosen": -1.0046309232711792, + "logits/rejected": -2.594583749771118, + "logps/chosen": -304.2353515625, + "logps/rejected": -590.8602294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5955400466918945, + "rewards/margins": 11.888782501220703, + "rewards/rejected": -18.484323501586914, + "step": 14729 + }, + { + "epoch": 2.29, + "learning_rate": 3.34375538150319e-06, + "logits/chosen": -1.821576714515686, + "logits/rejected": -2.2897396087646484, + "logps/chosen": -240.7617950439453, + "logps/rejected": -375.75787353515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077878952026367, + "rewards/margins": 8.186450004577637, + "rewards/rejected": -14.264328956604004, + "step": 14730 + }, + { + "epoch": 2.29, + "learning_rate": 3.3430219409720423e-06, + "logits/chosen": -2.863184928894043, + "logits/rejected": -2.417750835418701, + "logps/chosen": -363.5371398925781, + "logps/rejected": -303.5378723144531, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6060471534729, + "rewards/margins": 8.920567512512207, + "rewards/rejected": -13.52661418914795, + "step": 14731 + }, + { + "epoch": 2.29, + "learning_rate": 3.342288500440894e-06, + "logits/chosen": -1.5577880144119263, + "logits/rejected": -2.52847957611084, + "logps/chosen": -168.4786376953125, + "logps/rejected": -492.76092529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.914037704467773, + "rewards/margins": 14.125530242919922, + "rewards/rejected": -23.039567947387695, + "step": 14732 + }, + { + "epoch": 2.29, + "learning_rate": 3.341555059909746e-06, + "logits/chosen": -1.7891134023666382, + "logits/rejected": -2.6136717796325684, + "logps/chosen": -293.072021484375, + "logps/rejected": -501.827880859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.590846061706543, + "rewards/margins": 10.679370880126953, + "rewards/rejected": -21.270217895507812, + "step": 14733 + }, + { + "epoch": 2.29, + "learning_rate": 3.3408216193785983e-06, + "logits/chosen": -2.3560051918029785, + "logits/rejected": -2.6484594345092773, + "logps/chosen": -370.0955810546875, + "logps/rejected": -821.234619140625, + "loss": 0.164, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.596790313720703, + "rewards/margins": 6.540095806121826, + "rewards/rejected": -16.136886596679688, + "step": 14734 + }, + { + "epoch": 2.29, + "learning_rate": 3.3400881788474506e-06, + "logits/chosen": -2.6221892833709717, + "logits/rejected": -1.864048957824707, + "logps/chosen": -239.1683349609375, + "logps/rejected": -308.73748779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668261528015137, + "rewards/margins": 9.07430648803711, + "rewards/rejected": -15.742568016052246, + "step": 14735 + }, + { + "epoch": 2.29, + "learning_rate": 3.339354738316303e-06, + "logits/chosen": -2.7086517810821533, + "logits/rejected": -2.779989719390869, + "logps/chosen": -226.71487426757812, + "logps/rejected": -364.0784606933594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.284027576446533, + "rewards/margins": 16.347126007080078, + "rewards/rejected": -22.631153106689453, + "step": 14736 + }, + { + "epoch": 2.29, + "learning_rate": 3.338621297785155e-06, + "logits/chosen": -2.782796859741211, + "logits/rejected": -2.312760829925537, + "logps/chosen": -460.45550537109375, + "logps/rejected": -483.7525634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.073710441589355, + "rewards/margins": 11.304495811462402, + "rewards/rejected": -20.378206253051758, + "step": 14737 + }, + { + "epoch": 2.29, + "learning_rate": 3.337887857254007e-06, + "logits/chosen": -2.4346375465393066, + "logits/rejected": -0.7933722138404846, + "logps/chosen": -248.0792236328125, + "logps/rejected": -155.47264099121094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.416874647140503, + "rewards/margins": 9.716350555419922, + "rewards/rejected": -12.133225440979004, + "step": 14738 + }, + { + "epoch": 2.29, + "learning_rate": 3.337154416722859e-06, + "logits/chosen": -2.9961061477661133, + "logits/rejected": -2.151040554046631, + "logps/chosen": -248.56016540527344, + "logps/rejected": -299.5869140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.033608436584473, + "rewards/margins": 8.519392967224121, + "rewards/rejected": -15.553001403808594, + "step": 14739 + }, + { + "epoch": 2.29, + "learning_rate": 3.3364209761917113e-06, + "logits/chosen": -2.9299168586730957, + "logits/rejected": -2.6651484966278076, + "logps/chosen": -655.1632690429688, + "logps/rejected": -703.42822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.686917304992676, + "rewards/margins": 9.792243957519531, + "rewards/rejected": -16.479162216186523, + "step": 14740 + }, + { + "epoch": 2.29, + "learning_rate": 3.335687535660563e-06, + "logits/chosen": -2.137852430343628, + "logits/rejected": -0.799774706363678, + "logps/chosen": -537.768798828125, + "logps/rejected": -417.7757873535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.037198066711426, + "rewards/margins": 13.314435958862305, + "rewards/rejected": -21.351634979248047, + "step": 14741 + }, + { + "epoch": 2.29, + "learning_rate": 3.3349540951294155e-06, + "logits/chosen": -2.5949618816375732, + "logits/rejected": -2.6266026496887207, + "logps/chosen": -557.3298950195312, + "logps/rejected": -541.5906982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5185441970825195, + "rewards/margins": 10.104631423950195, + "rewards/rejected": -15.623175621032715, + "step": 14742 + }, + { + "epoch": 2.29, + "learning_rate": 3.3342206545982678e-06, + "logits/chosen": -2.2729873657226562, + "logits/rejected": -2.7149229049682617, + "logps/chosen": -257.3050537109375, + "logps/rejected": -282.13507080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.061522960662842, + "rewards/margins": 14.85830307006836, + "rewards/rejected": -18.91982650756836, + "step": 14743 + }, + { + "epoch": 2.29, + "learning_rate": 3.3334872140671197e-06, + "logits/chosen": -2.8506431579589844, + "logits/rejected": -1.447749137878418, + "logps/chosen": -309.6107482910156, + "logps/rejected": -218.21630859375, + "loss": 0.6873, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.0740966796875, + "rewards/margins": 4.153749942779541, + "rewards/rejected": -14.227846145629883, + "step": 14744 + }, + { + "epoch": 2.29, + "learning_rate": 3.332753773535972e-06, + "logits/chosen": -2.6929354667663574, + "logits/rejected": -2.760430097579956, + "logps/chosen": -105.14273071289062, + "logps/rejected": -284.11419677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.634916305541992, + "rewards/margins": 9.879403114318848, + "rewards/rejected": -18.514320373535156, + "step": 14745 + }, + { + "epoch": 2.29, + "learning_rate": 3.332020333004824e-06, + "logits/chosen": -2.8663370609283447, + "logits/rejected": -2.3270843029022217, + "logps/chosen": -214.85899353027344, + "logps/rejected": -227.99160766601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.711066722869873, + "rewards/margins": 12.019749641418457, + "rewards/rejected": -15.730815887451172, + "step": 14746 + }, + { + "epoch": 2.29, + "learning_rate": 3.331286892473676e-06, + "logits/chosen": -2.6805832386016846, + "logits/rejected": -3.0061700344085693, + "logps/chosen": -270.23211669921875, + "logps/rejected": -635.06103515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.103313446044922, + "rewards/margins": 12.646627426147461, + "rewards/rejected": -19.749940872192383, + "step": 14747 + }, + { + "epoch": 2.29, + "learning_rate": 3.330553451942528e-06, + "logits/chosen": -1.3718245029449463, + "logits/rejected": -2.244666814804077, + "logps/chosen": -236.90737915039062, + "logps/rejected": -512.8182983398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.015871047973633, + "rewards/margins": 12.581748962402344, + "rewards/rejected": -20.597620010375977, + "step": 14748 + }, + { + "epoch": 2.29, + "learning_rate": 3.3298200114113803e-06, + "logits/chosen": -2.5234601497650146, + "logits/rejected": -2.0622012615203857, + "logps/chosen": -277.17755126953125, + "logps/rejected": -416.4546813964844, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.72346305847168, + "rewards/margins": 8.36481761932373, + "rewards/rejected": -18.088279724121094, + "step": 14749 + }, + { + "epoch": 2.29, + "learning_rate": 3.329086570880232e-06, + "logits/chosen": -2.5820398330688477, + "logits/rejected": -1.8994154930114746, + "logps/chosen": -514.9453125, + "logps/rejected": -546.78466796875, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.376572608947754, + "rewards/margins": 11.937305450439453, + "rewards/rejected": -21.31387710571289, + "step": 14750 + }, + { + "epoch": 2.29, + "learning_rate": 3.3283531303490845e-06, + "logits/chosen": -2.747931480407715, + "logits/rejected": -2.435185194015503, + "logps/chosen": -146.85646057128906, + "logps/rejected": -277.5736999511719, + "loss": 0.1282, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.756467819213867, + "rewards/margins": 8.976956367492676, + "rewards/rejected": -16.733423233032227, + "step": 14751 + }, + { + "epoch": 2.29, + "learning_rate": 3.327619689817937e-06, + "logits/chosen": -2.8119659423828125, + "logits/rejected": -2.3781516551971436, + "logps/chosen": -192.46556091308594, + "logps/rejected": -228.41189575195312, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.661223411560059, + "rewards/margins": 5.863779067993164, + "rewards/rejected": -13.525003433227539, + "step": 14752 + }, + { + "epoch": 2.29, + "learning_rate": 3.3268862492867887e-06, + "logits/chosen": -2.679055690765381, + "logits/rejected": -2.7265214920043945, + "logps/chosen": -295.41644287109375, + "logps/rejected": -445.17449951171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.042322158813477, + "rewards/margins": 7.460307598114014, + "rewards/rejected": -16.50263023376465, + "step": 14753 + }, + { + "epoch": 2.29, + "learning_rate": 3.326152808755641e-06, + "logits/chosen": -1.7457077503204346, + "logits/rejected": -2.4389054775238037, + "logps/chosen": -252.78758239746094, + "logps/rejected": -653.9494018554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159791946411133, + "rewards/margins": 17.960956573486328, + "rewards/rejected": -24.120750427246094, + "step": 14754 + }, + { + "epoch": 2.29, + "learning_rate": 3.325419368224493e-06, + "logits/chosen": -2.3100712299346924, + "logits/rejected": -1.7181036472320557, + "logps/chosen": -276.9713134765625, + "logps/rejected": -343.9439697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.317592620849609, + "rewards/margins": 10.39498233795166, + "rewards/rejected": -17.712574005126953, + "step": 14755 + }, + { + "epoch": 2.29, + "learning_rate": 3.324685927693345e-06, + "logits/chosen": -2.2917463779449463, + "logits/rejected": -2.8875722885131836, + "logps/chosen": -554.75830078125, + "logps/rejected": -846.7750244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.710075378417969, + "rewards/margins": 10.067426681518555, + "rewards/rejected": -21.777502059936523, + "step": 14756 + }, + { + "epoch": 2.3, + "learning_rate": 3.323952487162197e-06, + "logits/chosen": -2.344572067260742, + "logits/rejected": -1.6011794805526733, + "logps/chosen": -496.51629638671875, + "logps/rejected": -427.65008544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.897367477416992, + "rewards/margins": 9.649456024169922, + "rewards/rejected": -21.546825408935547, + "step": 14757 + }, + { + "epoch": 2.3, + "learning_rate": 3.3232190466310493e-06, + "logits/chosen": -2.2421951293945312, + "logits/rejected": -2.5036966800689697, + "logps/chosen": -377.18829345703125, + "logps/rejected": -601.7435913085938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.27357006072998, + "rewards/margins": 14.083312034606934, + "rewards/rejected": -24.356882095336914, + "step": 14758 + }, + { + "epoch": 2.3, + "learning_rate": 3.3224856060999012e-06, + "logits/chosen": -2.852687358856201, + "logits/rejected": -1.921219825744629, + "logps/chosen": -391.8906555175781, + "logps/rejected": -369.53045654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1863067150115967, + "rewards/margins": 11.146871566772461, + "rewards/rejected": -13.33317756652832, + "step": 14759 + }, + { + "epoch": 2.3, + "learning_rate": 3.321752165568754e-06, + "logits/chosen": -1.2172620296478271, + "logits/rejected": -2.177213191986084, + "logps/chosen": -211.7616424560547, + "logps/rejected": -423.7899169921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.919314384460449, + "rewards/margins": 8.200139999389648, + "rewards/rejected": -16.11945343017578, + "step": 14760 + }, + { + "epoch": 2.3, + "learning_rate": 3.321018725037606e-06, + "logits/chosen": -2.491471767425537, + "logits/rejected": -3.006294012069702, + "logps/chosen": -188.6151123046875, + "logps/rejected": -417.9820556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.824788093566895, + "rewards/margins": 10.847911834716797, + "rewards/rejected": -19.672698974609375, + "step": 14761 + }, + { + "epoch": 2.3, + "learning_rate": 3.320285284506458e-06, + "logits/chosen": -2.461754322052002, + "logits/rejected": -2.0264034271240234, + "logps/chosen": -202.12298583984375, + "logps/rejected": -321.2689514160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.60457706451416, + "rewards/margins": 12.266607284545898, + "rewards/rejected": -20.871185302734375, + "step": 14762 + }, + { + "epoch": 2.3, + "learning_rate": 3.31955184397531e-06, + "logits/chosen": -1.7724590301513672, + "logits/rejected": -2.5701119899749756, + "logps/chosen": -112.21466064453125, + "logps/rejected": -304.7516174316406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505819320678711, + "rewards/margins": 8.419020652770996, + "rewards/rejected": -13.924839973449707, + "step": 14763 + }, + { + "epoch": 2.3, + "learning_rate": 3.318818403444162e-06, + "logits/chosen": -1.7776893377304077, + "logits/rejected": -2.708758592605591, + "logps/chosen": -131.86883544921875, + "logps/rejected": -350.04388427734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.002169609069824, + "rewards/margins": 9.283170700073242, + "rewards/rejected": -17.28533935546875, + "step": 14764 + }, + { + "epoch": 2.3, + "learning_rate": 3.318084962913014e-06, + "logits/chosen": -2.6719624996185303, + "logits/rejected": -2.561781406402588, + "logps/chosen": -159.5034942626953, + "logps/rejected": -302.8018798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.32534646987915, + "rewards/margins": 10.60836410522461, + "rewards/rejected": -17.933712005615234, + "step": 14765 + }, + { + "epoch": 2.3, + "learning_rate": 3.317351522381866e-06, + "logits/chosen": -1.1998412609100342, + "logits/rejected": -2.6463987827301025, + "logps/chosen": -94.85807800292969, + "logps/rejected": -391.336669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.043878078460693, + "rewards/margins": 14.397802352905273, + "rewards/rejected": -19.441680908203125, + "step": 14766 + }, + { + "epoch": 2.3, + "learning_rate": 3.3166180818507184e-06, + "logits/chosen": -2.7827892303466797, + "logits/rejected": -2.418511152267456, + "logps/chosen": -393.2372131347656, + "logps/rejected": -414.722900390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.444999694824219, + "rewards/margins": 8.369049072265625, + "rewards/rejected": -14.814048767089844, + "step": 14767 + }, + { + "epoch": 2.3, + "learning_rate": 3.3158846413195707e-06, + "logits/chosen": -2.594909429550171, + "logits/rejected": -2.6687047481536865, + "logps/chosen": -402.56964111328125, + "logps/rejected": -530.2028198242188, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.928757667541504, + "rewards/margins": 7.114170074462891, + "rewards/rejected": -16.042926788330078, + "step": 14768 + }, + { + "epoch": 2.3, + "learning_rate": 3.315151200788423e-06, + "logits/chosen": -2.797304391860962, + "logits/rejected": -2.448542833328247, + "logps/chosen": -200.1195831298828, + "logps/rejected": -184.02426147460938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.898501396179199, + "rewards/margins": 10.662537574768066, + "rewards/rejected": -15.561038970947266, + "step": 14769 + }, + { + "epoch": 2.3, + "learning_rate": 3.314417760257275e-06, + "logits/chosen": -2.438292980194092, + "logits/rejected": -1.8770502805709839, + "logps/chosen": -197.9723358154297, + "logps/rejected": -384.5525207519531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.966525554656982, + "rewards/margins": 15.073307991027832, + "rewards/rejected": -20.039833068847656, + "step": 14770 + }, + { + "epoch": 2.3, + "learning_rate": 3.313684319726127e-06, + "logits/chosen": -2.5867249965667725, + "logits/rejected": -2.047873020172119, + "logps/chosen": -224.13380432128906, + "logps/rejected": -281.13861083984375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.860650062561035, + "rewards/margins": 10.861635208129883, + "rewards/rejected": -17.7222843170166, + "step": 14771 + }, + { + "epoch": 2.3, + "learning_rate": 3.312950879194979e-06, + "logits/chosen": -2.7056188583374023, + "logits/rejected": -2.6441569328308105, + "logps/chosen": -324.27874755859375, + "logps/rejected": -508.5950622558594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.991699695587158, + "rewards/margins": 10.383180618286133, + "rewards/rejected": -18.374879837036133, + "step": 14772 + }, + { + "epoch": 2.3, + "learning_rate": 3.312217438663831e-06, + "logits/chosen": -2.0703442096710205, + "logits/rejected": -2.2041187286376953, + "logps/chosen": -139.70803833007812, + "logps/rejected": -283.353271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.219724655151367, + "rewards/margins": 10.531055450439453, + "rewards/rejected": -18.750782012939453, + "step": 14773 + }, + { + "epoch": 2.3, + "learning_rate": 3.3114839981326832e-06, + "logits/chosen": -2.365138530731201, + "logits/rejected": -2.6685972213745117, + "logps/chosen": -442.06829833984375, + "logps/rejected": -599.559326171875, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.650775909423828, + "rewards/margins": 5.927262306213379, + "rewards/rejected": -18.57803726196289, + "step": 14774 + }, + { + "epoch": 2.3, + "learning_rate": 3.310750557601535e-06, + "logits/chosen": -2.5995473861694336, + "logits/rejected": -2.670414686203003, + "logps/chosen": -480.0115661621094, + "logps/rejected": -335.48614501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.537574291229248, + "rewards/margins": 13.555181503295898, + "rewards/rejected": -20.092756271362305, + "step": 14775 + }, + { + "epoch": 2.3, + "learning_rate": 3.3100171170703874e-06, + "logits/chosen": -1.4414664506912231, + "logits/rejected": -2.394493341445923, + "logps/chosen": -253.3323516845703, + "logps/rejected": -506.1212158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.164728164672852, + "rewards/margins": 11.683685302734375, + "rewards/rejected": -23.848413467407227, + "step": 14776 + }, + { + "epoch": 2.3, + "learning_rate": 3.3092836765392397e-06, + "logits/chosen": -2.798837184906006, + "logits/rejected": -2.784503698348999, + "logps/chosen": -149.80096435546875, + "logps/rejected": -213.23202514648438, + "loss": 1.4591, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.936091423034668, + "rewards/margins": 1.8014907836914062, + "rewards/rejected": -12.737582206726074, + "step": 14777 + }, + { + "epoch": 2.3, + "learning_rate": 3.308550236008092e-06, + "logits/chosen": -1.9455827474594116, + "logits/rejected": -2.306691884994507, + "logps/chosen": -299.5771789550781, + "logps/rejected": -355.51104736328125, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.360795974731445, + "rewards/margins": 4.416427135467529, + "rewards/rejected": -15.777223587036133, + "step": 14778 + }, + { + "epoch": 2.3, + "learning_rate": 3.307816795476944e-06, + "logits/chosen": -1.4980424642562866, + "logits/rejected": -2.499126434326172, + "logps/chosen": -314.22943115234375, + "logps/rejected": -419.5765380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.838432312011719, + "rewards/margins": 8.79130744934082, + "rewards/rejected": -18.62973976135254, + "step": 14779 + }, + { + "epoch": 2.3, + "learning_rate": 3.307083354945796e-06, + "logits/chosen": -1.6787434816360474, + "logits/rejected": -2.7407045364379883, + "logps/chosen": -127.70217895507812, + "logps/rejected": -483.76422119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.617107391357422, + "rewards/margins": 12.718008995056152, + "rewards/rejected": -17.33511734008789, + "step": 14780 + }, + { + "epoch": 2.3, + "learning_rate": 3.306349914414648e-06, + "logits/chosen": -2.809122085571289, + "logits/rejected": -2.8385977745056152, + "logps/chosen": -296.5383605957031, + "logps/rejected": -290.0412292480469, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.538265228271484, + "rewards/margins": 5.912295341491699, + "rewards/rejected": -13.450560569763184, + "step": 14781 + }, + { + "epoch": 2.3, + "learning_rate": 3.3056164738835e-06, + "logits/chosen": -1.7110637426376343, + "logits/rejected": -2.4240543842315674, + "logps/chosen": -131.10137939453125, + "logps/rejected": -271.0043029785156, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0642290115356445, + "rewards/margins": 8.905763626098633, + "rewards/rejected": -15.969991683959961, + "step": 14782 + }, + { + "epoch": 2.3, + "learning_rate": 3.3048830333523522e-06, + "logits/chosen": -2.707232713699341, + "logits/rejected": -1.7255792617797852, + "logps/chosen": -339.77972412109375, + "logps/rejected": -243.88720703125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.590477466583252, + "rewards/margins": 7.079280853271484, + "rewards/rejected": -13.669757843017578, + "step": 14783 + }, + { + "epoch": 2.3, + "learning_rate": 3.304149592821204e-06, + "logits/chosen": -2.0206243991851807, + "logits/rejected": -2.7488842010498047, + "logps/chosen": -263.80743408203125, + "logps/rejected": -424.80126953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.362150192260742, + "rewards/margins": 13.692710876464844, + "rewards/rejected": -20.054861068725586, + "step": 14784 + }, + { + "epoch": 2.3, + "learning_rate": 3.303416152290057e-06, + "logits/chosen": -2.338897705078125, + "logits/rejected": -2.975703239440918, + "logps/chosen": -101.993408203125, + "logps/rejected": -354.72174072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.118429660797119, + "rewards/margins": 14.111723899841309, + "rewards/rejected": -19.230154037475586, + "step": 14785 + }, + { + "epoch": 2.3, + "learning_rate": 3.3026827117589087e-06, + "logits/chosen": -2.4218130111694336, + "logits/rejected": -2.8133292198181152, + "logps/chosen": -729.0358276367188, + "logps/rejected": -624.677978515625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.625072479248047, + "rewards/margins": 9.445234298706055, + "rewards/rejected": -16.0703067779541, + "step": 14786 + }, + { + "epoch": 2.3, + "learning_rate": 3.301949271227761e-06, + "logits/chosen": -2.741394281387329, + "logits/rejected": -1.0648727416992188, + "logps/chosen": -304.121826171875, + "logps/rejected": -229.54354858398438, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.538400173187256, + "rewards/margins": 7.897526741027832, + "rewards/rejected": -15.43592643737793, + "step": 14787 + }, + { + "epoch": 2.3, + "learning_rate": 3.301215830696613e-06, + "logits/chosen": -1.7742170095443726, + "logits/rejected": -2.2913501262664795, + "logps/chosen": -190.7916259765625, + "logps/rejected": -502.9840087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.079607963562012, + "rewards/margins": 13.829764366149902, + "rewards/rejected": -23.909372329711914, + "step": 14788 + }, + { + "epoch": 2.3, + "learning_rate": 3.300482390165465e-06, + "logits/chosen": -2.066237688064575, + "logits/rejected": -2.8137478828430176, + "logps/chosen": -172.4986572265625, + "logps/rejected": -458.3844909667969, + "loss": 0.2462, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.477787017822266, + "rewards/margins": 5.1168622970581055, + "rewards/rejected": -16.594648361206055, + "step": 14789 + }, + { + "epoch": 2.3, + "learning_rate": 3.299748949634317e-06, + "logits/chosen": -2.637317419052124, + "logits/rejected": -2.764085292816162, + "logps/chosen": -99.1590576171875, + "logps/rejected": -336.0373840332031, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.448175430297852, + "rewards/margins": 12.430704116821289, + "rewards/rejected": -20.87887954711914, + "step": 14790 + }, + { + "epoch": 2.3, + "learning_rate": 3.2990155091031694e-06, + "logits/chosen": -2.80029296875, + "logits/rejected": -1.964005947113037, + "logps/chosen": -299.15673828125, + "logps/rejected": -289.0507507324219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.113615989685059, + "rewards/margins": 10.274066925048828, + "rewards/rejected": -16.387683868408203, + "step": 14791 + }, + { + "epoch": 2.3, + "learning_rate": 3.2982820685720213e-06, + "logits/chosen": -2.54586124420166, + "logits/rejected": -2.708707094192505, + "logps/chosen": -119.23967742919922, + "logps/rejected": -319.26031494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.963855743408203, + "rewards/margins": 10.4619140625, + "rewards/rejected": -17.425769805908203, + "step": 14792 + }, + { + "epoch": 2.3, + "learning_rate": 3.2975486280408736e-06, + "logits/chosen": -2.0487656593322754, + "logits/rejected": -2.529982805252075, + "logps/chosen": -224.70803833007812, + "logps/rejected": -420.3891906738281, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.877219200134277, + "rewards/margins": 7.437996864318848, + "rewards/rejected": -17.315216064453125, + "step": 14793 + }, + { + "epoch": 2.3, + "learning_rate": 3.296815187509726e-06, + "logits/chosen": -2.820050001144409, + "logits/rejected": -2.7798237800598145, + "logps/chosen": -151.9190673828125, + "logps/rejected": -264.4246520996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.641547203063965, + "rewards/margins": 9.667521476745605, + "rewards/rejected": -18.30906867980957, + "step": 14794 + }, + { + "epoch": 2.3, + "learning_rate": 3.2960817469785777e-06, + "logits/chosen": -2.869584798812866, + "logits/rejected": -3.1034884452819824, + "logps/chosen": -88.31046295166016, + "logps/rejected": -220.59829711914062, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3049235343933105, + "rewards/margins": 7.490381717681885, + "rewards/rejected": -14.795305252075195, + "step": 14795 + }, + { + "epoch": 2.3, + "learning_rate": 3.29534830644743e-06, + "logits/chosen": -2.577591896057129, + "logits/rejected": -2.429121255874634, + "logps/chosen": -251.48342895507812, + "logps/rejected": -505.00335693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.531591415405273, + "rewards/margins": 17.357711791992188, + "rewards/rejected": -27.889305114746094, + "step": 14796 + }, + { + "epoch": 2.3, + "learning_rate": 3.294614865916282e-06, + "logits/chosen": -2.4404447078704834, + "logits/rejected": -2.03891658782959, + "logps/chosen": -270.04693603515625, + "logps/rejected": -219.3307342529297, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7133941650390625, + "rewards/margins": 5.715816497802734, + "rewards/rejected": -13.429210662841797, + "step": 14797 + }, + { + "epoch": 2.3, + "learning_rate": 3.2938814253851342e-06, + "logits/chosen": -2.9589102268218994, + "logits/rejected": -2.1616828441619873, + "logps/chosen": -912.8056640625, + "logps/rejected": -667.31396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.052384376525879, + "rewards/margins": 11.936176300048828, + "rewards/rejected": -18.98855972290039, + "step": 14798 + }, + { + "epoch": 2.3, + "learning_rate": 3.293147984853986e-06, + "logits/chosen": -2.750408887863159, + "logits/rejected": -1.6615147590637207, + "logps/chosen": -481.77587890625, + "logps/rejected": -329.88812255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90687370300293, + "rewards/margins": 9.72825813293457, + "rewards/rejected": -15.6351318359375, + "step": 14799 + }, + { + "epoch": 2.3, + "learning_rate": 3.2924145443228384e-06, + "logits/chosen": -1.0326778888702393, + "logits/rejected": -1.9914360046386719, + "logps/chosen": -263.5684509277344, + "logps/rejected": -481.28472900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.386617660522461, + "rewards/margins": 14.6146879196167, + "rewards/rejected": -24.001304626464844, + "step": 14800 + }, + { + "epoch": 2.3, + "learning_rate": 3.2916811037916903e-06, + "logits/chosen": -2.2442312240600586, + "logits/rejected": -2.1270999908447266, + "logps/chosen": -313.76513671875, + "logps/rejected": -416.251220703125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.019536018371582, + "rewards/margins": 9.528687477111816, + "rewards/rejected": -22.5482234954834, + "step": 14801 + }, + { + "epoch": 2.3, + "learning_rate": 3.2909476632605426e-06, + "logits/chosen": -2.2611255645751953, + "logits/rejected": -2.8650050163269043, + "logps/chosen": -86.12179565429688, + "logps/rejected": -340.0090637207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.299152374267578, + "rewards/margins": 11.940446853637695, + "rewards/rejected": -18.239599227905273, + "step": 14802 + }, + { + "epoch": 2.3, + "learning_rate": 3.290214222729395e-06, + "logits/chosen": -2.6974308490753174, + "logits/rejected": -2.2124996185302734, + "logps/chosen": -342.53643798828125, + "logps/rejected": -295.0455322265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.636911392211914, + "rewards/margins": 8.35732650756836, + "rewards/rejected": -16.994237899780273, + "step": 14803 + }, + { + "epoch": 2.3, + "learning_rate": 3.2894807821982468e-06, + "logits/chosen": -2.3199517726898193, + "logits/rejected": -2.635756015777588, + "logps/chosen": -219.76470947265625, + "logps/rejected": -264.0009765625, + "loss": 0.9845, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.912484169006348, + "rewards/margins": 6.474319934844971, + "rewards/rejected": -13.386804580688477, + "step": 14804 + }, + { + "epoch": 2.3, + "learning_rate": 3.288747341667099e-06, + "logits/chosen": -2.094142436981201, + "logits/rejected": -2.7639572620391846, + "logps/chosen": -303.84320068359375, + "logps/rejected": -563.7658081054688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.898599624633789, + "rewards/margins": 10.639486312866211, + "rewards/rejected": -19.5380859375, + "step": 14805 + }, + { + "epoch": 2.3, + "learning_rate": 3.288013901135951e-06, + "logits/chosen": -2.6064178943634033, + "logits/rejected": -1.7191983461380005, + "logps/chosen": -496.670654296875, + "logps/rejected": -388.48321533203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.122127532958984, + "rewards/margins": 8.384336471557617, + "rewards/rejected": -20.5064640045166, + "step": 14806 + }, + { + "epoch": 2.3, + "learning_rate": 3.2872804606048032e-06, + "logits/chosen": -2.12968111038208, + "logits/rejected": -2.493959426879883, + "logps/chosen": -593.5823364257812, + "logps/rejected": -552.0980224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.202774047851562, + "rewards/margins": 9.943973541259766, + "rewards/rejected": -20.146747589111328, + "step": 14807 + }, + { + "epoch": 2.3, + "learning_rate": 3.286547020073655e-06, + "logits/chosen": -2.122612953186035, + "logits/rejected": -2.684495210647583, + "logps/chosen": -159.47946166992188, + "logps/rejected": -360.5235290527344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1052141189575195, + "rewards/margins": 10.029600143432617, + "rewards/rejected": -15.134815216064453, + "step": 14808 + }, + { + "epoch": 2.3, + "learning_rate": 3.2858135795425074e-06, + "logits/chosen": -2.5850629806518555, + "logits/rejected": -2.933316230773926, + "logps/chosen": -605.8760986328125, + "logps/rejected": -638.798583984375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.147382736206055, + "rewards/margins": 7.122896194458008, + "rewards/rejected": -15.270278930664062, + "step": 14809 + }, + { + "epoch": 2.3, + "learning_rate": 3.2850801390113597e-06, + "logits/chosen": -1.9752602577209473, + "logits/rejected": -2.90468168258667, + "logps/chosen": -99.69039154052734, + "logps/rejected": -386.5627746582031, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.142232894897461, + "rewards/margins": 10.308941841125488, + "rewards/rejected": -16.451175689697266, + "step": 14810 + }, + { + "epoch": 2.3, + "learning_rate": 3.284346698480212e-06, + "logits/chosen": -1.5464308261871338, + "logits/rejected": -2.6749746799468994, + "logps/chosen": -216.2045440673828, + "logps/rejected": -400.37359619140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.400891304016113, + "rewards/margins": 10.44528865814209, + "rewards/rejected": -16.846179962158203, + "step": 14811 + }, + { + "epoch": 2.3, + "learning_rate": 3.283613257949064e-06, + "logits/chosen": -2.3588290214538574, + "logits/rejected": -2.311910629272461, + "logps/chosen": -251.13502502441406, + "logps/rejected": -441.397216796875, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.194108009338379, + "rewards/margins": 7.355510234832764, + "rewards/rejected": -16.549617767333984, + "step": 14812 + }, + { + "epoch": 2.3, + "learning_rate": 3.2828798174179158e-06, + "logits/chosen": -2.272752285003662, + "logits/rejected": -2.452476978302002, + "logps/chosen": -344.346923828125, + "logps/rejected": -388.67181396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8136887550354, + "rewards/margins": 10.078873634338379, + "rewards/rejected": -15.892562866210938, + "step": 14813 + }, + { + "epoch": 2.3, + "learning_rate": 3.282146376886768e-06, + "logits/chosen": -2.2258477210998535, + "logits/rejected": -2.5300614833831787, + "logps/chosen": -151.1194305419922, + "logps/rejected": -347.4543151855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.38723087310791, + "rewards/margins": 12.737166404724121, + "rewards/rejected": -19.12439727783203, + "step": 14814 + }, + { + "epoch": 2.3, + "learning_rate": 3.28141293635562e-06, + "logits/chosen": -2.4100232124328613, + "logits/rejected": -2.9122426509857178, + "logps/chosen": -142.47288513183594, + "logps/rejected": -254.2846221923828, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.339450836181641, + "rewards/margins": 9.014630317687988, + "rewards/rejected": -14.354081153869629, + "step": 14815 + }, + { + "epoch": 2.3, + "learning_rate": 3.2806794958244723e-06, + "logits/chosen": -2.329484701156616, + "logits/rejected": -2.4195542335510254, + "logps/chosen": -165.94808959960938, + "logps/rejected": -223.12973022460938, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.686549186706543, + "rewards/margins": 6.487076759338379, + "rewards/rejected": -16.173625946044922, + "step": 14816 + }, + { + "epoch": 2.3, + "learning_rate": 3.279946055293324e-06, + "logits/chosen": -2.317682981491089, + "logits/rejected": -2.5587196350097656, + "logps/chosen": -172.56129455566406, + "logps/rejected": -483.5174560546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.527385711669922, + "rewards/margins": 10.070199966430664, + "rewards/rejected": -21.597583770751953, + "step": 14817 + }, + { + "epoch": 2.3, + "learning_rate": 3.2792126147621764e-06, + "logits/chosen": -2.384598731994629, + "logits/rejected": -2.413209915161133, + "logps/chosen": -321.38232421875, + "logps/rejected": -466.72784423828125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.627568244934082, + "rewards/margins": 11.061309814453125, + "rewards/rejected": -21.68887710571289, + "step": 14818 + }, + { + "epoch": 2.3, + "learning_rate": 3.2784791742310287e-06, + "logits/chosen": -3.0107011795043945, + "logits/rejected": -3.0409016609191895, + "logps/chosen": -154.09133911132812, + "logps/rejected": -189.29296875, + "loss": 2.6473, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.629302024841309, + "rewards/margins": 1.2918837070465088, + "rewards/rejected": -11.921185493469238, + "step": 14819 + }, + { + "epoch": 2.3, + "learning_rate": 3.277745733699881e-06, + "logits/chosen": -1.7416402101516724, + "logits/rejected": -2.655795097351074, + "logps/chosen": -121.07307434082031, + "logps/rejected": -535.3065795898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.66369915008545, + "rewards/margins": 11.669800758361816, + "rewards/rejected": -22.333499908447266, + "step": 14820 + }, + { + "epoch": 2.3, + "learning_rate": 3.277012293168733e-06, + "logits/chosen": -2.9035303592681885, + "logits/rejected": -2.2213685512542725, + "logps/chosen": -207.56304931640625, + "logps/rejected": -283.994140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.360067844390869, + "rewards/margins": 10.379585266113281, + "rewards/rejected": -14.739652633666992, + "step": 14821 + }, + { + "epoch": 2.31, + "learning_rate": 3.276278852637585e-06, + "logits/chosen": -2.2337589263916016, + "logits/rejected": -2.139645576477051, + "logps/chosen": -304.7086486816406, + "logps/rejected": -300.05548095703125, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.320318698883057, + "rewards/margins": 7.305850505828857, + "rewards/rejected": -14.626169204711914, + "step": 14822 + }, + { + "epoch": 2.31, + "learning_rate": 3.275545412106437e-06, + "logits/chosen": -1.9226646423339844, + "logits/rejected": -2.8467047214508057, + "logps/chosen": -724.7162475585938, + "logps/rejected": -850.2332763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.30384349822998, + "rewards/margins": 15.514291763305664, + "rewards/rejected": -24.818134307861328, + "step": 14823 + }, + { + "epoch": 2.31, + "learning_rate": 3.274811971575289e-06, + "logits/chosen": -1.8710954189300537, + "logits/rejected": -2.656743288040161, + "logps/chosen": -205.11721801757812, + "logps/rejected": -525.7989501953125, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.678985595703125, + "rewards/margins": 9.04298210144043, + "rewards/rejected": -18.721969604492188, + "step": 14824 + }, + { + "epoch": 2.31, + "learning_rate": 3.2740785310441413e-06, + "logits/chosen": -2.7350285053253174, + "logits/rejected": -2.0082662105560303, + "logps/chosen": -741.0656127929688, + "logps/rejected": -481.58465576171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.097112655639648, + "rewards/margins": 8.422046661376953, + "rewards/rejected": -18.5191593170166, + "step": 14825 + }, + { + "epoch": 2.31, + "learning_rate": 3.273345090512993e-06, + "logits/chosen": -2.7496910095214844, + "logits/rejected": -2.88046932220459, + "logps/chosen": -222.22061157226562, + "logps/rejected": -486.2666320800781, + "loss": 1.5974, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.230862617492676, + "rewards/margins": 6.207404613494873, + "rewards/rejected": -18.43826675415039, + "step": 14826 + }, + { + "epoch": 2.31, + "learning_rate": 3.272611649981846e-06, + "logits/chosen": -2.698455810546875, + "logits/rejected": -2.805614709854126, + "logps/chosen": -122.44114685058594, + "logps/rejected": -267.853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.809426307678223, + "rewards/margins": 11.009297370910645, + "rewards/rejected": -16.818723678588867, + "step": 14827 + }, + { + "epoch": 2.31, + "learning_rate": 3.2718782094506978e-06, + "logits/chosen": -2.661879777908325, + "logits/rejected": -2.8850674629211426, + "logps/chosen": -140.244384765625, + "logps/rejected": -263.7476501464844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213526725769043, + "rewards/margins": 7.67311954498291, + "rewards/rejected": -13.886646270751953, + "step": 14828 + }, + { + "epoch": 2.31, + "learning_rate": 3.27114476891955e-06, + "logits/chosen": -2.1420466899871826, + "logits/rejected": -2.5188729763031006, + "logps/chosen": -284.7159423828125, + "logps/rejected": -277.9501647949219, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.766519546508789, + "rewards/margins": 5.64349365234375, + "rewards/rejected": -11.410013198852539, + "step": 14829 + }, + { + "epoch": 2.31, + "learning_rate": 3.270411328388402e-06, + "logits/chosen": -1.3618812561035156, + "logits/rejected": -2.865408420562744, + "logps/chosen": -115.25691986083984, + "logps/rejected": -345.01934814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5353078842163086, + "rewards/margins": 11.231575965881348, + "rewards/rejected": -14.766883850097656, + "step": 14830 + }, + { + "epoch": 2.31, + "learning_rate": 3.269677887857254e-06, + "logits/chosen": -2.4253530502319336, + "logits/rejected": -2.8856003284454346, + "logps/chosen": -175.70318603515625, + "logps/rejected": -420.59521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920328617095947, + "rewards/margins": 12.199569702148438, + "rewards/rejected": -19.119897842407227, + "step": 14831 + }, + { + "epoch": 2.31, + "learning_rate": 3.268944447326106e-06, + "logits/chosen": -2.644554376602173, + "logits/rejected": -2.1859254837036133, + "logps/chosen": -464.08056640625, + "logps/rejected": -636.2763061523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.153560161590576, + "rewards/margins": 18.627094268798828, + "rewards/rejected": -22.780654907226562, + "step": 14832 + }, + { + "epoch": 2.31, + "learning_rate": 3.268211006794958e-06, + "logits/chosen": -2.740093231201172, + "logits/rejected": -2.8218600749969482, + "logps/chosen": -785.6102294921875, + "logps/rejected": -849.9228515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.045671463012695, + "rewards/margins": 11.31815242767334, + "rewards/rejected": -22.36382293701172, + "step": 14833 + }, + { + "epoch": 2.31, + "learning_rate": 3.2674775662638103e-06, + "logits/chosen": -1.9180262088775635, + "logits/rejected": -2.2970120906829834, + "logps/chosen": -419.90966796875, + "logps/rejected": -680.8407592773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.398470878601074, + "rewards/margins": 11.619770050048828, + "rewards/rejected": -23.01824188232422, + "step": 14834 + }, + { + "epoch": 2.31, + "learning_rate": 3.2667441257326626e-06, + "logits/chosen": -2.0268962383270264, + "logits/rejected": -2.28348970413208, + "logps/chosen": -233.34523010253906, + "logps/rejected": -431.16851806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.992347717285156, + "rewards/margins": 12.5220308303833, + "rewards/rejected": -18.51437759399414, + "step": 14835 + }, + { + "epoch": 2.31, + "learning_rate": 3.266010685201515e-06, + "logits/chosen": -1.8187336921691895, + "logits/rejected": -2.5780391693115234, + "logps/chosen": -98.08821868896484, + "logps/rejected": -295.0726623535156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.999509811401367, + "rewards/margins": 9.45113754272461, + "rewards/rejected": -17.450647354125977, + "step": 14836 + }, + { + "epoch": 2.31, + "learning_rate": 3.265277244670367e-06, + "logits/chosen": -1.728511929512024, + "logits/rejected": -2.5640149116516113, + "logps/chosen": -310.50408935546875, + "logps/rejected": -551.4818115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.051901340484619, + "rewards/margins": 17.93520736694336, + "rewards/rejected": -24.98710823059082, + "step": 14837 + }, + { + "epoch": 2.31, + "learning_rate": 3.264543804139219e-06, + "logits/chosen": -2.2908477783203125, + "logits/rejected": -2.6943328380584717, + "logps/chosen": -188.79135131835938, + "logps/rejected": -341.32000732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.979347229003906, + "rewards/margins": 11.82009506225586, + "rewards/rejected": -19.799442291259766, + "step": 14838 + }, + { + "epoch": 2.31, + "learning_rate": 3.263810363608071e-06, + "logits/chosen": -2.559501886367798, + "logits/rejected": -2.889096975326538, + "logps/chosen": -137.44232177734375, + "logps/rejected": -269.7257995605469, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99101448059082, + "rewards/margins": 8.666625022888184, + "rewards/rejected": -14.657638549804688, + "step": 14839 + }, + { + "epoch": 2.31, + "learning_rate": 3.2630769230769233e-06, + "logits/chosen": -1.8922698497772217, + "logits/rejected": -2.549290418624878, + "logps/chosen": -161.07278442382812, + "logps/rejected": -303.67486572265625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.74785041809082, + "rewards/margins": 9.377148628234863, + "rewards/rejected": -17.124998092651367, + "step": 14840 + }, + { + "epoch": 2.31, + "learning_rate": 3.262343482545775e-06, + "logits/chosen": -2.588914394378662, + "logits/rejected": -2.661958694458008, + "logps/chosen": -437.32769775390625, + "logps/rejected": -553.1483154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.838530540466309, + "rewards/margins": 16.28396987915039, + "rewards/rejected": -23.122501373291016, + "step": 14841 + }, + { + "epoch": 2.31, + "learning_rate": 3.261610042014627e-06, + "logits/chosen": -2.22544002532959, + "logits/rejected": -2.691955089569092, + "logps/chosen": -291.6406555175781, + "logps/rejected": -527.5415649414062, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.560072898864746, + "rewards/margins": 11.779975891113281, + "rewards/rejected": -24.340049743652344, + "step": 14842 + }, + { + "epoch": 2.31, + "learning_rate": 3.2608766014834793e-06, + "logits/chosen": -2.7429566383361816, + "logits/rejected": -1.7694529294967651, + "logps/chosen": -669.9609985351562, + "logps/rejected": -609.258056640625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.507095336914062, + "rewards/margins": 7.625728607177734, + "rewards/rejected": -17.132823944091797, + "step": 14843 + }, + { + "epoch": 2.31, + "learning_rate": 3.2601431609523316e-06, + "logits/chosen": -2.569033622741699, + "logits/rejected": -2.81469988822937, + "logps/chosen": -60.20403289794922, + "logps/rejected": -307.9407043457031, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.560523986816406, + "rewards/margins": 11.842851638793945, + "rewards/rejected": -16.40337562561035, + "step": 14844 + }, + { + "epoch": 2.31, + "learning_rate": 3.259409720421184e-06, + "logits/chosen": -2.6596171855926514, + "logits/rejected": -2.8012263774871826, + "logps/chosen": -343.18402099609375, + "logps/rejected": -644.5921020507812, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.351678848266602, + "rewards/margins": 11.529537200927734, + "rewards/rejected": -19.881216049194336, + "step": 14845 + }, + { + "epoch": 2.31, + "learning_rate": 3.258676279890036e-06, + "logits/chosen": -2.425424337387085, + "logits/rejected": -2.214714527130127, + "logps/chosen": -292.27679443359375, + "logps/rejected": -271.9139404296875, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.237613677978516, + "rewards/margins": 6.7366485595703125, + "rewards/rejected": -11.974262237548828, + "step": 14846 + }, + { + "epoch": 2.31, + "learning_rate": 3.257942839358888e-06, + "logits/chosen": -1.0511443614959717, + "logits/rejected": -1.2910161018371582, + "logps/chosen": -540.8140258789062, + "logps/rejected": -654.2588500976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3542938232421875, + "rewards/margins": 10.895284652709961, + "rewards/rejected": -16.24957847595215, + "step": 14847 + }, + { + "epoch": 2.31, + "learning_rate": 3.25720939882774e-06, + "logits/chosen": -2.5289435386657715, + "logits/rejected": -2.8549416065216064, + "logps/chosen": -63.11438751220703, + "logps/rejected": -270.69268798828125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.342531681060791, + "rewards/margins": 11.725896835327148, + "rewards/rejected": -17.06842803955078, + "step": 14848 + }, + { + "epoch": 2.31, + "learning_rate": 3.2564759582965923e-06, + "logits/chosen": -1.6521668434143066, + "logits/rejected": -2.5677595138549805, + "logps/chosen": -484.60894775390625, + "logps/rejected": -557.068115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.802254676818848, + "rewards/margins": 9.30265998840332, + "rewards/rejected": -18.10491371154785, + "step": 14849 + }, + { + "epoch": 2.31, + "learning_rate": 3.255742517765444e-06, + "logits/chosen": -2.067615032196045, + "logits/rejected": -2.8141658306121826, + "logps/chosen": -223.04200744628906, + "logps/rejected": -429.89117431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.324080467224121, + "rewards/margins": 10.378108978271484, + "rewards/rejected": -17.702190399169922, + "step": 14850 + }, + { + "epoch": 2.31, + "learning_rate": 3.255009077234296e-06, + "logits/chosen": -2.6643307209014893, + "logits/rejected": -2.6320431232452393, + "logps/chosen": -194.67318725585938, + "logps/rejected": -335.25579833984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.137453079223633, + "rewards/margins": 8.499996185302734, + "rewards/rejected": -18.637449264526367, + "step": 14851 + }, + { + "epoch": 2.31, + "learning_rate": 3.2542756367031484e-06, + "logits/chosen": -2.8628528118133545, + "logits/rejected": -2.815830707550049, + "logps/chosen": -163.919921875, + "logps/rejected": -428.20330810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.901397705078125, + "rewards/margins": 11.761043548583984, + "rewards/rejected": -20.66244125366211, + "step": 14852 + }, + { + "epoch": 2.31, + "learning_rate": 3.2535421961720007e-06, + "logits/chosen": -2.679676055908203, + "logits/rejected": -2.758854627609253, + "logps/chosen": -136.3048095703125, + "logps/rejected": -345.5668029785156, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.158432960510254, + "rewards/margins": 7.74882698059082, + "rewards/rejected": -14.907258987426758, + "step": 14853 + }, + { + "epoch": 2.31, + "learning_rate": 3.252808755640853e-06, + "logits/chosen": -2.0622920989990234, + "logits/rejected": -1.3809936046600342, + "logps/chosen": -235.00779724121094, + "logps/rejected": -217.54522705078125, + "loss": 0.0686, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.588807106018066, + "rewards/margins": 5.392956256866455, + "rewards/rejected": -13.98176383972168, + "step": 14854 + }, + { + "epoch": 2.31, + "learning_rate": 3.252075315109705e-06, + "logits/chosen": -2.7626912593841553, + "logits/rejected": -1.8094208240509033, + "logps/chosen": -419.3117980957031, + "logps/rejected": -376.2811279296875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.40443229675293, + "rewards/margins": 6.136834144592285, + "rewards/rejected": -14.541265487670898, + "step": 14855 + }, + { + "epoch": 2.31, + "learning_rate": 3.251341874578557e-06, + "logits/chosen": -2.6459615230560303, + "logits/rejected": -2.4425032138824463, + "logps/chosen": -134.54403686523438, + "logps/rejected": -162.741943359375, + "loss": 2.3518, + "rewards/accuracies": 0.5, + "rewards/chosen": -5.409022331237793, + "rewards/margins": 4.50875186920166, + "rewards/rejected": -9.917774200439453, + "step": 14856 + }, + { + "epoch": 2.31, + "learning_rate": 3.250608434047409e-06, + "logits/chosen": -1.9389656782150269, + "logits/rejected": -2.828134298324585, + "logps/chosen": -145.64674377441406, + "logps/rejected": -402.534912109375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.839292526245117, + "rewards/margins": 7.559381484985352, + "rewards/rejected": -14.398674011230469, + "step": 14857 + }, + { + "epoch": 2.31, + "learning_rate": 3.2498749935162613e-06, + "logits/chosen": -1.880281925201416, + "logits/rejected": -2.758920669555664, + "logps/chosen": -262.6483154296875, + "logps/rejected": -415.33575439453125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.636817932128906, + "rewards/margins": 13.417089462280273, + "rewards/rejected": -23.053909301757812, + "step": 14858 + }, + { + "epoch": 2.31, + "learning_rate": 3.249141552985113e-06, + "logits/chosen": -2.811365842819214, + "logits/rejected": -2.4926488399505615, + "logps/chosen": -508.8445739746094, + "logps/rejected": -430.6235656738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.845999240875244, + "rewards/margins": 12.626490592956543, + "rewards/rejected": -19.472490310668945, + "step": 14859 + }, + { + "epoch": 2.31, + "learning_rate": 3.2484081124539655e-06, + "logits/chosen": -2.301891326904297, + "logits/rejected": -2.4034225940704346, + "logps/chosen": -188.1506805419922, + "logps/rejected": -425.16644287109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.366623878479004, + "rewards/margins": 11.02486515045166, + "rewards/rejected": -17.391489028930664, + "step": 14860 + }, + { + "epoch": 2.31, + "learning_rate": 3.247674671922818e-06, + "logits/chosen": -2.7357757091522217, + "logits/rejected": -2.521975040435791, + "logps/chosen": -619.6898803710938, + "logps/rejected": -913.39453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.55372428894043, + "rewards/margins": 14.871574401855469, + "rewards/rejected": -24.42530059814453, + "step": 14861 + }, + { + "epoch": 2.31, + "learning_rate": 3.2469412313916697e-06, + "logits/chosen": -2.648160457611084, + "logits/rejected": -2.690228223800659, + "logps/chosen": -176.8031768798828, + "logps/rejected": -283.97296142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3282976150512695, + "rewards/margins": 8.901609420776367, + "rewards/rejected": -14.229907989501953, + "step": 14862 + }, + { + "epoch": 2.31, + "learning_rate": 3.246207790860522e-06, + "logits/chosen": -2.6475682258605957, + "logits/rejected": -2.723588466644287, + "logps/chosen": -223.42752075195312, + "logps/rejected": -276.1455383300781, + "loss": 0.4283, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.570798873901367, + "rewards/margins": 5.251105785369873, + "rewards/rejected": -14.821905136108398, + "step": 14863 + }, + { + "epoch": 2.31, + "learning_rate": 3.245474350329374e-06, + "logits/chosen": -2.6391351222991943, + "logits/rejected": -1.8376518487930298, + "logps/chosen": -348.96160888671875, + "logps/rejected": -287.8247985839844, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.559950828552246, + "rewards/margins": 8.224852561950684, + "rewards/rejected": -16.78480339050293, + "step": 14864 + }, + { + "epoch": 2.31, + "learning_rate": 3.244740909798226e-06, + "logits/chosen": -2.6630303859710693, + "logits/rejected": -1.9666900634765625, + "logps/chosen": -497.94281005859375, + "logps/rejected": -478.2427062988281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.843316555023193, + "rewards/margins": 9.085453033447266, + "rewards/rejected": -15.9287691116333, + "step": 14865 + }, + { + "epoch": 2.31, + "learning_rate": 3.244007469267078e-06, + "logits/chosen": -2.7570879459381104, + "logits/rejected": -1.2679940462112427, + "logps/chosen": -932.6841430664062, + "logps/rejected": -551.9088134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.898755073547363, + "rewards/margins": 12.31663990020752, + "rewards/rejected": -20.215394973754883, + "step": 14866 + }, + { + "epoch": 2.31, + "learning_rate": 3.2432740287359303e-06, + "logits/chosen": -2.6052260398864746, + "logits/rejected": -2.144512414932251, + "logps/chosen": -571.8101806640625, + "logps/rejected": -507.3277587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.040544509887695, + "rewards/margins": 10.673135757446289, + "rewards/rejected": -18.713680267333984, + "step": 14867 + }, + { + "epoch": 2.31, + "learning_rate": 3.2425405882047822e-06, + "logits/chosen": -1.8192967176437378, + "logits/rejected": -2.4478182792663574, + "logps/chosen": -239.74285888671875, + "logps/rejected": -388.79730224609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.960417747497559, + "rewards/margins": 10.388434410095215, + "rewards/rejected": -19.348852157592773, + "step": 14868 + }, + { + "epoch": 2.31, + "learning_rate": 3.2418071476736345e-06, + "logits/chosen": -2.2191050052642822, + "logits/rejected": -2.8456575870513916, + "logps/chosen": -107.4560546875, + "logps/rejected": -294.9969482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.302244186401367, + "rewards/margins": 8.8047513961792, + "rewards/rejected": -16.106996536254883, + "step": 14869 + }, + { + "epoch": 2.31, + "learning_rate": 3.241073707142487e-06, + "logits/chosen": -2.8479437828063965, + "logits/rejected": -2.8472042083740234, + "logps/chosen": -119.4344253540039, + "logps/rejected": -282.1033935546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.779729843139648, + "rewards/margins": 6.971526145935059, + "rewards/rejected": -13.751255989074707, + "step": 14870 + }, + { + "epoch": 2.31, + "learning_rate": 3.2403402666113387e-06, + "logits/chosen": -1.2981679439544678, + "logits/rejected": -2.792043447494507, + "logps/chosen": -105.29925537109375, + "logps/rejected": -387.22064208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.12938404083252, + "rewards/margins": 11.438875198364258, + "rewards/rejected": -19.568260192871094, + "step": 14871 + }, + { + "epoch": 2.31, + "learning_rate": 3.239606826080191e-06, + "logits/chosen": -1.2971611022949219, + "logits/rejected": -2.4306912422180176, + "logps/chosen": -224.2854766845703, + "logps/rejected": -493.8447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.278433799743652, + "rewards/margins": 13.141103744506836, + "rewards/rejected": -21.419536590576172, + "step": 14872 + }, + { + "epoch": 2.31, + "learning_rate": 3.238873385549043e-06, + "logits/chosen": -2.4974114894866943, + "logits/rejected": -1.5601588487625122, + "logps/chosen": -230.6783447265625, + "logps/rejected": -267.776611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7664878368377686, + "rewards/margins": 10.795160293579102, + "rewards/rejected": -13.56164836883545, + "step": 14873 + }, + { + "epoch": 2.31, + "learning_rate": 3.238139945017895e-06, + "logits/chosen": -1.029667615890503, + "logits/rejected": -2.3293204307556152, + "logps/chosen": -130.9060516357422, + "logps/rejected": -388.81536865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262079238891602, + "rewards/margins": 12.175954818725586, + "rewards/rejected": -18.438034057617188, + "step": 14874 + }, + { + "epoch": 2.31, + "learning_rate": 3.237406504486747e-06, + "logits/chosen": -2.172056198120117, + "logits/rejected": -2.4318535327911377, + "logps/chosen": -192.61639404296875, + "logps/rejected": -297.9212646484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.45989990234375, + "rewards/margins": 8.154422760009766, + "rewards/rejected": -15.614322662353516, + "step": 14875 + }, + { + "epoch": 2.31, + "learning_rate": 3.2366730639555994e-06, + "logits/chosen": -2.639775037765503, + "logits/rejected": -2.5914711952209473, + "logps/chosen": -296.50775146484375, + "logps/rejected": -268.40338134765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.90456485748291, + "rewards/margins": 7.854612350463867, + "rewards/rejected": -14.759178161621094, + "step": 14876 + }, + { + "epoch": 2.31, + "learning_rate": 3.2359396234244512e-06, + "logits/chosen": -1.3352631330490112, + "logits/rejected": -2.2266416549682617, + "logps/chosen": -270.454833984375, + "logps/rejected": -447.86865234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.137081146240234, + "rewards/margins": 9.614269256591797, + "rewards/rejected": -18.75135040283203, + "step": 14877 + }, + { + "epoch": 2.31, + "learning_rate": 3.235206182893304e-06, + "logits/chosen": -2.811830520629883, + "logits/rejected": -1.9276931285858154, + "logps/chosen": -351.1319580078125, + "logps/rejected": -344.912841796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.058427810668945, + "rewards/margins": 8.789375305175781, + "rewards/rejected": -15.847803115844727, + "step": 14878 + }, + { + "epoch": 2.31, + "learning_rate": 3.234472742362156e-06, + "logits/chosen": -2.5752103328704834, + "logits/rejected": -1.6264270544052124, + "logps/chosen": -425.2379150390625, + "logps/rejected": -393.08404541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.459768295288086, + "rewards/margins": 10.157563209533691, + "rewards/rejected": -17.617332458496094, + "step": 14879 + }, + { + "epoch": 2.31, + "learning_rate": 3.233739301831008e-06, + "logits/chosen": -1.2474960088729858, + "logits/rejected": -2.543792486190796, + "logps/chosen": -290.6357116699219, + "logps/rejected": -513.712158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.8760986328125, + "rewards/margins": 9.34358024597168, + "rewards/rejected": -19.21967887878418, + "step": 14880 + }, + { + "epoch": 2.31, + "learning_rate": 3.23300586129986e-06, + "logits/chosen": -2.8840384483337402, + "logits/rejected": -2.7374842166900635, + "logps/chosen": -195.13986206054688, + "logps/rejected": -195.73941040039062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.337461948394775, + "rewards/margins": 9.044698715209961, + "rewards/rejected": -15.382160186767578, + "step": 14881 + }, + { + "epoch": 2.31, + "learning_rate": 3.232272420768712e-06, + "logits/chosen": -2.365508556365967, + "logits/rejected": -2.5821545124053955, + "logps/chosen": -116.52595520019531, + "logps/rejected": -312.7554931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.007462501525879, + "rewards/margins": 11.144262313842773, + "rewards/rejected": -17.151723861694336, + "step": 14882 + }, + { + "epoch": 2.31, + "learning_rate": 3.231538980237564e-06, + "logits/chosen": -1.028977394104004, + "logits/rejected": -2.8092763423919678, + "logps/chosen": -188.79031372070312, + "logps/rejected": -734.0247192382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.017996788024902, + "rewards/margins": 13.206710815429688, + "rewards/rejected": -20.224708557128906, + "step": 14883 + }, + { + "epoch": 2.31, + "learning_rate": 3.230805539706416e-06, + "logits/chosen": -2.7634787559509277, + "logits/rejected": -2.006244659423828, + "logps/chosen": -208.64425659179688, + "logps/rejected": -294.2821044921875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.954325675964355, + "rewards/margins": 6.616205215454102, + "rewards/rejected": -15.570530891418457, + "step": 14884 + }, + { + "epoch": 2.31, + "learning_rate": 3.2300720991752684e-06, + "logits/chosen": -2.4913909435272217, + "logits/rejected": -1.6595319509506226, + "logps/chosen": -263.99114990234375, + "logps/rejected": -307.6014709472656, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.843118667602539, + "rewards/margins": 5.70750617980957, + "rewards/rejected": -13.55062484741211, + "step": 14885 + }, + { + "epoch": 2.32, + "learning_rate": 3.2293386586441207e-06, + "logits/chosen": -2.7943835258483887, + "logits/rejected": -2.7394511699676514, + "logps/chosen": -199.8022003173828, + "logps/rejected": -238.48899841308594, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696395397186279, + "rewards/margins": 5.8184051513671875, + "rewards/rejected": -13.514801025390625, + "step": 14886 + }, + { + "epoch": 2.32, + "learning_rate": 3.228605218112973e-06, + "logits/chosen": -1.642211675643921, + "logits/rejected": -3.0045409202575684, + "logps/chosen": -221.80906677246094, + "logps/rejected": -495.017822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.018535614013672, + "rewards/margins": 10.339221954345703, + "rewards/rejected": -19.357757568359375, + "step": 14887 + }, + { + "epoch": 2.32, + "learning_rate": 3.227871777581825e-06, + "logits/chosen": -1.4203139543533325, + "logits/rejected": -2.552438974380493, + "logps/chosen": -223.13015747070312, + "logps/rejected": -427.9706726074219, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.478717803955078, + "rewards/margins": 8.92391300201416, + "rewards/rejected": -19.402629852294922, + "step": 14888 + }, + { + "epoch": 2.32, + "learning_rate": 3.227138337050677e-06, + "logits/chosen": -2.827491521835327, + "logits/rejected": -2.7736456394195557, + "logps/chosen": -188.87698364257812, + "logps/rejected": -296.7410583496094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.120953559875488, + "rewards/margins": 11.055803298950195, + "rewards/rejected": -18.1767578125, + "step": 14889 + }, + { + "epoch": 2.32, + "learning_rate": 3.226404896519529e-06, + "logits/chosen": -1.106170415878296, + "logits/rejected": -2.6491942405700684, + "logps/chosen": -133.334228515625, + "logps/rejected": -370.393798828125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.084059715270996, + "rewards/margins": 6.972644329071045, + "rewards/rejected": -18.056703567504883, + "step": 14890 + }, + { + "epoch": 2.32, + "learning_rate": 3.225671455988381e-06, + "logits/chosen": -2.417935609817505, + "logits/rejected": -2.4910573959350586, + "logps/chosen": -247.89376831054688, + "logps/rejected": -231.7577362060547, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.265388011932373, + "rewards/margins": 5.893411159515381, + "rewards/rejected": -13.158799171447754, + "step": 14891 + }, + { + "epoch": 2.32, + "learning_rate": 3.2249380154572332e-06, + "logits/chosen": -3.021296501159668, + "logits/rejected": -2.971740484237671, + "logps/chosen": -348.1366271972656, + "logps/rejected": -545.5167236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.540733337402344, + "rewards/margins": 13.975889205932617, + "rewards/rejected": -21.51662254333496, + "step": 14892 + }, + { + "epoch": 2.32, + "learning_rate": 3.224204574926085e-06, + "logits/chosen": -2.760883331298828, + "logits/rejected": -2.4827325344085693, + "logps/chosen": -293.54290771484375, + "logps/rejected": -351.0233154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.531375885009766, + "rewards/margins": 10.697614669799805, + "rewards/rejected": -17.22899055480957, + "step": 14893 + }, + { + "epoch": 2.32, + "learning_rate": 3.2234711343949374e-06, + "logits/chosen": -2.7963902950286865, + "logits/rejected": -2.483525514602661, + "logps/chosen": -165.04837036132812, + "logps/rejected": -269.48358154296875, + "loss": 1.1505, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.619467735290527, + "rewards/margins": 4.998582363128662, + "rewards/rejected": -16.61804962158203, + "step": 14894 + }, + { + "epoch": 2.32, + "learning_rate": 3.2227376938637897e-06, + "logits/chosen": -2.9361660480499268, + "logits/rejected": -1.7393040657043457, + "logps/chosen": -580.044921875, + "logps/rejected": -354.5166320800781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1993727684021, + "rewards/margins": 8.82006549835205, + "rewards/rejected": -14.019437789916992, + "step": 14895 + }, + { + "epoch": 2.32, + "learning_rate": 3.222004253332642e-06, + "logits/chosen": -2.6291072368621826, + "logits/rejected": -2.609828472137451, + "logps/chosen": -443.4103698730469, + "logps/rejected": -561.1273803710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.913321018218994, + "rewards/margins": 12.116710662841797, + "rewards/rejected": -19.030031204223633, + "step": 14896 + }, + { + "epoch": 2.32, + "learning_rate": 3.221270812801494e-06, + "logits/chosen": -2.4804420471191406, + "logits/rejected": -2.920311450958252, + "logps/chosen": -539.4228515625, + "logps/rejected": -647.691650390625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.751640319824219, + "rewards/margins": 9.011512756347656, + "rewards/rejected": -21.763153076171875, + "step": 14897 + }, + { + "epoch": 2.32, + "learning_rate": 3.220537372270346e-06, + "logits/chosen": -1.8987905979156494, + "logits/rejected": -2.7307803630828857, + "logps/chosen": -297.5816650390625, + "logps/rejected": -480.2976379394531, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.803894996643066, + "rewards/margins": 6.591999053955078, + "rewards/rejected": -17.395893096923828, + "step": 14898 + }, + { + "epoch": 2.32, + "learning_rate": 3.219803931739198e-06, + "logits/chosen": -2.067213296890259, + "logits/rejected": -2.9306640625, + "logps/chosen": -281.100341796875, + "logps/rejected": -394.5078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.652036666870117, + "rewards/margins": 7.397436141967773, + "rewards/rejected": -15.04947280883789, + "step": 14899 + }, + { + "epoch": 2.32, + "learning_rate": 3.21907049120805e-06, + "logits/chosen": -2.703559637069702, + "logits/rejected": -2.8816256523132324, + "logps/chosen": -89.66462707519531, + "logps/rejected": -241.23562622070312, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.945162773132324, + "rewards/margins": 6.328756332397461, + "rewards/rejected": -14.273918151855469, + "step": 14900 + }, + { + "epoch": 2.32, + "learning_rate": 3.2183370506769022e-06, + "logits/chosen": -2.7948803901672363, + "logits/rejected": -2.738988161087036, + "logps/chosen": -325.2218017578125, + "logps/rejected": -490.64996337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.90684700012207, + "rewards/margins": 14.337104797363281, + "rewards/rejected": -22.24395179748535, + "step": 14901 + }, + { + "epoch": 2.32, + "learning_rate": 3.217603610145754e-06, + "logits/chosen": -2.6998157501220703, + "logits/rejected": -3.164513111114502, + "logps/chosen": -94.34466552734375, + "logps/rejected": -252.71435546875, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.423796653747559, + "rewards/margins": 4.55037784576416, + "rewards/rejected": -12.974174499511719, + "step": 14902 + }, + { + "epoch": 2.32, + "learning_rate": 3.216870169614607e-06, + "logits/chosen": -2.4415555000305176, + "logits/rejected": -2.818319320678711, + "logps/chosen": -282.9145812988281, + "logps/rejected": -431.6951599121094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.06826400756836, + "rewards/margins": 6.847647190093994, + "rewards/rejected": -14.915910720825195, + "step": 14903 + }, + { + "epoch": 2.32, + "learning_rate": 3.2161367290834587e-06, + "logits/chosen": -2.9104411602020264, + "logits/rejected": -2.6045854091644287, + "logps/chosen": -128.69923400878906, + "logps/rejected": -305.7939758300781, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.319877624511719, + "rewards/margins": 10.514942169189453, + "rewards/rejected": -18.834819793701172, + "step": 14904 + }, + { + "epoch": 2.32, + "learning_rate": 3.215403288552311e-06, + "logits/chosen": -1.7956751585006714, + "logits/rejected": -2.7395994663238525, + "logps/chosen": -333.98284912109375, + "logps/rejected": -471.6971435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2506279945373535, + "rewards/margins": 13.167703628540039, + "rewards/rejected": -20.418331146240234, + "step": 14905 + }, + { + "epoch": 2.32, + "learning_rate": 3.214669848021163e-06, + "logits/chosen": -1.5519963502883911, + "logits/rejected": -2.2841155529022217, + "logps/chosen": -251.07049560546875, + "logps/rejected": -537.6546630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9727582931518555, + "rewards/margins": 12.081920623779297, + "rewards/rejected": -20.05467987060547, + "step": 14906 + }, + { + "epoch": 2.32, + "learning_rate": 3.2139364074900152e-06, + "logits/chosen": -2.899256467819214, + "logits/rejected": -2.847674608230591, + "logps/chosen": -271.4527282714844, + "logps/rejected": -344.2803955078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.306821823120117, + "rewards/margins": 8.793464660644531, + "rewards/rejected": -16.10028648376465, + "step": 14907 + }, + { + "epoch": 2.32, + "learning_rate": 3.213202966958867e-06, + "logits/chosen": -1.8748314380645752, + "logits/rejected": -2.6192398071289062, + "logps/chosen": -152.64114379882812, + "logps/rejected": -406.7655029296875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3232421875, + "rewards/margins": 8.910128593444824, + "rewards/rejected": -14.23337173461914, + "step": 14908 + }, + { + "epoch": 2.32, + "learning_rate": 3.2124695264277194e-06, + "logits/chosen": -2.609179973602295, + "logits/rejected": -1.8325968980789185, + "logps/chosen": -291.0114440917969, + "logps/rejected": -472.09600830078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.426109313964844, + "rewards/margins": 11.109512329101562, + "rewards/rejected": -18.535621643066406, + "step": 14909 + }, + { + "epoch": 2.32, + "learning_rate": 3.2117360858965713e-06, + "logits/chosen": -2.595663070678711, + "logits/rejected": -2.8705294132232666, + "logps/chosen": -794.7576904296875, + "logps/rejected": -851.310302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.235598564147949, + "rewards/margins": 12.510882377624512, + "rewards/rejected": -17.74648094177246, + "step": 14910 + }, + { + "epoch": 2.32, + "learning_rate": 3.2110026453654236e-06, + "logits/chosen": -2.2936911582946777, + "logits/rejected": -1.926231861114502, + "logps/chosen": -454.0149841308594, + "logps/rejected": -588.6455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081207275390625, + "rewards/margins": 14.058563232421875, + "rewards/rejected": -19.1397705078125, + "step": 14911 + }, + { + "epoch": 2.32, + "learning_rate": 3.210269204834276e-06, + "logits/chosen": -1.632802963256836, + "logits/rejected": -2.233147382736206, + "logps/chosen": -173.390625, + "logps/rejected": -426.5281982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696994304656982, + "rewards/margins": 11.020257949829102, + "rewards/rejected": -18.717252731323242, + "step": 14912 + }, + { + "epoch": 2.32, + "learning_rate": 3.2095357643031278e-06, + "logits/chosen": -1.2307294607162476, + "logits/rejected": -2.5864357948303223, + "logps/chosen": -243.2811279296875, + "logps/rejected": -640.3093872070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.391782760620117, + "rewards/margins": 12.391674041748047, + "rewards/rejected": -22.783458709716797, + "step": 14913 + }, + { + "epoch": 2.32, + "learning_rate": 3.20880232377198e-06, + "logits/chosen": -2.885082960128784, + "logits/rejected": -2.749361753463745, + "logps/chosen": -125.50581359863281, + "logps/rejected": -181.1252899169922, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.015589714050293, + "rewards/margins": 6.589170455932617, + "rewards/rejected": -15.60476016998291, + "step": 14914 + }, + { + "epoch": 2.32, + "learning_rate": 3.208068883240832e-06, + "logits/chosen": -2.5129380226135254, + "logits/rejected": -2.691943645477295, + "logps/chosen": -89.68028259277344, + "logps/rejected": -332.9388427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132205963134766, + "rewards/margins": 11.528955459594727, + "rewards/rejected": -17.661161422729492, + "step": 14915 + }, + { + "epoch": 2.32, + "learning_rate": 3.2073354427096842e-06, + "logits/chosen": -2.645230770111084, + "logits/rejected": -1.9811081886291504, + "logps/chosen": -248.52734375, + "logps/rejected": -435.998779296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.618996620178223, + "rewards/margins": 9.604463577270508, + "rewards/rejected": -17.223461151123047, + "step": 14916 + }, + { + "epoch": 2.32, + "learning_rate": 3.206602002178536e-06, + "logits/chosen": -2.2893245220184326, + "logits/rejected": -2.329946517944336, + "logps/chosen": -156.08108520507812, + "logps/rejected": -303.9547119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.875807762145996, + "rewards/margins": 11.458515167236328, + "rewards/rejected": -17.33432388305664, + "step": 14917 + }, + { + "epoch": 2.32, + "learning_rate": 3.2058685616473884e-06, + "logits/chosen": -1.7980225086212158, + "logits/rejected": -2.6593260765075684, + "logps/chosen": -240.63897705078125, + "logps/rejected": -336.793701171875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1241326332092285, + "rewards/margins": 7.210062026977539, + "rewards/rejected": -11.33419418334961, + "step": 14918 + }, + { + "epoch": 2.32, + "learning_rate": 3.2051351211162403e-06, + "logits/chosen": -2.675398826599121, + "logits/rejected": -2.841245412826538, + "logps/chosen": -603.7652587890625, + "logps/rejected": -531.2348022460938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1890950202941895, + "rewards/margins": 12.528799057006836, + "rewards/rejected": -19.717893600463867, + "step": 14919 + }, + { + "epoch": 2.32, + "learning_rate": 3.2044016805850926e-06, + "logits/chosen": -1.8327704668045044, + "logits/rejected": -2.281817674636841, + "logps/chosen": -156.43582153320312, + "logps/rejected": -566.119384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.845144271850586, + "rewards/margins": 17.085025787353516, + "rewards/rejected": -23.930171966552734, + "step": 14920 + }, + { + "epoch": 2.32, + "learning_rate": 3.203668240053945e-06, + "logits/chosen": -2.1478121280670166, + "logits/rejected": -2.844712018966675, + "logps/chosen": -167.16427612304688, + "logps/rejected": -421.90362548828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.133211612701416, + "rewards/margins": 10.507562637329102, + "rewards/rejected": -16.64077377319336, + "step": 14921 + }, + { + "epoch": 2.32, + "learning_rate": 3.2029347995227968e-06, + "logits/chosen": -1.5560088157653809, + "logits/rejected": -2.676175355911255, + "logps/chosen": -143.10183715820312, + "logps/rejected": -385.8052978515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.257787704467773, + "rewards/margins": 8.204402923583984, + "rewards/rejected": -17.462190628051758, + "step": 14922 + }, + { + "epoch": 2.32, + "learning_rate": 3.202201358991649e-06, + "logits/chosen": -2.5619966983795166, + "logits/rejected": -2.633104085922241, + "logps/chosen": -309.99029541015625, + "logps/rejected": -581.3577270507812, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.164305686950684, + "rewards/margins": 13.72395133972168, + "rewards/rejected": -21.88825798034668, + "step": 14923 + }, + { + "epoch": 2.32, + "learning_rate": 3.201467918460501e-06, + "logits/chosen": -2.768747568130493, + "logits/rejected": -2.601768732070923, + "logps/chosen": -563.1738891601562, + "logps/rejected": -801.7615966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.645297050476074, + "rewards/margins": 9.353588104248047, + "rewards/rejected": -20.998886108398438, + "step": 14924 + }, + { + "epoch": 2.32, + "learning_rate": 3.2007344779293533e-06, + "logits/chosen": -1.7312476634979248, + "logits/rejected": -2.3413612842559814, + "logps/chosen": -328.59808349609375, + "logps/rejected": -354.90704345703125, + "loss": 0.4982, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.542435646057129, + "rewards/margins": 6.154652118682861, + "rewards/rejected": -15.697088241577148, + "step": 14925 + }, + { + "epoch": 2.32, + "learning_rate": 3.200001037398205e-06, + "logits/chosen": -1.9215425252914429, + "logits/rejected": -2.643888235092163, + "logps/chosen": -423.3939514160156, + "logps/rejected": -516.6524047851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.127490997314453, + "rewards/margins": 11.717697143554688, + "rewards/rejected": -22.84518814086914, + "step": 14926 + }, + { + "epoch": 2.32, + "learning_rate": 3.1992675968670574e-06, + "logits/chosen": -2.4175405502319336, + "logits/rejected": -2.5080296993255615, + "logps/chosen": -554.3262939453125, + "logps/rejected": -615.5923461914062, + "loss": 0.1001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.898722648620605, + "rewards/margins": 7.651912689208984, + "rewards/rejected": -16.550636291503906, + "step": 14927 + }, + { + "epoch": 2.32, + "learning_rate": 3.1985341563359097e-06, + "logits/chosen": -1.7075750827789307, + "logits/rejected": -2.533968925476074, + "logps/chosen": -170.2383575439453, + "logps/rejected": -330.78802490234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.432893753051758, + "rewards/margins": 9.946781158447266, + "rewards/rejected": -19.37967300415039, + "step": 14928 + }, + { + "epoch": 2.32, + "learning_rate": 3.197800715804762e-06, + "logits/chosen": -2.6911377906799316, + "logits/rejected": -1.822221040725708, + "logps/chosen": -1022.0027465820312, + "logps/rejected": -471.44781494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.306200981140137, + "rewards/margins": 13.758421897888184, + "rewards/rejected": -21.06462287902832, + "step": 14929 + }, + { + "epoch": 2.32, + "learning_rate": 3.197067275273614e-06, + "logits/chosen": -2.1708922386169434, + "logits/rejected": -2.711090326309204, + "logps/chosen": -427.8702392578125, + "logps/rejected": -498.42401123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.476077079772949, + "rewards/margins": 13.37015438079834, + "rewards/rejected": -18.84623146057129, + "step": 14930 + }, + { + "epoch": 2.32, + "learning_rate": 3.196333834742466e-06, + "logits/chosen": -2.4661865234375, + "logits/rejected": -2.8039629459381104, + "logps/chosen": -373.24090576171875, + "logps/rejected": -482.5301818847656, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.502509117126465, + "rewards/margins": 9.973942756652832, + "rewards/rejected": -15.476451873779297, + "step": 14931 + }, + { + "epoch": 2.32, + "learning_rate": 3.195600394211318e-06, + "logits/chosen": -2.7741713523864746, + "logits/rejected": -2.6265618801116943, + "logps/chosen": -486.55865478515625, + "logps/rejected": -415.4951171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.251568794250488, + "rewards/margins": 11.120881080627441, + "rewards/rejected": -20.37244987487793, + "step": 14932 + }, + { + "epoch": 2.32, + "learning_rate": 3.19486695368017e-06, + "logits/chosen": -1.0861620903015137, + "logits/rejected": -2.600968360900879, + "logps/chosen": -216.396240234375, + "logps/rejected": -743.686279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.614766120910645, + "rewards/margins": 10.60810661315918, + "rewards/rejected": -19.22287368774414, + "step": 14933 + }, + { + "epoch": 2.32, + "learning_rate": 3.1941335131490223e-06, + "logits/chosen": -2.300179958343506, + "logits/rejected": -2.428469181060791, + "logps/chosen": -602.3333740234375, + "logps/rejected": -508.2273254394531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.188318252563477, + "rewards/margins": 8.4188871383667, + "rewards/rejected": -20.60720443725586, + "step": 14934 + }, + { + "epoch": 2.32, + "learning_rate": 3.193400072617874e-06, + "logits/chosen": -2.3648273944854736, + "logits/rejected": -2.561073064804077, + "logps/chosen": -374.69866943359375, + "logps/rejected": -705.8235473632812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.582586288452148, + "rewards/margins": 10.082871437072754, + "rewards/rejected": -18.66545867919922, + "step": 14935 + }, + { + "epoch": 2.32, + "learning_rate": 3.1926666320867265e-06, + "logits/chosen": -2.6348729133605957, + "logits/rejected": -2.253934621810913, + "logps/chosen": -236.62420654296875, + "logps/rejected": -411.62030029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.510810852050781, + "rewards/margins": 10.816736221313477, + "rewards/rejected": -20.327547073364258, + "step": 14936 + }, + { + "epoch": 2.32, + "learning_rate": 3.1919331915555788e-06, + "logits/chosen": -2.6743335723876953, + "logits/rejected": -1.8120019435882568, + "logps/chosen": -531.7911987304688, + "logps/rejected": -506.40789794921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.168627738952637, + "rewards/margins": 9.138032913208008, + "rewards/rejected": -20.306659698486328, + "step": 14937 + }, + { + "epoch": 2.32, + "learning_rate": 3.191199751024431e-06, + "logits/chosen": -1.32040536403656, + "logits/rejected": -2.2990314960479736, + "logps/chosen": -238.6589813232422, + "logps/rejected": -438.40252685546875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.537740707397461, + "rewards/margins": 6.174918174743652, + "rewards/rejected": -14.71265983581543, + "step": 14938 + }, + { + "epoch": 2.32, + "learning_rate": 3.190466310493283e-06, + "logits/chosen": -2.797720432281494, + "logits/rejected": -1.762790322303772, + "logps/chosen": -261.9690856933594, + "logps/rejected": -275.9139709472656, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.84291934967041, + "rewards/margins": 7.49602746963501, + "rewards/rejected": -17.338947296142578, + "step": 14939 + }, + { + "epoch": 2.32, + "learning_rate": 3.189732869962135e-06, + "logits/chosen": -2.8493990898132324, + "logits/rejected": -2.5806660652160645, + "logps/chosen": -663.6848754882812, + "logps/rejected": -583.1473999023438, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.487486839294434, + "rewards/margins": 7.932097434997559, + "rewards/rejected": -16.419584274291992, + "step": 14940 + }, + { + "epoch": 2.32, + "learning_rate": 3.188999429430987e-06, + "logits/chosen": -2.428426504135132, + "logits/rejected": -2.5570156574249268, + "logps/chosen": -224.73336791992188, + "logps/rejected": -447.7412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.189373970031738, + "rewards/margins": 13.06765365600586, + "rewards/rejected": -20.25702667236328, + "step": 14941 + }, + { + "epoch": 2.32, + "learning_rate": 3.188265988899839e-06, + "logits/chosen": -2.6078579425811768, + "logits/rejected": -2.4003140926361084, + "logps/chosen": -374.743896484375, + "logps/rejected": -496.686767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.415494918823242, + "rewards/margins": 13.913373947143555, + "rewards/rejected": -23.328868865966797, + "step": 14942 + }, + { + "epoch": 2.32, + "learning_rate": 3.1875325483686913e-06, + "logits/chosen": -2.074918031692505, + "logits/rejected": -2.9751853942871094, + "logps/chosen": -354.47442626953125, + "logps/rejected": -417.579345703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.570192337036133, + "rewards/margins": 8.55131721496582, + "rewards/rejected": -16.121509552001953, + "step": 14943 + }, + { + "epoch": 2.32, + "learning_rate": 3.186799107837543e-06, + "logits/chosen": -2.113372325897217, + "logits/rejected": -2.73017954826355, + "logps/chosen": -269.45855712890625, + "logps/rejected": -354.056396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.810258865356445, + "rewards/margins": 9.253058433532715, + "rewards/rejected": -17.063316345214844, + "step": 14944 + }, + { + "epoch": 2.32, + "learning_rate": 3.186065667306396e-06, + "logits/chosen": -1.3677706718444824, + "logits/rejected": -2.3687305450439453, + "logps/chosen": -218.70399475097656, + "logps/rejected": -839.6005249023438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.676668167114258, + "rewards/margins": 24.913286209106445, + "rewards/rejected": -33.5899543762207, + "step": 14945 + }, + { + "epoch": 2.32, + "learning_rate": 3.1853322267752478e-06, + "logits/chosen": -2.4516677856445312, + "logits/rejected": -2.573010206222534, + "logps/chosen": -366.2606506347656, + "logps/rejected": -486.0367126464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.135171890258789, + "rewards/margins": 12.61104965209961, + "rewards/rejected": -21.74622344970703, + "step": 14946 + }, + { + "epoch": 2.32, + "learning_rate": 3.1845987862441e-06, + "logits/chosen": -2.242683172225952, + "logits/rejected": -2.4591288566589355, + "logps/chosen": -206.932861328125, + "logps/rejected": -417.2781982421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.716281890869141, + "rewards/margins": 11.863302230834961, + "rewards/rejected": -19.5795841217041, + "step": 14947 + }, + { + "epoch": 2.32, + "learning_rate": 3.183865345712952e-06, + "logits/chosen": -1.0543988943099976, + "logits/rejected": -2.80033540725708, + "logps/chosen": -296.7614440917969, + "logps/rejected": -646.0017700195312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.932489395141602, + "rewards/margins": 10.692325592041016, + "rewards/rejected": -18.624814987182617, + "step": 14948 + }, + { + "epoch": 2.32, + "learning_rate": 3.1831319051818043e-06, + "logits/chosen": -1.6358369588851929, + "logits/rejected": -2.1662843227386475, + "logps/chosen": -140.6298065185547, + "logps/rejected": -407.1495056152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.061274528503418, + "rewards/margins": 14.5103178024292, + "rewards/rejected": -22.571592330932617, + "step": 14949 + }, + { + "epoch": 2.33, + "learning_rate": 3.182398464650656e-06, + "logits/chosen": -0.662373960018158, + "logits/rejected": -2.8528850078582764, + "logps/chosen": -106.47441101074219, + "logps/rejected": -783.974853515625, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2703471183776855, + "rewards/margins": 15.863450050354004, + "rewards/rejected": -21.13379669189453, + "step": 14950 + }, + { + "epoch": 2.33, + "learning_rate": 3.181665024119508e-06, + "logits/chosen": -2.232839584350586, + "logits/rejected": -2.312366485595703, + "logps/chosen": -196.5828094482422, + "logps/rejected": -464.8000183105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.175395965576172, + "rewards/margins": 14.43346118927002, + "rewards/rejected": -22.608856201171875, + "step": 14951 + }, + { + "epoch": 2.33, + "learning_rate": 3.1809315835883603e-06, + "logits/chosen": -2.4375643730163574, + "logits/rejected": -2.669196367263794, + "logps/chosen": -636.7737426757812, + "logps/rejected": -751.4692993164062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.05777359008789, + "rewards/margins": 8.80128288269043, + "rewards/rejected": -19.859054565429688, + "step": 14952 + }, + { + "epoch": 2.33, + "learning_rate": 3.1801981430572126e-06, + "logits/chosen": -3.002147912979126, + "logits/rejected": -2.9066264629364014, + "logps/chosen": -234.35580444335938, + "logps/rejected": -469.92218017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.215938091278076, + "rewards/margins": 9.129305839538574, + "rewards/rejected": -16.345243453979492, + "step": 14953 + }, + { + "epoch": 2.33, + "learning_rate": 3.179464702526065e-06, + "logits/chosen": -1.534664511680603, + "logits/rejected": -2.8410565853118896, + "logps/chosen": -267.8603820800781, + "logps/rejected": -579.22607421875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.401901245117188, + "rewards/margins": 7.389535427093506, + "rewards/rejected": -18.79143714904785, + "step": 14954 + }, + { + "epoch": 2.33, + "learning_rate": 3.178731261994917e-06, + "logits/chosen": -2.762979507446289, + "logits/rejected": -2.357759475708008, + "logps/chosen": -185.02993774414062, + "logps/rejected": -275.3064270019531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954479217529297, + "rewards/margins": 12.154786109924316, + "rewards/rejected": -17.109264373779297, + "step": 14955 + }, + { + "epoch": 2.33, + "learning_rate": 3.177997821463769e-06, + "logits/chosen": -1.8613542318344116, + "logits/rejected": -2.163236379623413, + "logps/chosen": -502.06573486328125, + "logps/rejected": -542.024658203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8019700050354, + "rewards/margins": 9.196980476379395, + "rewards/rejected": -16.998950958251953, + "step": 14956 + }, + { + "epoch": 2.33, + "learning_rate": 3.177264380932621e-06, + "logits/chosen": -2.1687262058258057, + "logits/rejected": -2.583824634552002, + "logps/chosen": -144.04095458984375, + "logps/rejected": -287.06365966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.878836631774902, + "rewards/margins": 9.754246711730957, + "rewards/rejected": -19.63308334350586, + "step": 14957 + }, + { + "epoch": 2.33, + "learning_rate": 3.1765309404014733e-06, + "logits/chosen": -2.287764072418213, + "logits/rejected": -2.784684181213379, + "logps/chosen": -187.44851684570312, + "logps/rejected": -263.02325439453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.499942779541016, + "rewards/margins": 8.594039916992188, + "rewards/rejected": -16.093982696533203, + "step": 14958 + }, + { + "epoch": 2.33, + "learning_rate": 3.175797499870325e-06, + "logits/chosen": -1.768904685974121, + "logits/rejected": -2.662813186645508, + "logps/chosen": -226.97003173828125, + "logps/rejected": -415.5689392089844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.147826194763184, + "rewards/margins": 7.808798313140869, + "rewards/rejected": -14.956624984741211, + "step": 14959 + }, + { + "epoch": 2.33, + "learning_rate": 3.175064059339177e-06, + "logits/chosen": -1.6740024089813232, + "logits/rejected": -2.8550055027008057, + "logps/chosen": -117.04718017578125, + "logps/rejected": -514.9542236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.539351463317871, + "rewards/margins": 15.1759614944458, + "rewards/rejected": -22.715312957763672, + "step": 14960 + }, + { + "epoch": 2.33, + "learning_rate": 3.1743306188080293e-06, + "logits/chosen": -1.0380350351333618, + "logits/rejected": -2.89404034614563, + "logps/chosen": -304.08660888671875, + "logps/rejected": -381.026611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.363105773925781, + "rewards/margins": 9.811924934387207, + "rewards/rejected": -16.175031661987305, + "step": 14961 + }, + { + "epoch": 2.33, + "learning_rate": 3.1735971782768816e-06, + "logits/chosen": -2.855058193206787, + "logits/rejected": -2.82879900932312, + "logps/chosen": -163.18124389648438, + "logps/rejected": -426.40606689453125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.48508358001709, + "rewards/margins": 7.802648067474365, + "rewards/rejected": -16.287731170654297, + "step": 14962 + }, + { + "epoch": 2.33, + "learning_rate": 3.172863737745734e-06, + "logits/chosen": -2.780890464782715, + "logits/rejected": -2.751106023788452, + "logps/chosen": -391.3626708984375, + "logps/rejected": -411.91705322265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.694785118103027, + "rewards/margins": 9.697225570678711, + "rewards/rejected": -18.392009735107422, + "step": 14963 + }, + { + "epoch": 2.33, + "learning_rate": 3.172130297214586e-06, + "logits/chosen": -2.140500068664551, + "logits/rejected": -2.3748292922973633, + "logps/chosen": -148.828857421875, + "logps/rejected": -387.03662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.813566207885742, + "rewards/margins": 11.068914413452148, + "rewards/rejected": -17.88248062133789, + "step": 14964 + }, + { + "epoch": 2.33, + "learning_rate": 3.171396856683438e-06, + "logits/chosen": -1.8146473169326782, + "logits/rejected": -2.5348076820373535, + "logps/chosen": -243.67898559570312, + "logps/rejected": -340.33685302734375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.204015731811523, + "rewards/margins": 6.442084312438965, + "rewards/rejected": -14.646100997924805, + "step": 14965 + }, + { + "epoch": 2.33, + "learning_rate": 3.17066341615229e-06, + "logits/chosen": -2.10006046295166, + "logits/rejected": -2.3926546573638916, + "logps/chosen": -998.8230590820312, + "logps/rejected": -947.014404296875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.29437255859375, + "rewards/margins": 7.957721710205078, + "rewards/rejected": -17.252094268798828, + "step": 14966 + }, + { + "epoch": 2.33, + "learning_rate": 3.1699299756211423e-06, + "logits/chosen": -2.416919708251953, + "logits/rejected": -2.7335402965545654, + "logps/chosen": -368.08453369140625, + "logps/rejected": -476.4638671875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.318398952484131, + "rewards/margins": 11.012744903564453, + "rewards/rejected": -17.331144332885742, + "step": 14967 + }, + { + "epoch": 2.33, + "learning_rate": 3.169196535089994e-06, + "logits/chosen": -1.7089757919311523, + "logits/rejected": -2.2443714141845703, + "logps/chosen": -192.95452880859375, + "logps/rejected": -364.2105712890625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.822511672973633, + "rewards/margins": 10.559852600097656, + "rewards/rejected": -20.38236427307129, + "step": 14968 + }, + { + "epoch": 2.33, + "learning_rate": 3.168463094558846e-06, + "logits/chosen": -2.6946067810058594, + "logits/rejected": -2.206143617630005, + "logps/chosen": -255.04595947265625, + "logps/rejected": -459.7847900390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.589134216308594, + "rewards/margins": 11.833545684814453, + "rewards/rejected": -25.422679901123047, + "step": 14969 + }, + { + "epoch": 2.33, + "learning_rate": 3.167729654027699e-06, + "logits/chosen": -2.3498709201812744, + "logits/rejected": -2.739718437194824, + "logps/chosen": -147.1739501953125, + "logps/rejected": -217.94813537597656, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.616567611694336, + "rewards/margins": 7.411904811859131, + "rewards/rejected": -15.028472900390625, + "step": 14970 + }, + { + "epoch": 2.33, + "learning_rate": 3.1669962134965507e-06, + "logits/chosen": -2.0086007118225098, + "logits/rejected": -2.6911914348602295, + "logps/chosen": -441.6661682128906, + "logps/rejected": -488.4326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.179847717285156, + "rewards/margins": 9.747272491455078, + "rewards/rejected": -18.927120208740234, + "step": 14971 + }, + { + "epoch": 2.33, + "learning_rate": 3.166262772965403e-06, + "logits/chosen": -1.2579524517059326, + "logits/rejected": -2.3767478466033936, + "logps/chosen": -156.70849609375, + "logps/rejected": -344.45916748046875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.441157341003418, + "rewards/margins": 8.406370162963867, + "rewards/rejected": -17.84752655029297, + "step": 14972 + }, + { + "epoch": 2.33, + "learning_rate": 3.165529332434255e-06, + "logits/chosen": -2.8860561847686768, + "logits/rejected": -2.623840093612671, + "logps/chosen": -475.89599609375, + "logps/rejected": -449.59051513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.490013122558594, + "rewards/margins": 10.386547088623047, + "rewards/rejected": -19.87656021118164, + "step": 14973 + }, + { + "epoch": 2.33, + "learning_rate": 3.164795891903107e-06, + "logits/chosen": -2.2291088104248047, + "logits/rejected": -2.921217679977417, + "logps/chosen": -146.8556365966797, + "logps/rejected": -391.2557373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.029163360595703, + "rewards/margins": 9.433649063110352, + "rewards/rejected": -16.462812423706055, + "step": 14974 + }, + { + "epoch": 2.33, + "learning_rate": 3.164062451371959e-06, + "logits/chosen": -2.334035634994507, + "logits/rejected": -2.707515239715576, + "logps/chosen": -633.5084228515625, + "logps/rejected": -507.86810302734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.676856994628906, + "rewards/margins": 8.90246295928955, + "rewards/rejected": -16.57931900024414, + "step": 14975 + }, + { + "epoch": 2.33, + "learning_rate": 3.1633290108408113e-06, + "logits/chosen": -0.504515528678894, + "logits/rejected": -2.631551504135132, + "logps/chosen": -201.29037475585938, + "logps/rejected": -692.60693359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.835476875305176, + "rewards/margins": 8.913250923156738, + "rewards/rejected": -20.748727798461914, + "step": 14976 + }, + { + "epoch": 2.33, + "learning_rate": 3.162595570309663e-06, + "logits/chosen": -2.7603068351745605, + "logits/rejected": -2.627748489379883, + "logps/chosen": -298.9131774902344, + "logps/rejected": -508.8084716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9118242263793945, + "rewards/margins": 11.326895713806152, + "rewards/rejected": -18.238719940185547, + "step": 14977 + }, + { + "epoch": 2.33, + "learning_rate": 3.1618621297785155e-06, + "logits/chosen": -2.7910890579223633, + "logits/rejected": -2.976414442062378, + "logps/chosen": -173.76364135742188, + "logps/rejected": -341.6248474121094, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.078227996826172, + "rewards/margins": 5.281101226806641, + "rewards/rejected": -13.359329223632812, + "step": 14978 + }, + { + "epoch": 2.33, + "learning_rate": 3.161128689247368e-06, + "logits/chosen": -1.8947737216949463, + "logits/rejected": -2.3454480171203613, + "logps/chosen": -150.62841796875, + "logps/rejected": -315.0412292480469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.655965805053711, + "rewards/margins": 10.292251586914062, + "rewards/rejected": -20.948219299316406, + "step": 14979 + }, + { + "epoch": 2.33, + "learning_rate": 3.1603952487162197e-06, + "logits/chosen": -1.7719762325286865, + "logits/rejected": -2.3934481143951416, + "logps/chosen": -166.7953338623047, + "logps/rejected": -293.5095520019531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.474288940429688, + "rewards/margins": 8.660417556762695, + "rewards/rejected": -17.134706497192383, + "step": 14980 + }, + { + "epoch": 2.33, + "learning_rate": 3.159661808185072e-06, + "logits/chosen": -2.1537766456604004, + "logits/rejected": -2.2614147663116455, + "logps/chosen": -236.18667602539062, + "logps/rejected": -497.41009521484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.253423690795898, + "rewards/margins": 13.285126686096191, + "rewards/rejected": -26.538551330566406, + "step": 14981 + }, + { + "epoch": 2.33, + "learning_rate": 3.158928367653924e-06, + "logits/chosen": -2.561723232269287, + "logits/rejected": -2.5160000324249268, + "logps/chosen": -537.6614990234375, + "logps/rejected": -650.0345458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.239046096801758, + "rewards/margins": 9.534770011901855, + "rewards/rejected": -18.773815155029297, + "step": 14982 + }, + { + "epoch": 2.33, + "learning_rate": 3.158194927122776e-06, + "logits/chosen": -0.9850117564201355, + "logits/rejected": -2.861001491546631, + "logps/chosen": -225.17913818359375, + "logps/rejected": -449.33563232421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.430853843688965, + "rewards/margins": 11.233963012695312, + "rewards/rejected": -18.664817810058594, + "step": 14983 + }, + { + "epoch": 2.33, + "learning_rate": 3.157461486591628e-06, + "logits/chosen": -2.528475761413574, + "logits/rejected": -2.884877920150757, + "logps/chosen": -160.92971801757812, + "logps/rejected": -387.43121337890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034285545349121, + "rewards/margins": 9.595995903015137, + "rewards/rejected": -15.630281448364258, + "step": 14984 + }, + { + "epoch": 2.33, + "learning_rate": 3.1567280460604804e-06, + "logits/chosen": -2.4652090072631836, + "logits/rejected": -2.8381872177124023, + "logps/chosen": -644.9437866210938, + "logps/rejected": -580.8028564453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3173933029174805, + "rewards/margins": 7.887630462646484, + "rewards/rejected": -14.205023765563965, + "step": 14985 + }, + { + "epoch": 2.33, + "learning_rate": 3.1559946055293322e-06, + "logits/chosen": -2.5981333255767822, + "logits/rejected": -2.706247329711914, + "logps/chosen": -283.31256103515625, + "logps/rejected": -331.0138244628906, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.797883033752441, + "rewards/margins": 6.5317230224609375, + "rewards/rejected": -14.329606056213379, + "step": 14986 + }, + { + "epoch": 2.33, + "learning_rate": 3.155261164998185e-06, + "logits/chosen": -2.0343220233917236, + "logits/rejected": -2.820207118988037, + "logps/chosen": -446.5971984863281, + "logps/rejected": -599.6973266601562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.529586791992188, + "rewards/margins": 10.329814910888672, + "rewards/rejected": -18.85940170288086, + "step": 14987 + }, + { + "epoch": 2.33, + "learning_rate": 3.154527724467037e-06, + "logits/chosen": -2.514975070953369, + "logits/rejected": -2.3226683139801025, + "logps/chosen": -758.3926391601562, + "logps/rejected": -286.7073974609375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.548791885375977, + "rewards/margins": 9.4735107421875, + "rewards/rejected": -14.022302627563477, + "step": 14988 + }, + { + "epoch": 2.33, + "learning_rate": 3.1537942839358887e-06, + "logits/chosen": -2.955202341079712, + "logits/rejected": -3.133139133453369, + "logps/chosen": -410.5638427734375, + "logps/rejected": -504.52874755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.691930770874023, + "rewards/margins": 13.70379638671875, + "rewards/rejected": -21.395727157592773, + "step": 14989 + }, + { + "epoch": 2.33, + "learning_rate": 3.153060843404741e-06, + "logits/chosen": -2.233839750289917, + "logits/rejected": -2.683220863342285, + "logps/chosen": -597.9228515625, + "logps/rejected": -754.6248168945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.860812187194824, + "rewards/margins": 13.162520408630371, + "rewards/rejected": -22.023332595825195, + "step": 14990 + }, + { + "epoch": 2.33, + "learning_rate": 3.152327402873593e-06, + "logits/chosen": -2.8781800270080566, + "logits/rejected": -2.76863694190979, + "logps/chosen": -606.2920532226562, + "logps/rejected": -555.1673583984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.490501403808594, + "rewards/margins": 10.081884384155273, + "rewards/rejected": -16.572385787963867, + "step": 14991 + }, + { + "epoch": 2.33, + "learning_rate": 3.151593962342445e-06, + "logits/chosen": -1.8567036390304565, + "logits/rejected": -2.919978618621826, + "logps/chosen": -189.4588623046875, + "logps/rejected": -466.5103454589844, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.333196640014648, + "rewards/margins": 6.519026756286621, + "rewards/rejected": -14.85222339630127, + "step": 14992 + }, + { + "epoch": 2.33, + "learning_rate": 3.150860521811297e-06, + "logits/chosen": -2.682102680206299, + "logits/rejected": -2.67889142036438, + "logps/chosen": -402.8117980957031, + "logps/rejected": -319.1324462890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.90615463256836, + "rewards/margins": 8.847251892089844, + "rewards/rejected": -17.753406524658203, + "step": 14993 + }, + { + "epoch": 2.33, + "learning_rate": 3.1501270812801494e-06, + "logits/chosen": -1.5603471994400024, + "logits/rejected": -2.710771322250366, + "logps/chosen": -309.30230712890625, + "logps/rejected": -460.51824951171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.137775421142578, + "rewards/margins": 9.402746200561523, + "rewards/rejected": -19.5405216217041, + "step": 14994 + }, + { + "epoch": 2.33, + "learning_rate": 3.1493936407490017e-06, + "logits/chosen": -2.693666696548462, + "logits/rejected": -1.8303217887878418, + "logps/chosen": -1077.39892578125, + "logps/rejected": -661.94677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.947308540344238, + "rewards/margins": 11.710773468017578, + "rewards/rejected": -18.6580810546875, + "step": 14995 + }, + { + "epoch": 2.33, + "learning_rate": 3.148660200217854e-06, + "logits/chosen": -1.8366047143936157, + "logits/rejected": -2.558882474899292, + "logps/chosen": -342.36199951171875, + "logps/rejected": -515.441162109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.342936515808105, + "rewards/margins": 12.495829582214355, + "rewards/rejected": -22.83876609802246, + "step": 14996 + }, + { + "epoch": 2.33, + "learning_rate": 3.147926759686706e-06, + "logits/chosen": -1.80851411819458, + "logits/rejected": -2.693396806716919, + "logps/chosen": -108.01868438720703, + "logps/rejected": -273.50555419921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.411850929260254, + "rewards/margins": 8.602962493896484, + "rewards/rejected": -16.014814376831055, + "step": 14997 + }, + { + "epoch": 2.33, + "learning_rate": 3.147193319155558e-06, + "logits/chosen": -2.47040057182312, + "logits/rejected": -2.6472301483154297, + "logps/chosen": -366.588134765625, + "logps/rejected": -465.7727355957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.33298397064209, + "rewards/margins": 10.015113830566406, + "rewards/rejected": -21.348098754882812, + "step": 14998 + }, + { + "epoch": 2.33, + "learning_rate": 3.14645987862441e-06, + "logits/chosen": -1.211634635925293, + "logits/rejected": -2.5884134769439697, + "logps/chosen": -283.91864013671875, + "logps/rejected": -701.9763793945312, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.990944862365723, + "rewards/margins": 7.474448204040527, + "rewards/rejected": -21.46539306640625, + "step": 14999 + }, + { + "epoch": 2.33, + "learning_rate": 3.145726438093262e-06, + "logits/chosen": -1.7341557741165161, + "logits/rejected": -2.5720865726470947, + "logps/chosen": -161.16513061523438, + "logps/rejected": -357.0016174316406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.515615463256836, + "rewards/margins": 8.699073791503906, + "rewards/rejected": -18.214689254760742, + "step": 15000 + }, + { + "epoch": 2.33, + "learning_rate": 3.1449929975621142e-06, + "logits/chosen": -2.4969611167907715, + "logits/rejected": -1.6513843536376953, + "logps/chosen": -460.5055847167969, + "logps/rejected": -462.228271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.942132949829102, + "rewards/margins": 11.201177597045898, + "rewards/rejected": -20.143310546875, + "step": 15001 + }, + { + "epoch": 2.33, + "learning_rate": 3.144259557030966e-06, + "logits/chosen": -1.3683255910873413, + "logits/rejected": -1.7515243291854858, + "logps/chosen": -217.5354461669922, + "logps/rejected": -439.2337341308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.548073768615723, + "rewards/margins": 8.98782730102539, + "rewards/rejected": -18.535900115966797, + "step": 15002 + }, + { + "epoch": 2.33, + "learning_rate": 3.1435261164998184e-06, + "logits/chosen": -2.428131580352783, + "logits/rejected": -2.4801576137542725, + "logps/chosen": -361.985107421875, + "logps/rejected": -419.54766845703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.805243492126465, + "rewards/margins": 9.848095893859863, + "rewards/rejected": -21.653339385986328, + "step": 15003 + }, + { + "epoch": 2.33, + "learning_rate": 3.1427926759686707e-06, + "logits/chosen": -0.8162854313850403, + "logits/rejected": -2.641099214553833, + "logps/chosen": -183.75491333007812, + "logps/rejected": -658.0286254882812, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.530770301818848, + "rewards/margins": 8.085718154907227, + "rewards/rejected": -16.61648941040039, + "step": 15004 + }, + { + "epoch": 2.33, + "learning_rate": 3.142059235437523e-06, + "logits/chosen": -2.888056516647339, + "logits/rejected": -2.9640538692474365, + "logps/chosen": -138.24769592285156, + "logps/rejected": -389.1436767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.706982612609863, + "rewards/margins": 15.875946998596191, + "rewards/rejected": -21.582929611206055, + "step": 15005 + }, + { + "epoch": 2.33, + "learning_rate": 3.141325794906375e-06, + "logits/chosen": -2.573204755783081, + "logits/rejected": -1.1237608194351196, + "logps/chosen": -600.0300903320312, + "logps/rejected": -311.64202880859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.339803695678711, + "rewards/margins": 7.011880874633789, + "rewards/rejected": -19.3516845703125, + "step": 15006 + }, + { + "epoch": 2.33, + "learning_rate": 3.140592354375227e-06, + "logits/chosen": -0.5176928639411926, + "logits/rejected": -2.2102036476135254, + "logps/chosen": -175.39334106445312, + "logps/rejected": -609.7901611328125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.237168312072754, + "rewards/margins": 14.44381332397461, + "rewards/rejected": -25.680980682373047, + "step": 15007 + }, + { + "epoch": 2.33, + "learning_rate": 3.139858913844079e-06, + "logits/chosen": -1.0654429197311401, + "logits/rejected": -2.6660079956054688, + "logps/chosen": -169.33587646484375, + "logps/rejected": -506.2376403808594, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.136470794677734, + "rewards/margins": 8.044402122497559, + "rewards/rejected": -20.18087387084961, + "step": 15008 + }, + { + "epoch": 2.33, + "learning_rate": 3.139125473312931e-06, + "logits/chosen": -1.374788761138916, + "logits/rejected": -2.5308711528778076, + "logps/chosen": -240.01461791992188, + "logps/rejected": -491.2986145019531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.28648853302002, + "rewards/margins": 10.903932571411133, + "rewards/rejected": -19.19042205810547, + "step": 15009 + }, + { + "epoch": 2.33, + "learning_rate": 3.1383920327817832e-06, + "logits/chosen": -1.7859408855438232, + "logits/rejected": -2.61964750289917, + "logps/chosen": -271.32940673828125, + "logps/rejected": -240.77200317382812, + "loss": 0.1135, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.900748252868652, + "rewards/margins": 3.8496384620666504, + "rewards/rejected": -11.750387191772461, + "step": 15010 + }, + { + "epoch": 2.33, + "learning_rate": 3.137658592250635e-06, + "logits/chosen": -1.0977579355239868, + "logits/rejected": -2.4256460666656494, + "logps/chosen": -251.73648071289062, + "logps/rejected": -534.069580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.894609451293945, + "rewards/margins": 11.513587951660156, + "rewards/rejected": -24.408199310302734, + "step": 15011 + }, + { + "epoch": 2.33, + "learning_rate": 3.136925151719488e-06, + "logits/chosen": -1.971592903137207, + "logits/rejected": -2.3035953044891357, + "logps/chosen": -224.74436950683594, + "logps/rejected": -365.5801086425781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.860270023345947, + "rewards/margins": 9.425285339355469, + "rewards/rejected": -17.285554885864258, + "step": 15012 + }, + { + "epoch": 2.33, + "learning_rate": 3.1361917111883397e-06, + "logits/chosen": -2.689232110977173, + "logits/rejected": -2.8442165851593018, + "logps/chosen": -725.9340209960938, + "logps/rejected": -770.7950439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5804057121276855, + "rewards/margins": 12.182584762573242, + "rewards/rejected": -18.762990951538086, + "step": 15013 + }, + { + "epoch": 2.33, + "learning_rate": 3.135458270657192e-06, + "logits/chosen": -2.28279972076416, + "logits/rejected": -2.5470130443573, + "logps/chosen": -145.9197998046875, + "logps/rejected": -275.479248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.602860450744629, + "rewards/margins": 12.395462989807129, + "rewards/rejected": -19.998323440551758, + "step": 15014 + }, + { + "epoch": 2.34, + "learning_rate": 3.134724830126044e-06, + "logits/chosen": -1.7918810844421387, + "logits/rejected": -2.430380344390869, + "logps/chosen": -307.8538818359375, + "logps/rejected": -477.7314453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.81160306930542, + "rewards/margins": 8.657320022583008, + "rewards/rejected": -15.46892261505127, + "step": 15015 + }, + { + "epoch": 2.34, + "learning_rate": 3.133991389594896e-06, + "logits/chosen": -2.015050172805786, + "logits/rejected": -2.8516483306884766, + "logps/chosen": -121.92897033691406, + "logps/rejected": -492.02203369140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.175782203674316, + "rewards/margins": 7.254813194274902, + "rewards/rejected": -17.43059539794922, + "step": 15016 + }, + { + "epoch": 2.34, + "learning_rate": 3.133257949063748e-06, + "logits/chosen": -2.934915065765381, + "logits/rejected": -2.9866418838500977, + "logps/chosen": -526.9395751953125, + "logps/rejected": -524.066650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.797429084777832, + "rewards/margins": 12.194038391113281, + "rewards/rejected": -21.991466522216797, + "step": 15017 + }, + { + "epoch": 2.34, + "learning_rate": 3.1325245085326e-06, + "logits/chosen": -0.7842480540275574, + "logits/rejected": -2.5905401706695557, + "logps/chosen": -111.10786437988281, + "logps/rejected": -840.7877197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.195511341094971, + "rewards/margins": 21.67892074584961, + "rewards/rejected": -28.874431610107422, + "step": 15018 + }, + { + "epoch": 2.34, + "learning_rate": 3.1317910680014523e-06, + "logits/chosen": -1.9502795934677124, + "logits/rejected": -2.5434021949768066, + "logps/chosen": -421.10882568359375, + "logps/rejected": -420.57733154296875, + "loss": 0.1384, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.56341552734375, + "rewards/margins": 7.166796684265137, + "rewards/rejected": -17.73021125793457, + "step": 15019 + }, + { + "epoch": 2.34, + "learning_rate": 3.1310576274703046e-06, + "logits/chosen": -2.8360164165496826, + "logits/rejected": -2.3200461864471436, + "logps/chosen": -291.1750793457031, + "logps/rejected": -170.76556396484375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.80103874206543, + "rewards/margins": 7.221630096435547, + "rewards/rejected": -14.022668838500977, + "step": 15020 + }, + { + "epoch": 2.34, + "learning_rate": 3.130324186939157e-06, + "logits/chosen": -2.4529762268066406, + "logits/rejected": -2.421445369720459, + "logps/chosen": -621.5482788085938, + "logps/rejected": -466.2705078125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.261764526367188, + "rewards/margins": 6.193695068359375, + "rewards/rejected": -15.455459594726562, + "step": 15021 + }, + { + "epoch": 2.34, + "learning_rate": 3.1295907464080087e-06, + "logits/chosen": -2.4993743896484375, + "logits/rejected": -2.200124740600586, + "logps/chosen": -370.6792297363281, + "logps/rejected": -351.605712890625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.335372924804688, + "rewards/margins": 6.127612590789795, + "rewards/rejected": -16.46298599243164, + "step": 15022 + }, + { + "epoch": 2.34, + "learning_rate": 3.128857305876861e-06, + "logits/chosen": -2.258277177810669, + "logits/rejected": -2.747514009475708, + "logps/chosen": -680.5850219726562, + "logps/rejected": -740.1527099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.52071762084961, + "rewards/margins": 10.167537689208984, + "rewards/rejected": -21.688255310058594, + "step": 15023 + }, + { + "epoch": 2.34, + "learning_rate": 3.128123865345713e-06, + "logits/chosen": -1.7647579908370972, + "logits/rejected": -2.4877278804779053, + "logps/chosen": -439.63592529296875, + "logps/rejected": -596.8424072265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.95396614074707, + "rewards/margins": 9.930418968200684, + "rewards/rejected": -23.884384155273438, + "step": 15024 + }, + { + "epoch": 2.34, + "learning_rate": 3.1273904248145652e-06, + "logits/chosen": -2.533388376235962, + "logits/rejected": -0.9929943084716797, + "logps/chosen": -267.3072509765625, + "logps/rejected": -307.2290344238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.983111381530762, + "rewards/margins": 13.338994979858398, + "rewards/rejected": -21.322105407714844, + "step": 15025 + }, + { + "epoch": 2.34, + "learning_rate": 3.126656984283417e-06, + "logits/chosen": -2.71946382522583, + "logits/rejected": -2.8932926654815674, + "logps/chosen": -148.56661987304688, + "logps/rejected": -494.8214111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.533100128173828, + "rewards/margins": 12.187284469604492, + "rewards/rejected": -23.72038459777832, + "step": 15026 + }, + { + "epoch": 2.34, + "learning_rate": 3.1259235437522694e-06, + "logits/chosen": -2.43819522857666, + "logits/rejected": -2.1794004440307617, + "logps/chosen": -277.52520751953125, + "logps/rejected": -359.793701171875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.898853302001953, + "rewards/margins": 6.85727071762085, + "rewards/rejected": -18.756122589111328, + "step": 15027 + }, + { + "epoch": 2.34, + "learning_rate": 3.1251901032211213e-06, + "logits/chosen": -2.9655532836914062, + "logits/rejected": -2.7731502056121826, + "logps/chosen": -359.59674072265625, + "logps/rejected": -428.6702880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.293706893920898, + "rewards/margins": 10.3803071975708, + "rewards/rejected": -19.674015045166016, + "step": 15028 + }, + { + "epoch": 2.34, + "learning_rate": 3.1244566626899736e-06, + "logits/chosen": -0.7865871787071228, + "logits/rejected": -2.3957138061523438, + "logps/chosen": -157.74639892578125, + "logps/rejected": -683.961669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.035472869873047, + "rewards/margins": 15.943843841552734, + "rewards/rejected": -25.97931671142578, + "step": 15029 + }, + { + "epoch": 2.34, + "learning_rate": 3.123723222158826e-06, + "logits/chosen": -2.5983927249908447, + "logits/rejected": -1.8896815776824951, + "logps/chosen": -392.16754150390625, + "logps/rejected": -340.7082824707031, + "loss": 0.0864, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.824068069458008, + "rewards/margins": 5.501399040222168, + "rewards/rejected": -14.325468063354492, + "step": 15030 + }, + { + "epoch": 2.34, + "learning_rate": 3.1229897816276778e-06, + "logits/chosen": -2.6094939708709717, + "logits/rejected": -1.7918225526809692, + "logps/chosen": -317.241943359375, + "logps/rejected": -225.4541473388672, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.679596900939941, + "rewards/margins": 6.7361531257629395, + "rewards/rejected": -14.415750503540039, + "step": 15031 + }, + { + "epoch": 2.34, + "learning_rate": 3.12225634109653e-06, + "logits/chosen": -2.750699043273926, + "logits/rejected": -2.3942999839782715, + "logps/chosen": -747.3358764648438, + "logps/rejected": -712.7066650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.547683715820312, + "rewards/margins": 11.964736938476562, + "rewards/rejected": -23.512420654296875, + "step": 15032 + }, + { + "epoch": 2.34, + "learning_rate": 3.121522900565382e-06, + "logits/chosen": -1.1648051738739014, + "logits/rejected": -2.6431450843811035, + "logps/chosen": -208.56118774414062, + "logps/rejected": -438.06689453125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.596506118774414, + "rewards/margins": 8.405838966369629, + "rewards/rejected": -18.00234603881836, + "step": 15033 + }, + { + "epoch": 2.34, + "learning_rate": 3.1207894600342342e-06, + "logits/chosen": -2.819502353668213, + "logits/rejected": -1.908272385597229, + "logps/chosen": -647.2843627929688, + "logps/rejected": -340.002685546875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.606093406677246, + "rewards/margins": 6.922247409820557, + "rewards/rejected": -17.52834129333496, + "step": 15034 + }, + { + "epoch": 2.34, + "learning_rate": 3.120056019503086e-06, + "logits/chosen": -2.9000327587127686, + "logits/rejected": -2.3377325534820557, + "logps/chosen": -571.1148071289062, + "logps/rejected": -508.04730224609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.517879486083984, + "rewards/margins": 11.612982749938965, + "rewards/rejected": -17.130863189697266, + "step": 15035 + }, + { + "epoch": 2.34, + "learning_rate": 3.1193225789719384e-06, + "logits/chosen": -2.0511958599090576, + "logits/rejected": -2.2062606811523438, + "logps/chosen": -229.1789093017578, + "logps/rejected": -243.5721435546875, + "loss": 0.1384, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.872809410095215, + "rewards/margins": 3.6555399894714355, + "rewards/rejected": -15.528348922729492, + "step": 15036 + }, + { + "epoch": 2.34, + "learning_rate": 3.1185891384407903e-06, + "logits/chosen": -2.926243782043457, + "logits/rejected": -2.2752227783203125, + "logps/chosen": -453.1655578613281, + "logps/rejected": -289.26812744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6155471801757812, + "rewards/margins": 10.979888916015625, + "rewards/rejected": -14.595436096191406, + "step": 15037 + }, + { + "epoch": 2.34, + "learning_rate": 3.1178556979096426e-06, + "logits/chosen": -1.6669526100158691, + "logits/rejected": -2.772810697555542, + "logps/chosen": -487.1976318359375, + "logps/rejected": -677.8038330078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.674593925476074, + "rewards/margins": 11.227426528930664, + "rewards/rejected": -22.902019500732422, + "step": 15038 + }, + { + "epoch": 2.34, + "learning_rate": 3.117122257378495e-06, + "logits/chosen": -1.9883469343185425, + "logits/rejected": -2.7069590091705322, + "logps/chosen": -509.9583740234375, + "logps/rejected": -1015.821044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.501609802246094, + "rewards/margins": 12.251745223999023, + "rewards/rejected": -22.753353118896484, + "step": 15039 + }, + { + "epoch": 2.34, + "learning_rate": 3.1163888168473468e-06, + "logits/chosen": -2.5093679428100586, + "logits/rejected": -2.567978620529175, + "logps/chosen": -341.9788513183594, + "logps/rejected": -478.2480773925781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.578485488891602, + "rewards/margins": 12.22784423828125, + "rewards/rejected": -20.806331634521484, + "step": 15040 + }, + { + "epoch": 2.34, + "learning_rate": 3.115655376316199e-06, + "logits/chosen": -1.8794838190078735, + "logits/rejected": -2.4606757164001465, + "logps/chosen": -166.1442108154297, + "logps/rejected": -280.0106506347656, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.554218292236328, + "rewards/margins": 8.264034271240234, + "rewards/rejected": -18.818252563476562, + "step": 15041 + }, + { + "epoch": 2.34, + "learning_rate": 3.114921935785051e-06, + "logits/chosen": -1.7216707468032837, + "logits/rejected": -2.4704127311706543, + "logps/chosen": -220.4911651611328, + "logps/rejected": -428.94635009765625, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.307458877563477, + "rewards/margins": 6.381833553314209, + "rewards/rejected": -15.689292907714844, + "step": 15042 + }, + { + "epoch": 2.34, + "learning_rate": 3.1141884952539033e-06, + "logits/chosen": -2.359712600708008, + "logits/rejected": -1.9223772287368774, + "logps/chosen": -460.77911376953125, + "logps/rejected": -539.1447143554688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.975214958190918, + "rewards/margins": 8.956490516662598, + "rewards/rejected": -24.931705474853516, + "step": 15043 + }, + { + "epoch": 2.34, + "learning_rate": 3.113455054722755e-06, + "logits/chosen": -2.5716655254364014, + "logits/rejected": -2.7663393020629883, + "logps/chosen": -134.70098876953125, + "logps/rejected": -183.2318878173828, + "loss": 1.0001, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.58132266998291, + "rewards/margins": 2.9093832969665527, + "rewards/rejected": -12.490705490112305, + "step": 15044 + }, + { + "epoch": 2.34, + "learning_rate": 3.1127216141916074e-06, + "logits/chosen": -2.5979230403900146, + "logits/rejected": -1.4619609117507935, + "logps/chosen": -522.484130859375, + "logps/rejected": -405.85577392578125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.247739791870117, + "rewards/margins": 6.0136566162109375, + "rewards/rejected": -22.261396408081055, + "step": 15045 + }, + { + "epoch": 2.34, + "learning_rate": 3.1119881736604598e-06, + "logits/chosen": -2.098605155944824, + "logits/rejected": -2.8989078998565674, + "logps/chosen": -197.02914428710938, + "logps/rejected": -269.4744873046875, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.947980880737305, + "rewards/margins": 4.444334506988525, + "rewards/rejected": -13.392314910888672, + "step": 15046 + }, + { + "epoch": 2.34, + "learning_rate": 3.111254733129312e-06, + "logits/chosen": -2.526000738143921, + "logits/rejected": -2.6107308864593506, + "logps/chosen": -162.25653076171875, + "logps/rejected": -319.8985290527344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2622652053833, + "rewards/margins": 8.315946578979492, + "rewards/rejected": -16.57821273803711, + "step": 15047 + }, + { + "epoch": 2.34, + "learning_rate": 3.110521292598164e-06, + "logits/chosen": -1.667110800743103, + "logits/rejected": -2.569384813308716, + "logps/chosen": -148.6888427734375, + "logps/rejected": -518.7454833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.841300964355469, + "rewards/margins": 15.270957946777344, + "rewards/rejected": -26.112258911132812, + "step": 15048 + }, + { + "epoch": 2.34, + "learning_rate": 3.109787852067016e-06, + "logits/chosen": -2.8011324405670166, + "logits/rejected": -2.425126791000366, + "logps/chosen": -297.44036865234375, + "logps/rejected": -293.8669738769531, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3073601722717285, + "rewards/margins": 6.412972450256348, + "rewards/rejected": -10.720333099365234, + "step": 15049 + }, + { + "epoch": 2.34, + "learning_rate": 3.109054411535868e-06, + "logits/chosen": -2.6984710693359375, + "logits/rejected": -2.92073655128479, + "logps/chosen": -160.47012329101562, + "logps/rejected": -400.2024841308594, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.276652336120605, + "rewards/margins": 6.91471004486084, + "rewards/rejected": -17.191362380981445, + "step": 15050 + }, + { + "epoch": 2.34, + "learning_rate": 3.10832097100472e-06, + "logits/chosen": -1.3537544012069702, + "logits/rejected": -2.5745339393615723, + "logps/chosen": -364.124267578125, + "logps/rejected": -542.7631225585938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.039403915405273, + "rewards/margins": 10.742504119873047, + "rewards/rejected": -20.78190803527832, + "step": 15051 + }, + { + "epoch": 2.34, + "learning_rate": 3.1075875304735723e-06, + "logits/chosen": -2.7967376708984375, + "logits/rejected": -2.9544098377227783, + "logps/chosen": -464.7419128417969, + "logps/rejected": -595.3959350585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.724115371704102, + "rewards/margins": 12.679490089416504, + "rewards/rejected": -17.403606414794922, + "step": 15052 + }, + { + "epoch": 2.34, + "learning_rate": 3.106854089942424e-06, + "logits/chosen": -2.2576169967651367, + "logits/rejected": -2.299651861190796, + "logps/chosen": -364.6292724609375, + "logps/rejected": -394.58343505859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4371538162231445, + "rewards/margins": 7.70994234085083, + "rewards/rejected": -15.147096633911133, + "step": 15053 + }, + { + "epoch": 2.34, + "learning_rate": 3.1061206494112765e-06, + "logits/chosen": -2.3216941356658936, + "logits/rejected": -2.8152825832366943, + "logps/chosen": -206.57130432128906, + "logps/rejected": -277.405517578125, + "loss": 0.1439, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.343070983886719, + "rewards/margins": 7.368125915527344, + "rewards/rejected": -16.711196899414062, + "step": 15054 + }, + { + "epoch": 2.34, + "learning_rate": 3.1053872088801288e-06, + "logits/chosen": -2.4317502975463867, + "logits/rejected": -2.2496042251586914, + "logps/chosen": -186.23985290527344, + "logps/rejected": -191.60179138183594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1314167976379395, + "rewards/margins": 10.502613067626953, + "rewards/rejected": -14.634029388427734, + "step": 15055 + }, + { + "epoch": 2.34, + "learning_rate": 3.104653768348981e-06, + "logits/chosen": -2.6857354640960693, + "logits/rejected": -2.6962192058563232, + "logps/chosen": -185.88072204589844, + "logps/rejected": -210.52664184570312, + "loss": 0.578, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.701629638671875, + "rewards/margins": 3.452263593673706, + "rewards/rejected": -11.15389347076416, + "step": 15056 + }, + { + "epoch": 2.34, + "learning_rate": 3.103920327817833e-06, + "logits/chosen": -2.4876418113708496, + "logits/rejected": -2.7137746810913086, + "logps/chosen": -486.8012390136719, + "logps/rejected": -456.11572265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.149321556091309, + "rewards/margins": 8.785381317138672, + "rewards/rejected": -17.934703826904297, + "step": 15057 + }, + { + "epoch": 2.34, + "learning_rate": 3.103186887286685e-06, + "logits/chosen": -1.654069185256958, + "logits/rejected": -2.868844747543335, + "logps/chosen": -312.71563720703125, + "logps/rejected": -280.3287658691406, + "loss": 0.0807, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.250497341156006, + "rewards/margins": 8.423738479614258, + "rewards/rejected": -13.674236297607422, + "step": 15058 + }, + { + "epoch": 2.34, + "learning_rate": 3.102453446755537e-06, + "logits/chosen": -1.8245625495910645, + "logits/rejected": -2.6543726921081543, + "logps/chosen": -170.50772094726562, + "logps/rejected": -415.8871765136719, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.146584510803223, + "rewards/margins": 8.632048606872559, + "rewards/rejected": -19.77863311767578, + "step": 15059 + }, + { + "epoch": 2.34, + "learning_rate": 3.101720006224389e-06, + "logits/chosen": -1.7176791429519653, + "logits/rejected": -2.6299219131469727, + "logps/chosen": -168.1090087890625, + "logps/rejected": -378.13079833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.448264122009277, + "rewards/margins": 11.816559791564941, + "rewards/rejected": -19.26482391357422, + "step": 15060 + }, + { + "epoch": 2.34, + "learning_rate": 3.1009865656932413e-06, + "logits/chosen": -2.8878018856048584, + "logits/rejected": -2.4309868812561035, + "logps/chosen": -322.1536865234375, + "logps/rejected": -265.2740173339844, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.599268913269043, + "rewards/margins": 6.415774822235107, + "rewards/rejected": -13.015044212341309, + "step": 15061 + }, + { + "epoch": 2.34, + "learning_rate": 3.100253125162093e-06, + "logits/chosen": -1.9887632131576538, + "logits/rejected": -2.3490493297576904, + "logps/chosen": -164.51138305664062, + "logps/rejected": -318.744873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.036951065063477, + "rewards/margins": 11.519222259521484, + "rewards/rejected": -19.55617332458496, + "step": 15062 + }, + { + "epoch": 2.34, + "learning_rate": 3.099519684630946e-06, + "logits/chosen": -2.591163396835327, + "logits/rejected": -2.501008987426758, + "logps/chosen": -647.9423828125, + "logps/rejected": -746.55029296875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.90011215209961, + "rewards/margins": 8.122018814086914, + "rewards/rejected": -20.022130966186523, + "step": 15063 + }, + { + "epoch": 2.34, + "learning_rate": 3.098786244099798e-06, + "logits/chosen": -2.196648120880127, + "logits/rejected": -1.9590785503387451, + "logps/chosen": -279.3297424316406, + "logps/rejected": -190.4075927734375, + "loss": 0.572, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.682548522949219, + "rewards/margins": 0.7965240478515625, + "rewards/rejected": -12.479072570800781, + "step": 15064 + }, + { + "epoch": 2.34, + "learning_rate": 3.09805280356865e-06, + "logits/chosen": -2.944239854812622, + "logits/rejected": -2.9898226261138916, + "logps/chosen": -280.02239990234375, + "logps/rejected": -319.24871826171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.523143768310547, + "rewards/margins": 8.64194393157959, + "rewards/rejected": -17.165088653564453, + "step": 15065 + }, + { + "epoch": 2.34, + "learning_rate": 3.097319363037502e-06, + "logits/chosen": -2.8719050884246826, + "logits/rejected": -2.9066333770751953, + "logps/chosen": -853.3795776367188, + "logps/rejected": -773.6636962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.757901191711426, + "rewards/margins": 11.905052185058594, + "rewards/rejected": -18.662952423095703, + "step": 15066 + }, + { + "epoch": 2.34, + "learning_rate": 3.0965859225063543e-06, + "logits/chosen": -1.9403576850891113, + "logits/rejected": -2.589123487472534, + "logps/chosen": -302.636474609375, + "logps/rejected": -398.4742736816406, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.792104721069336, + "rewards/margins": 7.058514595031738, + "rewards/rejected": -14.850619316101074, + "step": 15067 + }, + { + "epoch": 2.34, + "learning_rate": 3.095852481975206e-06, + "logits/chosen": -1.8437470197677612, + "logits/rejected": -2.4801533222198486, + "logps/chosen": -201.57969665527344, + "logps/rejected": -419.5300598144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.931029319763184, + "rewards/margins": 11.20598030090332, + "rewards/rejected": -22.137008666992188, + "step": 15068 + }, + { + "epoch": 2.34, + "learning_rate": 3.095119041444058e-06, + "logits/chosen": -2.801278829574585, + "logits/rejected": -1.8187611103057861, + "logps/chosen": -815.4420166015625, + "logps/rejected": -500.4727478027344, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.083780288696289, + "rewards/margins": 6.6827616691589355, + "rewards/rejected": -16.766542434692383, + "step": 15069 + }, + { + "epoch": 2.34, + "learning_rate": 3.0943856009129103e-06, + "logits/chosen": -1.786996603012085, + "logits/rejected": -2.623586893081665, + "logps/chosen": -316.99664306640625, + "logps/rejected": -489.2584228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.468561172485352, + "rewards/margins": 12.56474494934082, + "rewards/rejected": -21.033306121826172, + "step": 15070 + }, + { + "epoch": 2.34, + "learning_rate": 3.0936521603817626e-06, + "logits/chosen": -2.8469934463500977, + "logits/rejected": -2.7596113681793213, + "logps/chosen": -238.83535766601562, + "logps/rejected": -305.83349609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.313060760498047, + "rewards/margins": 7.1961989402771, + "rewards/rejected": -16.509260177612305, + "step": 15071 + }, + { + "epoch": 2.34, + "learning_rate": 3.092918719850615e-06, + "logits/chosen": -2.5606348514556885, + "logits/rejected": -2.742703437805176, + "logps/chosen": -96.05829620361328, + "logps/rejected": -330.33807373046875, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.288246154785156, + "rewards/margins": 6.5460309982299805, + "rewards/rejected": -13.834277153015137, + "step": 15072 + }, + { + "epoch": 2.34, + "learning_rate": 3.092185279319467e-06, + "logits/chosen": -2.3478336334228516, + "logits/rejected": -2.6542344093322754, + "logps/chosen": -414.134033203125, + "logps/rejected": -512.2952880859375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.140567779541016, + "rewards/margins": 5.478758335113525, + "rewards/rejected": -18.619325637817383, + "step": 15073 + }, + { + "epoch": 2.34, + "learning_rate": 3.091451838788319e-06, + "logits/chosen": -2.5423853397369385, + "logits/rejected": -2.3434462547302246, + "logps/chosen": -176.90489196777344, + "logps/rejected": -563.0794067382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.977614879608154, + "rewards/margins": 15.599088668823242, + "rewards/rejected": -23.576702117919922, + "step": 15074 + }, + { + "epoch": 2.34, + "learning_rate": 3.090718398257171e-06, + "logits/chosen": -1.7794150114059448, + "logits/rejected": -2.5328967571258545, + "logps/chosen": -250.60902404785156, + "logps/rejected": -531.2257080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.584770202636719, + "rewards/margins": 11.190718650817871, + "rewards/rejected": -23.775489807128906, + "step": 15075 + }, + { + "epoch": 2.34, + "learning_rate": 3.0899849577260233e-06, + "logits/chosen": -1.5570847988128662, + "logits/rejected": -2.6833913326263428, + "logps/chosen": -184.42135620117188, + "logps/rejected": -406.0879821777344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.08635139465332, + "rewards/margins": 8.777583122253418, + "rewards/rejected": -18.863933563232422, + "step": 15076 + }, + { + "epoch": 2.34, + "learning_rate": 3.089251517194875e-06, + "logits/chosen": -0.43706485629081726, + "logits/rejected": -2.4527382850646973, + "logps/chosen": -224.6688995361328, + "logps/rejected": -516.6831665039062, + "loss": 3.7304, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.9976167678833, + "rewards/margins": 7.214267253875732, + "rewards/rejected": -21.211883544921875, + "step": 15077 + }, + { + "epoch": 2.34, + "learning_rate": 3.088518076663727e-06, + "logits/chosen": -2.4690186977386475, + "logits/rejected": -2.8804986476898193, + "logps/chosen": -181.19908142089844, + "logps/rejected": -408.56988525390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.781898021697998, + "rewards/margins": 9.965530395507812, + "rewards/rejected": -17.74742889404297, + "step": 15078 + }, + { + "epoch": 2.35, + "learning_rate": 3.0877846361325794e-06, + "logits/chosen": -2.561429023742676, + "logits/rejected": -2.3672027587890625, + "logps/chosen": -253.3529052734375, + "logps/rejected": -347.69647216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.922561168670654, + "rewards/margins": 12.691484451293945, + "rewards/rejected": -17.614046096801758, + "step": 15079 + }, + { + "epoch": 2.35, + "learning_rate": 3.0870511956014317e-06, + "logits/chosen": -2.742802381515503, + "logits/rejected": -2.938176155090332, + "logps/chosen": -415.6925964355469, + "logps/rejected": -519.3712158203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.538211822509766, + "rewards/margins": 10.709444999694824, + "rewards/rejected": -20.247655868530273, + "step": 15080 + }, + { + "epoch": 2.35, + "learning_rate": 3.086317755070284e-06, + "logits/chosen": -2.847277879714966, + "logits/rejected": -2.7599174976348877, + "logps/chosen": -246.94888305664062, + "logps/rejected": -439.9327392578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.145196914672852, + "rewards/margins": 11.823347091674805, + "rewards/rejected": -18.968544006347656, + "step": 15081 + }, + { + "epoch": 2.35, + "learning_rate": 3.085584314539136e-06, + "logits/chosen": -2.642031192779541, + "logits/rejected": -2.554786205291748, + "logps/chosen": -429.7070617675781, + "logps/rejected": -444.7656555175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4248833656311035, + "rewards/margins": 9.013800621032715, + "rewards/rejected": -14.438684463500977, + "step": 15082 + }, + { + "epoch": 2.35, + "learning_rate": 3.084850874007988e-06, + "logits/chosen": -2.7673606872558594, + "logits/rejected": -2.818366050720215, + "logps/chosen": -229.41439819335938, + "logps/rejected": -230.267578125, + "loss": 0.0764, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.533820152282715, + "rewards/margins": 3.0097200870513916, + "rewards/rejected": -13.543540954589844, + "step": 15083 + }, + { + "epoch": 2.35, + "learning_rate": 3.08411743347684e-06, + "logits/chosen": -2.8946917057037354, + "logits/rejected": -2.9449262619018555, + "logps/chosen": -64.27415466308594, + "logps/rejected": -238.12271118164062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.369094371795654, + "rewards/margins": 9.843647003173828, + "rewards/rejected": -14.21274185180664, + "step": 15084 + }, + { + "epoch": 2.35, + "learning_rate": 3.0833839929456923e-06, + "logits/chosen": -2.7565505504608154, + "logits/rejected": -2.8665361404418945, + "logps/chosen": -112.15779876708984, + "logps/rejected": -184.46234130859375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904178142547607, + "rewards/margins": 7.765481472015381, + "rewards/rejected": -14.669659614562988, + "step": 15085 + }, + { + "epoch": 2.35, + "learning_rate": 3.082650552414544e-06, + "logits/chosen": -2.996338367462158, + "logits/rejected": -1.8550409078598022, + "logps/chosen": -286.37725830078125, + "logps/rejected": -366.1355285644531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.885407447814941, + "rewards/margins": 10.39704418182373, + "rewards/rejected": -16.282451629638672, + "step": 15086 + }, + { + "epoch": 2.35, + "learning_rate": 3.081917111883396e-06, + "logits/chosen": -1.4211747646331787, + "logits/rejected": -2.9769914150238037, + "logps/chosen": -168.9524688720703, + "logps/rejected": -582.473388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.952202796936035, + "rewards/margins": 10.015012741088867, + "rewards/rejected": -20.96721649169922, + "step": 15087 + }, + { + "epoch": 2.35, + "learning_rate": 3.081183671352249e-06, + "logits/chosen": -2.435877561569214, + "logits/rejected": -2.7474851608276367, + "logps/chosen": -625.1651611328125, + "logps/rejected": -648.79296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.350255012512207, + "rewards/margins": 14.289216995239258, + "rewards/rejected": -21.63947105407715, + "step": 15088 + }, + { + "epoch": 2.35, + "learning_rate": 3.0804502308211007e-06, + "logits/chosen": -1.6124868392944336, + "logits/rejected": -2.8048033714294434, + "logps/chosen": -417.7994384765625, + "logps/rejected": -601.6017456054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.685389518737793, + "rewards/margins": 11.354979515075684, + "rewards/rejected": -19.040369033813477, + "step": 15089 + }, + { + "epoch": 2.35, + "learning_rate": 3.079716790289953e-06, + "logits/chosen": -2.918531894683838, + "logits/rejected": -2.9801409244537354, + "logps/chosen": -134.84671020507812, + "logps/rejected": -248.40908813476562, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.111382484436035, + "rewards/margins": 5.4448137283325195, + "rewards/rejected": -16.556196212768555, + "step": 15090 + }, + { + "epoch": 2.35, + "learning_rate": 3.078983349758805e-06, + "logits/chosen": -1.5428881645202637, + "logits/rejected": -2.434647560119629, + "logps/chosen": -249.94635009765625, + "logps/rejected": -482.91082763671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.281723976135254, + "rewards/margins": 10.31700325012207, + "rewards/rejected": -20.59872817993164, + "step": 15091 + }, + { + "epoch": 2.35, + "learning_rate": 3.078249909227657e-06, + "logits/chosen": -2.4813320636749268, + "logits/rejected": -1.9880332946777344, + "logps/chosen": -397.7044372558594, + "logps/rejected": -322.0279541015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518523216247559, + "rewards/margins": 9.617103576660156, + "rewards/rejected": -16.13562774658203, + "step": 15092 + }, + { + "epoch": 2.35, + "learning_rate": 3.077516468696509e-06, + "logits/chosen": -1.9315789937973022, + "logits/rejected": -2.4275600910186768, + "logps/chosen": -302.35009765625, + "logps/rejected": -515.0228881835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.639833450317383, + "rewards/margins": 9.817138671875, + "rewards/rejected": -18.456972122192383, + "step": 15093 + }, + { + "epoch": 2.35, + "learning_rate": 3.0767830281653613e-06, + "logits/chosen": -2.1261239051818848, + "logits/rejected": -2.6359426975250244, + "logps/chosen": -378.6773986816406, + "logps/rejected": -401.4876708984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.416712284088135, + "rewards/margins": 8.753002166748047, + "rewards/rejected": -14.169713973999023, + "step": 15094 + }, + { + "epoch": 2.35, + "learning_rate": 3.0760495876342132e-06, + "logits/chosen": -1.4967299699783325, + "logits/rejected": -2.9077556133270264, + "logps/chosen": -140.47315979003906, + "logps/rejected": -445.8270263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.268593788146973, + "rewards/margins": 10.431387901306152, + "rewards/rejected": -20.699981689453125, + "step": 15095 + }, + { + "epoch": 2.35, + "learning_rate": 3.0753161471030655e-06, + "logits/chosen": -2.876518487930298, + "logits/rejected": -2.95052170753479, + "logps/chosen": -158.89932250976562, + "logps/rejected": -250.8330078125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.126093864440918, + "rewards/margins": 6.813271522521973, + "rewards/rejected": -14.93936538696289, + "step": 15096 + }, + { + "epoch": 2.35, + "learning_rate": 3.074582706571918e-06, + "logits/chosen": -2.7793233394622803, + "logits/rejected": -2.682616949081421, + "logps/chosen": -148.07907104492188, + "logps/rejected": -205.67864990234375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.684427261352539, + "rewards/margins": 7.161551475524902, + "rewards/rejected": -15.845977783203125, + "step": 15097 + }, + { + "epoch": 2.35, + "learning_rate": 3.0738492660407697e-06, + "logits/chosen": -1.000123381614685, + "logits/rejected": -2.342421293258667, + "logps/chosen": -301.8289794921875, + "logps/rejected": -810.375732421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.46649694442749, + "rewards/margins": 17.34279441833496, + "rewards/rejected": -22.80929183959961, + "step": 15098 + }, + { + "epoch": 2.35, + "learning_rate": 3.073115825509622e-06, + "logits/chosen": -1.576565146446228, + "logits/rejected": -2.4726359844207764, + "logps/chosen": -131.1412811279297, + "logps/rejected": -281.6993713378906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.377311706542969, + "rewards/margins": 8.626509666442871, + "rewards/rejected": -18.003820419311523, + "step": 15099 + }, + { + "epoch": 2.35, + "learning_rate": 3.072382384978474e-06, + "logits/chosen": -2.5036137104034424, + "logits/rejected": -2.178767204284668, + "logps/chosen": -466.0151672363281, + "logps/rejected": -536.55810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.009964942932129, + "rewards/margins": 11.37918758392334, + "rewards/rejected": -20.38915252685547, + "step": 15100 + }, + { + "epoch": 2.35, + "learning_rate": 3.071648944447326e-06, + "logits/chosen": -2.3200743198394775, + "logits/rejected": -1.5985461473464966, + "logps/chosen": -226.18060302734375, + "logps/rejected": -216.921630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.714202880859375, + "rewards/margins": 9.603015899658203, + "rewards/rejected": -16.317218780517578, + "step": 15101 + }, + { + "epoch": 2.35, + "learning_rate": 3.070915503916178e-06, + "logits/chosen": -0.7098712921142578, + "logits/rejected": -2.648409366607666, + "logps/chosen": -232.084716796875, + "logps/rejected": -584.8468017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.78964614868164, + "rewards/margins": 9.031496047973633, + "rewards/rejected": -19.821142196655273, + "step": 15102 + }, + { + "epoch": 2.35, + "learning_rate": 3.0701820633850304e-06, + "logits/chosen": -2.436734914779663, + "logits/rejected": -1.9862004518508911, + "logps/chosen": -317.1197509765625, + "logps/rejected": -423.98388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.009967803955078, + "rewards/margins": 10.716361999511719, + "rewards/rejected": -19.726329803466797, + "step": 15103 + }, + { + "epoch": 2.35, + "learning_rate": 3.0694486228538822e-06, + "logits/chosen": -1.303379774093628, + "logits/rejected": -2.666447401046753, + "logps/chosen": -172.49484252929688, + "logps/rejected": -296.09954833984375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.2697114944458, + "rewards/margins": 5.923912048339844, + "rewards/rejected": -17.193622589111328, + "step": 15104 + }, + { + "epoch": 2.35, + "learning_rate": 3.068715182322735e-06, + "logits/chosen": -1.965502142906189, + "logits/rejected": -2.6718902587890625, + "logps/chosen": -153.42005920410156, + "logps/rejected": -377.67010498046875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.517780303955078, + "rewards/margins": 9.56704330444336, + "rewards/rejected": -19.084823608398438, + "step": 15105 + }, + { + "epoch": 2.35, + "learning_rate": 3.067981741791587e-06, + "logits/chosen": -1.8219128847122192, + "logits/rejected": -2.5412557125091553, + "logps/chosen": -152.22621154785156, + "logps/rejected": -268.5096435546875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.551746845245361, + "rewards/margins": 9.493932723999023, + "rewards/rejected": -16.045679092407227, + "step": 15106 + }, + { + "epoch": 2.35, + "learning_rate": 3.0672483012604387e-06, + "logits/chosen": -1.5518287420272827, + "logits/rejected": -2.7420973777770996, + "logps/chosen": -311.419189453125, + "logps/rejected": -540.0413208007812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.366377830505371, + "rewards/margins": 9.463457107543945, + "rewards/rejected": -16.829833984375, + "step": 15107 + }, + { + "epoch": 2.35, + "learning_rate": 3.066514860729291e-06, + "logits/chosen": -1.4988768100738525, + "logits/rejected": -2.755763530731201, + "logps/chosen": -183.6750946044922, + "logps/rejected": -239.76751708984375, + "loss": 0.825, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.648073196411133, + "rewards/margins": 1.463191032409668, + "rewards/rejected": -13.111265182495117, + "step": 15108 + }, + { + "epoch": 2.35, + "learning_rate": 3.065781420198143e-06, + "logits/chosen": -0.7329335808753967, + "logits/rejected": -2.565206527709961, + "logps/chosen": -169.31695556640625, + "logps/rejected": -451.6679382324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.838353157043457, + "rewards/margins": 12.508562088012695, + "rewards/rejected": -22.34691619873047, + "step": 15109 + }, + { + "epoch": 2.35, + "learning_rate": 3.065047979666995e-06, + "logits/chosen": -0.950337290763855, + "logits/rejected": -2.4094223976135254, + "logps/chosen": -172.56494140625, + "logps/rejected": -607.327880859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.280744552612305, + "rewards/margins": 11.267707824707031, + "rewards/rejected": -23.548450469970703, + "step": 15110 + }, + { + "epoch": 2.35, + "learning_rate": 3.064314539135847e-06, + "logits/chosen": -2.333726167678833, + "logits/rejected": -2.3728489875793457, + "logps/chosen": -153.01744079589844, + "logps/rejected": -307.7214050292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.098468780517578, + "rewards/margins": 8.894196510314941, + "rewards/rejected": -17.992664337158203, + "step": 15111 + }, + { + "epoch": 2.35, + "learning_rate": 3.0635810986046994e-06, + "logits/chosen": -1.719527244567871, + "logits/rejected": -2.580716371536255, + "logps/chosen": -170.08145141601562, + "logps/rejected": -530.724609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.788785934448242, + "rewards/margins": 9.38789176940918, + "rewards/rejected": -17.176677703857422, + "step": 15112 + }, + { + "epoch": 2.35, + "learning_rate": 3.0628476580735517e-06, + "logits/chosen": -1.3857471942901611, + "logits/rejected": -2.5866167545318604, + "logps/chosen": -189.71438598632812, + "logps/rejected": -423.87554931640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.767171859741211, + "rewards/margins": 8.295059204101562, + "rewards/rejected": -17.062231063842773, + "step": 15113 + }, + { + "epoch": 2.35, + "learning_rate": 3.062114217542404e-06, + "logits/chosen": -1.4093832969665527, + "logits/rejected": -2.642441987991333, + "logps/chosen": -325.4934387207031, + "logps/rejected": -455.699462890625, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.69348430633545, + "rewards/margins": 7.249000549316406, + "rewards/rejected": -15.942485809326172, + "step": 15114 + }, + { + "epoch": 2.35, + "learning_rate": 3.061380777011256e-06, + "logits/chosen": -2.3671255111694336, + "logits/rejected": -2.5622987747192383, + "logps/chosen": -117.55799865722656, + "logps/rejected": -458.598388671875, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.757436752319336, + "rewards/margins": 10.620316505432129, + "rewards/rejected": -18.37775230407715, + "step": 15115 + }, + { + "epoch": 2.35, + "learning_rate": 3.060647336480108e-06, + "logits/chosen": -2.0551512241363525, + "logits/rejected": -2.7354695796966553, + "logps/chosen": -246.17913818359375, + "logps/rejected": -432.9582214355469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.547680854797363, + "rewards/margins": 10.743146896362305, + "rewards/rejected": -18.29082679748535, + "step": 15116 + }, + { + "epoch": 2.35, + "learning_rate": 3.05991389594896e-06, + "logits/chosen": -2.7870922088623047, + "logits/rejected": -2.154989242553711, + "logps/chosen": -226.67515563964844, + "logps/rejected": -195.23902893066406, + "loss": 1.073, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.352577209472656, + "rewards/margins": 2.606295585632324, + "rewards/rejected": -10.958871841430664, + "step": 15117 + }, + { + "epoch": 2.35, + "learning_rate": 3.059180455417812e-06, + "logits/chosen": -2.710995674133301, + "logits/rejected": -2.0855557918548584, + "logps/chosen": -257.09332275390625, + "logps/rejected": -415.2216491699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.359391212463379, + "rewards/margins": 14.879783630371094, + "rewards/rejected": -22.23917579650879, + "step": 15118 + }, + { + "epoch": 2.35, + "learning_rate": 3.0584470148866642e-06, + "logits/chosen": -2.43617844581604, + "logits/rejected": -2.7428336143493652, + "logps/chosen": -296.6373291015625, + "logps/rejected": -564.1251220703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.262118339538574, + "rewards/margins": 15.50544548034668, + "rewards/rejected": -25.76756477355957, + "step": 15119 + }, + { + "epoch": 2.35, + "learning_rate": 3.057713574355516e-06, + "logits/chosen": -2.826206684112549, + "logits/rejected": -2.262781858444214, + "logps/chosen": -276.68182373046875, + "logps/rejected": -340.5664978027344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8647308349609375, + "rewards/margins": 8.364818572998047, + "rewards/rejected": -16.229549407958984, + "step": 15120 + }, + { + "epoch": 2.35, + "learning_rate": 3.0569801338243684e-06, + "logits/chosen": -1.9587650299072266, + "logits/rejected": -2.2752108573913574, + "logps/chosen": -206.916259765625, + "logps/rejected": -407.59063720703125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.485361099243164, + "rewards/margins": 8.369621276855469, + "rewards/rejected": -19.854982376098633, + "step": 15121 + }, + { + "epoch": 2.35, + "learning_rate": 3.0562466932932207e-06, + "logits/chosen": -1.6502940654754639, + "logits/rejected": -2.6921377182006836, + "logps/chosen": -352.0311279296875, + "logps/rejected": -499.34405517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.7716064453125, + "rewards/margins": 13.14585018157959, + "rewards/rejected": -21.917457580566406, + "step": 15122 + }, + { + "epoch": 2.35, + "learning_rate": 3.055513252762073e-06, + "logits/chosen": -2.528712034225464, + "logits/rejected": -1.8039231300354004, + "logps/chosen": -374.872314453125, + "logps/rejected": -356.42230224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.480931282043457, + "rewards/margins": 13.249883651733398, + "rewards/rejected": -21.730815887451172, + "step": 15123 + }, + { + "epoch": 2.35, + "learning_rate": 3.054779812230925e-06, + "logits/chosen": -2.573479413986206, + "logits/rejected": -2.2190587520599365, + "logps/chosen": -404.18682861328125, + "logps/rejected": -330.60858154296875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.903031349182129, + "rewards/margins": 6.812883377075195, + "rewards/rejected": -17.71591567993164, + "step": 15124 + }, + { + "epoch": 2.35, + "learning_rate": 3.054046371699777e-06, + "logits/chosen": -2.111114025115967, + "logits/rejected": -2.8328282833099365, + "logps/chosen": -239.86099243164062, + "logps/rejected": -561.285888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.199640274047852, + "rewards/margins": 10.030353546142578, + "rewards/rejected": -19.22999382019043, + "step": 15125 + }, + { + "epoch": 2.35, + "learning_rate": 3.053312931168629e-06, + "logits/chosen": -2.513106346130371, + "logits/rejected": -2.193931818008423, + "logps/chosen": -510.9682922363281, + "logps/rejected": -437.47930908203125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.250436782836914, + "rewards/margins": 8.799498558044434, + "rewards/rejected": -22.04993438720703, + "step": 15126 + }, + { + "epoch": 2.35, + "learning_rate": 3.052579490637481e-06, + "logits/chosen": -2.7367331981658936, + "logits/rejected": -1.9872642755508423, + "logps/chosen": -454.90289306640625, + "logps/rejected": -353.9122009277344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.550675392150879, + "rewards/margins": 8.90731430053711, + "rewards/rejected": -17.457988739013672, + "step": 15127 + }, + { + "epoch": 2.35, + "learning_rate": 3.0518460501063333e-06, + "logits/chosen": -1.9334460496902466, + "logits/rejected": -2.9539384841918945, + "logps/chosen": -84.65361022949219, + "logps/rejected": -301.93084716796875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.063189506530762, + "rewards/margins": 7.018592834472656, + "rewards/rejected": -13.081782341003418, + "step": 15128 + }, + { + "epoch": 2.35, + "learning_rate": 3.051112609575185e-06, + "logits/chosen": -1.4183497428894043, + "logits/rejected": -2.6767983436584473, + "logps/chosen": -328.1971435546875, + "logps/rejected": -577.4307861328125, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.408929824829102, + "rewards/margins": 4.626809120178223, + "rewards/rejected": -15.035738945007324, + "step": 15129 + }, + { + "epoch": 2.35, + "learning_rate": 3.050379169044038e-06, + "logits/chosen": -1.7043594121932983, + "logits/rejected": -2.466944456100464, + "logps/chosen": -286.2996520996094, + "logps/rejected": -350.3829345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.381089210510254, + "rewards/margins": 10.515953063964844, + "rewards/rejected": -20.89704132080078, + "step": 15130 + }, + { + "epoch": 2.35, + "learning_rate": 3.0496457285128897e-06, + "logits/chosen": -1.7272932529449463, + "logits/rejected": -2.5624022483825684, + "logps/chosen": -219.1837158203125, + "logps/rejected": -452.48388671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.552654266357422, + "rewards/margins": 9.530284881591797, + "rewards/rejected": -19.08293914794922, + "step": 15131 + }, + { + "epoch": 2.35, + "learning_rate": 3.048912287981742e-06, + "logits/chosen": -1.8029887676239014, + "logits/rejected": -0.9658842086791992, + "logps/chosen": -415.740234375, + "logps/rejected": -219.4065704345703, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.809308052062988, + "rewards/margins": 6.498906135559082, + "rewards/rejected": -14.30821418762207, + "step": 15132 + }, + { + "epoch": 2.35, + "learning_rate": 3.048178847450594e-06, + "logits/chosen": -1.808830738067627, + "logits/rejected": -2.859853982925415, + "logps/chosen": -279.9283142089844, + "logps/rejected": -723.5164794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.059799194335938, + "rewards/margins": 9.05921459197998, + "rewards/rejected": -22.119014739990234, + "step": 15133 + }, + { + "epoch": 2.35, + "learning_rate": 3.0474454069194462e-06, + "logits/chosen": -2.1629340648651123, + "logits/rejected": -2.598464012145996, + "logps/chosen": -200.26498413085938, + "logps/rejected": -358.8940734863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.010167121887207, + "rewards/margins": 12.090229034423828, + "rewards/rejected": -21.10039520263672, + "step": 15134 + }, + { + "epoch": 2.35, + "learning_rate": 3.046711966388298e-06, + "logits/chosen": -1.7922475337982178, + "logits/rejected": -2.586561918258667, + "logps/chosen": -515.3222045898438, + "logps/rejected": -645.922119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.682315826416016, + "rewards/margins": 12.36111831665039, + "rewards/rejected": -25.043434143066406, + "step": 15135 + }, + { + "epoch": 2.35, + "learning_rate": 3.04597852585715e-06, + "logits/chosen": -2.3384501934051514, + "logits/rejected": -2.618060350418091, + "logps/chosen": -166.39300537109375, + "logps/rejected": -236.18597412109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.306853294372559, + "rewards/margins": 8.54156494140625, + "rewards/rejected": -16.848417282104492, + "step": 15136 + }, + { + "epoch": 2.35, + "learning_rate": 3.0452450853260023e-06, + "logits/chosen": -2.8259778022766113, + "logits/rejected": -2.5148398876190186, + "logps/chosen": -691.932861328125, + "logps/rejected": -782.4591064453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.385211944580078, + "rewards/margins": 7.02329683303833, + "rewards/rejected": -23.40850830078125, + "step": 15137 + }, + { + "epoch": 2.35, + "learning_rate": 3.0445116447948546e-06, + "logits/chosen": -1.7867190837860107, + "logits/rejected": -2.705467700958252, + "logps/chosen": -168.95364379882812, + "logps/rejected": -528.4185180664062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.267443656921387, + "rewards/margins": 10.195596694946289, + "rewards/rejected": -18.46303939819336, + "step": 15138 + }, + { + "epoch": 2.35, + "learning_rate": 3.043778204263707e-06, + "logits/chosen": -3.0045366287231445, + "logits/rejected": -2.793027400970459, + "logps/chosen": -401.2642822265625, + "logps/rejected": -554.1775512695312, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.098723888397217, + "rewards/margins": 8.696758270263672, + "rewards/rejected": -15.795482635498047, + "step": 15139 + }, + { + "epoch": 2.35, + "learning_rate": 3.0430447637325588e-06, + "logits/chosen": -2.9126551151275635, + "logits/rejected": -2.9473836421966553, + "logps/chosen": -221.8199462890625, + "logps/rejected": -199.70986938476562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.973803520202637, + "rewards/margins": 8.285717010498047, + "rewards/rejected": -16.259519577026367, + "step": 15140 + }, + { + "epoch": 2.35, + "learning_rate": 3.042311323201411e-06, + "logits/chosen": -2.7464780807495117, + "logits/rejected": -1.8475228548049927, + "logps/chosen": -422.93280029296875, + "logps/rejected": -570.587890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.569990158081055, + "rewards/margins": 11.151321411132812, + "rewards/rejected": -21.721311569213867, + "step": 15141 + }, + { + "epoch": 2.35, + "learning_rate": 3.041577882670263e-06, + "logits/chosen": -0.947871744632721, + "logits/rejected": -2.2074625492095947, + "logps/chosen": -194.4783935546875, + "logps/rejected": -542.65869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.802029609680176, + "rewards/margins": 10.280996322631836, + "rewards/rejected": -20.083026885986328, + "step": 15142 + }, + { + "epoch": 2.36, + "learning_rate": 3.0408444421391152e-06, + "logits/chosen": -2.724053382873535, + "logits/rejected": -2.836772918701172, + "logps/chosen": -382.4733581542969, + "logps/rejected": -514.9222412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.154470443725586, + "rewards/margins": 11.674690246582031, + "rewards/rejected": -20.829160690307617, + "step": 15143 + }, + { + "epoch": 2.36, + "learning_rate": 3.040111001607967e-06, + "logits/chosen": -1.7116764783859253, + "logits/rejected": -2.6174135208129883, + "logps/chosen": -205.90988159179688, + "logps/rejected": -510.01092529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.10006046295166, + "rewards/margins": 10.40175724029541, + "rewards/rejected": -20.50181770324707, + "step": 15144 + }, + { + "epoch": 2.36, + "learning_rate": 3.0393775610768194e-06, + "logits/chosen": -2.3888001441955566, + "logits/rejected": -2.5516984462738037, + "logps/chosen": -347.15057373046875, + "logps/rejected": -440.37939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.31351375579834, + "rewards/margins": 13.94741439819336, + "rewards/rejected": -24.260929107666016, + "step": 15145 + }, + { + "epoch": 2.36, + "learning_rate": 3.0386441205456713e-06, + "logits/chosen": -1.344866394996643, + "logits/rejected": -2.7765660285949707, + "logps/chosen": -156.13546752929688, + "logps/rejected": -600.79541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.548669815063477, + "rewards/margins": 11.438352584838867, + "rewards/rejected": -22.987022399902344, + "step": 15146 + }, + { + "epoch": 2.36, + "learning_rate": 3.0379106800145236e-06, + "logits/chosen": -1.180113434791565, + "logits/rejected": -2.706547975540161, + "logps/chosen": -213.36941528320312, + "logps/rejected": -595.0924682617188, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.425234794616699, + "rewards/margins": 10.3342924118042, + "rewards/rejected": -17.7595272064209, + "step": 15147 + }, + { + "epoch": 2.36, + "learning_rate": 3.037177239483376e-06, + "logits/chosen": -2.025832176208496, + "logits/rejected": -2.762558698654175, + "logps/chosen": -235.84799194335938, + "logps/rejected": -322.32586669921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.134286880493164, + "rewards/margins": 10.17842960357666, + "rewards/rejected": -19.31271743774414, + "step": 15148 + }, + { + "epoch": 2.36, + "learning_rate": 3.0364437989522278e-06, + "logits/chosen": -2.4056761264801025, + "logits/rejected": -1.8166565895080566, + "logps/chosen": -310.37835693359375, + "logps/rejected": -403.56781005859375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.597996711730957, + "rewards/margins": 9.013269424438477, + "rewards/rejected": -17.61126708984375, + "step": 15149 + }, + { + "epoch": 2.36, + "learning_rate": 3.03571035842108e-06, + "logits/chosen": -2.0816125869750977, + "logits/rejected": -2.26865291595459, + "logps/chosen": -206.0147705078125, + "logps/rejected": -512.3074340820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.227066040039062, + "rewards/margins": 16.307571411132812, + "rewards/rejected": -27.534637451171875, + "step": 15150 + }, + { + "epoch": 2.36, + "learning_rate": 3.034976917889932e-06, + "logits/chosen": -2.626445770263672, + "logits/rejected": -2.722341537475586, + "logps/chosen": -139.28573608398438, + "logps/rejected": -256.0269775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.551473617553711, + "rewards/margins": 8.581774711608887, + "rewards/rejected": -17.13324737548828, + "step": 15151 + }, + { + "epoch": 2.36, + "learning_rate": 3.0342434773587843e-06, + "logits/chosen": -2.3268940448760986, + "logits/rejected": -2.8582191467285156, + "logps/chosen": -226.2611846923828, + "logps/rejected": -269.0303955078125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981521129608154, + "rewards/margins": 6.8936004638671875, + "rewards/rejected": -12.8751220703125, + "step": 15152 + }, + { + "epoch": 2.36, + "learning_rate": 3.033510036827636e-06, + "logits/chosen": -1.3863824605941772, + "logits/rejected": -2.6091971397399902, + "logps/chosen": -234.29833984375, + "logps/rejected": -606.5673217773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.559165000915527, + "rewards/margins": 14.48604965209961, + "rewards/rejected": -27.045215606689453, + "step": 15153 + }, + { + "epoch": 2.36, + "learning_rate": 3.0327765962964884e-06, + "logits/chosen": -2.7497265338897705, + "logits/rejected": -2.6117591857910156, + "logps/chosen": -369.181640625, + "logps/rejected": -362.2552490234375, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.204900741577148, + "rewards/margins": 5.481927871704102, + "rewards/rejected": -15.68682861328125, + "step": 15154 + }, + { + "epoch": 2.36, + "learning_rate": 3.0320431557653407e-06, + "logits/chosen": -2.8556506633758545, + "logits/rejected": -2.842848062515259, + "logps/chosen": -398.84686279296875, + "logps/rejected": -456.6665954589844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277132987976074, + "rewards/margins": 10.626221656799316, + "rewards/rejected": -14.90335464477539, + "step": 15155 + }, + { + "epoch": 2.36, + "learning_rate": 3.0313097152341926e-06, + "logits/chosen": -2.412931442260742, + "logits/rejected": -1.058643102645874, + "logps/chosen": -209.05999755859375, + "logps/rejected": -211.9188232421875, + "loss": 0.6934, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.661758422851562, + "rewards/margins": 4.06622314453125, + "rewards/rejected": -14.727981567382812, + "step": 15156 + }, + { + "epoch": 2.36, + "learning_rate": 3.030576274703045e-06, + "logits/chosen": -2.5250182151794434, + "logits/rejected": -2.6767215728759766, + "logps/chosen": -196.60256958007812, + "logps/rejected": -443.00091552734375, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.959657669067383, + "rewards/margins": 10.569937705993652, + "rewards/rejected": -22.52959442138672, + "step": 15157 + }, + { + "epoch": 2.36, + "learning_rate": 3.029842834171897e-06, + "logits/chosen": -2.550128221511841, + "logits/rejected": -2.4144883155822754, + "logps/chosen": -506.40350341796875, + "logps/rejected": -593.5947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.05459976196289, + "rewards/margins": 11.757278442382812, + "rewards/rejected": -20.811878204345703, + "step": 15158 + }, + { + "epoch": 2.36, + "learning_rate": 3.029109393640749e-06, + "logits/chosen": -2.5389328002929688, + "logits/rejected": -2.488495111465454, + "logps/chosen": -173.947021484375, + "logps/rejected": -278.22039794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.328238487243652, + "rewards/margins": 8.76589298248291, + "rewards/rejected": -19.094131469726562, + "step": 15159 + }, + { + "epoch": 2.36, + "learning_rate": 3.028375953109601e-06, + "logits/chosen": -2.034900426864624, + "logits/rejected": -2.318204641342163, + "logps/chosen": -284.578125, + "logps/rejected": -472.7237548828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.807891845703125, + "rewards/margins": 10.829582214355469, + "rewards/rejected": -20.637474060058594, + "step": 15160 + }, + { + "epoch": 2.36, + "learning_rate": 3.0276425125784533e-06, + "logits/chosen": -2.6600749492645264, + "logits/rejected": -1.69768488407135, + "logps/chosen": -242.04052734375, + "logps/rejected": -406.1944580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.199915885925293, + "rewards/margins": 9.700146675109863, + "rewards/rejected": -18.900062561035156, + "step": 15161 + }, + { + "epoch": 2.36, + "learning_rate": 3.026909072047305e-06, + "logits/chosen": -2.5854005813598633, + "logits/rejected": -2.198559522628784, + "logps/chosen": -275.06317138671875, + "logps/rejected": -350.8477783203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.173685073852539, + "rewards/margins": 7.752573490142822, + "rewards/rejected": -18.926258087158203, + "step": 15162 + }, + { + "epoch": 2.36, + "learning_rate": 3.0261756315161575e-06, + "logits/chosen": -0.701725959777832, + "logits/rejected": -2.2177538871765137, + "logps/chosen": -116.87821960449219, + "logps/rejected": -416.7205810546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3582353591918945, + "rewards/margins": 13.65880012512207, + "rewards/rejected": -21.01703643798828, + "step": 15163 + }, + { + "epoch": 2.36, + "learning_rate": 3.0254421909850098e-06, + "logits/chosen": -2.03155779838562, + "logits/rejected": -2.5982730388641357, + "logps/chosen": -741.9771118164062, + "logps/rejected": -690.51513671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.236011505126953, + "rewards/margins": 9.566427230834961, + "rewards/rejected": -20.80243682861328, + "step": 15164 + }, + { + "epoch": 2.36, + "learning_rate": 3.024708750453862e-06, + "logits/chosen": -0.46282562613487244, + "logits/rejected": -2.633469343185425, + "logps/chosen": -120.1456298828125, + "logps/rejected": -432.5016784667969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.782224655151367, + "rewards/margins": 12.91454792022705, + "rewards/rejected": -21.696773529052734, + "step": 15165 + }, + { + "epoch": 2.36, + "learning_rate": 3.023975309922714e-06, + "logits/chosen": -2.7114312648773193, + "logits/rejected": -1.9338433742523193, + "logps/chosen": -538.0052490234375, + "logps/rejected": -523.1610107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.156599044799805, + "rewards/margins": 10.83864974975586, + "rewards/rejected": -20.995248794555664, + "step": 15166 + }, + { + "epoch": 2.36, + "learning_rate": 3.023241869391566e-06, + "logits/chosen": -2.587415933609009, + "logits/rejected": -2.0511972904205322, + "logps/chosen": -243.6275634765625, + "logps/rejected": -244.31326293945312, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.177528381347656, + "rewards/margins": 6.42279052734375, + "rewards/rejected": -15.600318908691406, + "step": 15167 + }, + { + "epoch": 2.36, + "learning_rate": 3.022508428860418e-06, + "logits/chosen": -2.745542287826538, + "logits/rejected": -1.5091840028762817, + "logps/chosen": -303.92340087890625, + "logps/rejected": -275.3836975097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.981420516967773, + "rewards/margins": 9.813636779785156, + "rewards/rejected": -17.79505729675293, + "step": 15168 + }, + { + "epoch": 2.36, + "learning_rate": 3.02177498832927e-06, + "logits/chosen": -2.306286096572876, + "logits/rejected": -2.6334388256073, + "logps/chosen": -375.35357666015625, + "logps/rejected": -630.80224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.417144775390625, + "rewards/margins": 12.664116859436035, + "rewards/rejected": -24.081260681152344, + "step": 15169 + }, + { + "epoch": 2.36, + "learning_rate": 3.0210415477981223e-06, + "logits/chosen": -1.2645665407180786, + "logits/rejected": -2.4542598724365234, + "logps/chosen": -170.2702178955078, + "logps/rejected": -485.5855712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.421831130981445, + "rewards/margins": 11.594416618347168, + "rewards/rejected": -24.016246795654297, + "step": 15170 + }, + { + "epoch": 2.36, + "learning_rate": 3.020308107266974e-06, + "logits/chosen": -1.9782196283340454, + "logits/rejected": -2.0620036125183105, + "logps/chosen": -188.58999633789062, + "logps/rejected": -340.1422119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.681633949279785, + "rewards/margins": 9.279916763305664, + "rewards/rejected": -17.961551666259766, + "step": 15171 + }, + { + "epoch": 2.36, + "learning_rate": 3.019574666735827e-06, + "logits/chosen": -1.4461910724639893, + "logits/rejected": -2.1268653869628906, + "logps/chosen": -363.5806884765625, + "logps/rejected": -319.2410583496094, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.130271911621094, + "rewards/margins": 6.3200764656066895, + "rewards/rejected": -19.450347900390625, + "step": 15172 + }, + { + "epoch": 2.36, + "learning_rate": 3.0188412262046788e-06, + "logits/chosen": -2.0193846225738525, + "logits/rejected": -2.9699745178222656, + "logps/chosen": -219.81744384765625, + "logps/rejected": -525.5077514648438, + "loss": 0.0498, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.445744514465332, + "rewards/margins": 7.080139636993408, + "rewards/rejected": -17.5258846282959, + "step": 15173 + }, + { + "epoch": 2.36, + "learning_rate": 3.018107785673531e-06, + "logits/chosen": -2.588353157043457, + "logits/rejected": -2.567225456237793, + "logps/chosen": -474.45166015625, + "logps/rejected": -482.82598876953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.280081748962402, + "rewards/margins": 7.8842267990112305, + "rewards/rejected": -18.164308547973633, + "step": 15174 + }, + { + "epoch": 2.36, + "learning_rate": 3.017374345142383e-06, + "logits/chosen": -2.7590432167053223, + "logits/rejected": -2.3810086250305176, + "logps/chosen": -235.1037139892578, + "logps/rejected": -219.27085876464844, + "loss": 1.0247, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.84132194519043, + "rewards/margins": 5.370369911193848, + "rewards/rejected": -15.211691856384277, + "step": 15175 + }, + { + "epoch": 2.36, + "learning_rate": 3.016640904611235e-06, + "logits/chosen": -2.19637393951416, + "logits/rejected": -2.648583173751831, + "logps/chosen": -216.5125274658203, + "logps/rejected": -494.66436767578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.405265808105469, + "rewards/margins": 9.313499450683594, + "rewards/rejected": -18.718765258789062, + "step": 15176 + }, + { + "epoch": 2.36, + "learning_rate": 3.015907464080087e-06, + "logits/chosen": -2.69435715675354, + "logits/rejected": -0.7385967373847961, + "logps/chosen": -550.167724609375, + "logps/rejected": -260.1417541503906, + "loss": 0.0882, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.29007339477539, + "rewards/margins": 5.516700267791748, + "rewards/rejected": -16.806774139404297, + "step": 15177 + }, + { + "epoch": 2.36, + "learning_rate": 3.015174023548939e-06, + "logits/chosen": -2.93033766746521, + "logits/rejected": -2.6881468296051025, + "logps/chosen": -637.57373046875, + "logps/rejected": -504.65301513671875, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.043970108032227, + "rewards/margins": 9.34251880645752, + "rewards/rejected": -19.386489868164062, + "step": 15178 + }, + { + "epoch": 2.36, + "learning_rate": 3.0144405830177913e-06, + "logits/chosen": -1.3628793954849243, + "logits/rejected": -2.352863311767578, + "logps/chosen": -284.9579162597656, + "logps/rejected": -463.0892028808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.750334739685059, + "rewards/margins": 13.393214225769043, + "rewards/rejected": -26.1435489654541, + "step": 15179 + }, + { + "epoch": 2.36, + "learning_rate": 3.0137071424866436e-06, + "logits/chosen": -2.760660409927368, + "logits/rejected": -2.2649192810058594, + "logps/chosen": -590.0765380859375, + "logps/rejected": -776.22216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.193292617797852, + "rewards/margins": 15.34677791595459, + "rewards/rejected": -24.540071487426758, + "step": 15180 + }, + { + "epoch": 2.36, + "learning_rate": 3.012973701955496e-06, + "logits/chosen": -2.230426788330078, + "logits/rejected": -1.818680763244629, + "logps/chosen": -654.8839111328125, + "logps/rejected": -465.734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.277044296264648, + "rewards/margins": 11.333649635314941, + "rewards/rejected": -20.610694885253906, + "step": 15181 + }, + { + "epoch": 2.36, + "learning_rate": 3.012240261424348e-06, + "logits/chosen": -2.863208770751953, + "logits/rejected": -2.7340216636657715, + "logps/chosen": -163.76036071777344, + "logps/rejected": -244.719482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.70493745803833, + "rewards/margins": 9.053815841674805, + "rewards/rejected": -15.758752822875977, + "step": 15182 + }, + { + "epoch": 2.36, + "learning_rate": 3.0115068208932e-06, + "logits/chosen": -2.24117112159729, + "logits/rejected": -2.5892040729522705, + "logps/chosen": -320.91998291015625, + "logps/rejected": -454.69110107421875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.138652801513672, + "rewards/margins": 8.319826126098633, + "rewards/rejected": -16.458478927612305, + "step": 15183 + }, + { + "epoch": 2.36, + "learning_rate": 3.010773380362052e-06, + "logits/chosen": -2.693969488143921, + "logits/rejected": -2.853525400161743, + "logps/chosen": -293.3976135253906, + "logps/rejected": -342.0273132324219, + "loss": 1.0251, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.782705307006836, + "rewards/margins": 6.677822589874268, + "rewards/rejected": -18.460529327392578, + "step": 15184 + }, + { + "epoch": 2.36, + "learning_rate": 3.0100399398309043e-06, + "logits/chosen": -2.726783037185669, + "logits/rejected": -2.9944543838500977, + "logps/chosen": -266.38189697265625, + "logps/rejected": -487.38116455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.378199100494385, + "rewards/margins": 12.337686538696289, + "rewards/rejected": -19.715885162353516, + "step": 15185 + }, + { + "epoch": 2.36, + "learning_rate": 3.009306499299756e-06, + "logits/chosen": -2.560717821121216, + "logits/rejected": -2.88394832611084, + "logps/chosen": -196.87594604492188, + "logps/rejected": -242.21865844726562, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.815668106079102, + "rewards/margins": 7.54073429107666, + "rewards/rejected": -15.356402397155762, + "step": 15186 + }, + { + "epoch": 2.36, + "learning_rate": 3.008573058768608e-06, + "logits/chosen": -2.800201892852783, + "logits/rejected": -1.0327199697494507, + "logps/chosen": -395.47576904296875, + "logps/rejected": -414.5296630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.549558639526367, + "rewards/margins": 13.207045555114746, + "rewards/rejected": -22.756603240966797, + "step": 15187 + }, + { + "epoch": 2.36, + "learning_rate": 3.0078396182374603e-06, + "logits/chosen": -2.7706804275512695, + "logits/rejected": -2.861539840698242, + "logps/chosen": -131.8212127685547, + "logps/rejected": -334.92681884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.704002380371094, + "rewards/margins": 14.588000297546387, + "rewards/rejected": -24.292003631591797, + "step": 15188 + }, + { + "epoch": 2.36, + "learning_rate": 3.0071061777063127e-06, + "logits/chosen": -2.390486717224121, + "logits/rejected": -2.673659086227417, + "logps/chosen": -320.643310546875, + "logps/rejected": -515.1669311523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.150136947631836, + "rewards/margins": 12.302844047546387, + "rewards/rejected": -20.452980041503906, + "step": 15189 + }, + { + "epoch": 2.36, + "learning_rate": 3.006372737175165e-06, + "logits/chosen": -1.283867597579956, + "logits/rejected": -2.4739813804626465, + "logps/chosen": -224.84422302246094, + "logps/rejected": -418.41162109375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.07425308227539, + "rewards/margins": 8.242341995239258, + "rewards/rejected": -17.31659507751465, + "step": 15190 + }, + { + "epoch": 2.36, + "learning_rate": 3.005639296644017e-06, + "logits/chosen": -2.043832778930664, + "logits/rejected": -2.4698078632354736, + "logps/chosen": -241.07107543945312, + "logps/rejected": -437.83416748046875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.75757122039795, + "rewards/margins": 6.773438453674316, + "rewards/rejected": -17.531009674072266, + "step": 15191 + }, + { + "epoch": 2.36, + "learning_rate": 3.004905856112869e-06, + "logits/chosen": -2.49383807182312, + "logits/rejected": -2.9708611965179443, + "logps/chosen": -154.92291259765625, + "logps/rejected": -439.2991943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.91485595703125, + "rewards/margins": 10.314212799072266, + "rewards/rejected": -14.229068756103516, + "step": 15192 + }, + { + "epoch": 2.36, + "learning_rate": 3.004172415581721e-06, + "logits/chosen": -2.2234771251678467, + "logits/rejected": -2.2808663845062256, + "logps/chosen": -244.54913330078125, + "logps/rejected": -495.1071472167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.990606307983398, + "rewards/margins": 13.537298202514648, + "rewards/rejected": -24.527904510498047, + "step": 15193 + }, + { + "epoch": 2.36, + "learning_rate": 3.0034389750505733e-06, + "logits/chosen": -2.037341356277466, + "logits/rejected": -2.5147643089294434, + "logps/chosen": -247.94036865234375, + "logps/rejected": -335.1661071777344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.603334426879883, + "rewards/margins": 9.509007453918457, + "rewards/rejected": -21.112342834472656, + "step": 15194 + }, + { + "epoch": 2.36, + "learning_rate": 3.002705534519425e-06, + "logits/chosen": -2.5938093662261963, + "logits/rejected": -2.86195969581604, + "logps/chosen": -99.98828125, + "logps/rejected": -256.6265563964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.214973449707031, + "rewards/margins": 9.976203918457031, + "rewards/rejected": -16.191177368164062, + "step": 15195 + }, + { + "epoch": 2.36, + "learning_rate": 3.001972093988277e-06, + "logits/chosen": -2.3309485912323, + "logits/rejected": -2.073796033859253, + "logps/chosen": -215.71917724609375, + "logps/rejected": -453.6936340332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.521849632263184, + "rewards/margins": 11.934036254882812, + "rewards/rejected": -21.455886840820312, + "step": 15196 + }, + { + "epoch": 2.36, + "learning_rate": 3.00123865345713e-06, + "logits/chosen": -2.285607099533081, + "logits/rejected": -2.8090035915374756, + "logps/chosen": -109.11178588867188, + "logps/rejected": -233.52685546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.027539253234863, + "rewards/margins": 7.892539024353027, + "rewards/rejected": -15.92007827758789, + "step": 15197 + }, + { + "epoch": 2.36, + "learning_rate": 3.0005052129259817e-06, + "logits/chosen": -2.1638083457946777, + "logits/rejected": -2.350487232208252, + "logps/chosen": -239.7165069580078, + "logps/rejected": -447.009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.628463745117188, + "rewards/margins": 10.108327865600586, + "rewards/rejected": -20.73678970336914, + "step": 15198 + }, + { + "epoch": 2.36, + "learning_rate": 2.999771772394834e-06, + "logits/chosen": -2.54333233833313, + "logits/rejected": -1.475412368774414, + "logps/chosen": -509.7355041503906, + "logps/rejected": -386.8428955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.055158615112305, + "rewards/margins": 10.291309356689453, + "rewards/rejected": -20.346467971801758, + "step": 15199 + }, + { + "epoch": 2.36, + "learning_rate": 2.999038331863686e-06, + "logits/chosen": -2.912287473678589, + "logits/rejected": -1.8076421022415161, + "logps/chosen": -323.3525390625, + "logps/rejected": -288.12847900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0348944664001465, + "rewards/margins": 12.288780212402344, + "rewards/rejected": -19.32367515563965, + "step": 15200 + }, + { + "epoch": 2.36, + "learning_rate": 2.998304891332538e-06, + "logits/chosen": -2.627013683319092, + "logits/rejected": -2.3946659564971924, + "logps/chosen": -594.4332885742188, + "logps/rejected": -648.20849609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.859870910644531, + "rewards/margins": 9.56615161895752, + "rewards/rejected": -20.426021575927734, + "step": 15201 + }, + { + "epoch": 2.36, + "learning_rate": 2.99757145080139e-06, + "logits/chosen": -2.5374975204467773, + "logits/rejected": -2.521256446838379, + "logps/chosen": -395.33990478515625, + "logps/rejected": -619.5439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.896417617797852, + "rewards/margins": 12.650009155273438, + "rewards/rejected": -22.54642677307129, + "step": 15202 + }, + { + "epoch": 2.36, + "learning_rate": 2.9968380102702423e-06, + "logits/chosen": -2.563405990600586, + "logits/rejected": -2.9594368934631348, + "logps/chosen": -249.82260131835938, + "logps/rejected": -657.4317626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4359130859375, + "rewards/margins": 12.42690372467041, + "rewards/rejected": -21.862815856933594, + "step": 15203 + }, + { + "epoch": 2.36, + "learning_rate": 2.9961045697390942e-06, + "logits/chosen": -2.6538805961608887, + "logits/rejected": -2.7156982421875, + "logps/chosen": -223.03622436523438, + "logps/rejected": -440.23797607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.761496543884277, + "rewards/margins": 11.169214248657227, + "rewards/rejected": -22.930709838867188, + "step": 15204 + }, + { + "epoch": 2.36, + "learning_rate": 2.9953711292079465e-06, + "logits/chosen": -2.7391574382781982, + "logits/rejected": -2.4047276973724365, + "logps/chosen": -296.72637939453125, + "logps/rejected": -486.3040771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.213953971862793, + "rewards/margins": 12.059616088867188, + "rewards/rejected": -22.273571014404297, + "step": 15205 + }, + { + "epoch": 2.36, + "learning_rate": 2.994637688676799e-06, + "logits/chosen": -1.606506109237671, + "logits/rejected": -2.6812708377838135, + "logps/chosen": -109.9145278930664, + "logps/rejected": -418.55828857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.485551834106445, + "rewards/margins": 14.664907455444336, + "rewards/rejected": -23.15045928955078, + "step": 15206 + }, + { + "epoch": 2.37, + "learning_rate": 2.9939042481456507e-06, + "logits/chosen": -1.125609278678894, + "logits/rejected": -2.217099189758301, + "logps/chosen": -148.24478149414062, + "logps/rejected": -367.165283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.91085147857666, + "rewards/margins": 17.756399154663086, + "rewards/rejected": -25.667251586914062, + "step": 15207 + }, + { + "epoch": 2.37, + "learning_rate": 2.993170807614503e-06, + "logits/chosen": -1.8322125673294067, + "logits/rejected": -2.7549550533294678, + "logps/chosen": -185.6573486328125, + "logps/rejected": -423.619873046875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.147275924682617, + "rewards/margins": 7.56095027923584, + "rewards/rejected": -16.708227157592773, + "step": 15208 + }, + { + "epoch": 2.37, + "learning_rate": 2.992437367083355e-06, + "logits/chosen": -2.1216235160827637, + "logits/rejected": -2.782853364944458, + "logps/chosen": -262.8505554199219, + "logps/rejected": -565.6751098632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.406612396240234, + "rewards/margins": 13.641523361206055, + "rewards/rejected": -24.04813575744629, + "step": 15209 + }, + { + "epoch": 2.37, + "learning_rate": 2.991703926552207e-06, + "logits/chosen": -1.2999545335769653, + "logits/rejected": -2.554441452026367, + "logps/chosen": -270.7377014160156, + "logps/rejected": -313.81341552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.189506530761719, + "rewards/margins": 11.711859703063965, + "rewards/rejected": -16.9013671875, + "step": 15210 + }, + { + "epoch": 2.37, + "learning_rate": 2.990970486021059e-06, + "logits/chosen": -2.955498456954956, + "logits/rejected": -2.7692277431488037, + "logps/chosen": -762.2745361328125, + "logps/rejected": -632.934326171875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.993878364562988, + "rewards/margins": 4.844710350036621, + "rewards/rejected": -11.83858871459961, + "step": 15211 + }, + { + "epoch": 2.37, + "learning_rate": 2.9902370454899114e-06, + "logits/chosen": -2.6634397506713867, + "logits/rejected": -3.0482168197631836, + "logps/chosen": -153.91525268554688, + "logps/rejected": -434.0233459472656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.175498008728027, + "rewards/margins": 8.914020538330078, + "rewards/rejected": -16.089519500732422, + "step": 15212 + }, + { + "epoch": 2.37, + "learning_rate": 2.9895036049587632e-06, + "logits/chosen": -2.7191386222839355, + "logits/rejected": -1.9436761140823364, + "logps/chosen": -572.3168334960938, + "logps/rejected": -724.1005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.094505310058594, + "rewards/margins": 14.364389419555664, + "rewards/rejected": -24.458894729614258, + "step": 15213 + }, + { + "epoch": 2.37, + "learning_rate": 2.988770164427616e-06, + "logits/chosen": -2.6692936420440674, + "logits/rejected": -2.7562990188598633, + "logps/chosen": -422.6214904785156, + "logps/rejected": -641.4579467773438, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.575417518615723, + "rewards/margins": 7.616264343261719, + "rewards/rejected": -18.191680908203125, + "step": 15214 + }, + { + "epoch": 2.37, + "learning_rate": 2.988036723896468e-06, + "logits/chosen": -2.5993099212646484, + "logits/rejected": -2.6917366981506348, + "logps/chosen": -292.53546142578125, + "logps/rejected": -533.8346557617188, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.551233291625977, + "rewards/margins": 7.2952494621276855, + "rewards/rejected": -19.846481323242188, + "step": 15215 + }, + { + "epoch": 2.37, + "learning_rate": 2.9873032833653197e-06, + "logits/chosen": -2.8177742958068848, + "logits/rejected": -2.441654920578003, + "logps/chosen": -451.8904113769531, + "logps/rejected": -652.8887939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.257072448730469, + "rewards/margins": 16.495288848876953, + "rewards/rejected": -23.752361297607422, + "step": 15216 + }, + { + "epoch": 2.37, + "learning_rate": 2.986569842834172e-06, + "logits/chosen": -1.7272368669509888, + "logits/rejected": -2.6548099517822266, + "logps/chosen": -316.3748779296875, + "logps/rejected": -542.3358154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.629536628723145, + "rewards/margins": 12.316606521606445, + "rewards/rejected": -20.946144104003906, + "step": 15217 + }, + { + "epoch": 2.37, + "learning_rate": 2.985836402303024e-06, + "logits/chosen": -1.309664011001587, + "logits/rejected": -2.24633526802063, + "logps/chosen": -175.28707885742188, + "logps/rejected": -432.33782958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.919239044189453, + "rewards/margins": 11.731597900390625, + "rewards/rejected": -19.650836944580078, + "step": 15218 + }, + { + "epoch": 2.37, + "learning_rate": 2.985102961771876e-06, + "logits/chosen": -1.6543879508972168, + "logits/rejected": -2.71854829788208, + "logps/chosen": -248.451416015625, + "logps/rejected": -640.5965576171875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.261884689331055, + "rewards/margins": 8.953094482421875, + "rewards/rejected": -20.214977264404297, + "step": 15219 + }, + { + "epoch": 2.37, + "learning_rate": 2.984369521240728e-06, + "logits/chosen": -2.7600269317626953, + "logits/rejected": -2.88615345954895, + "logps/chosen": -250.38546752929688, + "logps/rejected": -482.2513122558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.506811141967773, + "rewards/margins": 13.605472564697266, + "rewards/rejected": -19.11228370666504, + "step": 15220 + }, + { + "epoch": 2.37, + "learning_rate": 2.9836360807095804e-06, + "logits/chosen": -2.8251287937164307, + "logits/rejected": -2.6966538429260254, + "logps/chosen": -314.8102722167969, + "logps/rejected": -228.68698120117188, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.747579097747803, + "rewards/margins": 7.160582542419434, + "rewards/rejected": -14.908161163330078, + "step": 15221 + }, + { + "epoch": 2.37, + "learning_rate": 2.9829026401784327e-06, + "logits/chosen": -2.251115083694458, + "logits/rejected": -2.715142250061035, + "logps/chosen": -290.9393005371094, + "logps/rejected": -499.3541564941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.281363487243652, + "rewards/margins": 12.920942306518555, + "rewards/rejected": -20.20230484008789, + "step": 15222 + }, + { + "epoch": 2.37, + "learning_rate": 2.982169199647285e-06, + "logits/chosen": -2.0521864891052246, + "logits/rejected": -2.3487792015075684, + "logps/chosen": -321.43865966796875, + "logps/rejected": -634.8856201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01093053817749, + "rewards/margins": 13.553251266479492, + "rewards/rejected": -20.56418228149414, + "step": 15223 + }, + { + "epoch": 2.37, + "learning_rate": 2.981435759116137e-06, + "logits/chosen": -1.8638651371002197, + "logits/rejected": -2.53920316696167, + "logps/chosen": -165.36245727539062, + "logps/rejected": -486.59442138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.941506385803223, + "rewards/margins": 12.873703956604004, + "rewards/rejected": -22.815210342407227, + "step": 15224 + }, + { + "epoch": 2.37, + "learning_rate": 2.9807023185849887e-06, + "logits/chosen": -2.5389981269836426, + "logits/rejected": -2.8098549842834473, + "logps/chosen": -114.64332580566406, + "logps/rejected": -292.85308837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.556877136230469, + "rewards/margins": 9.784585952758789, + "rewards/rejected": -19.341463088989258, + "step": 15225 + }, + { + "epoch": 2.37, + "learning_rate": 2.979968878053841e-06, + "logits/chosen": -2.212693452835083, + "logits/rejected": -2.90938138961792, + "logps/chosen": -220.1430206298828, + "logps/rejected": -500.94879150390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.006867408752441, + "rewards/margins": 8.098318099975586, + "rewards/rejected": -18.105186462402344, + "step": 15226 + }, + { + "epoch": 2.37, + "learning_rate": 2.979235437522693e-06, + "logits/chosen": -2.749781370162964, + "logits/rejected": -2.6350479125976562, + "logps/chosen": -362.15631103515625, + "logps/rejected": -523.97802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.615724563598633, + "rewards/margins": 13.244787216186523, + "rewards/rejected": -22.860511779785156, + "step": 15227 + }, + { + "epoch": 2.37, + "learning_rate": 2.9785019969915452e-06, + "logits/chosen": -2.7776477336883545, + "logits/rejected": -1.9451768398284912, + "logps/chosen": -753.2996826171875, + "logps/rejected": -771.2154541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.413978576660156, + "rewards/margins": 15.848688125610352, + "rewards/rejected": -29.262666702270508, + "step": 15228 + }, + { + "epoch": 2.37, + "learning_rate": 2.977768556460397e-06, + "logits/chosen": -2.000953435897827, + "logits/rejected": -2.7757604122161865, + "logps/chosen": -290.7080078125, + "logps/rejected": -549.1443481445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.287665367126465, + "rewards/margins": 13.455174446105957, + "rewards/rejected": -19.742839813232422, + "step": 15229 + }, + { + "epoch": 2.37, + "learning_rate": 2.9770351159292494e-06, + "logits/chosen": -2.6047117710113525, + "logits/rejected": -2.7656753063201904, + "logps/chosen": -1294.4114990234375, + "logps/rejected": -809.222412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.785358428955078, + "rewards/margins": 10.695001602172852, + "rewards/rejected": -21.480358123779297, + "step": 15230 + }, + { + "epoch": 2.37, + "learning_rate": 2.9763016753981017e-06, + "logits/chosen": -2.9095115661621094, + "logits/rejected": -2.6851186752319336, + "logps/chosen": -285.0449523925781, + "logps/rejected": -427.89385986328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.12889575958252, + "rewards/margins": 7.5862274169921875, + "rewards/rejected": -18.71512222290039, + "step": 15231 + }, + { + "epoch": 2.37, + "learning_rate": 2.975568234866954e-06, + "logits/chosen": -2.4014790058135986, + "logits/rejected": -1.9485623836517334, + "logps/chosen": -301.199951171875, + "logps/rejected": -252.9709014892578, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.927780151367188, + "rewards/margins": 7.001184463500977, + "rewards/rejected": -16.928964614868164, + "step": 15232 + }, + { + "epoch": 2.37, + "learning_rate": 2.974834794335806e-06, + "logits/chosen": -2.4145994186401367, + "logits/rejected": -2.8682897090911865, + "logps/chosen": -1072.966064453125, + "logps/rejected": -1079.510986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.322027206420898, + "rewards/margins": 14.424614906311035, + "rewards/rejected": -24.74664306640625, + "step": 15233 + }, + { + "epoch": 2.37, + "learning_rate": 2.974101353804658e-06, + "logits/chosen": -1.8846834897994995, + "logits/rejected": -2.666948080062866, + "logps/chosen": -497.8630676269531, + "logps/rejected": -582.9066772460938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035945892333984, + "rewards/margins": 9.703268051147461, + "rewards/rejected": -15.739213943481445, + "step": 15234 + }, + { + "epoch": 2.37, + "learning_rate": 2.97336791327351e-06, + "logits/chosen": -2.666942834854126, + "logits/rejected": -0.726324737071991, + "logps/chosen": -378.4344177246094, + "logps/rejected": -290.99072265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.46681022644043, + "rewards/margins": 8.766813278198242, + "rewards/rejected": -19.233623504638672, + "step": 15235 + }, + { + "epoch": 2.37, + "learning_rate": 2.972634472742362e-06, + "logits/chosen": -2.590569257736206, + "logits/rejected": -2.946270227432251, + "logps/chosen": -380.3152770996094, + "logps/rejected": -502.75830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.634920120239258, + "rewards/margins": 10.163021087646484, + "rewards/rejected": -18.797941207885742, + "step": 15236 + }, + { + "epoch": 2.37, + "learning_rate": 2.9719010322112142e-06, + "logits/chosen": -2.2194483280181885, + "logits/rejected": -2.7115252017974854, + "logps/chosen": -309.2896728515625, + "logps/rejected": -489.0787048339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.797983646392822, + "rewards/margins": 11.299866676330566, + "rewards/rejected": -18.097850799560547, + "step": 15237 + }, + { + "epoch": 2.37, + "learning_rate": 2.971167591680066e-06, + "logits/chosen": -2.790992259979248, + "logits/rejected": -1.8471969366073608, + "logps/chosen": -281.0435791015625, + "logps/rejected": -353.82086181640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.526180267333984, + "rewards/margins": 8.85062026977539, + "rewards/rejected": -18.376800537109375, + "step": 15238 + }, + { + "epoch": 2.37, + "learning_rate": 2.9704341511489184e-06, + "logits/chosen": -2.7896621227264404, + "logits/rejected": -1.810608148574829, + "logps/chosen": -204.21434020996094, + "logps/rejected": -318.24774169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.121337890625, + "rewards/margins": 13.405769348144531, + "rewards/rejected": -21.52710723876953, + "step": 15239 + }, + { + "epoch": 2.37, + "learning_rate": 2.9697007106177707e-06, + "logits/chosen": -0.6744115352630615, + "logits/rejected": -2.0382065773010254, + "logps/chosen": -374.9189453125, + "logps/rejected": -754.5289306640625, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.443134307861328, + "rewards/margins": 13.98132610321045, + "rewards/rejected": -26.424461364746094, + "step": 15240 + }, + { + "epoch": 2.37, + "learning_rate": 2.968967270086623e-06, + "logits/chosen": -2.6643779277801514, + "logits/rejected": -1.295056700706482, + "logps/chosen": -659.2020263671875, + "logps/rejected": -489.9697265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.824244499206543, + "rewards/margins": 15.10849380493164, + "rewards/rejected": -18.9327392578125, + "step": 15241 + }, + { + "epoch": 2.37, + "learning_rate": 2.968233829555475e-06, + "logits/chosen": -2.5973639488220215, + "logits/rejected": -1.4991120100021362, + "logps/chosen": -613.2733154296875, + "logps/rejected": -389.9299011230469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.954266548156738, + "rewards/margins": 13.455506324768066, + "rewards/rejected": -20.409772872924805, + "step": 15242 + }, + { + "epoch": 2.37, + "learning_rate": 2.967500389024327e-06, + "logits/chosen": -2.7928051948547363, + "logits/rejected": -2.3461456298828125, + "logps/chosen": -596.2593383789062, + "logps/rejected": -621.4002685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.735422134399414, + "rewards/margins": 9.523151397705078, + "rewards/rejected": -25.258575439453125, + "step": 15243 + }, + { + "epoch": 2.37, + "learning_rate": 2.966766948493179e-06, + "logits/chosen": -1.5719279050827026, + "logits/rejected": -2.836483955383301, + "logps/chosen": -147.53565979003906, + "logps/rejected": -529.1041259765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.214069366455078, + "rewards/margins": 11.052907943725586, + "rewards/rejected": -20.26697540283203, + "step": 15244 + }, + { + "epoch": 2.37, + "learning_rate": 2.966033507962031e-06, + "logits/chosen": -2.412022829055786, + "logits/rejected": -2.452746868133545, + "logps/chosen": -325.3438720703125, + "logps/rejected": -481.724365234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.218923568725586, + "rewards/margins": 8.687091827392578, + "rewards/rejected": -20.906015396118164, + "step": 15245 + }, + { + "epoch": 2.37, + "learning_rate": 2.9653000674308833e-06, + "logits/chosen": -2.5263736248016357, + "logits/rejected": -2.858950614929199, + "logps/chosen": -195.13633728027344, + "logps/rejected": -450.9886474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.675935745239258, + "rewards/margins": 11.754396438598633, + "rewards/rejected": -21.43033218383789, + "step": 15246 + }, + { + "epoch": 2.37, + "learning_rate": 2.964566626899735e-06, + "logits/chosen": -3.0676422119140625, + "logits/rejected": -2.99149751663208, + "logps/chosen": -114.67684936523438, + "logps/rejected": -188.36080932617188, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.121755599975586, + "rewards/margins": 6.1109395027160645, + "rewards/rejected": -14.232694625854492, + "step": 15247 + }, + { + "epoch": 2.37, + "learning_rate": 2.963833186368588e-06, + "logits/chosen": -1.7879769802093506, + "logits/rejected": -2.388356924057007, + "logps/chosen": -191.1366424560547, + "logps/rejected": -367.863525390625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.003527641296387, + "rewards/margins": 6.6410417556762695, + "rewards/rejected": -19.644569396972656, + "step": 15248 + }, + { + "epoch": 2.37, + "learning_rate": 2.9630997458374397e-06, + "logits/chosen": -2.761507272720337, + "logits/rejected": -2.774480104446411, + "logps/chosen": -145.5606231689453, + "logps/rejected": -322.10064697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.325006484985352, + "rewards/margins": 8.577095031738281, + "rewards/rejected": -17.902099609375, + "step": 15249 + }, + { + "epoch": 2.37, + "learning_rate": 2.962366305306292e-06, + "logits/chosen": -2.389151096343994, + "logits/rejected": -2.5166878700256348, + "logps/chosen": -463.58770751953125, + "logps/rejected": -518.1859130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.99772834777832, + "rewards/margins": 10.11888313293457, + "rewards/rejected": -22.11661148071289, + "step": 15250 + }, + { + "epoch": 2.37, + "learning_rate": 2.961632864775144e-06, + "logits/chosen": -1.5580886602401733, + "logits/rejected": -2.9499528408050537, + "logps/chosen": -219.1112518310547, + "logps/rejected": -554.9848022460938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.936952590942383, + "rewards/margins": 13.211870193481445, + "rewards/rejected": -22.148822784423828, + "step": 15251 + }, + { + "epoch": 2.37, + "learning_rate": 2.9608994242439962e-06, + "logits/chosen": -1.156585931777954, + "logits/rejected": -2.587150812149048, + "logps/chosen": -306.85943603515625, + "logps/rejected": -571.765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.467281341552734, + "rewards/margins": 11.391342163085938, + "rewards/rejected": -20.858623504638672, + "step": 15252 + }, + { + "epoch": 2.37, + "learning_rate": 2.960165983712848e-06, + "logits/chosen": -1.839565634727478, + "logits/rejected": -2.6471469402313232, + "logps/chosen": -89.79731750488281, + "logps/rejected": -412.3639831542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.658825874328613, + "rewards/margins": 10.808712005615234, + "rewards/rejected": -18.46753692626953, + "step": 15253 + }, + { + "epoch": 2.37, + "learning_rate": 2.9594325431817004e-06, + "logits/chosen": -2.6865365505218506, + "logits/rejected": -1.9158878326416016, + "logps/chosen": -298.6815185546875, + "logps/rejected": -246.34091186523438, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.637879371643066, + "rewards/margins": 4.870792388916016, + "rewards/rejected": -14.508671760559082, + "step": 15254 + }, + { + "epoch": 2.37, + "learning_rate": 2.9586991026505523e-06, + "logits/chosen": -2.766969680786133, + "logits/rejected": -2.428386926651001, + "logps/chosen": -257.7467346191406, + "logps/rejected": -483.6947021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.527231216430664, + "rewards/margins": 14.761468887329102, + "rewards/rejected": -24.288700103759766, + "step": 15255 + }, + { + "epoch": 2.37, + "learning_rate": 2.9579656621194046e-06, + "logits/chosen": -1.077547311782837, + "logits/rejected": -2.4319846630096436, + "logps/chosen": -227.2506561279297, + "logps/rejected": -624.97265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.109809875488281, + "rewards/margins": 13.63239860534668, + "rewards/rejected": -24.74220848083496, + "step": 15256 + }, + { + "epoch": 2.37, + "learning_rate": 2.957232221588257e-06, + "logits/chosen": -3.063462495803833, + "logits/rejected": -3.0467684268951416, + "logps/chosen": -484.8717956542969, + "logps/rejected": -590.0098266601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.391592025756836, + "rewards/margins": 13.423429489135742, + "rewards/rejected": -20.815021514892578, + "step": 15257 + }, + { + "epoch": 2.37, + "learning_rate": 2.9564987810571088e-06, + "logits/chosen": -1.8543092012405396, + "logits/rejected": -2.5825376510620117, + "logps/chosen": -485.69146728515625, + "logps/rejected": -727.3104248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.47287654876709, + "rewards/margins": 12.632609367370605, + "rewards/rejected": -22.105485916137695, + "step": 15258 + }, + { + "epoch": 2.37, + "learning_rate": 2.955765340525961e-06, + "logits/chosen": -2.4119670391082764, + "logits/rejected": -2.6407971382141113, + "logps/chosen": -265.59930419921875, + "logps/rejected": -473.44720458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.90755558013916, + "rewards/margins": 16.227346420288086, + "rewards/rejected": -23.134902954101562, + "step": 15259 + }, + { + "epoch": 2.37, + "learning_rate": 2.955031899994813e-06, + "logits/chosen": -2.454160213470459, + "logits/rejected": -2.9031286239624023, + "logps/chosen": -530.076171875, + "logps/rejected": -663.197998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.571451187133789, + "rewards/margins": 10.711792945861816, + "rewards/rejected": -18.283245086669922, + "step": 15260 + }, + { + "epoch": 2.37, + "learning_rate": 2.9542984594636653e-06, + "logits/chosen": -1.571890950202942, + "logits/rejected": -1.6579604148864746, + "logps/chosen": -360.84210205078125, + "logps/rejected": -367.9083251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.673833847045898, + "rewards/margins": 13.329137802124023, + "rewards/rejected": -21.002971649169922, + "step": 15261 + }, + { + "epoch": 2.37, + "learning_rate": 2.953565018932517e-06, + "logits/chosen": -0.958594799041748, + "logits/rejected": -2.737334966659546, + "logps/chosen": -123.3073501586914, + "logps/rejected": -593.0862426757812, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.48359489440918, + "rewards/margins": 7.909782886505127, + "rewards/rejected": -18.39337730407715, + "step": 15262 + }, + { + "epoch": 2.37, + "learning_rate": 2.9528315784013694e-06, + "logits/chosen": -2.6716628074645996, + "logits/rejected": -2.4595329761505127, + "logps/chosen": -447.14898681640625, + "logps/rejected": -453.14892578125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.772600173950195, + "rewards/margins": 9.723240852355957, + "rewards/rejected": -21.495840072631836, + "step": 15263 + }, + { + "epoch": 2.37, + "learning_rate": 2.9520981378702213e-06, + "logits/chosen": -2.5732879638671875, + "logits/rejected": -2.6576290130615234, + "logps/chosen": -197.20330810546875, + "logps/rejected": -438.6415100097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.483503341674805, + "rewards/margins": 12.197052001953125, + "rewards/rejected": -22.680553436279297, + "step": 15264 + }, + { + "epoch": 2.37, + "learning_rate": 2.9513646973390736e-06, + "logits/chosen": -2.4642531871795654, + "logits/rejected": -2.971400260925293, + "logps/chosen": -211.46881103515625, + "logps/rejected": -406.9095153808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.794332504272461, + "rewards/margins": 12.77686595916748, + "rewards/rejected": -23.571197509765625, + "step": 15265 + }, + { + "epoch": 2.37, + "learning_rate": 2.950631256807926e-06, + "logits/chosen": -1.79392671585083, + "logits/rejected": -2.3733105659484863, + "logps/chosen": -204.7100372314453, + "logps/rejected": -345.7734680175781, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.424863815307617, + "rewards/margins": 7.065250873565674, + "rewards/rejected": -18.490116119384766, + "step": 15266 + }, + { + "epoch": 2.37, + "learning_rate": 2.949897816276778e-06, + "logits/chosen": -2.046680212020874, + "logits/rejected": -2.595566987991333, + "logps/chosen": -100.14981079101562, + "logps/rejected": -326.705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.234417915344238, + "rewards/margins": 10.449663162231445, + "rewards/rejected": -18.68408203125, + "step": 15267 + }, + { + "epoch": 2.37, + "learning_rate": 2.94916437574563e-06, + "logits/chosen": -2.5433883666992188, + "logits/rejected": -2.0489370822906494, + "logps/chosen": -416.40155029296875, + "logps/rejected": -376.741455078125, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.48259162902832, + "rewards/margins": 8.336517333984375, + "rewards/rejected": -17.819108963012695, + "step": 15268 + }, + { + "epoch": 2.37, + "learning_rate": 2.948430935214482e-06, + "logits/chosen": -1.8278028964996338, + "logits/rejected": -2.6809616088867188, + "logps/chosen": -130.11541748046875, + "logps/rejected": -330.49713134765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.40150260925293, + "rewards/margins": 10.73155403137207, + "rewards/rejected": -20.133056640625, + "step": 15269 + }, + { + "epoch": 2.37, + "learning_rate": 2.9476974946833343e-06, + "logits/chosen": -2.4808297157287598, + "logits/rejected": -2.557950973510742, + "logps/chosen": -396.9266357421875, + "logps/rejected": -367.1002502441406, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.155851364135742, + "rewards/margins": 4.70199728012085, + "rewards/rejected": -14.85784912109375, + "step": 15270 + }, + { + "epoch": 2.37, + "learning_rate": 2.946964054152186e-06, + "logits/chosen": -2.8049850463867188, + "logits/rejected": -2.7676122188568115, + "logps/chosen": -148.740234375, + "logps/rejected": -418.3842468261719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.346017837524414, + "rewards/margins": 13.751660346984863, + "rewards/rejected": -23.097679138183594, + "step": 15271 + }, + { + "epoch": 2.38, + "learning_rate": 2.9462306136210385e-06, + "logits/chosen": -2.640868902206421, + "logits/rejected": -2.777923107147217, + "logps/chosen": -609.7384643554688, + "logps/rejected": -593.2716674804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.687047004699707, + "rewards/margins": 13.582070350646973, + "rewards/rejected": -21.26911735534668, + "step": 15272 + }, + { + "epoch": 2.38, + "learning_rate": 2.9454971730898908e-06, + "logits/chosen": -2.688148260116577, + "logits/rejected": -2.8432741165161133, + "logps/chosen": -220.02374267578125, + "logps/rejected": -299.5315856933594, + "loss": 0.1125, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.752714157104492, + "rewards/margins": 4.824790954589844, + "rewards/rejected": -15.577505111694336, + "step": 15273 + }, + { + "epoch": 2.38, + "learning_rate": 2.9447637325587426e-06, + "logits/chosen": -1.6932082176208496, + "logits/rejected": -2.741534471511841, + "logps/chosen": -496.6861572265625, + "logps/rejected": -701.015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.17989730834961, + "rewards/margins": 10.402029037475586, + "rewards/rejected": -19.581924438476562, + "step": 15274 + }, + { + "epoch": 2.38, + "learning_rate": 2.944030292027595e-06, + "logits/chosen": -2.2163147926330566, + "logits/rejected": -2.554623603820801, + "logps/chosen": -805.226318359375, + "logps/rejected": -832.57421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4736909866333, + "rewards/margins": 14.736638069152832, + "rewards/rejected": -24.210329055786133, + "step": 15275 + }, + { + "epoch": 2.38, + "learning_rate": 2.943296851496447e-06, + "logits/chosen": -2.8630690574645996, + "logits/rejected": -2.917189836502075, + "logps/chosen": -164.01144409179688, + "logps/rejected": -230.47463989257812, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.100923538208008, + "rewards/margins": 6.240440368652344, + "rewards/rejected": -18.34136390686035, + "step": 15276 + }, + { + "epoch": 2.38, + "learning_rate": 2.942563410965299e-06, + "logits/chosen": -1.9603341817855835, + "logits/rejected": -2.7875468730926514, + "logps/chosen": -179.79483032226562, + "logps/rejected": -423.28021240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.069154739379883, + "rewards/margins": 11.676596641540527, + "rewards/rejected": -20.745750427246094, + "step": 15277 + }, + { + "epoch": 2.38, + "learning_rate": 2.941829970434151e-06, + "logits/chosen": -2.365164041519165, + "logits/rejected": -1.8790439367294312, + "logps/chosen": -314.9574279785156, + "logps/rejected": -399.56634521484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.107046127319336, + "rewards/margins": 9.53937816619873, + "rewards/rejected": -19.64642333984375, + "step": 15278 + }, + { + "epoch": 2.38, + "learning_rate": 2.9410965299030033e-06, + "logits/chosen": -2.0992441177368164, + "logits/rejected": -2.7186098098754883, + "logps/chosen": -237.62982177734375, + "logps/rejected": -602.5419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.863895416259766, + "rewards/margins": 12.966379165649414, + "rewards/rejected": -22.830276489257812, + "step": 15279 + }, + { + "epoch": 2.38, + "learning_rate": 2.940363089371855e-06, + "logits/chosen": -2.4854519367218018, + "logits/rejected": -2.4767215251922607, + "logps/chosen": -420.826416015625, + "logps/rejected": -304.8001403808594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.000537872314453, + "rewards/margins": 8.468084335327148, + "rewards/rejected": -16.4686222076416, + "step": 15280 + }, + { + "epoch": 2.38, + "learning_rate": 2.9396296488407075e-06, + "logits/chosen": -2.7630746364593506, + "logits/rejected": -1.6848747730255127, + "logps/chosen": -904.3046875, + "logps/rejected": -844.5614624023438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.262472152709961, + "rewards/margins": 17.984745025634766, + "rewards/rejected": -29.247215270996094, + "step": 15281 + }, + { + "epoch": 2.38, + "learning_rate": 2.9388962083095598e-06, + "logits/chosen": -2.811048984527588, + "logits/rejected": -2.8471732139587402, + "logps/chosen": -153.349365234375, + "logps/rejected": -308.8130187988281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039777755737305, + "rewards/margins": 10.587573051452637, + "rewards/rejected": -17.627351760864258, + "step": 15282 + }, + { + "epoch": 2.38, + "learning_rate": 2.938162767778412e-06, + "logits/chosen": -2.7604458332061768, + "logits/rejected": -2.4740195274353027, + "logps/chosen": -383.1649169921875, + "logps/rejected": -467.26910400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.406641006469727, + "rewards/margins": 8.884491920471191, + "rewards/rejected": -17.2911319732666, + "step": 15283 + }, + { + "epoch": 2.38, + "learning_rate": 2.937429327247264e-06, + "logits/chosen": -1.9319109916687012, + "logits/rejected": -2.3914594650268555, + "logps/chosen": -275.61297607421875, + "logps/rejected": -460.92724609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.124058723449707, + "rewards/margins": 12.405855178833008, + "rewards/rejected": -20.52991485595703, + "step": 15284 + }, + { + "epoch": 2.38, + "learning_rate": 2.936695886716116e-06, + "logits/chosen": -2.672696590423584, + "logits/rejected": -1.9646496772766113, + "logps/chosen": -534.420166015625, + "logps/rejected": -445.6835021972656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.859811782836914, + "rewards/margins": 8.06899642944336, + "rewards/rejected": -20.928810119628906, + "step": 15285 + }, + { + "epoch": 2.38, + "learning_rate": 2.935962446184968e-06, + "logits/chosen": -1.8487393856048584, + "logits/rejected": -1.7135024070739746, + "logps/chosen": -452.33843994140625, + "logps/rejected": -495.59552001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.678033828735352, + "rewards/margins": 13.386805534362793, + "rewards/rejected": -24.064838409423828, + "step": 15286 + }, + { + "epoch": 2.38, + "learning_rate": 2.93522900565382e-06, + "logits/chosen": -2.819870710372925, + "logits/rejected": -2.509032726287842, + "logps/chosen": -158.5780487060547, + "logps/rejected": -243.77239990234375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.682682037353516, + "rewards/margins": 7.375729560852051, + "rewards/rejected": -19.05841064453125, + "step": 15287 + }, + { + "epoch": 2.38, + "learning_rate": 2.9344955651226723e-06, + "logits/chosen": -1.839568018913269, + "logits/rejected": -2.6157710552215576, + "logps/chosen": -236.38677978515625, + "logps/rejected": -444.56195068359375, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.143450736999512, + "rewards/margins": 7.851786136627197, + "rewards/rejected": -18.995237350463867, + "step": 15288 + }, + { + "epoch": 2.38, + "learning_rate": 2.933762124591524e-06, + "logits/chosen": -0.8798742294311523, + "logits/rejected": -1.211572527885437, + "logps/chosen": -226.02655029296875, + "logps/rejected": -452.1310119628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.533491134643555, + "rewards/margins": 11.791051864624023, + "rewards/rejected": -17.324542999267578, + "step": 15289 + }, + { + "epoch": 2.38, + "learning_rate": 2.933028684060377e-06, + "logits/chosen": -2.7304039001464844, + "logits/rejected": -2.0993423461914062, + "logps/chosen": -217.60260009765625, + "logps/rejected": -186.68203735351562, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.108930587768555, + "rewards/margins": 4.417669296264648, + "rewards/rejected": -16.526599884033203, + "step": 15290 + }, + { + "epoch": 2.38, + "learning_rate": 2.932295243529229e-06, + "logits/chosen": -1.4940953254699707, + "logits/rejected": -2.425570487976074, + "logps/chosen": -170.90554809570312, + "logps/rejected": -320.14422607421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.820931434631348, + "rewards/margins": 8.172294616699219, + "rewards/rejected": -16.993227005004883, + "step": 15291 + }, + { + "epoch": 2.38, + "learning_rate": 2.931561802998081e-06, + "logits/chosen": -2.68821382522583, + "logits/rejected": -1.6589454412460327, + "logps/chosen": -325.1933898925781, + "logps/rejected": -369.87359619140625, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52366828918457, + "rewards/margins": 9.599061965942383, + "rewards/rejected": -18.122730255126953, + "step": 15292 + }, + { + "epoch": 2.38, + "learning_rate": 2.930828362466933e-06, + "logits/chosen": -2.8420822620391846, + "logits/rejected": -3.026474714279175, + "logps/chosen": -157.95693969726562, + "logps/rejected": -452.51885986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.628104209899902, + "rewards/margins": 14.55579948425293, + "rewards/rejected": -23.183902740478516, + "step": 15293 + }, + { + "epoch": 2.38, + "learning_rate": 2.930094921935785e-06, + "logits/chosen": -2.624117612838745, + "logits/rejected": -2.7820796966552734, + "logps/chosen": -156.65109252929688, + "logps/rejected": -282.8867492675781, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.025918960571289, + "rewards/margins": 7.433319091796875, + "rewards/rejected": -17.459238052368164, + "step": 15294 + }, + { + "epoch": 2.38, + "learning_rate": 2.929361481404637e-06, + "logits/chosen": -2.8825137615203857, + "logits/rejected": -2.7815804481506348, + "logps/chosen": -654.9502563476562, + "logps/rejected": -300.8531494140625, + "loss": 0.1126, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.888029098510742, + "rewards/margins": 3.9044103622436523, + "rewards/rejected": -15.792438507080078, + "step": 15295 + }, + { + "epoch": 2.38, + "learning_rate": 2.928628040873489e-06, + "logits/chosen": -1.666334629058838, + "logits/rejected": -2.2731425762176514, + "logps/chosen": -488.9114990234375, + "logps/rejected": -469.4168701171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.941594123840332, + "rewards/margins": 13.021125793457031, + "rewards/rejected": -20.962718963623047, + "step": 15296 + }, + { + "epoch": 2.38, + "learning_rate": 2.9278946003423413e-06, + "logits/chosen": -1.2549936771392822, + "logits/rejected": -2.3643805980682373, + "logps/chosen": -195.25531005859375, + "logps/rejected": -467.06097412109375, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.256216049194336, + "rewards/margins": 6.976577281951904, + "rewards/rejected": -19.232791900634766, + "step": 15297 + }, + { + "epoch": 2.38, + "learning_rate": 2.9271611598111936e-06, + "logits/chosen": -1.7854676246643066, + "logits/rejected": -2.485880136489868, + "logps/chosen": -154.0373077392578, + "logps/rejected": -488.890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.957231521606445, + "rewards/margins": 14.175226211547852, + "rewards/rejected": -25.132457733154297, + "step": 15298 + }, + { + "epoch": 2.38, + "learning_rate": 2.926427719280046e-06, + "logits/chosen": -2.000375747680664, + "logits/rejected": -2.483823537826538, + "logps/chosen": -297.87933349609375, + "logps/rejected": -380.0677795410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.177152633666992, + "rewards/margins": 9.03654670715332, + "rewards/rejected": -21.213699340820312, + "step": 15299 + }, + { + "epoch": 2.38, + "learning_rate": 2.925694278748898e-06, + "logits/chosen": -2.6435186862945557, + "logits/rejected": -2.857938766479492, + "logps/chosen": -288.4136657714844, + "logps/rejected": -613.9620361328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.39299488067627, + "rewards/margins": 11.72291088104248, + "rewards/rejected": -20.11590576171875, + "step": 15300 + }, + { + "epoch": 2.38, + "learning_rate": 2.92496083821775e-06, + "logits/chosen": -3.0931384563446045, + "logits/rejected": -3.0276923179626465, + "logps/chosen": -256.8345642089844, + "logps/rejected": -214.62611389160156, + "loss": 0.3872, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.690546035766602, + "rewards/margins": 3.02988600730896, + "rewards/rejected": -9.72043228149414, + "step": 15301 + }, + { + "epoch": 2.38, + "learning_rate": 2.924227397686602e-06, + "logits/chosen": -2.611189603805542, + "logits/rejected": -1.9954825639724731, + "logps/chosen": -170.0367431640625, + "logps/rejected": -245.80027770996094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.047059059143066, + "rewards/margins": 8.652563095092773, + "rewards/rejected": -16.699623107910156, + "step": 15302 + }, + { + "epoch": 2.38, + "learning_rate": 2.9234939571554543e-06, + "logits/chosen": -2.059979200363159, + "logits/rejected": -2.1343905925750732, + "logps/chosen": -334.5283203125, + "logps/rejected": -550.1466674804688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.86889362335205, + "rewards/margins": 13.106218338012695, + "rewards/rejected": -22.975112915039062, + "step": 15303 + }, + { + "epoch": 2.38, + "learning_rate": 2.922760516624306e-06, + "logits/chosen": -2.723545789718628, + "logits/rejected": -1.327510118484497, + "logps/chosen": -290.88079833984375, + "logps/rejected": -455.2982177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.20040225982666, + "rewards/margins": 9.68943977355957, + "rewards/rejected": -17.889842987060547, + "step": 15304 + }, + { + "epoch": 2.38, + "learning_rate": 2.922027076093158e-06, + "logits/chosen": -2.4187490940093994, + "logits/rejected": -2.634178400039673, + "logps/chosen": -93.43797302246094, + "logps/rejected": -259.5716552734375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.466334819793701, + "rewards/margins": 10.16635799407959, + "rewards/rejected": -16.632692337036133, + "step": 15305 + }, + { + "epoch": 2.38, + "learning_rate": 2.9212936355620104e-06, + "logits/chosen": -2.3405938148498535, + "logits/rejected": -2.6194751262664795, + "logps/chosen": -174.73948669433594, + "logps/rejected": -435.1762390136719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.791171073913574, + "rewards/margins": 11.178079605102539, + "rewards/rejected": -19.969249725341797, + "step": 15306 + }, + { + "epoch": 2.38, + "learning_rate": 2.9205601950308627e-06, + "logits/chosen": -2.3648741245269775, + "logits/rejected": -2.632728338241577, + "logps/chosen": -192.76559448242188, + "logps/rejected": -313.6041564941406, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.686786651611328, + "rewards/margins": 7.570868968963623, + "rewards/rejected": -19.25765609741211, + "step": 15307 + }, + { + "epoch": 2.38, + "learning_rate": 2.919826754499715e-06, + "logits/chosen": -2.7825980186462402, + "logits/rejected": -1.5952403545379639, + "logps/chosen": -448.28216552734375, + "logps/rejected": -324.4098815917969, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.072230339050293, + "rewards/margins": 5.833337783813477, + "rewards/rejected": -13.90556812286377, + "step": 15308 + }, + { + "epoch": 2.38, + "learning_rate": 2.919093313968567e-06, + "logits/chosen": -1.6225359439849854, + "logits/rejected": -2.6641042232513428, + "logps/chosen": -321.3382263183594, + "logps/rejected": -378.0861511230469, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.657800674438477, + "rewards/margins": 8.057249069213867, + "rewards/rejected": -19.715049743652344, + "step": 15309 + }, + { + "epoch": 2.38, + "learning_rate": 2.918359873437419e-06, + "logits/chosen": -1.869043231010437, + "logits/rejected": -2.8242387771606445, + "logps/chosen": -284.87109375, + "logps/rejected": -466.4978942871094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.781065940856934, + "rewards/margins": 8.249465942382812, + "rewards/rejected": -19.030532836914062, + "step": 15310 + }, + { + "epoch": 2.38, + "learning_rate": 2.917626432906271e-06, + "logits/chosen": -2.6792449951171875, + "logits/rejected": -2.896873712539673, + "logps/chosen": -265.4285888671875, + "logps/rejected": -486.001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.318565368652344, + "rewards/margins": 14.103742599487305, + "rewards/rejected": -27.422306060791016, + "step": 15311 + }, + { + "epoch": 2.38, + "learning_rate": 2.9168929923751233e-06, + "logits/chosen": -2.8557698726654053, + "logits/rejected": -2.3468260765075684, + "logps/chosen": -664.2755737304688, + "logps/rejected": -556.1115112304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.632107734680176, + "rewards/margins": 12.420570373535156, + "rewards/rejected": -16.05267906188965, + "step": 15312 + }, + { + "epoch": 2.38, + "learning_rate": 2.916159551843975e-06, + "logits/chosen": -1.18813157081604, + "logits/rejected": -2.2726659774780273, + "logps/chosen": -232.61041259765625, + "logps/rejected": -539.2667846679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.396657943725586, + "rewards/margins": 13.010490417480469, + "rewards/rejected": -22.407150268554688, + "step": 15313 + }, + { + "epoch": 2.38, + "learning_rate": 2.915426111312827e-06, + "logits/chosen": -1.6835594177246094, + "logits/rejected": -2.4706249237060547, + "logps/chosen": -267.84710693359375, + "logps/rejected": -514.7262573242188, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.159968376159668, + "rewards/margins": 12.733810424804688, + "rewards/rejected": -21.893779754638672, + "step": 15314 + }, + { + "epoch": 2.38, + "learning_rate": 2.91469267078168e-06, + "logits/chosen": -2.638232469558716, + "logits/rejected": -1.0929255485534668, + "logps/chosen": -270.21405029296875, + "logps/rejected": -404.5595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.139409065246582, + "rewards/margins": 10.865209579467773, + "rewards/rejected": -19.004619598388672, + "step": 15315 + }, + { + "epoch": 2.38, + "learning_rate": 2.9139592302505317e-06, + "logits/chosen": -2.261216163635254, + "logits/rejected": -2.8063905239105225, + "logps/chosen": -185.75271606445312, + "logps/rejected": -418.9375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.46529483795166, + "rewards/margins": 8.653326034545898, + "rewards/rejected": -18.118621826171875, + "step": 15316 + }, + { + "epoch": 2.38, + "learning_rate": 2.913225789719384e-06, + "logits/chosen": -0.9949287176132202, + "logits/rejected": -2.3012449741363525, + "logps/chosen": -165.25540161132812, + "logps/rejected": -585.765380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.50700855255127, + "rewards/margins": 16.597150802612305, + "rewards/rejected": -25.10416030883789, + "step": 15317 + }, + { + "epoch": 2.38, + "learning_rate": 2.912492349188236e-06, + "logits/chosen": -2.6190125942230225, + "logits/rejected": -2.804224729537964, + "logps/chosen": -158.64053344726562, + "logps/rejected": -184.8336639404297, + "loss": 1.5047, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.143966674804688, + "rewards/margins": 5.536839008331299, + "rewards/rejected": -15.680805206298828, + "step": 15318 + }, + { + "epoch": 2.38, + "learning_rate": 2.911758908657088e-06, + "logits/chosen": -1.2681078910827637, + "logits/rejected": -2.421844959259033, + "logps/chosen": -158.43753051757812, + "logps/rejected": -504.9926452636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.549677848815918, + "rewards/margins": 14.400609970092773, + "rewards/rejected": -23.950288772583008, + "step": 15319 + }, + { + "epoch": 2.38, + "learning_rate": 2.91102546812594e-06, + "logits/chosen": -2.683408498764038, + "logits/rejected": -2.9663467407226562, + "logps/chosen": -645.767333984375, + "logps/rejected": -712.568115234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.914410591125488, + "rewards/margins": 10.039785385131836, + "rewards/rejected": -22.95419692993164, + "step": 15320 + }, + { + "epoch": 2.38, + "learning_rate": 2.9102920275947923e-06, + "logits/chosen": -2.486720561981201, + "logits/rejected": -2.2414605617523193, + "logps/chosen": -493.01617431640625, + "logps/rejected": -764.8972778320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.863243103027344, + "rewards/margins": 14.284242630004883, + "rewards/rejected": -27.147485733032227, + "step": 15321 + }, + { + "epoch": 2.38, + "learning_rate": 2.9095585870636442e-06, + "logits/chosen": -2.890961170196533, + "logits/rejected": -2.601811408996582, + "logps/chosen": -566.5155029296875, + "logps/rejected": -604.087158203125, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.770343780517578, + "rewards/margins": 5.654389381408691, + "rewards/rejected": -18.424732208251953, + "step": 15322 + }, + { + "epoch": 2.38, + "learning_rate": 2.9088251465324965e-06, + "logits/chosen": -2.565094232559204, + "logits/rejected": -2.1067123413085938, + "logps/chosen": -459.93853759765625, + "logps/rejected": -606.9794921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.660972595214844, + "rewards/margins": 11.653850555419922, + "rewards/rejected": -23.314823150634766, + "step": 15323 + }, + { + "epoch": 2.38, + "learning_rate": 2.908091706001349e-06, + "logits/chosen": -2.963366985321045, + "logits/rejected": -2.803266763687134, + "logps/chosen": -234.8304443359375, + "logps/rejected": -343.722412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962278366088867, + "rewards/margins": 12.9972562789917, + "rewards/rejected": -18.95953369140625, + "step": 15324 + }, + { + "epoch": 2.38, + "learning_rate": 2.9073582654702007e-06, + "logits/chosen": -2.854205369949341, + "logits/rejected": -2.104215383529663, + "logps/chosen": -271.85125732421875, + "logps/rejected": -364.1092834472656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.16458511352539, + "rewards/margins": 7.398530006408691, + "rewards/rejected": -15.563115119934082, + "step": 15325 + }, + { + "epoch": 2.38, + "learning_rate": 2.906624824939053e-06, + "logits/chosen": -1.561407208442688, + "logits/rejected": -2.596967935562134, + "logps/chosen": -559.8584594726562, + "logps/rejected": -521.201171875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.311442375183105, + "rewards/margins": 7.9397783279418945, + "rewards/rejected": -20.251220703125, + "step": 15326 + }, + { + "epoch": 2.38, + "learning_rate": 2.905891384407905e-06, + "logits/chosen": -2.5162689685821533, + "logits/rejected": -2.8237357139587402, + "logps/chosen": -237.3549346923828, + "logps/rejected": -352.79180908203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.894477844238281, + "rewards/margins": 8.364767074584961, + "rewards/rejected": -16.259244918823242, + "step": 15327 + }, + { + "epoch": 2.38, + "learning_rate": 2.905157943876757e-06, + "logits/chosen": -2.4793918132781982, + "logits/rejected": -2.8129818439483643, + "logps/chosen": -130.61636352539062, + "logps/rejected": -406.710205078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.113279342651367, + "rewards/margins": 7.655744552612305, + "rewards/rejected": -17.769023895263672, + "step": 15328 + }, + { + "epoch": 2.38, + "learning_rate": 2.904424503345609e-06, + "logits/chosen": -2.0023155212402344, + "logits/rejected": -2.4869980812072754, + "logps/chosen": -387.7289123535156, + "logps/rejected": -526.422119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.64645767211914, + "rewards/margins": 9.483261108398438, + "rewards/rejected": -19.129718780517578, + "step": 15329 + }, + { + "epoch": 2.38, + "learning_rate": 2.9036910628144614e-06, + "logits/chosen": -2.5170578956604004, + "logits/rejected": -2.9511566162109375, + "logps/chosen": -134.10342407226562, + "logps/rejected": -293.1474609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.486122131347656, + "rewards/margins": 9.49278450012207, + "rewards/rejected": -15.978906631469727, + "step": 15330 + }, + { + "epoch": 2.38, + "learning_rate": 2.9029576222833132e-06, + "logits/chosen": -2.52565860748291, + "logits/rejected": -2.612995147705078, + "logps/chosen": -170.99349975585938, + "logps/rejected": -302.6039733886719, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.569867134094238, + "rewards/margins": 7.147892951965332, + "rewards/rejected": -17.71776008605957, + "step": 15331 + }, + { + "epoch": 2.38, + "learning_rate": 2.902224181752166e-06, + "logits/chosen": -2.420170307159424, + "logits/rejected": -1.844161033630371, + "logps/chosen": -294.8796691894531, + "logps/rejected": -242.13975524902344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.977825164794922, + "rewards/margins": 8.988729476928711, + "rewards/rejected": -17.966552734375, + "step": 15332 + }, + { + "epoch": 2.38, + "learning_rate": 2.901490741221018e-06, + "logits/chosen": -2.8084280490875244, + "logits/rejected": -2.739999294281006, + "logps/chosen": -466.3006591796875, + "logps/rejected": -471.7798767089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.03118896484375, + "rewards/margins": 10.516617774963379, + "rewards/rejected": -18.547805786132812, + "step": 15333 + }, + { + "epoch": 2.38, + "learning_rate": 2.9007573006898697e-06, + "logits/chosen": -2.438771963119507, + "logits/rejected": -2.5044162273406982, + "logps/chosen": -155.43051147460938, + "logps/rejected": -215.33535766601562, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.168962478637695, + "rewards/margins": 6.554542541503906, + "rewards/rejected": -13.723505020141602, + "step": 15334 + }, + { + "epoch": 2.38, + "learning_rate": 2.900023860158722e-06, + "logits/chosen": -2.850954055786133, + "logits/rejected": -2.9488718509674072, + "logps/chosen": -104.44392395019531, + "logps/rejected": -328.9775085449219, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.055477142333984, + "rewards/margins": 11.769441604614258, + "rewards/rejected": -20.824918746948242, + "step": 15335 + }, + { + "epoch": 2.39, + "learning_rate": 2.899290419627574e-06, + "logits/chosen": -2.7538464069366455, + "logits/rejected": -2.4562888145446777, + "logps/chosen": -678.2830200195312, + "logps/rejected": -585.8953247070312, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.223574638366699, + "rewards/margins": 7.411006927490234, + "rewards/rejected": -14.634581565856934, + "step": 15336 + }, + { + "epoch": 2.39, + "learning_rate": 2.8985569790964262e-06, + "logits/chosen": -2.7393791675567627, + "logits/rejected": -2.0336549282073975, + "logps/chosen": -365.9148254394531, + "logps/rejected": -388.6481628417969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.448460578918457, + "rewards/margins": 10.27017593383789, + "rewards/rejected": -17.71863555908203, + "step": 15337 + }, + { + "epoch": 2.39, + "learning_rate": 2.897823538565278e-06, + "logits/chosen": -1.8664554357528687, + "logits/rejected": -2.476813793182373, + "logps/chosen": -380.7379150390625, + "logps/rejected": -339.97900390625, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.375299453735352, + "rewards/margins": 4.248325347900391, + "rewards/rejected": -16.623624801635742, + "step": 15338 + }, + { + "epoch": 2.39, + "learning_rate": 2.8970900980341304e-06, + "logits/chosen": -2.5095374584198, + "logits/rejected": -1.719791293144226, + "logps/chosen": -291.40582275390625, + "logps/rejected": -384.327392578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.89783763885498, + "rewards/margins": 8.107240676879883, + "rewards/rejected": -19.005077362060547, + "step": 15339 + }, + { + "epoch": 2.39, + "learning_rate": 2.8963566575029827e-06, + "logits/chosen": -2.812913417816162, + "logits/rejected": -2.4109227657318115, + "logps/chosen": -354.9145202636719, + "logps/rejected": -291.1011962890625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.506467819213867, + "rewards/margins": 6.291015148162842, + "rewards/rejected": -18.797483444213867, + "step": 15340 + }, + { + "epoch": 2.39, + "learning_rate": 2.895623216971835e-06, + "logits/chosen": -1.238857388496399, + "logits/rejected": -1.5046718120574951, + "logps/chosen": -354.1701354980469, + "logps/rejected": -424.0313720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.615704536437988, + "rewards/margins": 9.516923904418945, + "rewards/rejected": -18.13262939453125, + "step": 15341 + }, + { + "epoch": 2.39, + "learning_rate": 2.894889776440687e-06, + "logits/chosen": -1.5864105224609375, + "logits/rejected": -2.196469306945801, + "logps/chosen": -234.5463409423828, + "logps/rejected": -269.1296691894531, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.612264633178711, + "rewards/margins": 5.722705364227295, + "rewards/rejected": -14.334970474243164, + "step": 15342 + }, + { + "epoch": 2.39, + "learning_rate": 2.8941563359095388e-06, + "logits/chosen": -2.5051932334899902, + "logits/rejected": -2.794337272644043, + "logps/chosen": -116.66600036621094, + "logps/rejected": -299.94464111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.616329193115234, + "rewards/margins": 10.086463928222656, + "rewards/rejected": -17.70279312133789, + "step": 15343 + }, + { + "epoch": 2.39, + "learning_rate": 2.893422895378391e-06, + "logits/chosen": -2.9181079864501953, + "logits/rejected": -2.804546594619751, + "logps/chosen": -231.5380859375, + "logps/rejected": -464.13519287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.967973709106445, + "rewards/margins": 11.833356857299805, + "rewards/rejected": -22.80133056640625, + "step": 15344 + }, + { + "epoch": 2.39, + "learning_rate": 2.892689454847243e-06, + "logits/chosen": -2.6334171295166016, + "logits/rejected": -2.6905150413513184, + "logps/chosen": -428.7908630371094, + "logps/rejected": -505.2409973144531, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.920906066894531, + "rewards/margins": 8.830976486206055, + "rewards/rejected": -20.751880645751953, + "step": 15345 + }, + { + "epoch": 2.39, + "learning_rate": 2.8919560143160952e-06, + "logits/chosen": -2.1910152435302734, + "logits/rejected": -2.7260608673095703, + "logps/chosen": -167.14788818359375, + "logps/rejected": -327.23822021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9932684898376465, + "rewards/margins": 13.22683334350586, + "rewards/rejected": -19.220102310180664, + "step": 15346 + }, + { + "epoch": 2.39, + "learning_rate": 2.891222573784947e-06, + "logits/chosen": -2.8761332035064697, + "logits/rejected": -3.1777398586273193, + "logps/chosen": -86.24203491210938, + "logps/rejected": -216.62892150878906, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.564760208129883, + "rewards/margins": 5.331221580505371, + "rewards/rejected": -12.895981788635254, + "step": 15347 + }, + { + "epoch": 2.39, + "learning_rate": 2.8904891332537994e-06, + "logits/chosen": -2.814948797225952, + "logits/rejected": -2.836088180541992, + "logps/chosen": -145.57199096679688, + "logps/rejected": -329.975341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.515351295471191, + "rewards/margins": 10.356093406677246, + "rewards/rejected": -21.871444702148438, + "step": 15348 + }, + { + "epoch": 2.39, + "learning_rate": 2.8897556927226517e-06, + "logits/chosen": -2.3105878829956055, + "logits/rejected": -2.6748673915863037, + "logps/chosen": -197.08517456054688, + "logps/rejected": -477.32147216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.481478214263916, + "rewards/margins": 10.959693908691406, + "rewards/rejected": -17.441171646118164, + "step": 15349 + }, + { + "epoch": 2.39, + "learning_rate": 2.889022252191504e-06, + "logits/chosen": -2.4937238693237305, + "logits/rejected": -2.556759834289551, + "logps/chosen": -563.1739501953125, + "logps/rejected": -603.589111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.805208206176758, + "rewards/margins": 10.257277488708496, + "rewards/rejected": -22.062484741210938, + "step": 15350 + }, + { + "epoch": 2.39, + "learning_rate": 2.888288811660356e-06, + "logits/chosen": -1.2175320386886597, + "logits/rejected": -2.6761977672576904, + "logps/chosen": -416.6422119140625, + "logps/rejected": -874.0001220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.808664321899414, + "rewards/margins": 14.393597602844238, + "rewards/rejected": -22.20226287841797, + "step": 15351 + }, + { + "epoch": 2.39, + "learning_rate": 2.887555371129208e-06, + "logits/chosen": -2.6268136501312256, + "logits/rejected": -2.035372018814087, + "logps/chosen": -244.62989807128906, + "logps/rejected": -207.65887451171875, + "loss": 0.1189, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.464804172515869, + "rewards/margins": 7.612494468688965, + "rewards/rejected": -13.077299118041992, + "step": 15352 + }, + { + "epoch": 2.39, + "learning_rate": 2.88682193059806e-06, + "logits/chosen": -2.1369104385375977, + "logits/rejected": -2.978123903274536, + "logps/chosen": -287.8960266113281, + "logps/rejected": -570.47705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.859701156616211, + "rewards/margins": 9.352073669433594, + "rewards/rejected": -20.211776733398438, + "step": 15353 + }, + { + "epoch": 2.39, + "learning_rate": 2.886088490066912e-06, + "logits/chosen": -2.4850733280181885, + "logits/rejected": -2.4552698135375977, + "logps/chosen": -494.20965576171875, + "logps/rejected": -511.0979919433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.390324592590332, + "rewards/margins": 11.888860702514648, + "rewards/rejected": -19.279186248779297, + "step": 15354 + }, + { + "epoch": 2.39, + "learning_rate": 2.8853550495357643e-06, + "logits/chosen": -1.654526948928833, + "logits/rejected": -2.369814872741699, + "logps/chosen": -274.23382568359375, + "logps/rejected": -451.607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.800898551940918, + "rewards/margins": 12.437249183654785, + "rewards/rejected": -20.238147735595703, + "step": 15355 + }, + { + "epoch": 2.39, + "learning_rate": 2.884621609004616e-06, + "logits/chosen": -2.876155376434326, + "logits/rejected": -1.7950503826141357, + "logps/chosen": -343.5643615722656, + "logps/rejected": -329.7509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.992650985717773, + "rewards/margins": 9.083006858825684, + "rewards/rejected": -18.07565689086914, + "step": 15356 + }, + { + "epoch": 2.39, + "learning_rate": 2.883888168473469e-06, + "logits/chosen": -1.950608253479004, + "logits/rejected": -2.6768059730529785, + "logps/chosen": -248.76593017578125, + "logps/rejected": -586.52490234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.817980766296387, + "rewards/margins": 13.3634614944458, + "rewards/rejected": -23.181442260742188, + "step": 15357 + }, + { + "epoch": 2.39, + "learning_rate": 2.8831547279423207e-06, + "logits/chosen": -2.706249475479126, + "logits/rejected": -2.800611972808838, + "logps/chosen": -201.08636474609375, + "logps/rejected": -260.2351989746094, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.18006706237793, + "rewards/margins": 8.560492515563965, + "rewards/rejected": -17.74056053161621, + "step": 15358 + }, + { + "epoch": 2.39, + "learning_rate": 2.882421287411173e-06, + "logits/chosen": -2.8531782627105713, + "logits/rejected": -2.7624123096466064, + "logps/chosen": -282.47052001953125, + "logps/rejected": -446.8553466796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.716898918151855, + "rewards/margins": 8.22634220123291, + "rewards/rejected": -16.943241119384766, + "step": 15359 + }, + { + "epoch": 2.39, + "learning_rate": 2.881687846880025e-06, + "logits/chosen": -2.010082960128784, + "logits/rejected": -2.8381972312927246, + "logps/chosen": -162.6288604736328, + "logps/rejected": -425.34332275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.299996376037598, + "rewards/margins": 12.232925415039062, + "rewards/rejected": -20.532920837402344, + "step": 15360 + }, + { + "epoch": 2.39, + "learning_rate": 2.8809544063488772e-06, + "logits/chosen": -2.6804518699645996, + "logits/rejected": -2.7957894802093506, + "logps/chosen": -193.82693481445312, + "logps/rejected": -317.3505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5379638671875, + "rewards/margins": 10.41125202178955, + "rewards/rejected": -15.949214935302734, + "step": 15361 + }, + { + "epoch": 2.39, + "learning_rate": 2.880220965817729e-06, + "logits/chosen": -2.703960657119751, + "logits/rejected": -2.6155200004577637, + "logps/chosen": -440.7563171386719, + "logps/rejected": -416.5744323730469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.061370849609375, + "rewards/margins": 9.114830017089844, + "rewards/rejected": -17.17620086669922, + "step": 15362 + }, + { + "epoch": 2.39, + "learning_rate": 2.879487525286581e-06, + "logits/chosen": -1.1597199440002441, + "logits/rejected": -2.6892597675323486, + "logps/chosen": -180.1578369140625, + "logps/rejected": -371.6881103515625, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.29426383972168, + "rewards/margins": 6.62290620803833, + "rewards/rejected": -16.917171478271484, + "step": 15363 + }, + { + "epoch": 2.39, + "learning_rate": 2.8787540847554333e-06, + "logits/chosen": -1.5186818838119507, + "logits/rejected": -2.324282169342041, + "logps/chosen": -290.5372314453125, + "logps/rejected": -616.8324584960938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.359599113464355, + "rewards/margins": 12.856526374816895, + "rewards/rejected": -21.21612548828125, + "step": 15364 + }, + { + "epoch": 2.39, + "learning_rate": 2.8780206442242856e-06, + "logits/chosen": -2.8825767040252686, + "logits/rejected": -1.7956780195236206, + "logps/chosen": -493.430908203125, + "logps/rejected": -500.36346435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.768792152404785, + "rewards/margins": 9.957915306091309, + "rewards/rejected": -17.726707458496094, + "step": 15365 + }, + { + "epoch": 2.39, + "learning_rate": 2.877287203693138e-06, + "logits/chosen": -2.499056816101074, + "logits/rejected": -1.5547828674316406, + "logps/chosen": -423.03759765625, + "logps/rejected": -415.98394775390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.843692779541016, + "rewards/margins": 10.747819900512695, + "rewards/rejected": -23.59151268005371, + "step": 15366 + }, + { + "epoch": 2.39, + "learning_rate": 2.8765537631619898e-06, + "logits/chosen": -1.682472825050354, + "logits/rejected": -2.5565435886383057, + "logps/chosen": -147.80824279785156, + "logps/rejected": -443.0304260253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.075929641723633, + "rewards/margins": 13.255889892578125, + "rewards/rejected": -23.331819534301758, + "step": 15367 + }, + { + "epoch": 2.39, + "learning_rate": 2.875820322630842e-06, + "logits/chosen": -1.8204890489578247, + "logits/rejected": -3.0857889652252197, + "logps/chosen": -145.8367919921875, + "logps/rejected": -619.2211303710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.453598022460938, + "rewards/margins": 12.431020736694336, + "rewards/rejected": -22.884618759155273, + "step": 15368 + }, + { + "epoch": 2.39, + "learning_rate": 2.875086882099694e-06, + "logits/chosen": -2.859467029571533, + "logits/rejected": -1.7420796155929565, + "logps/chosen": -326.9321594238281, + "logps/rejected": -278.545166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.740583896636963, + "rewards/margins": 11.478103637695312, + "rewards/rejected": -18.218687057495117, + "step": 15369 + }, + { + "epoch": 2.39, + "learning_rate": 2.8743534415685462e-06, + "logits/chosen": -2.984853982925415, + "logits/rejected": -2.3794360160827637, + "logps/chosen": -839.1455078125, + "logps/rejected": -565.9663696289062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8510589599609375, + "rewards/margins": 14.657447814941406, + "rewards/rejected": -19.508506774902344, + "step": 15370 + }, + { + "epoch": 2.39, + "learning_rate": 2.873620001037398e-06, + "logits/chosen": -2.225977897644043, + "logits/rejected": -2.7671597003936768, + "logps/chosen": -303.3724060058594, + "logps/rejected": -528.12939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.491291999816895, + "rewards/margins": 12.740411758422852, + "rewards/rejected": -21.231704711914062, + "step": 15371 + }, + { + "epoch": 2.39, + "learning_rate": 2.8728865605062504e-06, + "logits/chosen": -2.445401668548584, + "logits/rejected": -2.7898175716400146, + "logps/chosen": -299.452392578125, + "logps/rejected": -436.41912841796875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.242485046386719, + "rewards/margins": 7.0323381423950195, + "rewards/rejected": -19.274824142456055, + "step": 15372 + }, + { + "epoch": 2.39, + "learning_rate": 2.8721531199751023e-06, + "logits/chosen": -2.5438880920410156, + "logits/rejected": -1.957283616065979, + "logps/chosen": -321.9906005859375, + "logps/rejected": -435.16473388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.315241813659668, + "rewards/margins": 12.727032661437988, + "rewards/rejected": -22.042274475097656, + "step": 15373 + }, + { + "epoch": 2.39, + "learning_rate": 2.8714196794439546e-06, + "logits/chosen": -0.8741502165794373, + "logits/rejected": -2.16464900970459, + "logps/chosen": -196.51547241210938, + "logps/rejected": -522.63330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.148021697998047, + "rewards/margins": 10.083755493164062, + "rewards/rejected": -24.23177719116211, + "step": 15374 + }, + { + "epoch": 2.39, + "learning_rate": 2.870686238912807e-06, + "logits/chosen": -2.639970302581787, + "logits/rejected": -1.0777391195297241, + "logps/chosen": -266.0345458984375, + "logps/rejected": -207.65911865234375, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.132676124572754, + "rewards/margins": 7.207467555999756, + "rewards/rejected": -15.340143203735352, + "step": 15375 + }, + { + "epoch": 2.39, + "learning_rate": 2.8699527983816588e-06, + "logits/chosen": -0.9429093599319458, + "logits/rejected": -2.7975316047668457, + "logps/chosen": -165.64645385742188, + "logps/rejected": -595.170166015625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.406618118286133, + "rewards/margins": 11.350074768066406, + "rewards/rejected": -20.756690979003906, + "step": 15376 + }, + { + "epoch": 2.39, + "learning_rate": 2.869219357850511e-06, + "logits/chosen": -2.6144051551818848, + "logits/rejected": -1.1506705284118652, + "logps/chosen": -275.4566650390625, + "logps/rejected": -285.892578125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.614681243896484, + "rewards/margins": 6.517210960388184, + "rewards/rejected": -20.13189125061035, + "step": 15377 + }, + { + "epoch": 2.39, + "learning_rate": 2.868485917319363e-06, + "logits/chosen": -2.6934049129486084, + "logits/rejected": -3.0803613662719727, + "logps/chosen": -135.47911071777344, + "logps/rejected": -335.98065185546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.707179069519043, + "rewards/margins": 7.49530029296875, + "rewards/rejected": -16.20248031616211, + "step": 15378 + }, + { + "epoch": 2.39, + "learning_rate": 2.8677524767882153e-06, + "logits/chosen": -2.368131160736084, + "logits/rejected": -2.832350730895996, + "logps/chosen": -179.9466552734375, + "logps/rejected": -481.34075927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.56534481048584, + "rewards/margins": 9.759275436401367, + "rewards/rejected": -18.32461929321289, + "step": 15379 + }, + { + "epoch": 2.39, + "learning_rate": 2.867019036257067e-06, + "logits/chosen": -2.754990816116333, + "logits/rejected": -1.7664951086044312, + "logps/chosen": -711.7056884765625, + "logps/rejected": -399.53118896484375, + "loss": 0.1411, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.640015602111816, + "rewards/margins": 7.153722763061523, + "rewards/rejected": -13.79373836517334, + "step": 15380 + }, + { + "epoch": 2.39, + "learning_rate": 2.8662855957259194e-06, + "logits/chosen": -2.7286691665649414, + "logits/rejected": -1.6603093147277832, + "logps/chosen": -546.724365234375, + "logps/rejected": -519.750732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.223376274108887, + "rewards/margins": 11.239080429077148, + "rewards/rejected": -19.46245765686035, + "step": 15381 + }, + { + "epoch": 2.39, + "learning_rate": 2.8655521551947717e-06, + "logits/chosen": -2.726701259613037, + "logits/rejected": -2.9875452518463135, + "logps/chosen": -428.4862060546875, + "logps/rejected": -421.87249755859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.662284851074219, + "rewards/margins": 9.080118179321289, + "rewards/rejected": -19.742403030395508, + "step": 15382 + }, + { + "epoch": 2.39, + "learning_rate": 2.8648187146636236e-06, + "logits/chosen": -1.5139189958572388, + "logits/rejected": -2.358379602432251, + "logps/chosen": -192.76333618164062, + "logps/rejected": -496.68701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.230496883392334, + "rewards/margins": 20.805065155029297, + "rewards/rejected": -28.035560607910156, + "step": 15383 + }, + { + "epoch": 2.39, + "learning_rate": 2.864085274132476e-06, + "logits/chosen": -2.0989837646484375, + "logits/rejected": -2.588578701019287, + "logps/chosen": -132.25604248046875, + "logps/rejected": -392.51959228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.226405143737793, + "rewards/margins": 11.86740493774414, + "rewards/rejected": -21.09381103515625, + "step": 15384 + }, + { + "epoch": 2.39, + "learning_rate": 2.863351833601328e-06, + "logits/chosen": -2.04522705078125, + "logits/rejected": -2.400251865386963, + "logps/chosen": -369.3746643066406, + "logps/rejected": -613.3636474609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.744544982910156, + "rewards/margins": 16.56646728515625, + "rewards/rejected": -30.311012268066406, + "step": 15385 + }, + { + "epoch": 2.39, + "learning_rate": 2.86261839307018e-06, + "logits/chosen": -2.3660812377929688, + "logits/rejected": -2.760930061340332, + "logps/chosen": -131.20108032226562, + "logps/rejected": -288.6978759765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.968605995178223, + "rewards/margins": 7.619482517242432, + "rewards/rejected": -16.588088989257812, + "step": 15386 + }, + { + "epoch": 2.39, + "learning_rate": 2.861884952539032e-06, + "logits/chosen": -1.5023852586746216, + "logits/rejected": -2.711564779281616, + "logps/chosen": -189.62347412109375, + "logps/rejected": -534.4950561523438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.405256271362305, + "rewards/margins": 13.333986282348633, + "rewards/rejected": -19.739242553710938, + "step": 15387 + }, + { + "epoch": 2.39, + "learning_rate": 2.8611515120078843e-06, + "logits/chosen": -3.05391263961792, + "logits/rejected": -2.799241542816162, + "logps/chosen": -617.7078857421875, + "logps/rejected": -439.50653076171875, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.441789150238037, + "rewards/margins": 5.787780284881592, + "rewards/rejected": -13.229569435119629, + "step": 15388 + }, + { + "epoch": 2.39, + "learning_rate": 2.860418071476736e-06, + "logits/chosen": -2.245797872543335, + "logits/rejected": -1.9620022773742676, + "logps/chosen": -292.27581787109375, + "logps/rejected": -407.78082275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.705975532531738, + "rewards/margins": 10.682501792907715, + "rewards/rejected": -22.388477325439453, + "step": 15389 + }, + { + "epoch": 2.39, + "learning_rate": 2.8596846309455885e-06, + "logits/chosen": -2.6048035621643066, + "logits/rejected": -2.896479368209839, + "logps/chosen": -172.97000122070312, + "logps/rejected": -296.7740173339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920121192932129, + "rewards/margins": 11.2564115524292, + "rewards/rejected": -18.176532745361328, + "step": 15390 + }, + { + "epoch": 2.39, + "learning_rate": 2.8589511904144408e-06, + "logits/chosen": -2.0482356548309326, + "logits/rejected": -2.4666547775268555, + "logps/chosen": -380.5775451660156, + "logps/rejected": -602.0865478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.587400436401367, + "rewards/margins": 11.524429321289062, + "rewards/rejected": -27.11182975769043, + "step": 15391 + }, + { + "epoch": 2.39, + "learning_rate": 2.8582177498832926e-06, + "logits/chosen": -2.62954044342041, + "logits/rejected": -2.942199230194092, + "logps/chosen": -178.6448516845703, + "logps/rejected": -268.5802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904965400695801, + "rewards/margins": 10.51982307434082, + "rewards/rejected": -17.424789428710938, + "step": 15392 + }, + { + "epoch": 2.39, + "learning_rate": 2.857484309352145e-06, + "logits/chosen": -1.71920907497406, + "logits/rejected": -2.480612277984619, + "logps/chosen": -424.75213623046875, + "logps/rejected": -762.9320068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.179369926452637, + "rewards/margins": 14.193289756774902, + "rewards/rejected": -29.37265968322754, + "step": 15393 + }, + { + "epoch": 2.39, + "learning_rate": 2.856750868820997e-06, + "logits/chosen": -2.9188945293426514, + "logits/rejected": -1.8620301485061646, + "logps/chosen": -322.6283264160156, + "logps/rejected": -176.41082763671875, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.824810028076172, + "rewards/margins": 3.175995349884033, + "rewards/rejected": -13.000805854797363, + "step": 15394 + }, + { + "epoch": 2.39, + "learning_rate": 2.856017428289849e-06, + "logits/chosen": -2.2225465774536133, + "logits/rejected": -2.9289143085479736, + "logps/chosen": -298.22601318359375, + "logps/rejected": -423.38848876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.032176971435547, + "rewards/margins": 9.605396270751953, + "rewards/rejected": -18.6375732421875, + "step": 15395 + }, + { + "epoch": 2.39, + "learning_rate": 2.855283987758701e-06, + "logits/chosen": -2.341951847076416, + "logits/rejected": -1.999405860900879, + "logps/chosen": -174.7583770751953, + "logps/rejected": -203.5718231201172, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.194158554077148, + "rewards/margins": 7.406062126159668, + "rewards/rejected": -17.6002197265625, + "step": 15396 + }, + { + "epoch": 2.39, + "learning_rate": 2.8545505472275533e-06, + "logits/chosen": -2.8293328285217285, + "logits/rejected": -2.1763808727264404, + "logps/chosen": -385.0812072753906, + "logps/rejected": -387.3863525390625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.85236644744873, + "rewards/margins": 10.733972549438477, + "rewards/rejected": -21.586339950561523, + "step": 15397 + }, + { + "epoch": 2.39, + "learning_rate": 2.853817106696405e-06, + "logits/chosen": -2.021369457244873, + "logits/rejected": -2.4044106006622314, + "logps/chosen": -360.8426513671875, + "logps/rejected": -549.9822998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.83226203918457, + "rewards/margins": 10.497394561767578, + "rewards/rejected": -24.32965850830078, + "step": 15398 + }, + { + "epoch": 2.39, + "learning_rate": 2.853083666165258e-06, + "logits/chosen": -2.6069958209991455, + "logits/rejected": -2.1691200733184814, + "logps/chosen": -220.01564025878906, + "logps/rejected": -335.8251037597656, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.716774940490723, + "rewards/margins": 6.826844692230225, + "rewards/rejected": -16.54361915588379, + "step": 15399 + }, + { + "epoch": 2.4, + "learning_rate": 2.85235022563411e-06, + "logits/chosen": -1.8892031908035278, + "logits/rejected": -2.6901090145111084, + "logps/chosen": -207.01071166992188, + "logps/rejected": -329.9181213378906, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.952327728271484, + "rewards/margins": 7.005934238433838, + "rewards/rejected": -18.958261489868164, + "step": 15400 + }, + { + "epoch": 2.4, + "learning_rate": 2.851616785102962e-06, + "logits/chosen": -2.093109369277954, + "logits/rejected": -2.8013362884521484, + "logps/chosen": -129.2899932861328, + "logps/rejected": -488.1400451660156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469151973724365, + "rewards/margins": 17.261280059814453, + "rewards/rejected": -24.730432510375977, + "step": 15401 + }, + { + "epoch": 2.4, + "learning_rate": 2.850883344571814e-06, + "logits/chosen": -2.595658540725708, + "logits/rejected": -2.8504674434661865, + "logps/chosen": -216.73069763183594, + "logps/rejected": -336.72393798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.183465003967285, + "rewards/margins": 10.475626945495605, + "rewards/rejected": -18.65909194946289, + "step": 15402 + }, + { + "epoch": 2.4, + "learning_rate": 2.850149904040666e-06, + "logits/chosen": -2.4585626125335693, + "logits/rejected": -2.895547389984131, + "logps/chosen": -209.46810913085938, + "logps/rejected": -416.2982177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.871172904968262, + "rewards/margins": 9.010675430297852, + "rewards/rejected": -23.881847381591797, + "step": 15403 + }, + { + "epoch": 2.4, + "learning_rate": 2.849416463509518e-06, + "logits/chosen": -2.2416017055511475, + "logits/rejected": -2.4998891353607178, + "logps/chosen": -219.2462921142578, + "logps/rejected": -447.2611083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.797353744506836, + "rewards/margins": 11.156986236572266, + "rewards/rejected": -21.95433807373047, + "step": 15404 + }, + { + "epoch": 2.4, + "learning_rate": 2.84868302297837e-06, + "logits/chosen": -2.68572735786438, + "logits/rejected": -2.7809762954711914, + "logps/chosen": -268.6934814453125, + "logps/rejected": -630.2019653320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.913047790527344, + "rewards/margins": 13.650732040405273, + "rewards/rejected": -22.563777923583984, + "step": 15405 + }, + { + "epoch": 2.4, + "learning_rate": 2.8479495824472223e-06, + "logits/chosen": -2.626936197280884, + "logits/rejected": -2.8587992191314697, + "logps/chosen": -151.23480224609375, + "logps/rejected": -311.9634094238281, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.873000144958496, + "rewards/margins": 8.678512573242188, + "rewards/rejected": -18.551511764526367, + "step": 15406 + }, + { + "epoch": 2.4, + "learning_rate": 2.8472161419160746e-06, + "logits/chosen": -1.9875984191894531, + "logits/rejected": -2.8388051986694336, + "logps/chosen": -261.97882080078125, + "logps/rejected": -425.44024658203125, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.225293159484863, + "rewards/margins": 5.415266990661621, + "rewards/rejected": -17.640560150146484, + "step": 15407 + }, + { + "epoch": 2.4, + "learning_rate": 2.846482701384927e-06, + "logits/chosen": -2.7581167221069336, + "logits/rejected": -2.78094482421875, + "logps/chosen": -238.70645141601562, + "logps/rejected": -273.99456787109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.63802719116211, + "rewards/margins": 7.568384647369385, + "rewards/rejected": -16.20641326904297, + "step": 15408 + }, + { + "epoch": 2.4, + "learning_rate": 2.845749260853779e-06, + "logits/chosen": -1.1348005533218384, + "logits/rejected": -1.342071771621704, + "logps/chosen": -173.35214233398438, + "logps/rejected": -464.8078308105469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.80645751953125, + "rewards/margins": 11.581557273864746, + "rewards/rejected": -18.388015747070312, + "step": 15409 + }, + { + "epoch": 2.4, + "learning_rate": 2.845015820322631e-06, + "logits/chosen": -1.8591301441192627, + "logits/rejected": -2.6206023693084717, + "logps/chosen": -119.81378173828125, + "logps/rejected": -388.5099792480469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9264421463012695, + "rewards/margins": 16.147720336914062, + "rewards/rejected": -24.07416343688965, + "step": 15410 + }, + { + "epoch": 2.4, + "learning_rate": 2.844282379791483e-06, + "logits/chosen": -0.9254839420318604, + "logits/rejected": -1.9638057947158813, + "logps/chosen": -222.34695434570312, + "logps/rejected": -504.421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.072568893432617, + "rewards/margins": 11.994075775146484, + "rewards/rejected": -26.0666446685791, + "step": 15411 + }, + { + "epoch": 2.4, + "learning_rate": 2.843548939260335e-06, + "logits/chosen": -2.8652262687683105, + "logits/rejected": -1.382096767425537, + "logps/chosen": -176.00814819335938, + "logps/rejected": -317.0098876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.157830715179443, + "rewards/margins": 12.361321449279785, + "rewards/rejected": -17.51915168762207, + "step": 15412 + }, + { + "epoch": 2.4, + "learning_rate": 2.842815498729187e-06, + "logits/chosen": -1.8321658372879028, + "logits/rejected": -2.6492109298706055, + "logps/chosen": -171.71881103515625, + "logps/rejected": -545.9284057617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.746329307556152, + "rewards/margins": 12.196673393249512, + "rewards/rejected": -24.943002700805664, + "step": 15413 + }, + { + "epoch": 2.4, + "learning_rate": 2.842082058198039e-06, + "logits/chosen": -2.714491128921509, + "logits/rejected": -2.650454044342041, + "logps/chosen": -830.4465942382812, + "logps/rejected": -751.6016235351562, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.39416217803955, + "rewards/margins": 7.7130126953125, + "rewards/rejected": -18.107173919677734, + "step": 15414 + }, + { + "epoch": 2.4, + "learning_rate": 2.8413486176668914e-06, + "logits/chosen": -2.1238670349121094, + "logits/rejected": -2.5991265773773193, + "logps/chosen": -228.75180053710938, + "logps/rejected": -419.66119384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.256863594055176, + "rewards/margins": 16.532978057861328, + "rewards/rejected": -27.78984260559082, + "step": 15415 + }, + { + "epoch": 2.4, + "learning_rate": 2.8406151771357437e-06, + "logits/chosen": -2.420518398284912, + "logits/rejected": -2.5564258098602295, + "logps/chosen": -259.03546142578125, + "logps/rejected": -375.29241943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.34847354888916, + "rewards/margins": 10.178455352783203, + "rewards/rejected": -18.526927947998047, + "step": 15416 + }, + { + "epoch": 2.4, + "learning_rate": 2.839881736604596e-06, + "logits/chosen": -2.806929111480713, + "logits/rejected": -2.7905526161193848, + "logps/chosen": -167.336669921875, + "logps/rejected": -298.6776428222656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.982853889465332, + "rewards/margins": 8.91069507598877, + "rewards/rejected": -18.8935489654541, + "step": 15417 + }, + { + "epoch": 2.4, + "learning_rate": 2.839148296073448e-06, + "logits/chosen": -2.68123459815979, + "logits/rejected": -2.541895866394043, + "logps/chosen": -371.46405029296875, + "logps/rejected": -541.8194580078125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.654611587524414, + "rewards/margins": 15.093975067138672, + "rewards/rejected": -26.748586654663086, + "step": 15418 + }, + { + "epoch": 2.4, + "learning_rate": 2.8384148555423e-06, + "logits/chosen": -0.8415553569793701, + "logits/rejected": -2.5066487789154053, + "logps/chosen": -281.0653381347656, + "logps/rejected": -359.4891052246094, + "loss": 0.0737, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.731990814208984, + "rewards/margins": 7.111204624176025, + "rewards/rejected": -16.843196868896484, + "step": 15419 + }, + { + "epoch": 2.4, + "learning_rate": 2.837681415011152e-06, + "logits/chosen": -1.807258129119873, + "logits/rejected": -2.49639630317688, + "logps/chosen": -143.3457794189453, + "logps/rejected": -468.4457092285156, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.231976509094238, + "rewards/margins": 11.512147903442383, + "rewards/rejected": -20.744125366210938, + "step": 15420 + }, + { + "epoch": 2.4, + "learning_rate": 2.8369479744800043e-06, + "logits/chosen": -2.449333667755127, + "logits/rejected": -1.7775689363479614, + "logps/chosen": -248.1800079345703, + "logps/rejected": -224.90231323242188, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.665521621704102, + "rewards/margins": 7.963719367980957, + "rewards/rejected": -16.629241943359375, + "step": 15421 + }, + { + "epoch": 2.4, + "learning_rate": 2.836214533948856e-06, + "logits/chosen": -1.99526047706604, + "logits/rejected": -2.8268885612487793, + "logps/chosen": -542.8917846679688, + "logps/rejected": -662.3147583007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.89917278289795, + "rewards/margins": 11.741939544677734, + "rewards/rejected": -21.64111328125, + "step": 15422 + }, + { + "epoch": 2.4, + "learning_rate": 2.835481093417708e-06, + "logits/chosen": -2.655106544494629, + "logits/rejected": -2.7896056175231934, + "logps/chosen": -701.710205078125, + "logps/rejected": -981.300048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.118820190429688, + "rewards/margins": 11.29654312133789, + "rewards/rejected": -19.415363311767578, + "step": 15423 + }, + { + "epoch": 2.4, + "learning_rate": 2.8347476528865604e-06, + "logits/chosen": -2.7723276615142822, + "logits/rejected": -2.9402899742126465, + "logps/chosen": -356.667724609375, + "logps/rejected": -495.96551513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.407459259033203, + "rewards/margins": 10.90451431274414, + "rewards/rejected": -25.311973571777344, + "step": 15424 + }, + { + "epoch": 2.4, + "learning_rate": 2.8340142123554127e-06, + "logits/chosen": -2.1940741539001465, + "logits/rejected": -2.5923831462860107, + "logps/chosen": -490.276611328125, + "logps/rejected": -492.1354064941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.847264289855957, + "rewards/margins": 9.333930969238281, + "rewards/rejected": -22.181194305419922, + "step": 15425 + }, + { + "epoch": 2.4, + "learning_rate": 2.833280771824265e-06, + "logits/chosen": -1.1425635814666748, + "logits/rejected": -2.635373115539551, + "logps/chosen": -186.03274536132812, + "logps/rejected": -488.792724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.033526420593262, + "rewards/margins": 9.468268394470215, + "rewards/rejected": -19.501794815063477, + "step": 15426 + }, + { + "epoch": 2.4, + "learning_rate": 2.832547331293117e-06, + "logits/chosen": -2.625742197036743, + "logits/rejected": -2.7290234565734863, + "logps/chosen": -205.63095092773438, + "logps/rejected": -252.56027221679688, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.46162223815918, + "rewards/margins": 4.992807388305664, + "rewards/rejected": -13.454429626464844, + "step": 15427 + }, + { + "epoch": 2.4, + "learning_rate": 2.831813890761969e-06, + "logits/chosen": -2.7562954425811768, + "logits/rejected": -1.7870876789093018, + "logps/chosen": -969.056884765625, + "logps/rejected": -647.9252319335938, + "loss": 0.1236, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.082176208496094, + "rewards/margins": 6.416913032531738, + "rewards/rejected": -22.499088287353516, + "step": 15428 + }, + { + "epoch": 2.4, + "learning_rate": 2.831080450230821e-06, + "logits/chosen": -2.6612257957458496, + "logits/rejected": -2.087110996246338, + "logps/chosen": -377.80670166015625, + "logps/rejected": -325.3787536621094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.556464195251465, + "rewards/margins": 9.057723045349121, + "rewards/rejected": -19.614187240600586, + "step": 15429 + }, + { + "epoch": 2.4, + "learning_rate": 2.8303470096996733e-06, + "logits/chosen": -2.96850323677063, + "logits/rejected": -2.7886159420013428, + "logps/chosen": -224.77792358398438, + "logps/rejected": -211.03628540039062, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.139601230621338, + "rewards/margins": 7.266573429107666, + "rewards/rejected": -14.406174659729004, + "step": 15430 + }, + { + "epoch": 2.4, + "learning_rate": 2.8296135691685252e-06, + "logits/chosen": -2.011997699737549, + "logits/rejected": -2.5110549926757812, + "logps/chosen": -300.2899169921875, + "logps/rejected": -577.3038330078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.944856643676758, + "rewards/margins": 8.435314178466797, + "rewards/rejected": -21.380170822143555, + "step": 15431 + }, + { + "epoch": 2.4, + "learning_rate": 2.828880128637377e-06, + "logits/chosen": -1.477763295173645, + "logits/rejected": -2.467200994491577, + "logps/chosen": -418.7534484863281, + "logps/rejected": -582.4017333984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.047429084777832, + "rewards/margins": 9.355437278747559, + "rewards/rejected": -20.40286636352539, + "step": 15432 + }, + { + "epoch": 2.4, + "learning_rate": 2.82814668810623e-06, + "logits/chosen": -1.5423825979232788, + "logits/rejected": -2.349764823913574, + "logps/chosen": -250.07688903808594, + "logps/rejected": -583.3775024414062, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.40834903717041, + "rewards/margins": 15.650449752807617, + "rewards/rejected": -24.058799743652344, + "step": 15433 + }, + { + "epoch": 2.4, + "learning_rate": 2.8274132475750817e-06, + "logits/chosen": -2.0905940532684326, + "logits/rejected": -2.6718637943267822, + "logps/chosen": -199.06068420410156, + "logps/rejected": -370.85333251953125, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.179603576660156, + "rewards/margins": 9.148241996765137, + "rewards/rejected": -17.327844619750977, + "step": 15434 + }, + { + "epoch": 2.4, + "learning_rate": 2.826679807043934e-06, + "logits/chosen": -2.632797956466675, + "logits/rejected": -2.85341739654541, + "logps/chosen": -366.6615295410156, + "logps/rejected": -440.7077941894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.296743392944336, + "rewards/margins": 15.67863941192627, + "rewards/rejected": -27.975383758544922, + "step": 15435 + }, + { + "epoch": 2.4, + "learning_rate": 2.825946366512786e-06, + "logits/chosen": -1.7844973802566528, + "logits/rejected": -2.528785228729248, + "logps/chosen": -282.239990234375, + "logps/rejected": -424.779052734375, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.809322357177734, + "rewards/margins": 6.266073703765869, + "rewards/rejected": -20.075395584106445, + "step": 15436 + }, + { + "epoch": 2.4, + "learning_rate": 2.825212925981638e-06, + "logits/chosen": -2.265303134918213, + "logits/rejected": -2.9116485118865967, + "logps/chosen": -321.7825927734375, + "logps/rejected": -823.5123291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.795602798461914, + "rewards/margins": 12.040763854980469, + "rewards/rejected": -26.836366653442383, + "step": 15437 + }, + { + "epoch": 2.4, + "learning_rate": 2.82447948545049e-06, + "logits/chosen": -1.6698390245437622, + "logits/rejected": -2.7999377250671387, + "logps/chosen": -264.3244323730469, + "logps/rejected": -535.2423706054688, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.148924827575684, + "rewards/margins": 10.301916122436523, + "rewards/rejected": -22.45083999633789, + "step": 15438 + }, + { + "epoch": 2.4, + "learning_rate": 2.8237460449193424e-06, + "logits/chosen": -2.707768201828003, + "logits/rejected": -3.018118381500244, + "logps/chosen": -328.6294250488281, + "logps/rejected": -624.1701049804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.850114822387695, + "rewards/margins": 13.119367599487305, + "rewards/rejected": -21.969482421875, + "step": 15439 + }, + { + "epoch": 2.4, + "learning_rate": 2.8230126043881942e-06, + "logits/chosen": -2.602100133895874, + "logits/rejected": -2.9022839069366455, + "logps/chosen": -792.9644165039062, + "logps/rejected": -917.0802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.614630699157715, + "rewards/margins": 14.29848861694336, + "rewards/rejected": -25.91312026977539, + "step": 15440 + }, + { + "epoch": 2.4, + "learning_rate": 2.8222791638570465e-06, + "logits/chosen": -2.607659101486206, + "logits/rejected": -2.8153226375579834, + "logps/chosen": -517.9925537109375, + "logps/rejected": -545.7803955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.162336349487305, + "rewards/margins": 12.499256134033203, + "rewards/rejected": -25.661592483520508, + "step": 15441 + }, + { + "epoch": 2.4, + "learning_rate": 2.821545723325899e-06, + "logits/chosen": -2.881650924682617, + "logits/rejected": -2.966719388961792, + "logps/chosen": -892.1807250976562, + "logps/rejected": -576.5925903320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.041201591491699, + "rewards/margins": 9.834249496459961, + "rewards/rejected": -16.875452041625977, + "step": 15442 + }, + { + "epoch": 2.4, + "learning_rate": 2.8208122827947507e-06, + "logits/chosen": -2.551506757736206, + "logits/rejected": -1.7219717502593994, + "logps/chosen": -522.1839599609375, + "logps/rejected": -438.3305358886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.133489608764648, + "rewards/margins": 10.527000427246094, + "rewards/rejected": -21.660490036010742, + "step": 15443 + }, + { + "epoch": 2.4, + "learning_rate": 2.820078842263603e-06, + "logits/chosen": -2.541363000869751, + "logits/rejected": -2.5115807056427, + "logps/chosen": -299.209228515625, + "logps/rejected": -394.24029541015625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.274526596069336, + "rewards/margins": 7.631139755249023, + "rewards/rejected": -20.90566635131836, + "step": 15444 + }, + { + "epoch": 2.4, + "learning_rate": 2.819345401732455e-06, + "logits/chosen": -2.608995199203491, + "logits/rejected": -2.6058568954467773, + "logps/chosen": -643.8106689453125, + "logps/rejected": -687.64208984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.372404098510742, + "rewards/margins": 8.453529357910156, + "rewards/rejected": -20.82593536376953, + "step": 15445 + }, + { + "epoch": 2.4, + "learning_rate": 2.818611961201307e-06, + "logits/chosen": -1.9436522722244263, + "logits/rejected": -2.942195415496826, + "logps/chosen": -114.15560913085938, + "logps/rejected": -355.08331298828125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.324706077575684, + "rewards/margins": 9.274209976196289, + "rewards/rejected": -19.598915100097656, + "step": 15446 + }, + { + "epoch": 2.4, + "learning_rate": 2.817878520670159e-06, + "logits/chosen": -1.81931734085083, + "logits/rejected": -2.3100671768188477, + "logps/chosen": -207.1787567138672, + "logps/rejected": -431.6238098144531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.73634147644043, + "rewards/margins": 9.554824829101562, + "rewards/rejected": -19.291166305541992, + "step": 15447 + }, + { + "epoch": 2.4, + "learning_rate": 2.8171450801390114e-06, + "logits/chosen": -2.6221110820770264, + "logits/rejected": -1.9857162237167358, + "logps/chosen": -575.1309814453125, + "logps/rejected": -482.78887939453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.266310691833496, + "rewards/margins": 8.152973175048828, + "rewards/rejected": -19.419282913208008, + "step": 15448 + }, + { + "epoch": 2.4, + "learning_rate": 2.8164116396078633e-06, + "logits/chosen": -2.7713170051574707, + "logits/rejected": -2.220461368560791, + "logps/chosen": -791.0743408203125, + "logps/rejected": -714.300048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.285712242126465, + "rewards/margins": 13.763564109802246, + "rewards/rejected": -21.04927635192871, + "step": 15449 + }, + { + "epoch": 2.4, + "learning_rate": 2.815678199076716e-06, + "logits/chosen": -2.668539047241211, + "logits/rejected": -2.3534138202667236, + "logps/chosen": -416.8368225097656, + "logps/rejected": -771.1700439453125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.058610916137695, + "rewards/margins": 11.597508430480957, + "rewards/rejected": -25.65612030029297, + "step": 15450 + }, + { + "epoch": 2.4, + "learning_rate": 2.814944758545568e-06, + "logits/chosen": -2.889657735824585, + "logits/rejected": -2.1601827144622803, + "logps/chosen": -264.2457275390625, + "logps/rejected": -285.8409423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.380380630493164, + "rewards/margins": 9.983125686645508, + "rewards/rejected": -18.363506317138672, + "step": 15451 + }, + { + "epoch": 2.4, + "learning_rate": 2.8142113180144197e-06, + "logits/chosen": -2.600538730621338, + "logits/rejected": -2.834322690963745, + "logps/chosen": -361.08038330078125, + "logps/rejected": -467.7445068359375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.409337997436523, + "rewards/margins": 10.117605209350586, + "rewards/rejected": -18.52694320678711, + "step": 15452 + }, + { + "epoch": 2.4, + "learning_rate": 2.813477877483272e-06, + "logits/chosen": -3.0313761234283447, + "logits/rejected": -2.296190023422241, + "logps/chosen": -404.2909240722656, + "logps/rejected": -224.51002502441406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.923210144042969, + "rewards/margins": 9.390667915344238, + "rewards/rejected": -14.313878059387207, + "step": 15453 + }, + { + "epoch": 2.4, + "learning_rate": 2.812744436952124e-06, + "logits/chosen": -1.7508915662765503, + "logits/rejected": -2.0627262592315674, + "logps/chosen": -569.8270263671875, + "logps/rejected": -694.5831298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.432989120483398, + "rewards/margins": 12.238567352294922, + "rewards/rejected": -22.671558380126953, + "step": 15454 + }, + { + "epoch": 2.4, + "learning_rate": 2.8120109964209762e-06, + "logits/chosen": -1.7522529363632202, + "logits/rejected": -2.7703583240509033, + "logps/chosen": -168.94427490234375, + "logps/rejected": -378.91461181640625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.783482551574707, + "rewards/margins": 7.467926025390625, + "rewards/rejected": -18.251407623291016, + "step": 15455 + }, + { + "epoch": 2.4, + "learning_rate": 2.811277555889828e-06, + "logits/chosen": -1.1640372276306152, + "logits/rejected": -2.3202855587005615, + "logps/chosen": -185.77801513671875, + "logps/rejected": -569.762939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.192176818847656, + "rewards/margins": 14.735170364379883, + "rewards/rejected": -26.92734718322754, + "step": 15456 + }, + { + "epoch": 2.4, + "learning_rate": 2.8105441153586804e-06, + "logits/chosen": -2.0227606296539307, + "logits/rejected": -2.221144914627075, + "logps/chosen": -345.5906982421875, + "logps/rejected": -526.3238525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.331186294555664, + "rewards/margins": 12.261466979980469, + "rewards/rejected": -25.592655181884766, + "step": 15457 + }, + { + "epoch": 2.4, + "learning_rate": 2.8098106748275327e-06, + "logits/chosen": -2.8288075923919678, + "logits/rejected": -2.18204665184021, + "logps/chosen": -263.3126525878906, + "logps/rejected": -360.20428466796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.714753150939941, + "rewards/margins": 10.098489761352539, + "rewards/rejected": -17.813243865966797, + "step": 15458 + }, + { + "epoch": 2.4, + "learning_rate": 2.809077234296385e-06, + "logits/chosen": -2.8544702529907227, + "logits/rejected": -2.2675576210021973, + "logps/chosen": -302.958251953125, + "logps/rejected": -191.87533569335938, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.434349060058594, + "rewards/margins": 4.7753496170043945, + "rewards/rejected": -12.209698677062988, + "step": 15459 + }, + { + "epoch": 2.4, + "learning_rate": 2.808343793765237e-06, + "logits/chosen": -1.6433727741241455, + "logits/rejected": -1.880448341369629, + "logps/chosen": -128.61962890625, + "logps/rejected": -383.02642822265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.872039794921875, + "rewards/margins": 9.895153999328613, + "rewards/rejected": -16.767192840576172, + "step": 15460 + }, + { + "epoch": 2.4, + "learning_rate": 2.8076103532340888e-06, + "logits/chosen": -1.8313277959823608, + "logits/rejected": -1.4523863792419434, + "logps/chosen": -525.3529052734375, + "logps/rejected": -636.080322265625, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.563336372375488, + "rewards/margins": 10.931447982788086, + "rewards/rejected": -19.49478530883789, + "step": 15461 + }, + { + "epoch": 2.4, + "learning_rate": 2.806876912702941e-06, + "logits/chosen": -2.8580639362335205, + "logits/rejected": -2.556698799133301, + "logps/chosen": -445.81195068359375, + "logps/rejected": -575.768798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.949668884277344, + "rewards/margins": 12.792512893676758, + "rewards/rejected": -22.742183685302734, + "step": 15462 + }, + { + "epoch": 2.4, + "learning_rate": 2.806143472171793e-06, + "logits/chosen": -2.3250908851623535, + "logits/rejected": -2.568208932876587, + "logps/chosen": -179.47628784179688, + "logps/rejected": -455.05157470703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.545719146728516, + "rewards/margins": 9.142309188842773, + "rewards/rejected": -19.68802833557129, + "step": 15463 + }, + { + "epoch": 2.4, + "learning_rate": 2.8054100316406452e-06, + "logits/chosen": -2.6650338172912598, + "logits/rejected": -2.75118088722229, + "logps/chosen": -264.8034973144531, + "logps/rejected": -379.3713073730469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.717048645019531, + "rewards/margins": 7.710565090179443, + "rewards/rejected": -19.427614212036133, + "step": 15464 + }, + { + "epoch": 2.41, + "learning_rate": 2.804676591109497e-06, + "logits/chosen": -2.5768020153045654, + "logits/rejected": -2.844146490097046, + "logps/chosen": -892.4066162109375, + "logps/rejected": -923.1580200195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.637946128845215, + "rewards/margins": 10.7345552444458, + "rewards/rejected": -20.372501373291016, + "step": 15465 + }, + { + "epoch": 2.41, + "learning_rate": 2.8039431505783494e-06, + "logits/chosen": -2.5552361011505127, + "logits/rejected": -3.0211386680603027, + "logps/chosen": -225.9009552001953, + "logps/rejected": -502.90386962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7289958000183105, + "rewards/margins": 12.804122924804688, + "rewards/rejected": -18.533119201660156, + "step": 15466 + }, + { + "epoch": 2.41, + "learning_rate": 2.8032097100472017e-06, + "logits/chosen": -2.420109748840332, + "logits/rejected": -2.6239519119262695, + "logps/chosen": -242.52517700195312, + "logps/rejected": -354.5364074707031, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.336840629577637, + "rewards/margins": 6.866682529449463, + "rewards/rejected": -14.203523635864258, + "step": 15467 + }, + { + "epoch": 2.41, + "learning_rate": 2.802476269516054e-06, + "logits/chosen": -2.9125993251800537, + "logits/rejected": -2.9345507621765137, + "logps/chosen": -175.42666625976562, + "logps/rejected": -362.65802001953125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.011774063110352, + "rewards/margins": 7.368607997894287, + "rewards/rejected": -18.380382537841797, + "step": 15468 + }, + { + "epoch": 2.41, + "learning_rate": 2.801742828984906e-06, + "logits/chosen": -1.9846782684326172, + "logits/rejected": -2.6056320667266846, + "logps/chosen": -194.6308135986328, + "logps/rejected": -311.5843811035156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.568849563598633, + "rewards/margins": 7.117645263671875, + "rewards/rejected": -15.686494827270508, + "step": 15469 + }, + { + "epoch": 2.41, + "learning_rate": 2.8010093884537582e-06, + "logits/chosen": -2.734988212585449, + "logits/rejected": -2.6999974250793457, + "logps/chosen": -175.69146728515625, + "logps/rejected": -361.6838073730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.06539535522461, + "rewards/margins": 13.671087265014648, + "rewards/rejected": -22.736482620239258, + "step": 15470 + }, + { + "epoch": 2.41, + "learning_rate": 2.80027594792261e-06, + "logits/chosen": -1.999008059501648, + "logits/rejected": -1.1084489822387695, + "logps/chosen": -410.1391296386719, + "logps/rejected": -355.396484375, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.555822372436523, + "rewards/margins": 5.670045375823975, + "rewards/rejected": -14.225868225097656, + "step": 15471 + }, + { + "epoch": 2.41, + "learning_rate": 2.799542507391462e-06, + "logits/chosen": -2.405655860900879, + "logits/rejected": -2.6118381023406982, + "logps/chosen": -706.5717163085938, + "logps/rejected": -783.0716552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5883636474609375, + "rewards/margins": 14.593757629394531, + "rewards/rejected": -22.18212127685547, + "step": 15472 + }, + { + "epoch": 2.41, + "learning_rate": 2.7988090668603143e-06, + "logits/chosen": -2.4483730792999268, + "logits/rejected": -2.8802361488342285, + "logps/chosen": -160.32386779785156, + "logps/rejected": -328.85821533203125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.797017097473145, + "rewards/margins": 8.30599308013916, + "rewards/rejected": -18.103010177612305, + "step": 15473 + }, + { + "epoch": 2.41, + "learning_rate": 2.798075626329166e-06, + "logits/chosen": -1.7269798517227173, + "logits/rejected": -2.4484524726867676, + "logps/chosen": -328.2933349609375, + "logps/rejected": -690.9326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.244097709655762, + "rewards/margins": 13.282014846801758, + "rewards/rejected": -22.526111602783203, + "step": 15474 + }, + { + "epoch": 2.41, + "learning_rate": 2.797342185798019e-06, + "logits/chosen": -1.0115348100662231, + "logits/rejected": -2.203275680541992, + "logps/chosen": -224.79574584960938, + "logps/rejected": -513.2745361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.317293167114258, + "rewards/margins": 13.233051300048828, + "rewards/rejected": -24.550342559814453, + "step": 15475 + }, + { + "epoch": 2.41, + "learning_rate": 2.7966087452668708e-06, + "logits/chosen": -2.3846147060394287, + "logits/rejected": -2.866550922393799, + "logps/chosen": -163.8163299560547, + "logps/rejected": -385.0020446777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.598294258117676, + "rewards/margins": 12.995495796203613, + "rewards/rejected": -20.59379005432129, + "step": 15476 + }, + { + "epoch": 2.41, + "learning_rate": 2.795875304735723e-06, + "logits/chosen": -2.760329246520996, + "logits/rejected": -2.6564581394195557, + "logps/chosen": -228.96771240234375, + "logps/rejected": -371.7243347167969, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.67759895324707, + "rewards/margins": 9.314302444458008, + "rewards/rejected": -21.991901397705078, + "step": 15477 + }, + { + "epoch": 2.41, + "learning_rate": 2.795141864204575e-06, + "logits/chosen": -2.8665106296539307, + "logits/rejected": -2.9650537967681885, + "logps/chosen": -135.38185119628906, + "logps/rejected": -233.6175537109375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.261239051818848, + "rewards/margins": 7.304961204528809, + "rewards/rejected": -15.566200256347656, + "step": 15478 + }, + { + "epoch": 2.41, + "learning_rate": 2.7944084236734272e-06, + "logits/chosen": -1.517035722732544, + "logits/rejected": -2.710841178894043, + "logps/chosen": -196.9417724609375, + "logps/rejected": -283.77655029296875, + "loss": 3.7949, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.096748352050781, + "rewards/margins": 2.8567287921905518, + "rewards/rejected": -15.953476905822754, + "step": 15479 + }, + { + "epoch": 2.41, + "learning_rate": 2.793674983142279e-06, + "logits/chosen": -1.8769397735595703, + "logits/rejected": -2.7034809589385986, + "logps/chosen": -374.2998046875, + "logps/rejected": -651.8221435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.698335647583008, + "rewards/margins": 14.195968627929688, + "rewards/rejected": -24.894304275512695, + "step": 15480 + }, + { + "epoch": 2.41, + "learning_rate": 2.792941542611131e-06, + "logits/chosen": -2.55350661277771, + "logits/rejected": -2.779087543487549, + "logps/chosen": -571.6203002929688, + "logps/rejected": -434.408935546875, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.12519645690918, + "rewards/margins": 3.3108997344970703, + "rewards/rejected": -16.43609619140625, + "step": 15481 + }, + { + "epoch": 2.41, + "learning_rate": 2.7922081020799833e-06, + "logits/chosen": -2.412348985671997, + "logits/rejected": -2.375102996826172, + "logps/chosen": -251.79542541503906, + "logps/rejected": -345.3844299316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.240469932556152, + "rewards/margins": 10.951869010925293, + "rewards/rejected": -21.192338943481445, + "step": 15482 + }, + { + "epoch": 2.41, + "learning_rate": 2.7914746615488356e-06, + "logits/chosen": -2.1695685386657715, + "logits/rejected": -2.8076441287994385, + "logps/chosen": -288.4690856933594, + "logps/rejected": -504.1461181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.867682456970215, + "rewards/margins": 8.738191604614258, + "rewards/rejected": -18.605873107910156, + "step": 15483 + }, + { + "epoch": 2.41, + "learning_rate": 2.790741221017688e-06, + "logits/chosen": -3.029869556427002, + "logits/rejected": -2.47324538230896, + "logps/chosen": -339.2059326171875, + "logps/rejected": -234.9669647216797, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.867837905883789, + "rewards/margins": 6.47373104095459, + "rewards/rejected": -14.341568946838379, + "step": 15484 + }, + { + "epoch": 2.41, + "learning_rate": 2.7900077804865398e-06, + "logits/chosen": -2.5486509799957275, + "logits/rejected": -3.0701863765716553, + "logps/chosen": -91.70345306396484, + "logps/rejected": -379.6650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.293147087097168, + "rewards/margins": 12.032891273498535, + "rewards/rejected": -18.326038360595703, + "step": 15485 + }, + { + "epoch": 2.41, + "learning_rate": 2.789274339955392e-06, + "logits/chosen": -2.1662955284118652, + "logits/rejected": -2.744886636734009, + "logps/chosen": -218.16542053222656, + "logps/rejected": -436.33087158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.294197082519531, + "rewards/margins": 9.371011734008789, + "rewards/rejected": -20.66520881652832, + "step": 15486 + }, + { + "epoch": 2.41, + "learning_rate": 2.788540899424244e-06, + "logits/chosen": -1.7728725671768188, + "logits/rejected": -2.085890054702759, + "logps/chosen": -270.38720703125, + "logps/rejected": -345.5601806640625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.1411714553833, + "rewards/margins": 9.731500625610352, + "rewards/rejected": -21.87267303466797, + "step": 15487 + }, + { + "epoch": 2.41, + "learning_rate": 2.7878074588930963e-06, + "logits/chosen": -2.460669755935669, + "logits/rejected": -2.5638718605041504, + "logps/chosen": -402.4749755859375, + "logps/rejected": -643.638427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.314539909362793, + "rewards/margins": 16.156707763671875, + "rewards/rejected": -26.471248626708984, + "step": 15488 + }, + { + "epoch": 2.41, + "learning_rate": 2.787074018361948e-06, + "logits/chosen": -1.9282060861587524, + "logits/rejected": -2.4779105186462402, + "logps/chosen": -388.6321105957031, + "logps/rejected": -491.00042724609375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.64757537841797, + "rewards/margins": 6.629080772399902, + "rewards/rejected": -24.276657104492188, + "step": 15489 + }, + { + "epoch": 2.41, + "learning_rate": 2.7863405778308004e-06, + "logits/chosen": -2.393669843673706, + "logits/rejected": -2.793578863143921, + "logps/chosen": -220.43345642089844, + "logps/rejected": -401.8470458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.954955101013184, + "rewards/margins": 10.27365493774414, + "rewards/rejected": -18.22861099243164, + "step": 15490 + }, + { + "epoch": 2.41, + "learning_rate": 2.7856071372996523e-06, + "logits/chosen": -2.0053699016571045, + "logits/rejected": -2.9373831748962402, + "logps/chosen": -615.0743408203125, + "logps/rejected": -841.893798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.397375106811523, + "rewards/margins": 12.12869930267334, + "rewards/rejected": -19.526073455810547, + "step": 15491 + }, + { + "epoch": 2.41, + "learning_rate": 2.7848736967685046e-06, + "logits/chosen": -1.8521450757980347, + "logits/rejected": -2.9068472385406494, + "logps/chosen": -431.0584411621094, + "logps/rejected": -563.1644897460938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.240703582763672, + "rewards/margins": 9.286029815673828, + "rewards/rejected": -20.5267333984375, + "step": 15492 + }, + { + "epoch": 2.41, + "learning_rate": 2.784140256237357e-06, + "logits/chosen": -2.0613961219787598, + "logits/rejected": -2.730915069580078, + "logps/chosen": -708.499755859375, + "logps/rejected": -729.9891967773438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.289369583129883, + "rewards/margins": 10.308333396911621, + "rewards/rejected": -19.597702026367188, + "step": 15493 + }, + { + "epoch": 2.41, + "learning_rate": 2.783406815706209e-06, + "logits/chosen": -2.7071330547332764, + "logits/rejected": -1.5479322671890259, + "logps/chosen": -288.46240234375, + "logps/rejected": -255.7564697265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9678850173950195, + "rewards/margins": 10.739471435546875, + "rewards/rejected": -18.707355499267578, + "step": 15494 + }, + { + "epoch": 2.41, + "learning_rate": 2.782673375175061e-06, + "logits/chosen": -2.783116579055786, + "logits/rejected": -2.8450422286987305, + "logps/chosen": -153.39369201660156, + "logps/rejected": -173.2597198486328, + "loss": 0.2186, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.456082344055176, + "rewards/margins": 5.098452568054199, + "rewards/rejected": -14.554534912109375, + "step": 15495 + }, + { + "epoch": 2.41, + "learning_rate": 2.781939934643913e-06, + "logits/chosen": -2.7835581302642822, + "logits/rejected": -2.998741388320923, + "logps/chosen": -162.0065155029297, + "logps/rejected": -324.65716552734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.870821952819824, + "rewards/margins": 7.889122009277344, + "rewards/rejected": -19.759944915771484, + "step": 15496 + }, + { + "epoch": 2.41, + "learning_rate": 2.7812064941127653e-06, + "logits/chosen": -2.63783860206604, + "logits/rejected": -2.8315320014953613, + "logps/chosen": -181.72828674316406, + "logps/rejected": -356.1695556640625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.59708023071289, + "rewards/margins": 8.340145111083984, + "rewards/rejected": -22.937225341796875, + "step": 15497 + }, + { + "epoch": 2.41, + "learning_rate": 2.780473053581617e-06, + "logits/chosen": -2.6823039054870605, + "logits/rejected": -2.4927828311920166, + "logps/chosen": -696.6781005859375, + "logps/rejected": -1043.8486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.136682510375977, + "rewards/margins": 11.70375919342041, + "rewards/rejected": -20.840442657470703, + "step": 15498 + }, + { + "epoch": 2.41, + "learning_rate": 2.7797396130504695e-06, + "logits/chosen": -1.749537467956543, + "logits/rejected": -2.8153293132781982, + "logps/chosen": -636.258544921875, + "logps/rejected": -674.2575073242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.189984321594238, + "rewards/margins": 10.538311004638672, + "rewards/rejected": -20.728294372558594, + "step": 15499 + }, + { + "epoch": 2.41, + "learning_rate": 2.7790061725193218e-06, + "logits/chosen": -2.7230148315429688, + "logits/rejected": -2.8413639068603516, + "logps/chosen": -534.349365234375, + "logps/rejected": -655.4360961914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.54002571105957, + "rewards/margins": 11.068915367126465, + "rewards/rejected": -19.60894012451172, + "step": 15500 + }, + { + "epoch": 2.41, + "learning_rate": 2.7782727319881736e-06, + "logits/chosen": -1.440694808959961, + "logits/rejected": -2.4611105918884277, + "logps/chosen": -201.9722900390625, + "logps/rejected": -319.6197509765625, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.869760513305664, + "rewards/margins": 6.637425899505615, + "rewards/rejected": -19.507186889648438, + "step": 15501 + }, + { + "epoch": 2.41, + "learning_rate": 2.777539291457026e-06, + "logits/chosen": -1.9039268493652344, + "logits/rejected": -2.7392401695251465, + "logps/chosen": -249.86282348632812, + "logps/rejected": -567.901611328125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.339889526367188, + "rewards/margins": 10.7351713180542, + "rewards/rejected": -21.075061798095703, + "step": 15502 + }, + { + "epoch": 2.41, + "learning_rate": 2.776805850925878e-06, + "logits/chosen": -2.491826057434082, + "logits/rejected": -2.1392078399658203, + "logps/chosen": -308.0704040527344, + "logps/rejected": -381.5701904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.290910720825195, + "rewards/margins": 12.625282287597656, + "rewards/rejected": -21.91619300842285, + "step": 15503 + }, + { + "epoch": 2.41, + "learning_rate": 2.77607241039473e-06, + "logits/chosen": -2.074117660522461, + "logits/rejected": -2.2301077842712402, + "logps/chosen": -175.510009765625, + "logps/rejected": -302.73138427734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.78902530670166, + "rewards/margins": 9.339942932128906, + "rewards/rejected": -17.128969192504883, + "step": 15504 + }, + { + "epoch": 2.41, + "learning_rate": 2.775338969863582e-06, + "logits/chosen": -1.9864856004714966, + "logits/rejected": -2.479473352432251, + "logps/chosen": -199.58665466308594, + "logps/rejected": -449.1512145996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.815126419067383, + "rewards/margins": 10.814824104309082, + "rewards/rejected": -20.62995147705078, + "step": 15505 + }, + { + "epoch": 2.41, + "learning_rate": 2.7746055293324343e-06, + "logits/chosen": -2.805508613586426, + "logits/rejected": -2.288424015045166, + "logps/chosen": -586.8837890625, + "logps/rejected": -739.5830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.164556980133057, + "rewards/margins": 13.093650817871094, + "rewards/rejected": -20.258209228515625, + "step": 15506 + }, + { + "epoch": 2.41, + "learning_rate": 2.773872088801286e-06, + "logits/chosen": -2.7591896057128906, + "logits/rejected": -3.0171327590942383, + "logps/chosen": -156.38677978515625, + "logps/rejected": -369.0692138671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.442419052124023, + "rewards/margins": 9.60538101196289, + "rewards/rejected": -18.047801971435547, + "step": 15507 + }, + { + "epoch": 2.41, + "learning_rate": 2.7731386482701385e-06, + "logits/chosen": -2.4909892082214355, + "logits/rejected": -1.6588016748428345, + "logps/chosen": -227.60157775878906, + "logps/rejected": -264.3349609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.241813659667969, + "rewards/margins": 7.209402084350586, + "rewards/rejected": -15.451215744018555, + "step": 15508 + }, + { + "epoch": 2.41, + "learning_rate": 2.7724052077389908e-06, + "logits/chosen": -0.9700849056243896, + "logits/rejected": -2.0508830547332764, + "logps/chosen": -279.5771789550781, + "logps/rejected": -487.8952331542969, + "loss": 0.0459, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.083366394042969, + "rewards/margins": 7.634053707122803, + "rewards/rejected": -16.717418670654297, + "step": 15509 + }, + { + "epoch": 2.41, + "learning_rate": 2.7716717672078427e-06, + "logits/chosen": -1.7459471225738525, + "logits/rejected": -2.859407424926758, + "logps/chosen": -136.41897583007812, + "logps/rejected": -701.6448364257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.143908500671387, + "rewards/margins": 13.885425567626953, + "rewards/rejected": -23.029335021972656, + "step": 15510 + }, + { + "epoch": 2.41, + "learning_rate": 2.770938326676695e-06, + "logits/chosen": -2.3259451389312744, + "logits/rejected": -2.9360854625701904, + "logps/chosen": -538.4927978515625, + "logps/rejected": -432.63134765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.624872207641602, + "rewards/margins": 7.826016426086426, + "rewards/rejected": -21.450889587402344, + "step": 15511 + }, + { + "epoch": 2.41, + "learning_rate": 2.770204886145547e-06, + "logits/chosen": -2.8419454097747803, + "logits/rejected": -2.8926303386688232, + "logps/chosen": -229.4006805419922, + "logps/rejected": -374.9494323730469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5559282302856445, + "rewards/margins": 8.452789306640625, + "rewards/rejected": -16.008716583251953, + "step": 15512 + }, + { + "epoch": 2.41, + "learning_rate": 2.769471445614399e-06, + "logits/chosen": -2.1067757606506348, + "logits/rejected": -2.4621694087982178, + "logps/chosen": -743.815673828125, + "logps/rejected": -806.77587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.447415351867676, + "rewards/margins": 14.86646842956543, + "rewards/rejected": -25.31388282775879, + "step": 15513 + }, + { + "epoch": 2.41, + "learning_rate": 2.768738005083251e-06, + "logits/chosen": -2.013500928878784, + "logits/rejected": -2.6258456707000732, + "logps/chosen": -166.45382690429688, + "logps/rejected": -408.10986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.327550888061523, + "rewards/margins": 8.587118148803711, + "rewards/rejected": -19.914669036865234, + "step": 15514 + }, + { + "epoch": 2.41, + "learning_rate": 2.7680045645521033e-06, + "logits/chosen": -2.471169948577881, + "logits/rejected": -2.5894978046417236, + "logps/chosen": -310.9986572265625, + "logps/rejected": -486.7242431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.812479019165039, + "rewards/margins": 12.787446975708008, + "rewards/rejected": -23.599925994873047, + "step": 15515 + }, + { + "epoch": 2.41, + "learning_rate": 2.767271124020955e-06, + "logits/chosen": -2.0473177433013916, + "logits/rejected": -1.2604222297668457, + "logps/chosen": -472.1022033691406, + "logps/rejected": -400.319091796875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.957109451293945, + "rewards/margins": 7.058122158050537, + "rewards/rejected": -18.01523208618164, + "step": 15516 + }, + { + "epoch": 2.41, + "learning_rate": 2.766537683489808e-06, + "logits/chosen": -2.412724018096924, + "logits/rejected": -2.3825783729553223, + "logps/chosen": -228.4097442626953, + "logps/rejected": -420.11322021484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.861700057983398, + "rewards/margins": 11.822223663330078, + "rewards/rejected": -21.683923721313477, + "step": 15517 + }, + { + "epoch": 2.41, + "learning_rate": 2.76580424295866e-06, + "logits/chosen": -2.2471046447753906, + "logits/rejected": -2.542929172515869, + "logps/chosen": -515.7684326171875, + "logps/rejected": -395.7703857421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7025604248046875, + "rewards/margins": 9.365021705627441, + "rewards/rejected": -13.067582130432129, + "step": 15518 + }, + { + "epoch": 2.41, + "learning_rate": 2.765070802427512e-06, + "logits/chosen": -1.9606270790100098, + "logits/rejected": -2.673738718032837, + "logps/chosen": -200.40982055664062, + "logps/rejected": -482.1340026855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.282721519470215, + "rewards/margins": 13.247210502624512, + "rewards/rejected": -23.529932022094727, + "step": 15519 + }, + { + "epoch": 2.41, + "learning_rate": 2.764337361896364e-06, + "logits/chosen": -2.05330491065979, + "logits/rejected": -2.745117425918579, + "logps/chosen": -157.59683227539062, + "logps/rejected": -268.17877197265625, + "loss": 0.5103, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.578672409057617, + "rewards/margins": 1.448385238647461, + "rewards/rejected": -16.027057647705078, + "step": 15520 + }, + { + "epoch": 2.41, + "learning_rate": 2.763603921365216e-06, + "logits/chosen": -2.702897548675537, + "logits/rejected": -1.654606819152832, + "logps/chosen": -467.694091796875, + "logps/rejected": -369.74273681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.708905220031738, + "rewards/margins": 10.402745246887207, + "rewards/rejected": -18.111650466918945, + "step": 15521 + }, + { + "epoch": 2.41, + "learning_rate": 2.762870480834068e-06, + "logits/chosen": -1.6805580854415894, + "logits/rejected": -2.996337413787842, + "logps/chosen": -286.1549377441406, + "logps/rejected": -524.7919311523438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.296424865722656, + "rewards/margins": 12.071025848388672, + "rewards/rejected": -22.367450714111328, + "step": 15522 + }, + { + "epoch": 2.41, + "learning_rate": 2.76213704030292e-06, + "logits/chosen": -2.9512405395507812, + "logits/rejected": -3.0572104454040527, + "logps/chosen": -129.18858337402344, + "logps/rejected": -412.0627136230469, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.080936431884766, + "rewards/margins": 7.232728481292725, + "rewards/rejected": -14.313665390014648, + "step": 15523 + }, + { + "epoch": 2.41, + "learning_rate": 2.7614035997717723e-06, + "logits/chosen": -2.67570161819458, + "logits/rejected": -2.532069683074951, + "logps/chosen": -474.0587463378906, + "logps/rejected": -489.8651123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.077751159667969, + "rewards/margins": 11.935256958007812, + "rewards/rejected": -17.01300811767578, + "step": 15524 + }, + { + "epoch": 2.41, + "learning_rate": 2.7606701592406246e-06, + "logits/chosen": -2.737175703048706, + "logits/rejected": -2.095320463180542, + "logps/chosen": -285.7704162597656, + "logps/rejected": -332.27154541015625, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.919624328613281, + "rewards/margins": 6.647801876068115, + "rewards/rejected": -16.567426681518555, + "step": 15525 + }, + { + "epoch": 2.41, + "learning_rate": 2.759936718709477e-06, + "logits/chosen": -1.617279291152954, + "logits/rejected": -2.752685785293579, + "logps/chosen": -172.326904296875, + "logps/rejected": -542.4039306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4129486083984375, + "rewards/margins": 14.86318302154541, + "rewards/rejected": -22.27613067626953, + "step": 15526 + }, + { + "epoch": 2.41, + "learning_rate": 2.759203278178329e-06, + "logits/chosen": -1.8676371574401855, + "logits/rejected": -2.623673915863037, + "logps/chosen": -275.6090087890625, + "logps/rejected": -433.2303161621094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5907883644104, + "rewards/margins": 11.343683242797852, + "rewards/rejected": -17.934471130371094, + "step": 15527 + }, + { + "epoch": 2.41, + "learning_rate": 2.758469837647181e-06, + "logits/chosen": -1.7627403736114502, + "logits/rejected": -2.430450677871704, + "logps/chosen": -282.5438232421875, + "logps/rejected": -622.073974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.498761177062988, + "rewards/margins": 12.651483535766602, + "rewards/rejected": -22.150245666503906, + "step": 15528 + }, + { + "epoch": 2.42, + "learning_rate": 2.757736397116033e-06, + "logits/chosen": -2.6948156356811523, + "logits/rejected": -2.9123623371124268, + "logps/chosen": -297.4927978515625, + "logps/rejected": -268.16766357421875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.803047180175781, + "rewards/margins": 8.20145034790039, + "rewards/rejected": -16.004497528076172, + "step": 15529 + }, + { + "epoch": 2.42, + "learning_rate": 2.757002956584885e-06, + "logits/chosen": -2.581455945968628, + "logits/rejected": -2.271496057510376, + "logps/chosen": -833.139892578125, + "logps/rejected": -620.5822143554688, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.056775093078613, + "rewards/margins": 8.317459106445312, + "rewards/rejected": -18.374235153198242, + "step": 15530 + }, + { + "epoch": 2.42, + "learning_rate": 2.756269516053737e-06, + "logits/chosen": -2.4038374423980713, + "logits/rejected": -2.6070690155029297, + "logps/chosen": -495.4903564453125, + "logps/rejected": -736.7273559570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.113506317138672, + "rewards/margins": 15.091535568237305, + "rewards/rejected": -23.205041885375977, + "step": 15531 + }, + { + "epoch": 2.42, + "learning_rate": 2.755536075522589e-06, + "logits/chosen": -1.6478475332260132, + "logits/rejected": -2.8450448513031006, + "logps/chosen": -366.2264404296875, + "logps/rejected": -379.50811767578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.758757591247559, + "rewards/margins": 7.839371681213379, + "rewards/rejected": -16.598129272460938, + "step": 15532 + }, + { + "epoch": 2.42, + "learning_rate": 2.7548026349914414e-06, + "logits/chosen": -2.970118284225464, + "logits/rejected": -3.06445574760437, + "logps/chosen": -203.28201293945312, + "logps/rejected": -362.88677978515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.023422241210938, + "rewards/margins": 10.927488327026367, + "rewards/rejected": -19.950910568237305, + "step": 15533 + }, + { + "epoch": 2.42, + "learning_rate": 2.7540691944602937e-06, + "logits/chosen": -2.8100943565368652, + "logits/rejected": -2.2638492584228516, + "logps/chosen": -608.2059326171875, + "logps/rejected": -493.4863586425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.06332540512085, + "rewards/margins": 12.221700668334961, + "rewards/rejected": -19.28502655029297, + "step": 15534 + }, + { + "epoch": 2.42, + "learning_rate": 2.753335753929146e-06, + "logits/chosen": -2.738515615463257, + "logits/rejected": -2.9412031173706055, + "logps/chosen": -211.18231201171875, + "logps/rejected": -296.1458740234375, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.136007308959961, + "rewards/margins": 4.6408467292785645, + "rewards/rejected": -15.776854515075684, + "step": 15535 + }, + { + "epoch": 2.42, + "learning_rate": 2.752602313397998e-06, + "logits/chosen": -2.881422519683838, + "logits/rejected": -1.5173882246017456, + "logps/chosen": -428.11041259765625, + "logps/rejected": -433.495361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.536896705627441, + "rewards/margins": 16.172353744506836, + "rewards/rejected": -22.709251403808594, + "step": 15536 + }, + { + "epoch": 2.42, + "learning_rate": 2.75186887286685e-06, + "logits/chosen": -2.9316530227661133, + "logits/rejected": -2.989415407180786, + "logps/chosen": -181.67068481445312, + "logps/rejected": -437.5375671386719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.0150728225708, + "rewards/margins": 9.981681823730469, + "rewards/rejected": -17.996753692626953, + "step": 15537 + }, + { + "epoch": 2.42, + "learning_rate": 2.751135432335702e-06, + "logits/chosen": -2.747144937515259, + "logits/rejected": -2.63954758644104, + "logps/chosen": -194.6101531982422, + "logps/rejected": -273.80426025390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.164169311523438, + "rewards/margins": 9.155014038085938, + "rewards/rejected": -17.319183349609375, + "step": 15538 + }, + { + "epoch": 2.42, + "learning_rate": 2.7504019918045543e-06, + "logits/chosen": -2.1182165145874023, + "logits/rejected": -2.7502501010894775, + "logps/chosen": -222.67176818847656, + "logps/rejected": -445.46697998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.147754669189453, + "rewards/margins": 12.054744720458984, + "rewards/rejected": -20.202499389648438, + "step": 15539 + }, + { + "epoch": 2.42, + "learning_rate": 2.749668551273406e-06, + "logits/chosen": -2.7497618198394775, + "logits/rejected": -2.9018890857696533, + "logps/chosen": -115.7730712890625, + "logps/rejected": -240.01205444335938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.711991310119629, + "rewards/margins": 7.88637638092041, + "rewards/rejected": -15.598367691040039, + "step": 15540 + }, + { + "epoch": 2.42, + "learning_rate": 2.748935110742258e-06, + "logits/chosen": -2.9234671592712402, + "logits/rejected": -2.7042949199676514, + "logps/chosen": -254.8042449951172, + "logps/rejected": -211.76220703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.152395725250244, + "rewards/margins": 7.628498077392578, + "rewards/rejected": -13.780893325805664, + "step": 15541 + }, + { + "epoch": 2.42, + "learning_rate": 2.748201670211111e-06, + "logits/chosen": -1.845237135887146, + "logits/rejected": -2.3889806270599365, + "logps/chosen": -525.503662109375, + "logps/rejected": -1670.70068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.935813903808594, + "rewards/margins": 25.621009826660156, + "rewards/rejected": -38.55682373046875, + "step": 15542 + }, + { + "epoch": 2.42, + "learning_rate": 2.7474682296799627e-06, + "logits/chosen": -2.826010227203369, + "logits/rejected": -2.899169445037842, + "logps/chosen": -135.5499267578125, + "logps/rejected": -311.14013671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.351786613464355, + "rewards/margins": 11.985179901123047, + "rewards/rejected": -20.33696746826172, + "step": 15543 + }, + { + "epoch": 2.42, + "learning_rate": 2.746734789148815e-06, + "logits/chosen": -2.010047674179077, + "logits/rejected": -2.5956404209136963, + "logps/chosen": -171.6051788330078, + "logps/rejected": -471.2210693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.992166042327881, + "rewards/margins": 17.317991256713867, + "rewards/rejected": -22.310157775878906, + "step": 15544 + }, + { + "epoch": 2.42, + "learning_rate": 2.746001348617667e-06, + "logits/chosen": -2.9078638553619385, + "logits/rejected": -2.4648635387420654, + "logps/chosen": -600.4327392578125, + "logps/rejected": -534.2529907226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5267860889434814, + "rewards/margins": 11.582627296447754, + "rewards/rejected": -15.109413146972656, + "step": 15545 + }, + { + "epoch": 2.42, + "learning_rate": 2.745267908086519e-06, + "logits/chosen": -2.650733709335327, + "logits/rejected": -2.841613292694092, + "logps/chosen": -262.6027526855469, + "logps/rejected": -445.5547180175781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.795222282409668, + "rewards/margins": 9.448941230773926, + "rewards/rejected": -20.244163513183594, + "step": 15546 + }, + { + "epoch": 2.42, + "learning_rate": 2.744534467555371e-06, + "logits/chosen": -2.8109662532806396, + "logits/rejected": -2.560436248779297, + "logps/chosen": -123.392822265625, + "logps/rejected": -274.883056640625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.497237205505371, + "rewards/margins": 6.096259117126465, + "rewards/rejected": -13.593496322631836, + "step": 15547 + }, + { + "epoch": 2.42, + "learning_rate": 2.7438010270242234e-06, + "logits/chosen": -2.8398633003234863, + "logits/rejected": -1.939630150794983, + "logps/chosen": -688.801513671875, + "logps/rejected": -348.91876220703125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.162456512451172, + "rewards/margins": 6.685858249664307, + "rewards/rejected": -15.848315238952637, + "step": 15548 + }, + { + "epoch": 2.42, + "learning_rate": 2.7430675864930752e-06, + "logits/chosen": -2.162877082824707, + "logits/rejected": -2.8264009952545166, + "logps/chosen": -372.629638671875, + "logps/rejected": -672.1549072265625, + "loss": 0.0559, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.69523811340332, + "rewards/margins": 6.5940260887146, + "rewards/rejected": -16.289264678955078, + "step": 15549 + }, + { + "epoch": 2.42, + "learning_rate": 2.7423341459619275e-06, + "logits/chosen": -2.7088091373443604, + "logits/rejected": -2.8119683265686035, + "logps/chosen": -702.8137817382812, + "logps/rejected": -574.7786865234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.121793746948242, + "rewards/margins": 8.636177062988281, + "rewards/rejected": -17.757970809936523, + "step": 15550 + }, + { + "epoch": 2.42, + "learning_rate": 2.74160070543078e-06, + "logits/chosen": -2.820561170578003, + "logits/rejected": -2.0840680599212646, + "logps/chosen": -463.87579345703125, + "logps/rejected": -456.24053955078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.910405158996582, + "rewards/margins": 7.80426025390625, + "rewards/rejected": -15.714664459228516, + "step": 15551 + }, + { + "epoch": 2.42, + "learning_rate": 2.7408672648996317e-06, + "logits/chosen": -2.446476936340332, + "logits/rejected": -2.838233232498169, + "logps/chosen": -150.179931640625, + "logps/rejected": -312.8226318359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.181159973144531, + "rewards/margins": 11.196724891662598, + "rewards/rejected": -19.377883911132812, + "step": 15552 + }, + { + "epoch": 2.42, + "learning_rate": 2.740133824368484e-06, + "logits/chosen": -2.68308162689209, + "logits/rejected": -2.7506120204925537, + "logps/chosen": -392.79473876953125, + "logps/rejected": -382.62274169921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.283936500549316, + "rewards/margins": 8.201187133789062, + "rewards/rejected": -19.485122680664062, + "step": 15553 + }, + { + "epoch": 2.42, + "learning_rate": 2.739400383837336e-06, + "logits/chosen": -1.5134520530700684, + "logits/rejected": -2.364565849304199, + "logps/chosen": -121.99630737304688, + "logps/rejected": -393.2076110839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649591445922852, + "rewards/margins": 12.233665466308594, + "rewards/rejected": -19.883256912231445, + "step": 15554 + }, + { + "epoch": 2.42, + "learning_rate": 2.738666943306188e-06, + "logits/chosen": -2.6783528327941895, + "logits/rejected": -2.3630051612854004, + "logps/chosen": -628.9530029296875, + "logps/rejected": -683.857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.440320014953613, + "rewards/margins": 17.56886100769043, + "rewards/rejected": -30.00918197631836, + "step": 15555 + }, + { + "epoch": 2.42, + "learning_rate": 2.73793350277504e-06, + "logits/chosen": -2.1670169830322266, + "logits/rejected": -2.5710418224334717, + "logps/chosen": -152.8576202392578, + "logps/rejected": -386.59326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.502174377441406, + "rewards/margins": 14.796606063842773, + "rewards/rejected": -22.29878044128418, + "step": 15556 + }, + { + "epoch": 2.42, + "learning_rate": 2.7372000622438924e-06, + "logits/chosen": -2.909316062927246, + "logits/rejected": -2.9338371753692627, + "logps/chosen": -128.71990966796875, + "logps/rejected": -391.9176025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129168510437012, + "rewards/margins": 13.911733627319336, + "rewards/rejected": -21.04090118408203, + "step": 15557 + }, + { + "epoch": 2.42, + "learning_rate": 2.7364666217127443e-06, + "logits/chosen": -2.8897182941436768, + "logits/rejected": -1.9599385261535645, + "logps/chosen": -595.3763427734375, + "logps/rejected": -495.105224609375, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.25230598449707, + "rewards/margins": 9.965850830078125, + "rewards/rejected": -18.218156814575195, + "step": 15558 + }, + { + "epoch": 2.42, + "learning_rate": 2.735733181181597e-06, + "logits/chosen": -2.2012109756469727, + "logits/rejected": -2.5611562728881836, + "logps/chosen": -137.902587890625, + "logps/rejected": -211.47091674804688, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.723990440368652, + "rewards/margins": 6.842551231384277, + "rewards/rejected": -13.56654167175293, + "step": 15559 + }, + { + "epoch": 2.42, + "learning_rate": 2.734999740650449e-06, + "logits/chosen": -2.543978691101074, + "logits/rejected": -2.2814104557037354, + "logps/chosen": -413.9054870605469, + "logps/rejected": -408.9496154785156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.878975868225098, + "rewards/margins": 7.4250688552856445, + "rewards/rejected": -17.304044723510742, + "step": 15560 + }, + { + "epoch": 2.42, + "learning_rate": 2.7342663001193007e-06, + "logits/chosen": -2.879159927368164, + "logits/rejected": -1.6823915243148804, + "logps/chosen": -890.334228515625, + "logps/rejected": -553.146484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.045276641845703, + "rewards/margins": 9.39683723449707, + "rewards/rejected": -17.442113876342773, + "step": 15561 + }, + { + "epoch": 2.42, + "learning_rate": 2.733532859588153e-06, + "logits/chosen": -3.0765085220336914, + "logits/rejected": -2.7926554679870605, + "logps/chosen": -197.09251403808594, + "logps/rejected": -339.6871337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.642928123474121, + "rewards/margins": 12.81363296508789, + "rewards/rejected": -17.456562042236328, + "step": 15562 + }, + { + "epoch": 2.42, + "learning_rate": 2.732799419057005e-06, + "logits/chosen": -1.2424914836883545, + "logits/rejected": -2.2563118934631348, + "logps/chosen": -164.80551147460938, + "logps/rejected": -504.1566162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.318689346313477, + "rewards/margins": 10.222198486328125, + "rewards/rejected": -22.540889739990234, + "step": 15563 + }, + { + "epoch": 2.42, + "learning_rate": 2.7320659785258572e-06, + "logits/chosen": -2.994267702102661, + "logits/rejected": -3.042039632797241, + "logps/chosen": -245.80960083007812, + "logps/rejected": -364.5150451660156, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71670150756836, + "rewards/margins": 8.412467956542969, + "rewards/rejected": -17.129169464111328, + "step": 15564 + }, + { + "epoch": 2.42, + "learning_rate": 2.731332537994709e-06, + "logits/chosen": -2.9035098552703857, + "logits/rejected": -2.848227024078369, + "logps/chosen": -209.62115478515625, + "logps/rejected": -309.43896484375, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.014744758605957, + "rewards/margins": 5.009206295013428, + "rewards/rejected": -15.023950576782227, + "step": 15565 + }, + { + "epoch": 2.42, + "learning_rate": 2.7305990974635614e-06, + "logits/chosen": -3.0640077590942383, + "logits/rejected": -2.4042160511016846, + "logps/chosen": -297.06829833984375, + "logps/rejected": -294.60205078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.626381874084473, + "rewards/margins": 7.452676773071289, + "rewards/rejected": -18.079057693481445, + "step": 15566 + }, + { + "epoch": 2.42, + "learning_rate": 2.7298656569324137e-06, + "logits/chosen": -2.3416101932525635, + "logits/rejected": -2.839357376098633, + "logps/chosen": -171.83099365234375, + "logps/rejected": -257.7530822753906, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.448390007019043, + "rewards/margins": 5.880921840667725, + "rewards/rejected": -15.32931137084961, + "step": 15567 + }, + { + "epoch": 2.42, + "learning_rate": 2.729132216401266e-06, + "logits/chosen": -2.1665945053100586, + "logits/rejected": -2.6490414142608643, + "logps/chosen": -133.95465087890625, + "logps/rejected": -460.2536926269531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.22512149810791, + "rewards/margins": 14.192066192626953, + "rewards/rejected": -22.417186737060547, + "step": 15568 + }, + { + "epoch": 2.42, + "learning_rate": 2.728398775870118e-06, + "logits/chosen": -2.741534948348999, + "logits/rejected": -2.0486669540405273, + "logps/chosen": -262.3382568359375, + "logps/rejected": -375.68963623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.438545227050781, + "rewards/margins": 13.198671340942383, + "rewards/rejected": -20.637218475341797, + "step": 15569 + }, + { + "epoch": 2.42, + "learning_rate": 2.7276653353389698e-06, + "logits/chosen": -2.358475685119629, + "logits/rejected": -2.9082489013671875, + "logps/chosen": -249.6931610107422, + "logps/rejected": -527.6483154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.515140533447266, + "rewards/margins": 9.223367691040039, + "rewards/rejected": -18.738508224487305, + "step": 15570 + }, + { + "epoch": 2.42, + "learning_rate": 2.726931894807822e-06, + "logits/chosen": -1.9670498371124268, + "logits/rejected": -2.7282514572143555, + "logps/chosen": -234.37193298339844, + "logps/rejected": -538.725341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.042730331420898, + "rewards/margins": 11.277795791625977, + "rewards/rejected": -19.320526123046875, + "step": 15571 + }, + { + "epoch": 2.42, + "learning_rate": 2.726198454276674e-06, + "logits/chosen": -2.618797540664673, + "logits/rejected": -2.794748306274414, + "logps/chosen": -442.0065612792969, + "logps/rejected": -464.0091857910156, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.168966293334961, + "rewards/margins": 9.972497940063477, + "rewards/rejected": -19.141464233398438, + "step": 15572 + }, + { + "epoch": 2.42, + "learning_rate": 2.7254650137455262e-06, + "logits/chosen": -2.0108768939971924, + "logits/rejected": -2.830245018005371, + "logps/chosen": -172.53662109375, + "logps/rejected": -315.6193542480469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.044670104980469, + "rewards/margins": 10.793899536132812, + "rewards/rejected": -23.83856964111328, + "step": 15573 + }, + { + "epoch": 2.42, + "learning_rate": 2.724731573214378e-06, + "logits/chosen": -2.388585090637207, + "logits/rejected": -2.872602939605713, + "logps/chosen": -368.8270568847656, + "logps/rejected": -617.93017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.751314163208008, + "rewards/margins": 10.576507568359375, + "rewards/rejected": -23.327823638916016, + "step": 15574 + }, + { + "epoch": 2.42, + "learning_rate": 2.7239981326832304e-06, + "logits/chosen": -2.399779796600342, + "logits/rejected": -2.6806704998016357, + "logps/chosen": -255.79550170898438, + "logps/rejected": -451.16668701171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.61318588256836, + "rewards/margins": 10.420402526855469, + "rewards/rejected": -21.033588409423828, + "step": 15575 + }, + { + "epoch": 2.42, + "learning_rate": 2.7232646921520827e-06, + "logits/chosen": -2.864640474319458, + "logits/rejected": -2.650557518005371, + "logps/chosen": -336.10565185546875, + "logps/rejected": -406.6363525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9104790687561035, + "rewards/margins": 10.305120468139648, + "rewards/rejected": -17.215599060058594, + "step": 15576 + }, + { + "epoch": 2.42, + "learning_rate": 2.722531251620935e-06, + "logits/chosen": -2.4839985370635986, + "logits/rejected": -2.7791757583618164, + "logps/chosen": -246.55889892578125, + "logps/rejected": -402.772705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.629867076873779, + "rewards/margins": 11.306957244873047, + "rewards/rejected": -18.936824798583984, + "step": 15577 + }, + { + "epoch": 2.42, + "learning_rate": 2.721797811089787e-06, + "logits/chosen": -2.220273494720459, + "logits/rejected": -0.9864707589149475, + "logps/chosen": -267.5323486328125, + "logps/rejected": -178.54165649414062, + "loss": 0.5957, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.760618209838867, + "rewards/margins": 0.2491769790649414, + "rewards/rejected": -12.009794235229492, + "step": 15578 + }, + { + "epoch": 2.42, + "learning_rate": 2.7210643705586388e-06, + "logits/chosen": -2.745307445526123, + "logits/rejected": -1.2992082834243774, + "logps/chosen": -587.2700805664062, + "logps/rejected": -331.9298095703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.681354522705078, + "rewards/margins": 6.324453353881836, + "rewards/rejected": -21.005807876586914, + "step": 15579 + }, + { + "epoch": 2.42, + "learning_rate": 2.720330930027491e-06, + "logits/chosen": -2.581352710723877, + "logits/rejected": -2.763801336288452, + "logps/chosen": -423.3976745605469, + "logps/rejected": -362.70263671875, + "loss": 1.1535, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.829103469848633, + "rewards/margins": 4.736809730529785, + "rewards/rejected": -15.565912246704102, + "step": 15580 + }, + { + "epoch": 2.42, + "learning_rate": 2.719597489496343e-06, + "logits/chosen": -0.8549579977989197, + "logits/rejected": -2.467144250869751, + "logps/chosen": -147.53469848632812, + "logps/rejected": -396.8753967285156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.167560577392578, + "rewards/margins": 8.5497407913208, + "rewards/rejected": -15.717300415039062, + "step": 15581 + }, + { + "epoch": 2.42, + "learning_rate": 2.7188640489651953e-06, + "logits/chosen": -1.1706452369689941, + "logits/rejected": -2.4868035316467285, + "logps/chosen": -201.1561279296875, + "logps/rejected": -445.8723449707031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.549880981445312, + "rewards/margins": 10.465362548828125, + "rewards/rejected": -20.015243530273438, + "step": 15582 + }, + { + "epoch": 2.42, + "learning_rate": 2.718130608434047e-06, + "logits/chosen": -2.8368754386901855, + "logits/rejected": -2.426858901977539, + "logps/chosen": -667.1115112304688, + "logps/rejected": -529.3380737304688, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.28083610534668, + "rewards/margins": 9.316768646240234, + "rewards/rejected": -20.597604751586914, + "step": 15583 + }, + { + "epoch": 2.42, + "learning_rate": 2.7173971679029e-06, + "logits/chosen": -3.000629186630249, + "logits/rejected": -2.810805320739746, + "logps/chosen": -296.8233642578125, + "logps/rejected": -454.6491394042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.906488418579102, + "rewards/margins": 10.828815460205078, + "rewards/rejected": -20.73530387878418, + "step": 15584 + }, + { + "epoch": 2.42, + "learning_rate": 2.7166637273717517e-06, + "logits/chosen": -1.5429131984710693, + "logits/rejected": -2.8069493770599365, + "logps/chosen": -459.0963439941406, + "logps/rejected": -606.696533203125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.945748329162598, + "rewards/margins": 6.049125671386719, + "rewards/rejected": -16.994873046875, + "step": 15585 + }, + { + "epoch": 2.42, + "learning_rate": 2.715930286840604e-06, + "logits/chosen": -2.9884848594665527, + "logits/rejected": -1.9483026266098022, + "logps/chosen": -213.4181671142578, + "logps/rejected": -187.83514404296875, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.399454116821289, + "rewards/margins": 5.125792503356934, + "rewards/rejected": -13.525247573852539, + "step": 15586 + }, + { + "epoch": 2.42, + "learning_rate": 2.715196846309456e-06, + "logits/chosen": -2.209378957748413, + "logits/rejected": -2.7702584266662598, + "logps/chosen": -97.90447998046875, + "logps/rejected": -224.4046630859375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.794936180114746, + "rewards/margins": 6.484755516052246, + "rewards/rejected": -15.279691696166992, + "step": 15587 + }, + { + "epoch": 2.42, + "learning_rate": 2.7144634057783082e-06, + "logits/chosen": -2.1861250400543213, + "logits/rejected": -2.804844379425049, + "logps/chosen": -112.9770736694336, + "logps/rejected": -567.2318115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.362689018249512, + "rewards/margins": 14.09579849243164, + "rewards/rejected": -22.45848846435547, + "step": 15588 + }, + { + "epoch": 2.42, + "learning_rate": 2.71372996524716e-06, + "logits/chosen": -2.7314610481262207, + "logits/rejected": -2.589426279067993, + "logps/chosen": -305.8612365722656, + "logps/rejected": -505.848876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.494545936584473, + "rewards/margins": 11.794336318969727, + "rewards/rejected": -19.288881301879883, + "step": 15589 + }, + { + "epoch": 2.42, + "learning_rate": 2.712996524716012e-06, + "logits/chosen": -1.7609254121780396, + "logits/rejected": -2.205549955368042, + "logps/chosen": -284.8903503417969, + "logps/rejected": -557.3524169921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.527188301086426, + "rewards/margins": 12.590928077697754, + "rewards/rejected": -21.11811637878418, + "step": 15590 + }, + { + "epoch": 2.42, + "learning_rate": 2.7122630841848643e-06, + "logits/chosen": -2.9083738327026367, + "logits/rejected": -2.5594091415405273, + "logps/chosen": -634.4015502929688, + "logps/rejected": -456.9998474121094, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864442348480225, + "rewards/margins": 7.644781589508057, + "rewards/rejected": -13.509223937988281, + "step": 15591 + }, + { + "epoch": 2.42, + "learning_rate": 2.7115296436537166e-06, + "logits/chosen": -2.07350492477417, + "logits/rejected": -2.6001579761505127, + "logps/chosen": -195.3498077392578, + "logps/rejected": -371.5863037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.017807960510254, + "rewards/margins": 13.918977737426758, + "rewards/rejected": -21.936786651611328, + "step": 15592 + }, + { + "epoch": 2.43, + "learning_rate": 2.710796203122569e-06, + "logits/chosen": -2.1750681400299072, + "logits/rejected": -2.7176132202148438, + "logps/chosen": -178.67520141601562, + "logps/rejected": -333.372802734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.126297473907471, + "rewards/margins": 7.428576469421387, + "rewards/rejected": -12.554874420166016, + "step": 15593 + }, + { + "epoch": 2.43, + "learning_rate": 2.7100627625914208e-06, + "logits/chosen": -2.1581218242645264, + "logits/rejected": -2.8320140838623047, + "logps/chosen": -154.87277221679688, + "logps/rejected": -411.9694519042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.032806396484375, + "rewards/margins": 13.699209213256836, + "rewards/rejected": -21.73201560974121, + "step": 15594 + }, + { + "epoch": 2.43, + "learning_rate": 2.709329322060273e-06, + "logits/chosen": -2.0355312824249268, + "logits/rejected": -2.8211517333984375, + "logps/chosen": -422.45953369140625, + "logps/rejected": -637.3626708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.565472602844238, + "rewards/margins": 9.614253997802734, + "rewards/rejected": -21.179725646972656, + "step": 15595 + }, + { + "epoch": 2.43, + "learning_rate": 2.708595881529125e-06, + "logits/chosen": -2.486487627029419, + "logits/rejected": -2.8569912910461426, + "logps/chosen": -673.8768310546875, + "logps/rejected": -597.5140380859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638639450073242, + "rewards/margins": 7.699885368347168, + "rewards/rejected": -14.338523864746094, + "step": 15596 + }, + { + "epoch": 2.43, + "learning_rate": 2.7078624409979772e-06, + "logits/chosen": -2.665027379989624, + "logits/rejected": -2.7554006576538086, + "logps/chosen": -272.98370361328125, + "logps/rejected": -463.37109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.636837005615234, + "rewards/margins": 9.378400802612305, + "rewards/rejected": -19.01523780822754, + "step": 15597 + }, + { + "epoch": 2.43, + "learning_rate": 2.707129000466829e-06, + "logits/chosen": -2.106982946395874, + "logits/rejected": -2.6004810333251953, + "logps/chosen": -359.3601379394531, + "logps/rejected": -638.2587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.114127159118652, + "rewards/margins": 12.747912406921387, + "rewards/rejected": -21.86203956604004, + "step": 15598 + }, + { + "epoch": 2.43, + "learning_rate": 2.706395559935681e-06, + "logits/chosen": -1.0787383317947388, + "logits/rejected": -2.3836286067962646, + "logps/chosen": -431.5068664550781, + "logps/rejected": -456.841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.92658805847168, + "rewards/margins": 12.67000961303711, + "rewards/rejected": -22.59659767150879, + "step": 15599 + }, + { + "epoch": 2.43, + "learning_rate": 2.7056621194045333e-06, + "logits/chosen": -3.025721311569214, + "logits/rejected": -1.9246840476989746, + "logps/chosen": -417.80352783203125, + "logps/rejected": -365.60723876953125, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.828995704650879, + "rewards/margins": 7.517219066619873, + "rewards/rejected": -16.346214294433594, + "step": 15600 + }, + { + "epoch": 2.43, + "learning_rate": 2.7049286788733856e-06, + "logits/chosen": -2.6655538082122803, + "logits/rejected": -2.3108325004577637, + "logps/chosen": -570.3049926757812, + "logps/rejected": -746.6834716796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.926013946533203, + "rewards/margins": 8.477054595947266, + "rewards/rejected": -19.40306854248047, + "step": 15601 + }, + { + "epoch": 2.43, + "learning_rate": 2.704195238342238e-06, + "logits/chosen": -1.9257941246032715, + "logits/rejected": -2.85412335395813, + "logps/chosen": -244.73565673828125, + "logps/rejected": -367.2005615234375, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.761489868164062, + "rewards/margins": 10.224821090698242, + "rewards/rejected": -20.986310958862305, + "step": 15602 + }, + { + "epoch": 2.43, + "learning_rate": 2.70346179781109e-06, + "logits/chosen": -2.820451259613037, + "logits/rejected": -2.0679233074188232, + "logps/chosen": -291.784912109375, + "logps/rejected": -252.58837890625, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.437435150146484, + "rewards/margins": 4.072954177856445, + "rewards/rejected": -15.51038932800293, + "step": 15603 + }, + { + "epoch": 2.43, + "learning_rate": 2.702728357279942e-06, + "logits/chosen": -0.8373452425003052, + "logits/rejected": -2.6612565517425537, + "logps/chosen": -212.26467895507812, + "logps/rejected": -737.950439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.763230323791504, + "rewards/margins": 15.509138107299805, + "rewards/rejected": -25.272369384765625, + "step": 15604 + }, + { + "epoch": 2.43, + "learning_rate": 2.701994916748794e-06, + "logits/chosen": -2.89268159866333, + "logits/rejected": -2.8944597244262695, + "logps/chosen": -347.44317626953125, + "logps/rejected": -363.384765625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.455777168273926, + "rewards/margins": 8.119207382202148, + "rewards/rejected": -18.57498550415039, + "step": 15605 + }, + { + "epoch": 2.43, + "learning_rate": 2.7012614762176463e-06, + "logits/chosen": -2.1256706714630127, + "logits/rejected": -2.6875219345092773, + "logps/chosen": -366.3238525390625, + "logps/rejected": -371.572265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.033434867858887, + "rewards/margins": 8.456344604492188, + "rewards/rejected": -16.489778518676758, + "step": 15606 + }, + { + "epoch": 2.43, + "learning_rate": 2.700528035686498e-06, + "logits/chosen": -2.700590133666992, + "logits/rejected": -2.755150556564331, + "logps/chosen": -234.43081665039062, + "logps/rejected": -370.34375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.537763595581055, + "rewards/margins": 8.106393814086914, + "rewards/rejected": -16.64415740966797, + "step": 15607 + }, + { + "epoch": 2.43, + "learning_rate": 2.6997945951553505e-06, + "logits/chosen": -2.515638589859009, + "logits/rejected": -2.1817965507507324, + "logps/chosen": -189.8509979248047, + "logps/rejected": -383.667724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.302074432373047, + "rewards/margins": 10.317715644836426, + "rewards/rejected": -19.619789123535156, + "step": 15608 + }, + { + "epoch": 2.43, + "learning_rate": 2.6990611546242023e-06, + "logits/chosen": -2.611210346221924, + "logits/rejected": -2.1919949054718018, + "logps/chosen": -262.70501708984375, + "logps/rejected": -195.2100830078125, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.690124034881592, + "rewards/margins": 6.734560966491699, + "rewards/rejected": -12.424684524536133, + "step": 15609 + }, + { + "epoch": 2.43, + "learning_rate": 2.6983277140930546e-06, + "logits/chosen": -2.771942377090454, + "logits/rejected": -1.7604849338531494, + "logps/chosen": -461.22930908203125, + "logps/rejected": -421.19049072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.106432914733887, + "rewards/margins": 11.247177124023438, + "rewards/rejected": -21.35361099243164, + "step": 15610 + }, + { + "epoch": 2.43, + "learning_rate": 2.697594273561907e-06, + "logits/chosen": -1.8451178073883057, + "logits/rejected": -2.799954414367676, + "logps/chosen": -200.05288696289062, + "logps/rejected": -351.1666564941406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.499172210693359, + "rewards/margins": 6.989524841308594, + "rewards/rejected": -14.488697052001953, + "step": 15611 + }, + { + "epoch": 2.43, + "learning_rate": 2.696860833030759e-06, + "logits/chosen": -2.4716784954071045, + "logits/rejected": -2.4882125854492188, + "logps/chosen": -149.24920654296875, + "logps/rejected": -415.3824462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.620887756347656, + "rewards/margins": 13.666797637939453, + "rewards/rejected": -19.28768539428711, + "step": 15612 + }, + { + "epoch": 2.43, + "learning_rate": 2.696127392499611e-06, + "logits/chosen": -2.5935990810394287, + "logits/rejected": -0.8964200615882874, + "logps/chosen": -509.1157531738281, + "logps/rejected": -357.04193115234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.594728469848633, + "rewards/margins": 9.043157577514648, + "rewards/rejected": -18.63788604736328, + "step": 15613 + }, + { + "epoch": 2.43, + "learning_rate": 2.695393951968463e-06, + "logits/chosen": -2.4979920387268066, + "logits/rejected": -1.8813343048095703, + "logps/chosen": -454.82391357421875, + "logps/rejected": -389.61572265625, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.678743362426758, + "rewards/margins": 5.618289470672607, + "rewards/rejected": -23.29703140258789, + "step": 15614 + }, + { + "epoch": 2.43, + "learning_rate": 2.6946605114373153e-06, + "logits/chosen": -2.8087518215179443, + "logits/rejected": -2.237964153289795, + "logps/chosen": -507.36962890625, + "logps/rejected": -667.88330078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.14303207397461, + "rewards/margins": 8.452308654785156, + "rewards/rejected": -18.595340728759766, + "step": 15615 + }, + { + "epoch": 2.43, + "learning_rate": 2.693927070906167e-06, + "logits/chosen": -2.750135898590088, + "logits/rejected": -2.8851711750030518, + "logps/chosen": -151.5249786376953, + "logps/rejected": -262.90380859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.0881929397583, + "rewards/margins": 8.210685729980469, + "rewards/rejected": -16.298877716064453, + "step": 15616 + }, + { + "epoch": 2.43, + "learning_rate": 2.6931936303750195e-06, + "logits/chosen": -0.997821033000946, + "logits/rejected": -2.0543413162231445, + "logps/chosen": -286.0181884765625, + "logps/rejected": -505.76556396484375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.198869705200195, + "rewards/margins": 9.079781532287598, + "rewards/rejected": -18.27865219116211, + "step": 15617 + }, + { + "epoch": 2.43, + "learning_rate": 2.6924601898438718e-06, + "logits/chosen": -1.9433693885803223, + "logits/rejected": -2.499943494796753, + "logps/chosen": -208.5557098388672, + "logps/rejected": -366.6608581542969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.15274429321289, + "rewards/margins": 7.998112678527832, + "rewards/rejected": -18.150856018066406, + "step": 15618 + }, + { + "epoch": 2.43, + "learning_rate": 2.6917267493127237e-06, + "logits/chosen": -1.3203169107437134, + "logits/rejected": -2.4823193550109863, + "logps/chosen": -130.5313720703125, + "logps/rejected": -625.550048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.905031204223633, + "rewards/margins": 17.093238830566406, + "rewards/rejected": -26.99827003479004, + "step": 15619 + }, + { + "epoch": 2.43, + "learning_rate": 2.690993308781576e-06, + "logits/chosen": -1.6330654621124268, + "logits/rejected": -2.6534435749053955, + "logps/chosen": -174.3681640625, + "logps/rejected": -368.23388671875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.37263298034668, + "rewards/margins": 10.97808837890625, + "rewards/rejected": -18.35072135925293, + "step": 15620 + }, + { + "epoch": 2.43, + "learning_rate": 2.690259868250428e-06, + "logits/chosen": -2.6654584407806396, + "logits/rejected": -2.314218521118164, + "logps/chosen": -310.1119384765625, + "logps/rejected": -597.61181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.393136978149414, + "rewards/margins": 13.920312881469727, + "rewards/rejected": -25.31344985961914, + "step": 15621 + }, + { + "epoch": 2.43, + "learning_rate": 2.68952642771928e-06, + "logits/chosen": -2.3786497116088867, + "logits/rejected": -2.8074355125427246, + "logps/chosen": -295.84942626953125, + "logps/rejected": -392.7822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.709839820861816, + "rewards/margins": 12.688936233520508, + "rewards/rejected": -17.39877700805664, + "step": 15622 + }, + { + "epoch": 2.43, + "learning_rate": 2.688792987188132e-06, + "logits/chosen": -1.422928810119629, + "logits/rejected": -2.501816987991333, + "logps/chosen": -97.25062561035156, + "logps/rejected": -326.58026123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649504661560059, + "rewards/margins": 13.115110397338867, + "rewards/rejected": -20.764616012573242, + "step": 15623 + }, + { + "epoch": 2.43, + "learning_rate": 2.6880595466569843e-06, + "logits/chosen": -2.9237122535705566, + "logits/rejected": -2.460690498352051, + "logps/chosen": -451.5450439453125, + "logps/rejected": -330.9104309082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.492274284362793, + "rewards/margins": 11.351914405822754, + "rewards/rejected": -19.844188690185547, + "step": 15624 + }, + { + "epoch": 2.43, + "learning_rate": 2.687326106125836e-06, + "logits/chosen": -1.4911794662475586, + "logits/rejected": -2.422769069671631, + "logps/chosen": -237.04598999023438, + "logps/rejected": -338.14752197265625, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.2288179397583, + "rewards/margins": 4.736048698425293, + "rewards/rejected": -17.964866638183594, + "step": 15625 + }, + { + "epoch": 2.43, + "learning_rate": 2.6865926655946885e-06, + "logits/chosen": -2.1024484634399414, + "logits/rejected": -2.9319241046905518, + "logps/chosen": -329.0188903808594, + "logps/rejected": -486.1749267578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.342243194580078, + "rewards/margins": 10.108165740966797, + "rewards/rejected": -20.450408935546875, + "step": 15626 + }, + { + "epoch": 2.43, + "learning_rate": 2.685859225063541e-06, + "logits/chosen": -2.3563761711120605, + "logits/rejected": -2.5594146251678467, + "logps/chosen": -204.45272827148438, + "logps/rejected": -354.1858825683594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.54317569732666, + "rewards/margins": 8.367908477783203, + "rewards/rejected": -17.911083221435547, + "step": 15627 + }, + { + "epoch": 2.43, + "learning_rate": 2.685125784532393e-06, + "logits/chosen": -2.6682770252227783, + "logits/rejected": -2.7714037895202637, + "logps/chosen": -243.12448120117188, + "logps/rejected": -385.1844482421875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.463654518127441, + "rewards/margins": 9.414397239685059, + "rewards/rejected": -20.8780517578125, + "step": 15628 + }, + { + "epoch": 2.43, + "learning_rate": 2.684392344001245e-06, + "logits/chosen": -1.7535618543624878, + "logits/rejected": -2.2755842208862305, + "logps/chosen": -229.43048095703125, + "logps/rejected": -697.097412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.090734481811523, + "rewards/margins": 16.51894760131836, + "rewards/rejected": -26.609682083129883, + "step": 15629 + }, + { + "epoch": 2.43, + "learning_rate": 2.683658903470097e-06, + "logits/chosen": -2.8273088932037354, + "logits/rejected": -3.0192463397979736, + "logps/chosen": -456.86126708984375, + "logps/rejected": -496.63470458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.57822322845459, + "rewards/margins": 12.114643096923828, + "rewards/rejected": -21.692867279052734, + "step": 15630 + }, + { + "epoch": 2.43, + "learning_rate": 2.682925462938949e-06, + "logits/chosen": -1.3828139305114746, + "logits/rejected": -2.587080240249634, + "logps/chosen": -174.8980712890625, + "logps/rejected": -521.8720092773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.687187194824219, + "rewards/margins": 14.045953750610352, + "rewards/rejected": -23.73314094543457, + "step": 15631 + }, + { + "epoch": 2.43, + "learning_rate": 2.682192022407801e-06, + "logits/chosen": -2.836275339126587, + "logits/rejected": -2.938277244567871, + "logps/chosen": -185.70394897460938, + "logps/rejected": -228.58950805664062, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.755777835845947, + "rewards/margins": 6.035793304443359, + "rewards/rejected": -12.791570663452148, + "step": 15632 + }, + { + "epoch": 2.43, + "learning_rate": 2.6814585818766533e-06, + "logits/chosen": -1.2572054862976074, + "logits/rejected": -2.2834994792938232, + "logps/chosen": -223.52725219726562, + "logps/rejected": -559.8123168945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.120155334472656, + "rewards/margins": 11.984176635742188, + "rewards/rejected": -23.104331970214844, + "step": 15633 + }, + { + "epoch": 2.43, + "learning_rate": 2.6807251413455052e-06, + "logits/chosen": -2.9117391109466553, + "logits/rejected": -2.082247734069824, + "logps/chosen": -242.40264892578125, + "logps/rejected": -165.561767578125, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.659015655517578, + "rewards/margins": 6.258383750915527, + "rewards/rejected": -13.917399406433105, + "step": 15634 + }, + { + "epoch": 2.43, + "learning_rate": 2.679991700814358e-06, + "logits/chosen": -2.813645839691162, + "logits/rejected": -2.8571701049804688, + "logps/chosen": -196.4553680419922, + "logps/rejected": -246.4065399169922, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1966447830200195, + "rewards/margins": 6.853958606719971, + "rewards/rejected": -12.050603866577148, + "step": 15635 + }, + { + "epoch": 2.43, + "learning_rate": 2.67925826028321e-06, + "logits/chosen": -0.9911424517631531, + "logits/rejected": -2.2251362800598145, + "logps/chosen": -320.05059814453125, + "logps/rejected": -518.570068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.742170333862305, + "rewards/margins": 13.567991256713867, + "rewards/rejected": -22.310161590576172, + "step": 15636 + }, + { + "epoch": 2.43, + "learning_rate": 2.678524819752062e-06, + "logits/chosen": -1.0075844526290894, + "logits/rejected": -2.5020275115966797, + "logps/chosen": -150.67312622070312, + "logps/rejected": -321.3798828125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.976936340332031, + "rewards/margins": 7.120935440063477, + "rewards/rejected": -18.097871780395508, + "step": 15637 + }, + { + "epoch": 2.43, + "learning_rate": 2.677791379220914e-06, + "logits/chosen": -0.791560173034668, + "logits/rejected": -2.7651028633117676, + "logps/chosen": -272.3424987792969, + "logps/rejected": -776.9693603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.647529602050781, + "rewards/margins": 10.499181747436523, + "rewards/rejected": -21.146711349487305, + "step": 15638 + }, + { + "epoch": 2.43, + "learning_rate": 2.677057938689766e-06, + "logits/chosen": -2.690795660018921, + "logits/rejected": -2.559021234512329, + "logps/chosen": -464.109130859375, + "logps/rejected": -722.0537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.004006385803223, + "rewards/margins": 10.09663200378418, + "rewards/rejected": -20.10063934326172, + "step": 15639 + }, + { + "epoch": 2.43, + "learning_rate": 2.676324498158618e-06, + "logits/chosen": -2.675039291381836, + "logits/rejected": -2.6388473510742188, + "logps/chosen": -461.93377685546875, + "logps/rejected": -720.1203002929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.455931663513184, + "rewards/margins": 15.749104499816895, + "rewards/rejected": -22.205036163330078, + "step": 15640 + }, + { + "epoch": 2.43, + "learning_rate": 2.67559105762747e-06, + "logits/chosen": -3.047933578491211, + "logits/rejected": -2.7111659049987793, + "logps/chosen": -191.1133270263672, + "logps/rejected": -208.00531005859375, + "loss": 0.2013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.003808975219727, + "rewards/margins": 5.850480079650879, + "rewards/rejected": -13.854288101196289, + "step": 15641 + }, + { + "epoch": 2.43, + "learning_rate": 2.6748576170963224e-06, + "logits/chosen": -2.0571699142456055, + "logits/rejected": -2.9157931804656982, + "logps/chosen": -236.36122131347656, + "logps/rejected": -574.7965698242188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651944637298584, + "rewards/margins": 7.356047630310059, + "rewards/rejected": -15.007991790771484, + "step": 15642 + }, + { + "epoch": 2.43, + "learning_rate": 2.6741241765651747e-06, + "logits/chosen": -2.1969552040100098, + "logits/rejected": -2.7146317958831787, + "logps/chosen": -596.1452026367188, + "logps/rejected": -399.6214599609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.314960479736328, + "rewards/margins": 10.692176818847656, + "rewards/rejected": -16.007137298583984, + "step": 15643 + }, + { + "epoch": 2.43, + "learning_rate": 2.673390736034027e-06, + "logits/chosen": -2.8999876976013184, + "logits/rejected": -2.6996688842773438, + "logps/chosen": -872.7073364257812, + "logps/rejected": -701.2069091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690179824829102, + "rewards/margins": 10.650236129760742, + "rewards/rejected": -19.340415954589844, + "step": 15644 + }, + { + "epoch": 2.43, + "learning_rate": 2.672657295502879e-06, + "logits/chosen": -1.7321909666061401, + "logits/rejected": -2.7610669136047363, + "logps/chosen": -427.84906005859375, + "logps/rejected": -596.271728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.323179721832275, + "rewards/margins": 10.978937149047852, + "rewards/rejected": -17.30211639404297, + "step": 15645 + }, + { + "epoch": 2.43, + "learning_rate": 2.671923854971731e-06, + "logits/chosen": -1.76256263256073, + "logits/rejected": -2.4023046493530273, + "logps/chosen": -306.1153564453125, + "logps/rejected": -445.00823974609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429925918579102, + "rewards/margins": 9.848020553588867, + "rewards/rejected": -18.27794647216797, + "step": 15646 + }, + { + "epoch": 2.43, + "learning_rate": 2.671190414440583e-06, + "logits/chosen": -1.518487811088562, + "logits/rejected": -2.3696374893188477, + "logps/chosen": -148.7095947265625, + "logps/rejected": -465.73089599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844266891479492, + "rewards/margins": 12.125097274780273, + "rewards/rejected": -18.969364166259766, + "step": 15647 + }, + { + "epoch": 2.43, + "learning_rate": 2.670456973909435e-06, + "logits/chosen": -2.262605667114258, + "logits/rejected": -2.960181474685669, + "logps/chosen": -160.63771057128906, + "logps/rejected": -384.835205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.133476257324219, + "rewards/margins": 12.183380126953125, + "rewards/rejected": -21.316856384277344, + "step": 15648 + }, + { + "epoch": 2.43, + "learning_rate": 2.669723533378287e-06, + "logits/chosen": -2.8667690753936768, + "logits/rejected": -2.750702142715454, + "logps/chosen": -481.96148681640625, + "logps/rejected": -425.18084716796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.591657638549805, + "rewards/margins": 8.22751235961914, + "rewards/rejected": -13.819169998168945, + "step": 15649 + }, + { + "epoch": 2.43, + "learning_rate": 2.668990092847139e-06, + "logits/chosen": -2.174147367477417, + "logits/rejected": -1.1070067882537842, + "logps/chosen": -290.9931945800781, + "logps/rejected": -277.735107421875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.732412338256836, + "rewards/margins": 7.405465602874756, + "rewards/rejected": -19.13787841796875, + "step": 15650 + }, + { + "epoch": 2.43, + "learning_rate": 2.6682566523159914e-06, + "logits/chosen": -0.7191799283027649, + "logits/rejected": -2.2038686275482178, + "logps/chosen": -257.0660705566406, + "logps/rejected": -487.10003662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.891119956970215, + "rewards/margins": 13.55385971069336, + "rewards/rejected": -22.44498062133789, + "step": 15651 + }, + { + "epoch": 2.43, + "learning_rate": 2.6675232117848437e-06, + "logits/chosen": -1.134048581123352, + "logits/rejected": -1.6317411661148071, + "logps/chosen": -167.13162231445312, + "logps/rejected": -406.37249755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.920570373535156, + "rewards/margins": 11.271208763122559, + "rewards/rejected": -19.19178009033203, + "step": 15652 + }, + { + "epoch": 2.43, + "learning_rate": 2.666789771253696e-06, + "logits/chosen": -1.6361054182052612, + "logits/rejected": -2.3785617351531982, + "logps/chosen": -192.8987579345703, + "logps/rejected": -528.1453857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.15692138671875, + "rewards/margins": 15.959392547607422, + "rewards/rejected": -25.116313934326172, + "step": 15653 + }, + { + "epoch": 2.43, + "learning_rate": 2.666056330722548e-06, + "logits/chosen": -2.0032315254211426, + "logits/rejected": -2.6885218620300293, + "logps/chosen": -271.13299560546875, + "logps/rejected": -449.774169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987452030181885, + "rewards/margins": 11.736934661865234, + "rewards/rejected": -17.72438621520996, + "step": 15654 + }, + { + "epoch": 2.43, + "learning_rate": 2.6653228901914e-06, + "logits/chosen": -1.9913288354873657, + "logits/rejected": -2.251343250274658, + "logps/chosen": -146.32093811035156, + "logps/rejected": -267.31884765625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.484111785888672, + "rewards/margins": 8.84861946105957, + "rewards/rejected": -19.332731246948242, + "step": 15655 + }, + { + "epoch": 2.43, + "learning_rate": 2.664589449660252e-06, + "logits/chosen": -2.4706532955169678, + "logits/rejected": -2.6750807762145996, + "logps/chosen": -181.470703125, + "logps/rejected": -396.9701843261719, + "loss": 0.0279, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.336782455444336, + "rewards/margins": 8.350839614868164, + "rewards/rejected": -15.6876220703125, + "step": 15656 + }, + { + "epoch": 2.43, + "learning_rate": 2.6638560091291043e-06, + "logits/chosen": -2.179511547088623, + "logits/rejected": -2.685885429382324, + "logps/chosen": -380.58935546875, + "logps/rejected": -372.48779296875, + "loss": 2.6027, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.022902488708496, + "rewards/margins": 2.5874617099761963, + "rewards/rejected": -11.610363960266113, + "step": 15657 + }, + { + "epoch": 2.44, + "learning_rate": 2.6631225685979562e-06, + "logits/chosen": -2.5770926475524902, + "logits/rejected": -1.853340983390808, + "logps/chosen": -297.27655029296875, + "logps/rejected": -355.483154296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.958548545837402, + "rewards/margins": 8.683570861816406, + "rewards/rejected": -18.642120361328125, + "step": 15658 + }, + { + "epoch": 2.44, + "learning_rate": 2.662389128066808e-06, + "logits/chosen": -2.234318256378174, + "logits/rejected": -2.4587464332580566, + "logps/chosen": -595.9224853515625, + "logps/rejected": -600.27294921875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.476140975952148, + "rewards/margins": 7.9223408699035645, + "rewards/rejected": -21.398483276367188, + "step": 15659 + }, + { + "epoch": 2.44, + "learning_rate": 2.661655687535661e-06, + "logits/chosen": -3.0554139614105225, + "logits/rejected": -2.3425495624542236, + "logps/chosen": -906.076171875, + "logps/rejected": -788.7882080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.618178367614746, + "rewards/margins": 14.392232894897461, + "rewards/rejected": -23.01041030883789, + "step": 15660 + }, + { + "epoch": 2.44, + "learning_rate": 2.6609222470045127e-06, + "logits/chosen": -2.853959798812866, + "logits/rejected": -2.902055501937866, + "logps/chosen": -246.41070556640625, + "logps/rejected": -268.0374755859375, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.604692459106445, + "rewards/margins": 6.091883659362793, + "rewards/rejected": -18.696575164794922, + "step": 15661 + }, + { + "epoch": 2.44, + "learning_rate": 2.660188806473365e-06, + "logits/chosen": -2.7834231853485107, + "logits/rejected": -2.9286856651306152, + "logps/chosen": -205.53778076171875, + "logps/rejected": -498.5374755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.275165557861328, + "rewards/margins": 12.621044158935547, + "rewards/rejected": -20.896209716796875, + "step": 15662 + }, + { + "epoch": 2.44, + "learning_rate": 2.659455365942217e-06, + "logits/chosen": -1.6774842739105225, + "logits/rejected": -2.620898962020874, + "logps/chosen": -381.5635986328125, + "logps/rejected": -657.1719360351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.671974182128906, + "rewards/margins": 9.824922561645508, + "rewards/rejected": -20.496896743774414, + "step": 15663 + }, + { + "epoch": 2.44, + "learning_rate": 2.658721925411069e-06, + "logits/chosen": -1.9506728649139404, + "logits/rejected": -2.5271458625793457, + "logps/chosen": -130.4329376220703, + "logps/rejected": -358.96728515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.146329879760742, + "rewards/margins": 8.513416290283203, + "rewards/rejected": -17.659746170043945, + "step": 15664 + }, + { + "epoch": 2.44, + "learning_rate": 2.657988484879921e-06, + "logits/chosen": -1.7908039093017578, + "logits/rejected": -2.771941900253296, + "logps/chosen": -262.2111511230469, + "logps/rejected": -659.69775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.323121070861816, + "rewards/margins": 11.94320011138916, + "rewards/rejected": -19.266321182250977, + "step": 15665 + }, + { + "epoch": 2.44, + "learning_rate": 2.6572550443487734e-06, + "logits/chosen": -2.217453956604004, + "logits/rejected": -2.5466244220733643, + "logps/chosen": -152.23548889160156, + "logps/rejected": -381.12359619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.840778827667236, + "rewards/margins": 14.505025863647461, + "rewards/rejected": -21.34580421447754, + "step": 15666 + }, + { + "epoch": 2.44, + "learning_rate": 2.6565216038176252e-06, + "logits/chosen": -2.5661938190460205, + "logits/rejected": -2.9720070362091064, + "logps/chosen": -120.04119110107422, + "logps/rejected": -303.11688232421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52357006072998, + "rewards/margins": 8.721639633178711, + "rewards/rejected": -17.245208740234375, + "step": 15667 + }, + { + "epoch": 2.44, + "learning_rate": 2.6557881632864775e-06, + "logits/chosen": -1.303792119026184, + "logits/rejected": -2.588697910308838, + "logps/chosen": -198.05166625976562, + "logps/rejected": -405.8743896484375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.328713417053223, + "rewards/margins": 7.99879264831543, + "rewards/rejected": -18.32750701904297, + "step": 15668 + }, + { + "epoch": 2.44, + "learning_rate": 2.65505472275533e-06, + "logits/chosen": -2.235245704650879, + "logits/rejected": -2.8089261054992676, + "logps/chosen": -203.1353759765625, + "logps/rejected": -630.3731689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.448455810546875, + "rewards/margins": 15.16100788116455, + "rewards/rejected": -26.60946273803711, + "step": 15669 + }, + { + "epoch": 2.44, + "learning_rate": 2.6543212822241817e-06, + "logits/chosen": -2.0343375205993652, + "logits/rejected": -2.358201742172241, + "logps/chosen": -96.87850952148438, + "logps/rejected": -303.18450927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.918154716491699, + "rewards/margins": 11.571435928344727, + "rewards/rejected": -19.489591598510742, + "step": 15670 + }, + { + "epoch": 2.44, + "learning_rate": 2.653587841693034e-06, + "logits/chosen": -2.665238380432129, + "logits/rejected": -2.82619047164917, + "logps/chosen": -829.1103515625, + "logps/rejected": -909.0714721679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.951939582824707, + "rewards/margins": 11.23709487915039, + "rewards/rejected": -24.18903350830078, + "step": 15671 + }, + { + "epoch": 2.44, + "learning_rate": 2.652854401161886e-06, + "logits/chosen": -2.787684679031372, + "logits/rejected": -2.790156602859497, + "logps/chosen": -132.2263946533203, + "logps/rejected": -293.024658203125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.497511863708496, + "rewards/margins": 7.819974422454834, + "rewards/rejected": -15.317486763000488, + "step": 15672 + }, + { + "epoch": 2.44, + "learning_rate": 2.652120960630738e-06, + "logits/chosen": -2.5444986820220947, + "logits/rejected": -2.5127503871917725, + "logps/chosen": -483.8265380859375, + "logps/rejected": -473.5438232421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.744522094726562, + "rewards/margins": 9.69074821472168, + "rewards/rejected": -24.435270309448242, + "step": 15673 + }, + { + "epoch": 2.44, + "learning_rate": 2.65138752009959e-06, + "logits/chosen": -1.9009650945663452, + "logits/rejected": -2.712576389312744, + "logps/chosen": -300.4144287109375, + "logps/rejected": -532.2255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.277365684509277, + "rewards/margins": 10.798362731933594, + "rewards/rejected": -21.075729370117188, + "step": 15674 + }, + { + "epoch": 2.44, + "learning_rate": 2.6506540795684424e-06, + "logits/chosen": -1.690232515335083, + "logits/rejected": -2.7234103679656982, + "logps/chosen": -180.31158447265625, + "logps/rejected": -720.988525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.144783973693848, + "rewards/margins": 11.21876335144043, + "rewards/rejected": -21.363548278808594, + "step": 15675 + }, + { + "epoch": 2.44, + "learning_rate": 2.6499206390372943e-06, + "logits/chosen": -1.2184100151062012, + "logits/rejected": -2.537752151489258, + "logps/chosen": -159.33558654785156, + "logps/rejected": -496.3976135253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.375585556030273, + "rewards/margins": 12.420995712280273, + "rewards/rejected": -22.796581268310547, + "step": 15676 + }, + { + "epoch": 2.44, + "learning_rate": 2.649187198506147e-06, + "logits/chosen": -2.6616334915161133, + "logits/rejected": -3.015596866607666, + "logps/chosen": -139.63560485839844, + "logps/rejected": -266.113525390625, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.924931526184082, + "rewards/margins": 8.197674751281738, + "rewards/rejected": -16.12260627746582, + "step": 15677 + }, + { + "epoch": 2.44, + "learning_rate": 2.648453757974999e-06, + "logits/chosen": -2.5016934871673584, + "logits/rejected": -2.1380434036254883, + "logps/chosen": -873.5052490234375, + "logps/rejected": -567.6962280273438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.966033935546875, + "rewards/margins": 8.608564376831055, + "rewards/rejected": -20.57459831237793, + "step": 15678 + }, + { + "epoch": 2.44, + "learning_rate": 2.6477203174438507e-06, + "logits/chosen": -2.5148611068725586, + "logits/rejected": -1.5621188879013062, + "logps/chosen": -302.9731140136719, + "logps/rejected": -278.7739562988281, + "loss": 0.6437, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.441587448120117, + "rewards/margins": 5.195239543914795, + "rewards/rejected": -14.63682746887207, + "step": 15679 + }, + { + "epoch": 2.44, + "learning_rate": 2.646986876912703e-06, + "logits/chosen": -1.7425239086151123, + "logits/rejected": -1.976603388786316, + "logps/chosen": -157.54107666015625, + "logps/rejected": -404.8531799316406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.110416412353516, + "rewards/margins": 10.233388900756836, + "rewards/rejected": -17.34380531311035, + "step": 15680 + }, + { + "epoch": 2.44, + "learning_rate": 2.646253436381555e-06, + "logits/chosen": -1.6990340948104858, + "logits/rejected": -2.673335552215576, + "logps/chosen": -258.8224792480469, + "logps/rejected": -467.1911926269531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.048519134521484, + "rewards/margins": 11.708971977233887, + "rewards/rejected": -22.757490158081055, + "step": 15681 + }, + { + "epoch": 2.44, + "learning_rate": 2.6455199958504072e-06, + "logits/chosen": -1.7934372425079346, + "logits/rejected": -2.5191192626953125, + "logps/chosen": -225.7467041015625, + "logps/rejected": -472.23492431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.788132667541504, + "rewards/margins": 12.065317153930664, + "rewards/rejected": -22.85344886779785, + "step": 15682 + }, + { + "epoch": 2.44, + "learning_rate": 2.644786555319259e-06, + "logits/chosen": -1.7454276084899902, + "logits/rejected": -2.3203587532043457, + "logps/chosen": -170.4654083251953, + "logps/rejected": -357.927734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.0902681350708, + "rewards/margins": 10.276771545410156, + "rewards/rejected": -18.36703872680664, + "step": 15683 + }, + { + "epoch": 2.44, + "learning_rate": 2.6440531147881114e-06, + "logits/chosen": -1.5857590436935425, + "logits/rejected": -2.7159807682037354, + "logps/chosen": -234.6514434814453, + "logps/rejected": -501.7992248535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.354711532592773, + "rewards/margins": 14.499954223632812, + "rewards/rejected": -23.854665756225586, + "step": 15684 + }, + { + "epoch": 2.44, + "learning_rate": 2.6433196742569637e-06, + "logits/chosen": -2.6943325996398926, + "logits/rejected": -2.8866212368011475, + "logps/chosen": -148.35638427734375, + "logps/rejected": -223.82452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781949996948242, + "rewards/margins": 10.429107666015625, + "rewards/rejected": -16.211057662963867, + "step": 15685 + }, + { + "epoch": 2.44, + "learning_rate": 2.642586233725816e-06, + "logits/chosen": -2.4358043670654297, + "logits/rejected": -2.7414422035217285, + "logps/chosen": -542.7491455078125, + "logps/rejected": -556.3878173828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.993734359741211, + "rewards/margins": 8.270448684692383, + "rewards/rejected": -21.264183044433594, + "step": 15686 + }, + { + "epoch": 2.44, + "learning_rate": 2.641852793194668e-06, + "logits/chosen": -2.8838765621185303, + "logits/rejected": -2.7965352535247803, + "logps/chosen": -118.65164184570312, + "logps/rejected": -349.47210693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.757989406585693, + "rewards/margins": 12.91917610168457, + "rewards/rejected": -18.677165985107422, + "step": 15687 + }, + { + "epoch": 2.44, + "learning_rate": 2.6411193526635198e-06, + "logits/chosen": -2.5379838943481445, + "logits/rejected": -2.7772655487060547, + "logps/chosen": -236.16653442382812, + "logps/rejected": -278.697509765625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.566625595092773, + "rewards/margins": 7.025030136108398, + "rewards/rejected": -16.591655731201172, + "step": 15688 + }, + { + "epoch": 2.44, + "learning_rate": 2.640385912132372e-06, + "logits/chosen": -2.6260852813720703, + "logits/rejected": -2.6004467010498047, + "logps/chosen": -257.2461242675781, + "logps/rejected": -222.19688415527344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.90208101272583, + "rewards/margins": 8.87017822265625, + "rewards/rejected": -13.772258758544922, + "step": 15689 + }, + { + "epoch": 2.44, + "learning_rate": 2.639652471601224e-06, + "logits/chosen": -1.1711950302124023, + "logits/rejected": -1.7679274082183838, + "logps/chosen": -304.3817138671875, + "logps/rejected": -376.46990966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.645946025848389, + "rewards/margins": 12.262710571289062, + "rewards/rejected": -19.90865707397461, + "step": 15690 + }, + { + "epoch": 2.44, + "learning_rate": 2.6389190310700763e-06, + "logits/chosen": -2.0785179138183594, + "logits/rejected": -2.810502767562866, + "logps/chosen": -143.5010223388672, + "logps/rejected": -456.354736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.110339164733887, + "rewards/margins": 13.920938491821289, + "rewards/rejected": -23.03127670288086, + "step": 15691 + }, + { + "epoch": 2.44, + "learning_rate": 2.638185590538928e-06, + "logits/chosen": -1.709523320198059, + "logits/rejected": -2.673682928085327, + "logps/chosen": -407.54364013671875, + "logps/rejected": -589.9868774414062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.284162521362305, + "rewards/margins": 12.997419357299805, + "rewards/rejected": -21.28158187866211, + "step": 15692 + }, + { + "epoch": 2.44, + "learning_rate": 2.6374521500077804e-06, + "logits/chosen": -2.7108771800994873, + "logits/rejected": -0.9961903095245361, + "logps/chosen": -396.2078857421875, + "logps/rejected": -352.82989501953125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.897870063781738, + "rewards/margins": 7.419367790222168, + "rewards/rejected": -16.317237854003906, + "step": 15693 + }, + { + "epoch": 2.44, + "learning_rate": 2.6367187094766327e-06, + "logits/chosen": -2.5040431022644043, + "logits/rejected": -2.5045974254608154, + "logps/chosen": -151.18972778320312, + "logps/rejected": -366.22137451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.879448890686035, + "rewards/margins": 11.517045021057129, + "rewards/rejected": -21.396493911743164, + "step": 15694 + }, + { + "epoch": 2.44, + "learning_rate": 2.635985268945485e-06, + "logits/chosen": -1.8274190425872803, + "logits/rejected": -2.60528302192688, + "logps/chosen": -106.14414978027344, + "logps/rejected": -444.17706298828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.584643363952637, + "rewards/margins": 14.251012802124023, + "rewards/rejected": -23.835655212402344, + "step": 15695 + }, + { + "epoch": 2.44, + "learning_rate": 2.635251828414337e-06, + "logits/chosen": -2.5159361362457275, + "logits/rejected": -2.8636281490325928, + "logps/chosen": -232.6038818359375, + "logps/rejected": -304.08905029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.878056049346924, + "rewards/margins": 9.822881698608398, + "rewards/rejected": -16.700937271118164, + "step": 15696 + }, + { + "epoch": 2.44, + "learning_rate": 2.634518387883189e-06, + "logits/chosen": -2.8347229957580566, + "logits/rejected": -2.815091133117676, + "logps/chosen": -84.65853118896484, + "logps/rejected": -324.354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397671222686768, + "rewards/margins": 12.688302993774414, + "rewards/rejected": -17.085973739624023, + "step": 15697 + }, + { + "epoch": 2.44, + "learning_rate": 2.633784947352041e-06, + "logits/chosen": -2.7890982627868652, + "logits/rejected": -2.9763615131378174, + "logps/chosen": -130.544189453125, + "logps/rejected": -200.2695770263672, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99173641204834, + "rewards/margins": 7.978309154510498, + "rewards/rejected": -13.97004508972168, + "step": 15698 + }, + { + "epoch": 2.44, + "learning_rate": 2.633051506820893e-06, + "logits/chosen": -2.950348377227783, + "logits/rejected": -2.405510425567627, + "logps/chosen": -911.4660034179688, + "logps/rejected": -606.6322021484375, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.270251274108887, + "rewards/margins": 9.770715713500977, + "rewards/rejected": -17.040966033935547, + "step": 15699 + }, + { + "epoch": 2.44, + "learning_rate": 2.6323180662897453e-06, + "logits/chosen": -2.612990140914917, + "logits/rejected": -2.622945547103882, + "logps/chosen": -194.992431640625, + "logps/rejected": -397.6357421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.955073833465576, + "rewards/margins": 10.298299789428711, + "rewards/rejected": -17.253374099731445, + "step": 15700 + }, + { + "epoch": 2.44, + "learning_rate": 2.631584625758597e-06, + "logits/chosen": -1.0295591354370117, + "logits/rejected": -2.3787052631378174, + "logps/chosen": -116.4349594116211, + "logps/rejected": -354.50390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.264739036560059, + "rewards/margins": 9.980615615844727, + "rewards/rejected": -18.24535369873047, + "step": 15701 + }, + { + "epoch": 2.44, + "learning_rate": 2.63085118522745e-06, + "logits/chosen": -2.930920124053955, + "logits/rejected": -2.8858869075775146, + "logps/chosen": -293.4790344238281, + "logps/rejected": -374.1258544921875, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.440988540649414, + "rewards/margins": 6.59739351272583, + "rewards/rejected": -13.038381576538086, + "step": 15702 + }, + { + "epoch": 2.44, + "learning_rate": 2.6301177446963018e-06, + "logits/chosen": -1.0503060817718506, + "logits/rejected": -2.4007046222686768, + "logps/chosen": -114.65431213378906, + "logps/rejected": -346.0779724121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.053910255432129, + "rewards/margins": 10.166619300842285, + "rewards/rejected": -20.220529556274414, + "step": 15703 + }, + { + "epoch": 2.44, + "learning_rate": 2.629384304165154e-06, + "logits/chosen": -1.2948766946792603, + "logits/rejected": -2.082895278930664, + "logps/chosen": -331.82318115234375, + "logps/rejected": -515.6318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.513370513916016, + "rewards/margins": 11.764654159545898, + "rewards/rejected": -20.278024673461914, + "step": 15704 + }, + { + "epoch": 2.44, + "learning_rate": 2.628650863634006e-06, + "logits/chosen": -2.831881046295166, + "logits/rejected": -2.8640198707580566, + "logps/chosen": -227.93801879882812, + "logps/rejected": -426.58154296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.783013343811035, + "rewards/margins": 9.092472076416016, + "rewards/rejected": -18.875486373901367, + "step": 15705 + }, + { + "epoch": 2.44, + "learning_rate": 2.6279174231028582e-06, + "logits/chosen": -2.6702327728271484, + "logits/rejected": -2.9798667430877686, + "logps/chosen": -416.06927490234375, + "logps/rejected": -366.9151611328125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.450071334838867, + "rewards/margins": 8.361431121826172, + "rewards/rejected": -13.811502456665039, + "step": 15706 + }, + { + "epoch": 2.44, + "learning_rate": 2.62718398257171e-06, + "logits/chosen": -2.951936960220337, + "logits/rejected": -2.7728986740112305, + "logps/chosen": -613.1436157226562, + "logps/rejected": -455.2972412109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.17745304107666, + "rewards/margins": 7.093577861785889, + "rewards/rejected": -16.27103042602539, + "step": 15707 + }, + { + "epoch": 2.44, + "learning_rate": 2.626450542040562e-06, + "logits/chosen": -2.5905187129974365, + "logits/rejected": -2.5950472354888916, + "logps/chosen": -325.5057373046875, + "logps/rejected": -409.4061279296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.569343566894531, + "rewards/margins": 7.8628764152526855, + "rewards/rejected": -18.432220458984375, + "step": 15708 + }, + { + "epoch": 2.44, + "learning_rate": 2.6257171015094143e-06, + "logits/chosen": -1.2895482778549194, + "logits/rejected": -2.62451434135437, + "logps/chosen": -170.48143005371094, + "logps/rejected": -586.4593505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.809594631195068, + "rewards/margins": 17.174211502075195, + "rewards/rejected": -23.983806610107422, + "step": 15709 + }, + { + "epoch": 2.44, + "learning_rate": 2.6249836609782666e-06, + "logits/chosen": -1.81724214553833, + "logits/rejected": -2.2342870235443115, + "logps/chosen": -140.998046875, + "logps/rejected": -514.9857177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.003185749053955, + "rewards/margins": 17.791160583496094, + "rewards/rejected": -24.79434585571289, + "step": 15710 + }, + { + "epoch": 2.44, + "learning_rate": 2.624250220447119e-06, + "logits/chosen": -2.5694918632507324, + "logits/rejected": -2.7697765827178955, + "logps/chosen": -117.18680572509766, + "logps/rejected": -423.842041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.482985019683838, + "rewards/margins": 12.122747421264648, + "rewards/rejected": -19.605731964111328, + "step": 15711 + }, + { + "epoch": 2.44, + "learning_rate": 2.6235167799159708e-06, + "logits/chosen": -1.4795994758605957, + "logits/rejected": -2.1861250400543213, + "logps/chosen": -208.04530334472656, + "logps/rejected": -453.5611572265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.342071533203125, + "rewards/margins": 11.639795303344727, + "rewards/rejected": -23.98186683654785, + "step": 15712 + }, + { + "epoch": 2.44, + "learning_rate": 2.622783339384823e-06, + "logits/chosen": -2.8156349658966064, + "logits/rejected": -2.1199209690093994, + "logps/chosen": -383.2164306640625, + "logps/rejected": -550.36865234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5434417724609375, + "rewards/margins": 8.757095336914062, + "rewards/rejected": -15.300537109375, + "step": 15713 + }, + { + "epoch": 2.44, + "learning_rate": 2.622049898853675e-06, + "logits/chosen": -1.6578741073608398, + "logits/rejected": -2.0583410263061523, + "logps/chosen": -176.73980712890625, + "logps/rejected": -393.6592102050781, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.014366149902344, + "rewards/margins": 6.775030136108398, + "rewards/rejected": -15.789396286010742, + "step": 15714 + }, + { + "epoch": 2.44, + "learning_rate": 2.6213164583225273e-06, + "logits/chosen": -0.9187621474266052, + "logits/rejected": -2.8612728118896484, + "logps/chosen": -275.81146240234375, + "logps/rejected": -546.2606201171875, + "loss": 0.2274, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.05959701538086, + "rewards/margins": 6.68192195892334, + "rewards/rejected": -20.741519927978516, + "step": 15715 + }, + { + "epoch": 2.44, + "learning_rate": 2.620583017791379e-06, + "logits/chosen": -2.891073703765869, + "logits/rejected": -2.4227054119110107, + "logps/chosen": -165.9368896484375, + "logps/rejected": -403.0540771484375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.311114311218262, + "rewards/margins": 7.6288371086120605, + "rewards/rejected": -18.939950942993164, + "step": 15716 + }, + { + "epoch": 2.44, + "learning_rate": 2.619849577260231e-06, + "logits/chosen": -2.685225009918213, + "logits/rejected": -2.7121171951293945, + "logps/chosen": -288.0379943847656, + "logps/rejected": -350.02728271484375, + "loss": 0.4768, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.253186225891113, + "rewards/margins": 3.258716106414795, + "rewards/rejected": -16.51190185546875, + "step": 15717 + }, + { + "epoch": 2.44, + "learning_rate": 2.6191161367290833e-06, + "logits/chosen": -2.7349746227264404, + "logits/rejected": -1.2803926467895508, + "logps/chosen": -253.51397705078125, + "logps/rejected": -217.13168334960938, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.650674343109131, + "rewards/margins": 8.273481369018555, + "rewards/rejected": -15.924156188964844, + "step": 15718 + }, + { + "epoch": 2.44, + "learning_rate": 2.6183826961979356e-06, + "logits/chosen": -2.1612064838409424, + "logits/rejected": -2.9178102016448975, + "logps/chosen": -210.16632080078125, + "logps/rejected": -422.489501953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.500983238220215, + "rewards/margins": 8.422035217285156, + "rewards/rejected": -16.923017501831055, + "step": 15719 + }, + { + "epoch": 2.44, + "learning_rate": 2.617649255666788e-06, + "logits/chosen": -2.0105276107788086, + "logits/rejected": -2.9666645526885986, + "logps/chosen": -106.51375579833984, + "logps/rejected": -575.56591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.478086471557617, + "rewards/margins": 14.656096458435059, + "rewards/rejected": -22.13418197631836, + "step": 15720 + }, + { + "epoch": 2.44, + "learning_rate": 2.61691581513564e-06, + "logits/chosen": -2.8206770420074463, + "logits/rejected": -2.864741802215576, + "logps/chosen": -169.8973388671875, + "logps/rejected": -455.65289306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.327397346496582, + "rewards/margins": 11.784741401672363, + "rewards/rejected": -20.112138748168945, + "step": 15721 + }, + { + "epoch": 2.45, + "learning_rate": 2.616182374604492e-06, + "logits/chosen": -2.663194179534912, + "logits/rejected": -2.8362557888031006, + "logps/chosen": -643.864013671875, + "logps/rejected": -736.917236328125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.988792419433594, + "rewards/margins": 7.397047519683838, + "rewards/rejected": -16.385841369628906, + "step": 15722 + }, + { + "epoch": 2.45, + "learning_rate": 2.615448934073344e-06, + "logits/chosen": -1.7088160514831543, + "logits/rejected": -2.873382091522217, + "logps/chosen": -270.7742919921875, + "logps/rejected": -452.42254638671875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7805023193359375, + "rewards/margins": 6.62575626373291, + "rewards/rejected": -14.406259536743164, + "step": 15723 + }, + { + "epoch": 2.45, + "learning_rate": 2.6147154935421963e-06, + "logits/chosen": -1.7595068216323853, + "logits/rejected": -2.7142345905303955, + "logps/chosen": -241.8968505859375, + "logps/rejected": -570.8673706054688, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224117279052734, + "rewards/margins": 10.561463356018066, + "rewards/rejected": -19.785579681396484, + "step": 15724 + }, + { + "epoch": 2.45, + "learning_rate": 2.613982053011048e-06, + "logits/chosen": -2.7274417877197266, + "logits/rejected": -2.911017656326294, + "logps/chosen": -279.9112548828125, + "logps/rejected": -425.0637512207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.431282997131348, + "rewards/margins": 11.022151947021484, + "rewards/rejected": -18.453433990478516, + "step": 15725 + }, + { + "epoch": 2.45, + "learning_rate": 2.6132486124799005e-06, + "logits/chosen": -2.397448778152466, + "logits/rejected": -2.569091796875, + "logps/chosen": -251.74778747558594, + "logps/rejected": -460.7080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.718692779541016, + "rewards/margins": 11.67575454711914, + "rewards/rejected": -22.394447326660156, + "step": 15726 + }, + { + "epoch": 2.45, + "learning_rate": 2.6125151719487528e-06, + "logits/chosen": -2.058727979660034, + "logits/rejected": -2.7757537364959717, + "logps/chosen": -173.28367614746094, + "logps/rejected": -536.2479248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.026607513427734, + "rewards/margins": 14.832006454467773, + "rewards/rejected": -23.858613967895508, + "step": 15727 + }, + { + "epoch": 2.45, + "learning_rate": 2.6117817314176046e-06, + "logits/chosen": -2.4920597076416016, + "logits/rejected": -2.782547950744629, + "logps/chosen": -613.42333984375, + "logps/rejected": -851.1016235351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.485214948654175, + "rewards/margins": 17.040494918823242, + "rewards/rejected": -20.525711059570312, + "step": 15728 + }, + { + "epoch": 2.45, + "learning_rate": 2.611048290886457e-06, + "logits/chosen": -2.253528118133545, + "logits/rejected": -2.7355339527130127, + "logps/chosen": -146.54298400878906, + "logps/rejected": -280.49237060546875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.907288551330566, + "rewards/margins": 8.851840019226074, + "rewards/rejected": -15.75912857055664, + "step": 15729 + }, + { + "epoch": 2.45, + "learning_rate": 2.610314850355309e-06, + "logits/chosen": -2.842475175857544, + "logits/rejected": -2.9631898403167725, + "logps/chosen": -198.8587646484375, + "logps/rejected": -400.5680847167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.547893524169922, + "rewards/margins": 12.399110794067383, + "rewards/rejected": -15.947004318237305, + "step": 15730 + }, + { + "epoch": 2.45, + "learning_rate": 2.609581409824161e-06, + "logits/chosen": -0.9758972525596619, + "logits/rejected": -2.5587284564971924, + "logps/chosen": -207.3675537109375, + "logps/rejected": -566.6663818359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001101016998291, + "rewards/margins": 12.088056564331055, + "rewards/rejected": -18.089157104492188, + "step": 15731 + }, + { + "epoch": 2.45, + "learning_rate": 2.608847969293013e-06, + "logits/chosen": -2.0384156703948975, + "logits/rejected": -2.7520904541015625, + "logps/chosen": -278.2855529785156, + "logps/rejected": -444.41455078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.050786018371582, + "rewards/margins": 9.11547565460205, + "rewards/rejected": -18.166261672973633, + "step": 15732 + }, + { + "epoch": 2.45, + "learning_rate": 2.6081145287618653e-06, + "logits/chosen": -1.7483235597610474, + "logits/rejected": -2.132812738418579, + "logps/chosen": -282.21466064453125, + "logps/rejected": -572.859130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.7941255569458, + "rewards/margins": 14.15713882446289, + "rewards/rejected": -25.951263427734375, + "step": 15733 + }, + { + "epoch": 2.45, + "learning_rate": 2.607381088230717e-06, + "logits/chosen": -1.6326662302017212, + "logits/rejected": -2.513814687728882, + "logps/chosen": -135.04151916503906, + "logps/rejected": -487.1809997558594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.080501556396484, + "rewards/margins": 12.133100509643555, + "rewards/rejected": -22.21360206604004, + "step": 15734 + }, + { + "epoch": 2.45, + "learning_rate": 2.6066476476995695e-06, + "logits/chosen": -2.568181276321411, + "logits/rejected": -2.837042808532715, + "logps/chosen": -107.38380432128906, + "logps/rejected": -284.65325927734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.887321949005127, + "rewards/margins": 8.4373140335083, + "rewards/rejected": -16.324636459350586, + "step": 15735 + }, + { + "epoch": 2.45, + "learning_rate": 2.605914207168422e-06, + "logits/chosen": -2.4802792072296143, + "logits/rejected": -2.476452589035034, + "logps/chosen": -195.31271362304688, + "logps/rejected": -378.60174560546875, + "loss": 2.3128, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.913555145263672, + "rewards/margins": 8.771995544433594, + "rewards/rejected": -17.685550689697266, + "step": 15736 + }, + { + "epoch": 2.45, + "learning_rate": 2.6051807666372737e-06, + "logits/chosen": -1.7568784952163696, + "logits/rejected": -1.735897183418274, + "logps/chosen": -264.1714782714844, + "logps/rejected": -412.29736328125, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.156052589416504, + "rewards/margins": 4.028505325317383, + "rewards/rejected": -13.184557914733887, + "step": 15737 + }, + { + "epoch": 2.45, + "learning_rate": 2.604447326106126e-06, + "logits/chosen": -2.852088212966919, + "logits/rejected": -2.288597345352173, + "logps/chosen": -265.7027587890625, + "logps/rejected": -197.3640594482422, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.134891510009766, + "rewards/margins": 6.313854694366455, + "rewards/rejected": -11.448745727539062, + "step": 15738 + }, + { + "epoch": 2.45, + "learning_rate": 2.603713885574978e-06, + "logits/chosen": -2.9728598594665527, + "logits/rejected": -2.4948813915252686, + "logps/chosen": -645.0408935546875, + "logps/rejected": -512.4796142578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8485822677612305, + "rewards/margins": 9.514118194580078, + "rewards/rejected": -17.362701416015625, + "step": 15739 + }, + { + "epoch": 2.45, + "learning_rate": 2.60298044504383e-06, + "logits/chosen": -2.0479977130889893, + "logits/rejected": -2.8090827465057373, + "logps/chosen": -181.24270629882812, + "logps/rejected": -567.4970092773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.780361652374268, + "rewards/margins": 14.06321907043457, + "rewards/rejected": -20.843582153320312, + "step": 15740 + }, + { + "epoch": 2.45, + "learning_rate": 2.602247004512682e-06, + "logits/chosen": -2.0221359729766846, + "logits/rejected": -2.392824649810791, + "logps/chosen": -205.96951293945312, + "logps/rejected": -267.92205810546875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802057266235352, + "rewards/margins": 6.559549808502197, + "rewards/rejected": -14.36160659790039, + "step": 15741 + }, + { + "epoch": 2.45, + "learning_rate": 2.6015135639815343e-06, + "logits/chosen": -1.6133440732955933, + "logits/rejected": -2.3840935230255127, + "logps/chosen": -188.96627807617188, + "logps/rejected": -474.6081848144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.64586353302002, + "rewards/margins": 13.787721633911133, + "rewards/rejected": -23.43358612060547, + "step": 15742 + }, + { + "epoch": 2.45, + "learning_rate": 2.600780123450386e-06, + "logits/chosen": -2.7083587646484375, + "logits/rejected": -3.03281831741333, + "logps/chosen": -109.261474609375, + "logps/rejected": -398.8409423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.376983642578125, + "rewards/margins": 11.527782440185547, + "rewards/rejected": -19.904766082763672, + "step": 15743 + }, + { + "epoch": 2.45, + "learning_rate": 2.600046682919239e-06, + "logits/chosen": -1.9412580728530884, + "logits/rejected": -2.645956516265869, + "logps/chosen": -321.01678466796875, + "logps/rejected": -591.0142822265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.506586074829102, + "rewards/margins": 10.58183479309082, + "rewards/rejected": -21.088420867919922, + "step": 15744 + }, + { + "epoch": 2.45, + "learning_rate": 2.599313242388091e-06, + "logits/chosen": -2.8935911655426025, + "logits/rejected": -2.760216474533081, + "logps/chosen": -1218.808837890625, + "logps/rejected": -956.8984985351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.331866264343262, + "rewards/margins": 9.357267379760742, + "rewards/rejected": -16.689132690429688, + "step": 15745 + }, + { + "epoch": 2.45, + "learning_rate": 2.598579801856943e-06, + "logits/chosen": -3.059441328048706, + "logits/rejected": -2.9396140575408936, + "logps/chosen": -244.60003662109375, + "logps/rejected": -242.2271728515625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.905234336853027, + "rewards/margins": 8.088212966918945, + "rewards/rejected": -14.993447303771973, + "step": 15746 + }, + { + "epoch": 2.45, + "learning_rate": 2.597846361325795e-06, + "logits/chosen": -0.7226172089576721, + "logits/rejected": -2.3611905574798584, + "logps/chosen": -135.6781768798828, + "logps/rejected": -393.29296875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.862337112426758, + "rewards/margins": 7.945070266723633, + "rewards/rejected": -18.80740737915039, + "step": 15747 + }, + { + "epoch": 2.45, + "learning_rate": 2.597112920794647e-06, + "logits/chosen": -2.145754098892212, + "logits/rejected": -2.895111560821533, + "logps/chosen": -163.3204345703125, + "logps/rejected": -538.06298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.282577514648438, + "rewards/margins": 11.977304458618164, + "rewards/rejected": -21.2598819732666, + "step": 15748 + }, + { + "epoch": 2.45, + "learning_rate": 2.596379480263499e-06, + "logits/chosen": -2.602529525756836, + "logits/rejected": -2.987016201019287, + "logps/chosen": -372.6051940917969, + "logps/rejected": -552.3883056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.572169303894043, + "rewards/margins": 12.971233367919922, + "rewards/rejected": -20.54340362548828, + "step": 15749 + }, + { + "epoch": 2.45, + "learning_rate": 2.595646039732351e-06, + "logits/chosen": -1.7289599180221558, + "logits/rejected": -2.5485994815826416, + "logps/chosen": -221.75608825683594, + "logps/rejected": -558.2963256835938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.078518867492676, + "rewards/margins": 13.812418937683105, + "rewards/rejected": -25.89093780517578, + "step": 15750 + }, + { + "epoch": 2.45, + "learning_rate": 2.5949125992012034e-06, + "logits/chosen": -2.2968127727508545, + "logits/rejected": -2.6814591884613037, + "logps/chosen": -365.6529235839844, + "logps/rejected": -872.7652587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.374330520629883, + "rewards/margins": 11.039628982543945, + "rewards/rejected": -21.413959503173828, + "step": 15751 + }, + { + "epoch": 2.45, + "learning_rate": 2.5941791586700557e-06, + "logits/chosen": -1.5379606485366821, + "logits/rejected": -2.616219997406006, + "logps/chosen": -141.20481872558594, + "logps/rejected": -269.8145751953125, + "loss": 0.0614, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2884931564331055, + "rewards/margins": 6.201683044433594, + "rewards/rejected": -13.490175247192383, + "step": 15752 + }, + { + "epoch": 2.45, + "learning_rate": 2.593445718138908e-06, + "logits/chosen": -2.394179582595825, + "logits/rejected": -1.9693878889083862, + "logps/chosen": -123.05046844482422, + "logps/rejected": -223.99879455566406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.633726119995117, + "rewards/margins": 8.1900053024292, + "rewards/rejected": -15.823731422424316, + "step": 15753 + }, + { + "epoch": 2.45, + "learning_rate": 2.59271227760776e-06, + "logits/chosen": -1.0900945663452148, + "logits/rejected": -2.5051114559173584, + "logps/chosen": -271.1209411621094, + "logps/rejected": -426.7904968261719, + "loss": 0.9474, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.631410598754883, + "rewards/margins": 3.632798671722412, + "rewards/rejected": -18.264209747314453, + "step": 15754 + }, + { + "epoch": 2.45, + "learning_rate": 2.591978837076612e-06, + "logits/chosen": -2.7891407012939453, + "logits/rejected": -2.423107147216797, + "logps/chosen": -678.0258178710938, + "logps/rejected": -517.04296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.891669273376465, + "rewards/margins": 8.8411283493042, + "rewards/rejected": -24.732797622680664, + "step": 15755 + }, + { + "epoch": 2.45, + "learning_rate": 2.591245396545464e-06, + "logits/chosen": -2.1576955318450928, + "logits/rejected": -2.743147373199463, + "logps/chosen": -170.7962646484375, + "logps/rejected": -437.455810546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.731103897094727, + "rewards/margins": 7.399261951446533, + "rewards/rejected": -20.1303653717041, + "step": 15756 + }, + { + "epoch": 2.45, + "learning_rate": 2.590511956014316e-06, + "logits/chosen": -1.1829755306243896, + "logits/rejected": -2.5755481719970703, + "logps/chosen": -143.03321838378906, + "logps/rejected": -388.8336181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.106861114501953, + "rewards/margins": 10.148963928222656, + "rewards/rejected": -21.25582504272461, + "step": 15757 + }, + { + "epoch": 2.45, + "learning_rate": 2.589778515483168e-06, + "logits/chosen": -1.6807963848114014, + "logits/rejected": -2.591726303100586, + "logps/chosen": -261.2492370605469, + "logps/rejected": -484.63983154296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.884476661682129, + "rewards/margins": 9.108080863952637, + "rewards/rejected": -19.992557525634766, + "step": 15758 + }, + { + "epoch": 2.45, + "learning_rate": 2.58904507495202e-06, + "logits/chosen": -1.1898947954177856, + "logits/rejected": -2.8029561042785645, + "logps/chosen": -343.0666198730469, + "logps/rejected": -788.6157836914062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.507533073425293, + "rewards/margins": 12.489082336425781, + "rewards/rejected": -21.99661636352539, + "step": 15759 + }, + { + "epoch": 2.45, + "learning_rate": 2.5883116344208724e-06, + "logits/chosen": -2.44887638092041, + "logits/rejected": -2.3301000595092773, + "logps/chosen": -301.833740234375, + "logps/rejected": -435.560302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.399435043334961, + "rewards/margins": 15.103370666503906, + "rewards/rejected": -24.502805709838867, + "step": 15760 + }, + { + "epoch": 2.45, + "learning_rate": 2.5875781938897247e-06, + "logits/chosen": -2.0728824138641357, + "logits/rejected": -2.8070735931396484, + "logps/chosen": -225.98348999023438, + "logps/rejected": -616.1227416992188, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.568502426147461, + "rewards/margins": 6.493198394775391, + "rewards/rejected": -17.06170082092285, + "step": 15761 + }, + { + "epoch": 2.45, + "learning_rate": 2.586844753358577e-06, + "logits/chosen": -1.2575269937515259, + "logits/rejected": -2.6746344566345215, + "logps/chosen": -171.91665649414062, + "logps/rejected": -504.54168701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.253470420837402, + "rewards/margins": 11.964126586914062, + "rewards/rejected": -20.21759796142578, + "step": 15762 + }, + { + "epoch": 2.45, + "learning_rate": 2.586111312827429e-06, + "logits/chosen": -1.5732417106628418, + "logits/rejected": -2.4467689990997314, + "logps/chosen": -295.97308349609375, + "logps/rejected": -565.2005004882812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066877841949463, + "rewards/margins": 11.860822677612305, + "rewards/rejected": -17.92770004272461, + "step": 15763 + }, + { + "epoch": 2.45, + "learning_rate": 2.585377872296281e-06, + "logits/chosen": -0.9969704151153564, + "logits/rejected": -2.680800199508667, + "logps/chosen": -115.9637451171875, + "logps/rejected": -359.8451843261719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.113053321838379, + "rewards/margins": 12.738683700561523, + "rewards/rejected": -20.85173797607422, + "step": 15764 + }, + { + "epoch": 2.45, + "learning_rate": 2.584644431765133e-06, + "logits/chosen": -2.858426332473755, + "logits/rejected": -1.7335349321365356, + "logps/chosen": -522.358642578125, + "logps/rejected": -402.3525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.747929573059082, + "rewards/margins": 11.068772315979004, + "rewards/rejected": -19.816701889038086, + "step": 15765 + }, + { + "epoch": 2.45, + "learning_rate": 2.583910991233985e-06, + "logits/chosen": -2.3823540210723877, + "logits/rejected": -2.9228222370147705, + "logps/chosen": -180.61923217773438, + "logps/rejected": -355.807861328125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.866189956665039, + "rewards/margins": 6.6579718589782715, + "rewards/rejected": -15.524162292480469, + "step": 15766 + }, + { + "epoch": 2.45, + "learning_rate": 2.5831775507028372e-06, + "logits/chosen": -1.8374371528625488, + "logits/rejected": -2.660998582839966, + "logps/chosen": -162.76773071289062, + "logps/rejected": -504.5641784667969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.693632125854492, + "rewards/margins": 15.584121704101562, + "rewards/rejected": -23.277755737304688, + "step": 15767 + }, + { + "epoch": 2.45, + "learning_rate": 2.582444110171689e-06, + "logits/chosen": -2.20565128326416, + "logits/rejected": -2.8419296741485596, + "logps/chosen": -575.3953247070312, + "logps/rejected": -691.5059204101562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.167478561401367, + "rewards/margins": 11.314676284790039, + "rewards/rejected": -21.482154846191406, + "step": 15768 + }, + { + "epoch": 2.45, + "learning_rate": 2.581710669640542e-06, + "logits/chosen": -1.0330586433410645, + "logits/rejected": -2.053548574447632, + "logps/chosen": -368.26751708984375, + "logps/rejected": -661.74853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.419120788574219, + "rewards/margins": 14.931954383850098, + "rewards/rejected": -23.35107421875, + "step": 15769 + }, + { + "epoch": 2.45, + "learning_rate": 2.5809772291093937e-06, + "logits/chosen": -2.3228955268859863, + "logits/rejected": -2.8337032794952393, + "logps/chosen": -381.57073974609375, + "logps/rejected": -295.01104736328125, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.392229080200195, + "rewards/margins": 1.1847748756408691, + "rewards/rejected": -13.577003479003906, + "step": 15770 + }, + { + "epoch": 2.45, + "learning_rate": 2.580243788578246e-06, + "logits/chosen": -2.32698392868042, + "logits/rejected": -2.6894028186798096, + "logps/chosen": -328.6474304199219, + "logps/rejected": -487.6589050292969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.362268447875977, + "rewards/margins": 6.775077819824219, + "rewards/rejected": -17.137346267700195, + "step": 15771 + }, + { + "epoch": 2.45, + "learning_rate": 2.579510348047098e-06, + "logits/chosen": -2.5356638431549072, + "logits/rejected": -2.7852964401245117, + "logps/chosen": -109.88275146484375, + "logps/rejected": -231.81764221191406, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.121562004089355, + "rewards/margins": 6.917552947998047, + "rewards/rejected": -16.03911590576172, + "step": 15772 + }, + { + "epoch": 2.45, + "learning_rate": 2.57877690751595e-06, + "logits/chosen": -1.5172429084777832, + "logits/rejected": -2.6151340007781982, + "logps/chosen": -161.32601928710938, + "logps/rejected": -568.452880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.044802665710449, + "rewards/margins": 12.372138977050781, + "rewards/rejected": -19.416940689086914, + "step": 15773 + }, + { + "epoch": 2.45, + "learning_rate": 2.578043466984802e-06, + "logits/chosen": -1.6904798746109009, + "logits/rejected": -2.674929618835449, + "logps/chosen": -209.14822387695312, + "logps/rejected": -370.77947998046875, + "loss": 0.2399, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.150372505187988, + "rewards/margins": 7.416933059692383, + "rewards/rejected": -18.567306518554688, + "step": 15774 + }, + { + "epoch": 2.45, + "learning_rate": 2.5773100264536544e-06, + "logits/chosen": -2.7326574325561523, + "logits/rejected": -2.7916574478149414, + "logps/chosen": -194.71478271484375, + "logps/rejected": -388.87420654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.311577796936035, + "rewards/margins": 13.766800880432129, + "rewards/rejected": -22.078378677368164, + "step": 15775 + }, + { + "epoch": 2.45, + "learning_rate": 2.5765765859225062e-06, + "logits/chosen": -2.8872294425964355, + "logits/rejected": -2.4324190616607666, + "logps/chosen": -320.03515625, + "logps/rejected": -572.9197998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.694945335388184, + "rewards/margins": 13.119771003723145, + "rewards/rejected": -22.814716339111328, + "step": 15776 + }, + { + "epoch": 2.45, + "learning_rate": 2.5758431453913585e-06, + "logits/chosen": -2.2477526664733887, + "logits/rejected": -2.540384292602539, + "logps/chosen": -241.98336791992188, + "logps/rejected": -321.7547912597656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.354875564575195, + "rewards/margins": 7.865077018737793, + "rewards/rejected": -15.219952583312988, + "step": 15777 + }, + { + "epoch": 2.45, + "learning_rate": 2.575109704860211e-06, + "logits/chosen": -2.0506060123443604, + "logits/rejected": -2.908289909362793, + "logps/chosen": -449.3828125, + "logps/rejected": -653.6505126953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.439873695373535, + "rewards/margins": 9.475051879882812, + "rewards/rejected": -17.91492462158203, + "step": 15778 + }, + { + "epoch": 2.45, + "learning_rate": 2.5743762643290627e-06, + "logits/chosen": -1.921447515487671, + "logits/rejected": -2.714297294616699, + "logps/chosen": -179.73040771484375, + "logps/rejected": -291.46044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213226795196533, + "rewards/margins": 9.841694831848145, + "rewards/rejected": -16.054922103881836, + "step": 15779 + }, + { + "epoch": 2.45, + "learning_rate": 2.573642823797915e-06, + "logits/chosen": -2.8779423236846924, + "logits/rejected": -2.879132032394409, + "logps/chosen": -430.0419006347656, + "logps/rejected": -484.67626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.265267372131348, + "rewards/margins": 11.743925094604492, + "rewards/rejected": -20.009193420410156, + "step": 15780 + }, + { + "epoch": 2.45, + "learning_rate": 2.572909383266767e-06, + "logits/chosen": -1.930898904800415, + "logits/rejected": -2.5724501609802246, + "logps/chosen": -176.4186553955078, + "logps/rejected": -350.74481201171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.671205520629883, + "rewards/margins": 7.6659255027771, + "rewards/rejected": -18.33713150024414, + "step": 15781 + }, + { + "epoch": 2.45, + "learning_rate": 2.572175942735619e-06, + "logits/chosen": -2.9508137702941895, + "logits/rejected": -2.803103446960449, + "logps/chosen": -105.93426513671875, + "logps/rejected": -199.87266540527344, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.855005264282227, + "rewards/margins": 6.802776336669922, + "rewards/rejected": -12.657781600952148, + "step": 15782 + }, + { + "epoch": 2.45, + "learning_rate": 2.571442502204471e-06, + "logits/chosen": -2.8398630619049072, + "logits/rejected": -1.7990167140960693, + "logps/chosen": -320.7037048339844, + "logps/rejected": -214.32931518554688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6314496994018555, + "rewards/margins": 8.219396591186523, + "rewards/rejected": -15.850847244262695, + "step": 15783 + }, + { + "epoch": 2.45, + "learning_rate": 2.5707090616733234e-06, + "logits/chosen": -2.897836446762085, + "logits/rejected": -2.9805991649627686, + "logps/chosen": -143.99191284179688, + "logps/rejected": -210.27194213867188, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956193447113037, + "rewards/margins": 6.248197555541992, + "rewards/rejected": -12.204391479492188, + "step": 15784 + }, + { + "epoch": 2.45, + "learning_rate": 2.5699756211421753e-06, + "logits/chosen": -2.90799617767334, + "logits/rejected": -2.7051913738250732, + "logps/chosen": -227.25357055664062, + "logps/rejected": -377.118408203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.793448448181152, + "rewards/margins": 8.922561645507812, + "rewards/rejected": -16.71600914001465, + "step": 15785 + }, + { + "epoch": 2.46, + "learning_rate": 2.5692421806110276e-06, + "logits/chosen": -2.4325759410858154, + "logits/rejected": -2.8292043209075928, + "logps/chosen": -136.96865844726562, + "logps/rejected": -249.0455322265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.762749671936035, + "rewards/margins": 8.92536735534668, + "rewards/rejected": -17.68811798095703, + "step": 15786 + }, + { + "epoch": 2.46, + "learning_rate": 2.56850874007988e-06, + "logits/chosen": -1.8955885171890259, + "logits/rejected": -2.962709665298462, + "logps/chosen": -597.8963623046875, + "logps/rejected": -771.1325073242188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.575812339782715, + "rewards/margins": 11.573715209960938, + "rewards/rejected": -19.149526596069336, + "step": 15787 + }, + { + "epoch": 2.46, + "learning_rate": 2.5677752995487317e-06, + "logits/chosen": -2.752260208129883, + "logits/rejected": -1.8837827444076538, + "logps/chosen": -315.5324401855469, + "logps/rejected": -460.6077575683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.557063102722168, + "rewards/margins": 12.015412330627441, + "rewards/rejected": -18.57247543334961, + "step": 15788 + }, + { + "epoch": 2.46, + "learning_rate": 2.567041859017584e-06, + "logits/chosen": -0.9085550904273987, + "logits/rejected": -2.317966938018799, + "logps/chosen": -167.41156005859375, + "logps/rejected": -554.73828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.495696067810059, + "rewards/margins": 15.060149192810059, + "rewards/rejected": -23.55584716796875, + "step": 15789 + }, + { + "epoch": 2.46, + "learning_rate": 2.566308418486436e-06, + "logits/chosen": -1.1610769033432007, + "logits/rejected": -2.6924335956573486, + "logps/chosen": -146.97622680664062, + "logps/rejected": -447.8389587402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.653871536254883, + "rewards/margins": 10.327091217041016, + "rewards/rejected": -19.9809627532959, + "step": 15790 + }, + { + "epoch": 2.46, + "learning_rate": 2.5655749779552882e-06, + "logits/chosen": -2.944854497909546, + "logits/rejected": -2.565028190612793, + "logps/chosen": -548.6541137695312, + "logps/rejected": -718.0011596679688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.490001678466797, + "rewards/margins": 8.273307800292969, + "rewards/rejected": -18.763309478759766, + "step": 15791 + }, + { + "epoch": 2.46, + "learning_rate": 2.56484153742414e-06, + "logits/chosen": -2.815870523452759, + "logits/rejected": -1.939902663230896, + "logps/chosen": -576.6356811523438, + "logps/rejected": -429.86737060546875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.041586875915527, + "rewards/margins": 6.822063446044922, + "rewards/rejected": -18.863651275634766, + "step": 15792 + }, + { + "epoch": 2.46, + "learning_rate": 2.5641080968929924e-06, + "logits/chosen": -2.6117451190948486, + "logits/rejected": -2.7056968212127686, + "logps/chosen": -255.300537109375, + "logps/rejected": -329.51751708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.289161205291748, + "rewards/margins": 9.552299499511719, + "rewards/rejected": -15.841461181640625, + "step": 15793 + }, + { + "epoch": 2.46, + "learning_rate": 2.5633746563618443e-06, + "logits/chosen": -2.4189670085906982, + "logits/rejected": -1.8889094591140747, + "logps/chosen": -689.9931640625, + "logps/rejected": -540.6522827148438, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.796719551086426, + "rewards/margins": 6.334373474121094, + "rewards/rejected": -19.131092071533203, + "step": 15794 + }, + { + "epoch": 2.46, + "learning_rate": 2.562641215830697e-06, + "logits/chosen": -1.8819884061813354, + "logits/rejected": -2.460845470428467, + "logps/chosen": -534.1304321289062, + "logps/rejected": -651.451416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.098467826843262, + "rewards/margins": 9.209436416625977, + "rewards/rejected": -16.307903289794922, + "step": 15795 + }, + { + "epoch": 2.46, + "learning_rate": 2.561907775299549e-06, + "logits/chosen": -0.9589069485664368, + "logits/rejected": -2.794579267501831, + "logps/chosen": -178.05987548828125, + "logps/rejected": -868.0543212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.320538520812988, + "rewards/margins": 13.317769050598145, + "rewards/rejected": -21.638307571411133, + "step": 15796 + }, + { + "epoch": 2.46, + "learning_rate": 2.5611743347684008e-06, + "logits/chosen": -1.25582754611969, + "logits/rejected": -2.6840250492095947, + "logps/chosen": -314.23028564453125, + "logps/rejected": -510.3396911621094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.162013053894043, + "rewards/margins": 12.077075958251953, + "rewards/rejected": -17.239089965820312, + "step": 15797 + }, + { + "epoch": 2.46, + "learning_rate": 2.560440894237253e-06, + "logits/chosen": -2.6622815132141113, + "logits/rejected": -1.8089874982833862, + "logps/chosen": -660.5618896484375, + "logps/rejected": -458.7071533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.061955451965332, + "rewards/margins": 13.878644943237305, + "rewards/rejected": -22.940601348876953, + "step": 15798 + }, + { + "epoch": 2.46, + "learning_rate": 2.559707453706105e-06, + "logits/chosen": -1.6240051984786987, + "logits/rejected": -2.494753122329712, + "logps/chosen": -406.291015625, + "logps/rejected": -683.3574829101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.676326751708984, + "rewards/margins": 11.608427047729492, + "rewards/rejected": -22.284753799438477, + "step": 15799 + }, + { + "epoch": 2.46, + "learning_rate": 2.5589740131749572e-06, + "logits/chosen": -2.850728988647461, + "logits/rejected": -1.7795112133026123, + "logps/chosen": -899.9365234375, + "logps/rejected": -653.9429931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.612518310546875, + "rewards/margins": 10.113913536071777, + "rewards/rejected": -19.72643280029297, + "step": 15800 + }, + { + "epoch": 2.46, + "learning_rate": 2.558240572643809e-06, + "logits/chosen": -2.2370970249176025, + "logits/rejected": -1.5911953449249268, + "logps/chosen": -352.08587646484375, + "logps/rejected": -372.9208984375, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.574012756347656, + "rewards/margins": 6.408225059509277, + "rewards/rejected": -17.982236862182617, + "step": 15801 + }, + { + "epoch": 2.46, + "learning_rate": 2.5575071321126614e-06, + "logits/chosen": -1.4102915525436401, + "logits/rejected": -2.812028408050537, + "logps/chosen": -165.2378692626953, + "logps/rejected": -275.979736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.372539520263672, + "rewards/margins": 9.993602752685547, + "rewards/rejected": -18.36614227294922, + "step": 15802 + }, + { + "epoch": 2.46, + "learning_rate": 2.5567736915815137e-06, + "logits/chosen": -2.7405850887298584, + "logits/rejected": -2.9496936798095703, + "logps/chosen": -100.40795135498047, + "logps/rejected": -232.48748779296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.251406192779541, + "rewards/margins": 7.015590667724609, + "rewards/rejected": -14.266996383666992, + "step": 15803 + }, + { + "epoch": 2.46, + "learning_rate": 2.556040251050366e-06, + "logits/chosen": -2.926814317703247, + "logits/rejected": -2.499586820602417, + "logps/chosen": -196.29730224609375, + "logps/rejected": -279.82598876953125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.601485252380371, + "rewards/margins": 7.673358917236328, + "rewards/rejected": -13.2748441696167, + "step": 15804 + }, + { + "epoch": 2.46, + "learning_rate": 2.555306810519218e-06, + "logits/chosen": -2.234518527984619, + "logits/rejected": -2.7708215713500977, + "logps/chosen": -184.29165649414062, + "logps/rejected": -390.83819580078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.261355400085449, + "rewards/margins": 8.24934196472168, + "rewards/rejected": -14.510696411132812, + "step": 15805 + }, + { + "epoch": 2.46, + "learning_rate": 2.5545733699880698e-06, + "logits/chosen": -1.9603872299194336, + "logits/rejected": -2.880319118499756, + "logps/chosen": -180.10971069335938, + "logps/rejected": -376.8951721191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.910687446594238, + "rewards/margins": 10.061140060424805, + "rewards/rejected": -21.97182846069336, + "step": 15806 + }, + { + "epoch": 2.46, + "learning_rate": 2.553839929456922e-06, + "logits/chosen": -2.331618547439575, + "logits/rejected": -2.8534562587738037, + "logps/chosen": -105.98015594482422, + "logps/rejected": -214.28387451171875, + "loss": 0.2452, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.340799331665039, + "rewards/margins": 5.439472198486328, + "rewards/rejected": -12.780271530151367, + "step": 15807 + }, + { + "epoch": 2.46, + "learning_rate": 2.553106488925774e-06, + "logits/chosen": -2.674694299697876, + "logits/rejected": -2.1202046871185303, + "logps/chosen": -177.02993774414062, + "logps/rejected": -259.2326965332031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.286708831787109, + "rewards/margins": 7.7997283935546875, + "rewards/rejected": -14.086437225341797, + "step": 15808 + }, + { + "epoch": 2.46, + "learning_rate": 2.5523730483946263e-06, + "logits/chosen": -2.3817949295043945, + "logits/rejected": -2.375725746154785, + "logps/chosen": -475.5027770996094, + "logps/rejected": -911.2486572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.793395042419434, + "rewards/margins": 16.50851058959961, + "rewards/rejected": -26.301904678344727, + "step": 15809 + }, + { + "epoch": 2.46, + "learning_rate": 2.551639607863478e-06, + "logits/chosen": -2.582430362701416, + "logits/rejected": -2.9460511207580566, + "logps/chosen": -217.89907836914062, + "logps/rejected": -429.716064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.589382171630859, + "rewards/margins": 11.677200317382812, + "rewards/rejected": -16.266582489013672, + "step": 15810 + }, + { + "epoch": 2.46, + "learning_rate": 2.5509061673323304e-06, + "logits/chosen": -2.3787050247192383, + "logits/rejected": -2.738447904586792, + "logps/chosen": -263.5865173339844, + "logps/rejected": -301.9234313964844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661663055419922, + "rewards/margins": 7.921170234680176, + "rewards/rejected": -14.582833290100098, + "step": 15811 + }, + { + "epoch": 2.46, + "learning_rate": 2.5501727268011827e-06, + "logits/chosen": -2.6171507835388184, + "logits/rejected": -1.611053705215454, + "logps/chosen": -348.65411376953125, + "logps/rejected": -292.52734375, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.408210754394531, + "rewards/margins": 7.455446243286133, + "rewards/rejected": -17.863656997680664, + "step": 15812 + }, + { + "epoch": 2.46, + "learning_rate": 2.549439286270035e-06, + "logits/chosen": -2.1028637886047363, + "logits/rejected": -2.4287242889404297, + "logps/chosen": -324.2464599609375, + "logps/rejected": -405.0608215332031, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.462263107299805, + "rewards/margins": 6.169711112976074, + "rewards/rejected": -18.631973266601562, + "step": 15813 + }, + { + "epoch": 2.46, + "learning_rate": 2.548705845738887e-06, + "logits/chosen": -2.7871644496917725, + "logits/rejected": -2.822866916656494, + "logps/chosen": -291.8838806152344, + "logps/rejected": -435.0162658691406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.305526733398438, + "rewards/margins": 8.31222152709961, + "rewards/rejected": -19.617748260498047, + "step": 15814 + }, + { + "epoch": 2.46, + "learning_rate": 2.5479724052077392e-06, + "logits/chosen": -1.6198437213897705, + "logits/rejected": -2.4232499599456787, + "logps/chosen": -281.16583251953125, + "logps/rejected": -421.42791748046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.113718032836914, + "rewards/margins": 8.359333992004395, + "rewards/rejected": -19.473052978515625, + "step": 15815 + }, + { + "epoch": 2.46, + "learning_rate": 2.547238964676591e-06, + "logits/chosen": -2.0318362712860107, + "logits/rejected": -2.5268640518188477, + "logps/chosen": -186.6407470703125, + "logps/rejected": -551.6416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.329289436340332, + "rewards/margins": 14.682016372680664, + "rewards/rejected": -23.011306762695312, + "step": 15816 + }, + { + "epoch": 2.46, + "learning_rate": 2.546505524145443e-06, + "logits/chosen": -2.323452949523926, + "logits/rejected": -2.675340414047241, + "logps/chosen": -219.72726440429688, + "logps/rejected": -483.17596435546875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.638602256774902, + "rewards/margins": 8.326053619384766, + "rewards/rejected": -15.964654922485352, + "step": 15817 + }, + { + "epoch": 2.46, + "learning_rate": 2.5457720836142953e-06, + "logits/chosen": -1.793824553489685, + "logits/rejected": -2.911087989807129, + "logps/chosen": -922.70263671875, + "logps/rejected": -1234.363525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103421211242676, + "rewards/margins": 20.156532287597656, + "rewards/rejected": -26.25995445251465, + "step": 15818 + }, + { + "epoch": 2.46, + "learning_rate": 2.545038643083147e-06, + "logits/chosen": -1.3838152885437012, + "logits/rejected": -2.651576519012451, + "logps/chosen": -383.05950927734375, + "logps/rejected": -481.0453796386719, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.313705444335938, + "rewards/margins": 8.003822326660156, + "rewards/rejected": -18.317527770996094, + "step": 15819 + }, + { + "epoch": 2.46, + "learning_rate": 2.544305202552e-06, + "logits/chosen": -2.816865921020508, + "logits/rejected": -2.3645687103271484, + "logps/chosen": -497.61798095703125, + "logps/rejected": -375.74896240234375, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429482460021973, + "rewards/margins": 4.654879093170166, + "rewards/rejected": -13.084362030029297, + "step": 15820 + }, + { + "epoch": 2.46, + "learning_rate": 2.5435717620208518e-06, + "logits/chosen": -2.882272243499756, + "logits/rejected": -1.7962431907653809, + "logps/chosen": -301.7221984863281, + "logps/rejected": -232.40936279296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.883321523666382, + "rewards/margins": 10.89109992980957, + "rewards/rejected": -13.774421691894531, + "step": 15821 + }, + { + "epoch": 2.46, + "learning_rate": 2.542838321489704e-06, + "logits/chosen": -1.062752604484558, + "logits/rejected": -2.500906467437744, + "logps/chosen": -211.95660400390625, + "logps/rejected": -505.9841613769531, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.659246444702148, + "rewards/margins": 8.033976554870605, + "rewards/rejected": -17.693222045898438, + "step": 15822 + }, + { + "epoch": 2.46, + "learning_rate": 2.542104880958556e-06, + "logits/chosen": -2.6275594234466553, + "logits/rejected": -2.207486152648926, + "logps/chosen": -344.97735595703125, + "logps/rejected": -359.2346496582031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331428527832031, + "rewards/margins": 13.615520477294922, + "rewards/rejected": -19.946949005126953, + "step": 15823 + }, + { + "epoch": 2.46, + "learning_rate": 2.5413714404274083e-06, + "logits/chosen": -2.639988422393799, + "logits/rejected": -2.8214640617370605, + "logps/chosen": -216.56698608398438, + "logps/rejected": -238.59384155273438, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.314979553222656, + "rewards/margins": 6.194858074188232, + "rewards/rejected": -15.50983715057373, + "step": 15824 + }, + { + "epoch": 2.46, + "learning_rate": 2.54063799989626e-06, + "logits/chosen": -2.4153730869293213, + "logits/rejected": -2.8699700832366943, + "logps/chosen": -226.70237731933594, + "logps/rejected": -468.924560546875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.068090438842773, + "rewards/margins": 6.454855918884277, + "rewards/rejected": -18.522945404052734, + "step": 15825 + }, + { + "epoch": 2.46, + "learning_rate": 2.539904559365112e-06, + "logits/chosen": -2.1822519302368164, + "logits/rejected": -2.6027746200561523, + "logps/chosen": -105.49746704101562, + "logps/rejected": -282.25946044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019114971160889, + "rewards/margins": 9.292778015136719, + "rewards/rejected": -15.311893463134766, + "step": 15826 + }, + { + "epoch": 2.46, + "learning_rate": 2.5391711188339643e-06, + "logits/chosen": -2.330157518386841, + "logits/rejected": -2.3882534503936768, + "logps/chosen": -247.63836669921875, + "logps/rejected": -341.1557922363281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.006704330444336, + "rewards/margins": 8.842935562133789, + "rewards/rejected": -17.849639892578125, + "step": 15827 + }, + { + "epoch": 2.46, + "learning_rate": 2.5384376783028166e-06, + "logits/chosen": -1.3622088432312012, + "logits/rejected": -2.5327389240264893, + "logps/chosen": -126.85688781738281, + "logps/rejected": -555.3106079101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.226044654846191, + "rewards/margins": 14.794381141662598, + "rewards/rejected": -24.02042579650879, + "step": 15828 + }, + { + "epoch": 2.46, + "learning_rate": 2.537704237771669e-06, + "logits/chosen": -2.6704018115997314, + "logits/rejected": -2.336663007736206, + "logps/chosen": -306.22515869140625, + "logps/rejected": -352.00482177734375, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.897391319274902, + "rewards/margins": 5.4734601974487305, + "rewards/rejected": -13.370851516723633, + "step": 15829 + }, + { + "epoch": 2.46, + "learning_rate": 2.536970797240521e-06, + "logits/chosen": -1.5929783582687378, + "logits/rejected": -2.551539182662964, + "logps/chosen": -197.23666381835938, + "logps/rejected": -454.72344970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.500829696655273, + "rewards/margins": 16.120386123657227, + "rewards/rejected": -23.6212158203125, + "step": 15830 + }, + { + "epoch": 2.46, + "learning_rate": 2.536237356709373e-06, + "logits/chosen": -2.098867893218994, + "logits/rejected": -2.545666456222534, + "logps/chosen": -272.2698974609375, + "logps/rejected": -439.5655517578125, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.4758882522583, + "rewards/margins": 9.038395881652832, + "rewards/rejected": -20.514284133911133, + "step": 15831 + }, + { + "epoch": 2.46, + "learning_rate": 2.535503916178225e-06, + "logits/chosen": -1.5814027786254883, + "logits/rejected": -2.793092727661133, + "logps/chosen": -253.40916442871094, + "logps/rejected": -474.40875244140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.566654205322266, + "rewards/margins": 13.284647941589355, + "rewards/rejected": -21.851303100585938, + "step": 15832 + }, + { + "epoch": 2.46, + "learning_rate": 2.5347704756470773e-06, + "logits/chosen": -1.823629379272461, + "logits/rejected": -2.3859355449676514, + "logps/chosen": -273.6153259277344, + "logps/rejected": -432.01190185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.92008113861084, + "rewards/margins": 10.842820167541504, + "rewards/rejected": -21.762901306152344, + "step": 15833 + }, + { + "epoch": 2.46, + "learning_rate": 2.534037035115929e-06, + "logits/chosen": -2.314344882965088, + "logits/rejected": -2.5407421588897705, + "logps/chosen": -201.365234375, + "logps/rejected": -328.39202880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.280406951904297, + "rewards/margins": 13.180030822753906, + "rewards/rejected": -18.460437774658203, + "step": 15834 + }, + { + "epoch": 2.46, + "learning_rate": 2.533303594584781e-06, + "logits/chosen": -2.412851095199585, + "logits/rejected": -3.002854824066162, + "logps/chosen": -165.70062255859375, + "logps/rejected": -492.76373291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987347602844238, + "rewards/margins": 10.366880416870117, + "rewards/rejected": -16.35422706604004, + "step": 15835 + }, + { + "epoch": 2.46, + "learning_rate": 2.5325701540536333e-06, + "logits/chosen": -2.5940492153167725, + "logits/rejected": -2.7824785709381104, + "logps/chosen": -124.19805908203125, + "logps/rejected": -356.79290771484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9163312911987305, + "rewards/margins": 12.085227012634277, + "rewards/rejected": -19.001558303833008, + "step": 15836 + }, + { + "epoch": 2.46, + "learning_rate": 2.5318367135224856e-06, + "logits/chosen": -2.9194424152374268, + "logits/rejected": -2.089158535003662, + "logps/chosen": -716.030029296875, + "logps/rejected": -568.11669921875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.01788330078125, + "rewards/margins": 7.721589088439941, + "rewards/rejected": -14.739472389221191, + "step": 15837 + }, + { + "epoch": 2.46, + "learning_rate": 2.531103272991338e-06, + "logits/chosen": -2.762113571166992, + "logits/rejected": -2.865318775177002, + "logps/chosen": -189.19918823242188, + "logps/rejected": -244.30014038085938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.316052436828613, + "rewards/margins": 7.58928108215332, + "rewards/rejected": -14.905333518981934, + "step": 15838 + }, + { + "epoch": 2.46, + "learning_rate": 2.53036983246019e-06, + "logits/chosen": -1.8738504648208618, + "logits/rejected": -2.4029364585876465, + "logps/chosen": -161.21595764160156, + "logps/rejected": -229.79129028320312, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.149267196655273, + "rewards/margins": 7.742414474487305, + "rewards/rejected": -15.891681671142578, + "step": 15839 + }, + { + "epoch": 2.46, + "learning_rate": 2.529636391929042e-06, + "logits/chosen": -2.7595527172088623, + "logits/rejected": -1.530166745185852, + "logps/chosen": -476.4364318847656, + "logps/rejected": -295.4279479980469, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.38480281829834, + "rewards/margins": 5.81003475189209, + "rewards/rejected": -15.19483757019043, + "step": 15840 + }, + { + "epoch": 2.46, + "learning_rate": 2.528902951397894e-06, + "logits/chosen": -2.6598169803619385, + "logits/rejected": -2.4030356407165527, + "logps/chosen": -658.07275390625, + "logps/rejected": -569.7581787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.529226779937744, + "rewards/margins": 9.153488159179688, + "rewards/rejected": -16.682714462280273, + "step": 15841 + }, + { + "epoch": 2.46, + "learning_rate": 2.5281695108667463e-06, + "logits/chosen": -1.5019077062606812, + "logits/rejected": -2.5699989795684814, + "logps/chosen": -152.58984375, + "logps/rejected": -370.7720031738281, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.147150039672852, + "rewards/margins": 10.416582107543945, + "rewards/rejected": -19.563732147216797, + "step": 15842 + }, + { + "epoch": 2.46, + "learning_rate": 2.527436070335598e-06, + "logits/chosen": -2.82955002784729, + "logits/rejected": -1.3653416633605957, + "logps/chosen": -353.57220458984375, + "logps/rejected": -201.88265991210938, + "loss": 0.0528, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.744712829589844, + "rewards/margins": 3.4783031940460205, + "rewards/rejected": -11.223016738891602, + "step": 15843 + }, + { + "epoch": 2.46, + "learning_rate": 2.5267026298044505e-06, + "logits/chosen": -1.8057630062103271, + "logits/rejected": -2.562269449234009, + "logps/chosen": -389.5255432128906, + "logps/rejected": -523.8900146484375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.124942779541016, + "rewards/margins": 9.038440704345703, + "rewards/rejected": -18.16338348388672, + "step": 15844 + }, + { + "epoch": 2.46, + "learning_rate": 2.5259691892733028e-06, + "logits/chosen": -2.9177043437957764, + "logits/rejected": -3.003387689590454, + "logps/chosen": -234.22349548339844, + "logps/rejected": -243.72998046875, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.939879417419434, + "rewards/margins": 7.69188928604126, + "rewards/rejected": -14.631769180297852, + "step": 15845 + }, + { + "epoch": 2.46, + "learning_rate": 2.5252357487421547e-06, + "logits/chosen": -1.8042603731155396, + "logits/rejected": -2.901914358139038, + "logps/chosen": -267.5643615722656, + "logps/rejected": -459.7834167480469, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.971421241760254, + "rewards/margins": 5.398517608642578, + "rewards/rejected": -16.369937896728516, + "step": 15846 + }, + { + "epoch": 2.46, + "learning_rate": 2.524502308211007e-06, + "logits/chosen": -2.449214458465576, + "logits/rejected": -2.962071418762207, + "logps/chosen": -427.1647644042969, + "logps/rejected": -602.040283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.244651794433594, + "rewards/margins": 13.137430191040039, + "rewards/rejected": -22.382081985473633, + "step": 15847 + }, + { + "epoch": 2.46, + "learning_rate": 2.523768867679859e-06, + "logits/chosen": -3.0453953742980957, + "logits/rejected": -2.6748154163360596, + "logps/chosen": -301.48779296875, + "logps/rejected": -361.1440124511719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.187358856201172, + "rewards/margins": 10.667612075805664, + "rewards/rejected": -19.854970932006836, + "step": 15848 + }, + { + "epoch": 2.46, + "learning_rate": 2.523035427148711e-06, + "logits/chosen": -2.409036874771118, + "logits/rejected": -2.716531753540039, + "logps/chosen": -119.78279876708984, + "logps/rejected": -365.2230224609375, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.074041366577148, + "rewards/margins": 7.525001525878906, + "rewards/rejected": -14.599042892456055, + "step": 15849 + }, + { + "epoch": 2.47, + "learning_rate": 2.522301986617563e-06, + "logits/chosen": -2.7458603382110596, + "logits/rejected": -2.7583560943603516, + "logps/chosen": -427.2657775878906, + "logps/rejected": -341.43914794921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.27993392944336, + "rewards/margins": 7.8152546882629395, + "rewards/rejected": -19.09518814086914, + "step": 15850 + }, + { + "epoch": 2.47, + "learning_rate": 2.5215685460864153e-06, + "logits/chosen": -2.7970192432403564, + "logits/rejected": -2.461580991744995, + "logps/chosen": -529.9617919921875, + "logps/rejected": -471.94537353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.014066696166992, + "rewards/margins": 10.408441543579102, + "rewards/rejected": -19.422508239746094, + "step": 15851 + }, + { + "epoch": 2.47, + "learning_rate": 2.520835105555267e-06, + "logits/chosen": -1.87078058719635, + "logits/rejected": -2.0017306804656982, + "logps/chosen": -594.4623413085938, + "logps/rejected": -594.4207153320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.421733856201172, + "rewards/margins": 11.195537567138672, + "rewards/rejected": -22.617271423339844, + "step": 15852 + }, + { + "epoch": 2.47, + "learning_rate": 2.5201016650241195e-06, + "logits/chosen": -1.9210997819900513, + "logits/rejected": -2.8945515155792236, + "logps/chosen": -514.772705078125, + "logps/rejected": -666.0164794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.399663925170898, + "rewards/margins": 11.216480255126953, + "rewards/rejected": -23.61614418029785, + "step": 15853 + }, + { + "epoch": 2.47, + "learning_rate": 2.519368224492972e-06, + "logits/chosen": -1.7736587524414062, + "logits/rejected": -2.6017940044403076, + "logps/chosen": -207.32968139648438, + "logps/rejected": -264.4521179199219, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71963119506836, + "rewards/margins": 6.213512420654297, + "rewards/rejected": -14.933143615722656, + "step": 15854 + }, + { + "epoch": 2.47, + "learning_rate": 2.5186347839618237e-06, + "logits/chosen": -1.9907886981964111, + "logits/rejected": -2.6807913780212402, + "logps/chosen": -436.9425964355469, + "logps/rejected": -536.880859375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9943461418151855, + "rewards/margins": 7.832208633422852, + "rewards/rejected": -15.826555252075195, + "step": 15855 + }, + { + "epoch": 2.47, + "learning_rate": 2.517901343430676e-06, + "logits/chosen": -2.9032511711120605, + "logits/rejected": -2.8064513206481934, + "logps/chosen": -128.3211669921875, + "logps/rejected": -339.8542175292969, + "loss": 0.1692, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.830991744995117, + "rewards/margins": 2.611844539642334, + "rewards/rejected": -13.44283676147461, + "step": 15856 + }, + { + "epoch": 2.47, + "learning_rate": 2.517167902899528e-06, + "logits/chosen": -2.6910207271575928, + "logits/rejected": -1.3065704107284546, + "logps/chosen": -415.6676940917969, + "logps/rejected": -200.62814331054688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.997740268707275, + "rewards/margins": 9.372233390808105, + "rewards/rejected": -16.36997413635254, + "step": 15857 + }, + { + "epoch": 2.47, + "learning_rate": 2.51643446236838e-06, + "logits/chosen": -2.1299126148223877, + "logits/rejected": -2.8336215019226074, + "logps/chosen": -249.23524475097656, + "logps/rejected": -420.8638916015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.972493171691895, + "rewards/margins": 9.319350242614746, + "rewards/rejected": -20.29184341430664, + "step": 15858 + }, + { + "epoch": 2.47, + "learning_rate": 2.515701021837232e-06, + "logits/chosen": -2.4460127353668213, + "logits/rejected": -2.189744472503662, + "logps/chosen": -259.33831787109375, + "logps/rejected": -295.56317138671875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.135368347167969, + "rewards/margins": 8.14619255065918, + "rewards/rejected": -16.28156089782715, + "step": 15859 + }, + { + "epoch": 2.47, + "learning_rate": 2.5149675813060843e-06, + "logits/chosen": -0.481275737285614, + "logits/rejected": -1.8328027725219727, + "logps/chosen": -230.50192260742188, + "logps/rejected": -718.1381225585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.898155212402344, + "rewards/margins": 14.046363830566406, + "rewards/rejected": -24.94451904296875, + "step": 15860 + }, + { + "epoch": 2.47, + "learning_rate": 2.5142341407749362e-06, + "logits/chosen": -1.558679461479187, + "logits/rejected": -2.9287266731262207, + "logps/chosen": -398.04046630859375, + "logps/rejected": -792.909912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.538873672485352, + "rewards/margins": 14.266586303710938, + "rewards/rejected": -23.80545997619629, + "step": 15861 + }, + { + "epoch": 2.47, + "learning_rate": 2.513500700243789e-06, + "logits/chosen": -1.8107506036758423, + "logits/rejected": -2.1347603797912598, + "logps/chosen": -189.42408752441406, + "logps/rejected": -353.7469482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.283561706542969, + "rewards/margins": 9.039348602294922, + "rewards/rejected": -20.32291030883789, + "step": 15862 + }, + { + "epoch": 2.47, + "learning_rate": 2.512767259712641e-06, + "logits/chosen": -2.8368802070617676, + "logits/rejected": -1.8911197185516357, + "logps/chosen": -514.46630859375, + "logps/rejected": -501.67333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.953012466430664, + "rewards/margins": 9.910667419433594, + "rewards/rejected": -18.86368179321289, + "step": 15863 + }, + { + "epoch": 2.47, + "learning_rate": 2.512033819181493e-06, + "logits/chosen": -1.91347074508667, + "logits/rejected": -2.129711389541626, + "logps/chosen": -284.080810546875, + "logps/rejected": -505.99151611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.182637214660645, + "rewards/margins": 13.040166854858398, + "rewards/rejected": -25.22280502319336, + "step": 15864 + }, + { + "epoch": 2.47, + "learning_rate": 2.511300378650345e-06, + "logits/chosen": -0.8471997380256653, + "logits/rejected": -1.6201210021972656, + "logps/chosen": -275.9936828613281, + "logps/rejected": -558.8408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.665583610534668, + "rewards/margins": 14.113336563110352, + "rewards/rejected": -21.778919219970703, + "step": 15865 + }, + { + "epoch": 2.47, + "learning_rate": 2.510566938119197e-06, + "logits/chosen": -2.3441903591156006, + "logits/rejected": -2.807431936264038, + "logps/chosen": -117.21502685546875, + "logps/rejected": -269.8423767089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.240017890930176, + "rewards/margins": 11.031997680664062, + "rewards/rejected": -19.272014617919922, + "step": 15866 + }, + { + "epoch": 2.47, + "learning_rate": 2.509833497588049e-06, + "logits/chosen": -2.8581385612487793, + "logits/rejected": -2.5540659427642822, + "logps/chosen": -234.9097442626953, + "logps/rejected": -320.50469970703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.221452713012695, + "rewards/margins": 9.319549560546875, + "rewards/rejected": -18.541004180908203, + "step": 15867 + }, + { + "epoch": 2.47, + "learning_rate": 2.509100057056901e-06, + "logits/chosen": -1.402402639389038, + "logits/rejected": -2.2117886543273926, + "logps/chosen": -170.0137176513672, + "logps/rejected": -369.9273681640625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.403241157531738, + "rewards/margins": 6.414756774902344, + "rewards/rejected": -18.817996978759766, + "step": 15868 + }, + { + "epoch": 2.47, + "learning_rate": 2.5083666165257534e-06, + "logits/chosen": -1.2537386417388916, + "logits/rejected": -2.6114706993103027, + "logps/chosen": -250.40382385253906, + "logps/rejected": -403.939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.988090515136719, + "rewards/margins": 11.879667282104492, + "rewards/rejected": -21.86775779724121, + "step": 15869 + }, + { + "epoch": 2.47, + "learning_rate": 2.5076331759946057e-06, + "logits/chosen": -2.633375883102417, + "logits/rejected": -2.467824935913086, + "logps/chosen": -442.82598876953125, + "logps/rejected": -576.6973876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.62363052368164, + "rewards/margins": 9.750368118286133, + "rewards/rejected": -23.37399673461914, + "step": 15870 + }, + { + "epoch": 2.47, + "learning_rate": 2.506899735463458e-06, + "logits/chosen": -2.6344611644744873, + "logits/rejected": -1.9587832689285278, + "logps/chosen": -353.04388427734375, + "logps/rejected": -660.510498046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.445453643798828, + "rewards/margins": 9.407079696655273, + "rewards/rejected": -17.8525333404541, + "step": 15871 + }, + { + "epoch": 2.47, + "learning_rate": 2.50616629493231e-06, + "logits/chosen": -2.2230706214904785, + "logits/rejected": -3.068774938583374, + "logps/chosen": -317.99298095703125, + "logps/rejected": -481.2921142578125, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.326251983642578, + "rewards/margins": 5.3604736328125, + "rewards/rejected": -13.686725616455078, + "step": 15872 + }, + { + "epoch": 2.47, + "learning_rate": 2.505432854401162e-06, + "logits/chosen": -2.820478916168213, + "logits/rejected": -2.4611449241638184, + "logps/chosen": -561.1263427734375, + "logps/rejected": -582.5428466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.099366188049316, + "rewards/margins": 13.32748794555664, + "rewards/rejected": -21.42685317993164, + "step": 15873 + }, + { + "epoch": 2.47, + "learning_rate": 2.504699413870014e-06, + "logits/chosen": -3.0648486614227295, + "logits/rejected": -2.7714321613311768, + "logps/chosen": -198.17459106445312, + "logps/rejected": -227.80604553222656, + "loss": 0.0902, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.700931549072266, + "rewards/margins": 6.527885437011719, + "rewards/rejected": -14.228816986083984, + "step": 15874 + }, + { + "epoch": 2.47, + "learning_rate": 2.503965973338866e-06, + "logits/chosen": -0.48941633105278015, + "logits/rejected": -2.6110496520996094, + "logps/chosen": -140.42184448242188, + "logps/rejected": -613.1829223632812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.601152420043945, + "rewards/margins": 10.73937702178955, + "rewards/rejected": -20.340530395507812, + "step": 15875 + }, + { + "epoch": 2.47, + "learning_rate": 2.503232532807718e-06, + "logits/chosen": -2.5967116355895996, + "logits/rejected": -2.538668155670166, + "logps/chosen": -197.74465942382812, + "logps/rejected": -337.146728515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.091251373291016, + "rewards/margins": 7.711679935455322, + "rewards/rejected": -18.802932739257812, + "step": 15876 + }, + { + "epoch": 2.47, + "learning_rate": 2.50249909227657e-06, + "logits/chosen": -2.338846206665039, + "logits/rejected": -2.886007070541382, + "logps/chosen": -286.51116943359375, + "logps/rejected": -425.32489013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.441009044647217, + "rewards/margins": 9.463726997375488, + "rewards/rejected": -15.904735565185547, + "step": 15877 + }, + { + "epoch": 2.47, + "learning_rate": 2.5017656517454224e-06, + "logits/chosen": -2.5856432914733887, + "logits/rejected": -2.8562657833099365, + "logps/chosen": -394.42681884765625, + "logps/rejected": -374.0221862792969, + "loss": 0.1208, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.841495513916016, + "rewards/margins": 4.893761157989502, + "rewards/rejected": -16.73525619506836, + "step": 15878 + }, + { + "epoch": 2.47, + "learning_rate": 2.5010322112142747e-06, + "logits/chosen": -2.4901528358459473, + "logits/rejected": -2.7785985469818115, + "logps/chosen": -376.82989501953125, + "logps/rejected": -517.0323486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.841303825378418, + "rewards/margins": 11.772773742675781, + "rewards/rejected": -18.614078521728516, + "step": 15879 + }, + { + "epoch": 2.47, + "learning_rate": 2.500298770683127e-06, + "logits/chosen": -2.620759963989258, + "logits/rejected": -2.849411725997925, + "logps/chosen": -88.27951049804688, + "logps/rejected": -394.47088623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.880117893218994, + "rewards/margins": 11.09852409362793, + "rewards/rejected": -17.978641510009766, + "step": 15880 + }, + { + "epoch": 2.47, + "learning_rate": 2.499565330151979e-06, + "logits/chosen": -1.0950689315795898, + "logits/rejected": -2.436828374862671, + "logps/chosen": -269.9046936035156, + "logps/rejected": -406.20263671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7097883224487305, + "rewards/margins": 9.18929386138916, + "rewards/rejected": -15.89908218383789, + "step": 15881 + }, + { + "epoch": 2.47, + "learning_rate": 2.498831889620831e-06, + "logits/chosen": -2.4173710346221924, + "logits/rejected": -2.691755771636963, + "logps/chosen": -343.8546142578125, + "logps/rejected": -403.9520263671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.643280029296875, + "rewards/margins": 8.989889144897461, + "rewards/rejected": -19.633169174194336, + "step": 15882 + }, + { + "epoch": 2.47, + "learning_rate": 2.498098449089683e-06, + "logits/chosen": -2.8047678470611572, + "logits/rejected": -3.0204930305480957, + "logps/chosen": -134.3568572998047, + "logps/rejected": -269.7893371582031, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.337493896484375, + "rewards/margins": 8.298820495605469, + "rewards/rejected": -20.636314392089844, + "step": 15883 + }, + { + "epoch": 2.47, + "learning_rate": 2.497365008558535e-06, + "logits/chosen": -2.787666082382202, + "logits/rejected": -2.616270065307617, + "logps/chosen": -139.4556884765625, + "logps/rejected": -271.7557373046875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.012016296386719, + "rewards/margins": 6.8424296379089355, + "rewards/rejected": -16.854446411132812, + "step": 15884 + }, + { + "epoch": 2.47, + "learning_rate": 2.4966315680273872e-06, + "logits/chosen": -1.4880412817001343, + "logits/rejected": -2.7203991413116455, + "logps/chosen": -232.35000610351562, + "logps/rejected": -432.14337158203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.405008316040039, + "rewards/margins": 8.143144607543945, + "rewards/rejected": -17.548152923583984, + "step": 15885 + }, + { + "epoch": 2.47, + "learning_rate": 2.495898127496239e-06, + "logits/chosen": -1.756923794746399, + "logits/rejected": -2.67629337310791, + "logps/chosen": -161.87124633789062, + "logps/rejected": -331.6385192871094, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.30433464050293, + "rewards/margins": 6.351863384246826, + "rewards/rejected": -18.65619659423828, + "step": 15886 + }, + { + "epoch": 2.47, + "learning_rate": 2.495164686965092e-06, + "logits/chosen": -1.4888986349105835, + "logits/rejected": -2.7469868659973145, + "logps/chosen": -356.364990234375, + "logps/rejected": -806.8614501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.47270393371582, + "rewards/margins": 15.733421325683594, + "rewards/rejected": -25.206125259399414, + "step": 15887 + }, + { + "epoch": 2.47, + "learning_rate": 2.4944312464339437e-06, + "logits/chosen": -2.5313756465911865, + "logits/rejected": -2.572431802749634, + "logps/chosen": -248.17967224121094, + "logps/rejected": -427.34124755859375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.065235137939453, + "rewards/margins": 9.13874626159668, + "rewards/rejected": -19.203981399536133, + "step": 15888 + }, + { + "epoch": 2.47, + "learning_rate": 2.493697805902796e-06, + "logits/chosen": -2.6848151683807373, + "logits/rejected": -3.001985788345337, + "logps/chosen": -89.84407043457031, + "logps/rejected": -383.17218017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.512872695922852, + "rewards/margins": 14.764274597167969, + "rewards/rejected": -22.27714729309082, + "step": 15889 + }, + { + "epoch": 2.47, + "learning_rate": 2.492964365371648e-06, + "logits/chosen": -2.2289516925811768, + "logits/rejected": -2.637808084487915, + "logps/chosen": -464.5123596191406, + "logps/rejected": -616.903564453125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.05136489868164, + "rewards/margins": 10.097295761108398, + "rewards/rejected": -23.14866065979004, + "step": 15890 + }, + { + "epoch": 2.47, + "learning_rate": 2.4922309248405e-06, + "logits/chosen": -2.9192376136779785, + "logits/rejected": -3.0134785175323486, + "logps/chosen": -196.23635864257812, + "logps/rejected": -338.946533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.637234687805176, + "rewards/margins": 9.910467147827148, + "rewards/rejected": -18.547700881958008, + "step": 15891 + }, + { + "epoch": 2.47, + "learning_rate": 2.491497484309352e-06, + "logits/chosen": -1.9204175472259521, + "logits/rejected": -2.27316951751709, + "logps/chosen": -227.5797882080078, + "logps/rejected": -570.44384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.420050621032715, + "rewards/margins": 13.185684204101562, + "rewards/rejected": -20.605735778808594, + "step": 15892 + }, + { + "epoch": 2.47, + "learning_rate": 2.4907640437782044e-06, + "logits/chosen": -0.8268027305603027, + "logits/rejected": -2.163964033126831, + "logps/chosen": -298.57135009765625, + "logps/rejected": -612.362548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.91655445098877, + "rewards/margins": 12.184247016906738, + "rewards/rejected": -22.100801467895508, + "step": 15893 + }, + { + "epoch": 2.47, + "learning_rate": 2.4900306032470563e-06, + "logits/chosen": -2.5061891078948975, + "logits/rejected": -2.2412405014038086, + "logps/chosen": -296.1060485839844, + "logps/rejected": -419.4183349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.588208198547363, + "rewards/margins": 10.361474990844727, + "rewards/rejected": -18.949684143066406, + "step": 15894 + }, + { + "epoch": 2.47, + "learning_rate": 2.4892971627159086e-06, + "logits/chosen": -2.9921069145202637, + "logits/rejected": -2.9245996475219727, + "logps/chosen": -223.27476501464844, + "logps/rejected": -401.27313232421875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.168127059936523, + "rewards/margins": 9.056880950927734, + "rewards/rejected": -18.225008010864258, + "step": 15895 + }, + { + "epoch": 2.47, + "learning_rate": 2.488563722184761e-06, + "logits/chosen": -2.215585470199585, + "logits/rejected": -2.6893036365509033, + "logps/chosen": -473.8860168457031, + "logps/rejected": -543.8770141601562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.947026252746582, + "rewards/margins": 9.455343246459961, + "rewards/rejected": -16.40237045288086, + "step": 15896 + }, + { + "epoch": 2.47, + "learning_rate": 2.4878302816536127e-06, + "logits/chosen": -2.756382465362549, + "logits/rejected": -0.48079991340637207, + "logps/chosen": -386.44317626953125, + "logps/rejected": -236.27279663085938, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.349335670471191, + "rewards/margins": 5.858343124389648, + "rewards/rejected": -15.207679748535156, + "step": 15897 + }, + { + "epoch": 2.47, + "learning_rate": 2.487096841122465e-06, + "logits/chosen": -2.469548225402832, + "logits/rejected": -2.5195159912109375, + "logps/chosen": -399.35919189453125, + "logps/rejected": -499.7886962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.581147193908691, + "rewards/margins": 11.306875228881836, + "rewards/rejected": -20.888023376464844, + "step": 15898 + }, + { + "epoch": 2.47, + "learning_rate": 2.486363400591317e-06, + "logits/chosen": -1.1081937551498413, + "logits/rejected": -1.4048957824707031, + "logps/chosen": -178.39129638671875, + "logps/rejected": -366.3049011230469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.334842681884766, + "rewards/margins": 10.530893325805664, + "rewards/rejected": -18.865734100341797, + "step": 15899 + }, + { + "epoch": 2.47, + "learning_rate": 2.4856299600601692e-06, + "logits/chosen": -2.834453821182251, + "logits/rejected": -2.9053995609283447, + "logps/chosen": -345.0960998535156, + "logps/rejected": -551.658935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043322563171387, + "rewards/margins": 13.345233917236328, + "rewards/rejected": -19.38855743408203, + "step": 15900 + }, + { + "epoch": 2.47, + "learning_rate": 2.484896519529021e-06, + "logits/chosen": -1.8046225309371948, + "logits/rejected": -2.457430601119995, + "logps/chosen": -256.0623474121094, + "logps/rejected": -566.0147094726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.507086277008057, + "rewards/margins": 15.80443000793457, + "rewards/rejected": -23.31151580810547, + "step": 15901 + }, + { + "epoch": 2.47, + "learning_rate": 2.4841630789978734e-06, + "logits/chosen": -2.6971590518951416, + "logits/rejected": -2.849762439727783, + "logps/chosen": -118.20235443115234, + "logps/rejected": -171.23353576660156, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.645988941192627, + "rewards/margins": 6.477546215057373, + "rewards/rejected": -11.12353515625, + "step": 15902 + }, + { + "epoch": 2.47, + "learning_rate": 2.4834296384667253e-06, + "logits/chosen": -2.7814793586730957, + "logits/rejected": -2.9469168186187744, + "logps/chosen": -148.43341064453125, + "logps/rejected": -393.572265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.994039535522461, + "rewards/margins": 10.520835876464844, + "rewards/rejected": -18.514877319335938, + "step": 15903 + }, + { + "epoch": 2.47, + "learning_rate": 2.4826961979355776e-06, + "logits/chosen": -2.0320374965667725, + "logits/rejected": -2.5969700813293457, + "logps/chosen": -228.87232971191406, + "logps/rejected": -412.227294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.106929779052734, + "rewards/margins": 12.897075653076172, + "rewards/rejected": -22.004005432128906, + "step": 15904 + }, + { + "epoch": 2.47, + "learning_rate": 2.48196275740443e-06, + "logits/chosen": -2.3434431552886963, + "logits/rejected": -2.9379560947418213, + "logps/chosen": -349.8665466308594, + "logps/rejected": -513.3999633789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.447291851043701, + "rewards/margins": 12.42360782623291, + "rewards/rejected": -18.870899200439453, + "step": 15905 + }, + { + "epoch": 2.47, + "learning_rate": 2.4812293168732818e-06, + "logits/chosen": -1.7582752704620361, + "logits/rejected": -2.4998857975006104, + "logps/chosen": -262.8155517578125, + "logps/rejected": -361.154052734375, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.755050659179688, + "rewards/margins": 3.2992734909057617, + "rewards/rejected": -17.054325103759766, + "step": 15906 + }, + { + "epoch": 2.47, + "learning_rate": 2.480495876342134e-06, + "logits/chosen": -2.7350990772247314, + "logits/rejected": -2.8613884449005127, + "logps/chosen": -262.1820068359375, + "logps/rejected": -438.0630798339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9245829582214355, + "rewards/margins": 13.364347457885742, + "rewards/rejected": -20.288930892944336, + "step": 15907 + }, + { + "epoch": 2.47, + "learning_rate": 2.479762435810986e-06, + "logits/chosen": -1.8349201679229736, + "logits/rejected": -2.551278829574585, + "logps/chosen": -186.53237915039062, + "logps/rejected": -334.81024169921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.981595993041992, + "rewards/margins": 8.212972640991211, + "rewards/rejected": -19.194568634033203, + "step": 15908 + }, + { + "epoch": 2.47, + "learning_rate": 2.4790289952798382e-06, + "logits/chosen": -1.3266515731811523, + "logits/rejected": -2.850001096725464, + "logps/chosen": -180.12249755859375, + "logps/rejected": -741.7066650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.501855850219727, + "rewards/margins": 12.29063892364502, + "rewards/rejected": -21.792495727539062, + "step": 15909 + }, + { + "epoch": 2.47, + "learning_rate": 2.47829555474869e-06, + "logits/chosen": -1.7464885711669922, + "logits/rejected": -2.526228904724121, + "logps/chosen": -271.5244140625, + "logps/rejected": -463.0242614746094, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.393135070800781, + "rewards/margins": 7.7364912033081055, + "rewards/rejected": -17.129627227783203, + "step": 15910 + }, + { + "epoch": 2.47, + "learning_rate": 2.4775621142175424e-06, + "logits/chosen": -1.557504415512085, + "logits/rejected": -2.4827699661254883, + "logps/chosen": -185.73263549804688, + "logps/rejected": -370.57916259765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.216511726379395, + "rewards/margins": 7.892938137054443, + "rewards/rejected": -18.109451293945312, + "step": 15911 + }, + { + "epoch": 2.47, + "learning_rate": 2.4768286736863947e-06, + "logits/chosen": -2.3884105682373047, + "logits/rejected": -3.0463480949401855, + "logps/chosen": -235.5082550048828, + "logps/rejected": -391.60931396484375, + "loss": 2.5607, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.327381134033203, + "rewards/margins": 3.6911020278930664, + "rewards/rejected": -14.01848316192627, + "step": 15912 + }, + { + "epoch": 2.47, + "learning_rate": 2.476095233155247e-06, + "logits/chosen": -2.5446600914001465, + "logits/rejected": -3.068801164627075, + "logps/chosen": -150.5098876953125, + "logps/rejected": -253.50238037109375, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.651811599731445, + "rewards/margins": 5.464175701141357, + "rewards/rejected": -15.115986824035645, + "step": 15913 + }, + { + "epoch": 2.47, + "learning_rate": 2.475361792624099e-06, + "logits/chosen": -1.999823808670044, + "logits/rejected": -2.974703550338745, + "logps/chosen": -146.69471740722656, + "logps/rejected": -329.3098449707031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028332233428955, + "rewards/margins": 7.898686408996582, + "rewards/rejected": -13.927019119262695, + "step": 15914 + }, + { + "epoch": 2.48, + "learning_rate": 2.4746283520929508e-06, + "logits/chosen": -1.6731613874435425, + "logits/rejected": -2.766056776046753, + "logps/chosen": -218.70672607421875, + "logps/rejected": -554.263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.027327537536621, + "rewards/margins": 12.22549057006836, + "rewards/rejected": -21.252817153930664, + "step": 15915 + }, + { + "epoch": 2.48, + "learning_rate": 2.473894911561803e-06, + "logits/chosen": -2.844207763671875, + "logits/rejected": -3.0700337886810303, + "logps/chosen": -210.8555908203125, + "logps/rejected": -387.46759033203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.614168643951416, + "rewards/margins": 11.256790161132812, + "rewards/rejected": -18.87095832824707, + "step": 15916 + }, + { + "epoch": 2.48, + "learning_rate": 2.473161471030655e-06, + "logits/chosen": -2.6419007778167725, + "logits/rejected": -1.8473049402236938, + "logps/chosen": -194.69967651367188, + "logps/rejected": -192.2332763671875, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.205291748046875, + "rewards/margins": 2.956435203552246, + "rewards/rejected": -16.161725997924805, + "step": 15917 + }, + { + "epoch": 2.48, + "learning_rate": 2.4724280304995073e-06, + "logits/chosen": -2.159651041030884, + "logits/rejected": -2.7245852947235107, + "logps/chosen": -123.09342956542969, + "logps/rejected": -365.3319091796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.647836685180664, + "rewards/margins": 10.16905403137207, + "rewards/rejected": -17.816890716552734, + "step": 15918 + }, + { + "epoch": 2.48, + "learning_rate": 2.471694589968359e-06, + "logits/chosen": -2.619408369064331, + "logits/rejected": -2.854901075363159, + "logps/chosen": -300.54315185546875, + "logps/rejected": -492.6689453125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.22865104675293, + "rewards/margins": 7.394920349121094, + "rewards/rejected": -18.623571395874023, + "step": 15919 + }, + { + "epoch": 2.48, + "learning_rate": 2.4709611494372114e-06, + "logits/chosen": -1.8496301174163818, + "logits/rejected": -2.5241222381591797, + "logps/chosen": -409.3594665527344, + "logps/rejected": -618.9511108398438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.650413513183594, + "rewards/margins": 13.588212013244629, + "rewards/rejected": -23.23862648010254, + "step": 15920 + }, + { + "epoch": 2.48, + "learning_rate": 2.4702277089060637e-06, + "logits/chosen": -2.1328301429748535, + "logits/rejected": -2.6691441535949707, + "logps/chosen": -324.9835510253906, + "logps/rejected": -546.9085693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.053488731384277, + "rewards/margins": 12.770261764526367, + "rewards/rejected": -20.823749542236328, + "step": 15921 + }, + { + "epoch": 2.48, + "learning_rate": 2.469494268374916e-06, + "logits/chosen": -2.677666425704956, + "logits/rejected": -2.576791286468506, + "logps/chosen": -529.4276123046875, + "logps/rejected": -488.22314453125, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.6708984375, + "rewards/margins": 5.350211143493652, + "rewards/rejected": -15.021109580993652, + "step": 15922 + }, + { + "epoch": 2.48, + "learning_rate": 2.468760827843768e-06, + "logits/chosen": -2.652435779571533, + "logits/rejected": -2.9398858547210693, + "logps/chosen": -183.39527893066406, + "logps/rejected": -285.65057373046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.784961700439453, + "rewards/margins": 7.154983997344971, + "rewards/rejected": -14.939945220947266, + "step": 15923 + }, + { + "epoch": 2.48, + "learning_rate": 2.46802738731262e-06, + "logits/chosen": -1.719021201133728, + "logits/rejected": -2.4133384227752686, + "logps/chosen": -237.80654907226562, + "logps/rejected": -293.8569030761719, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.816893577575684, + "rewards/margins": 6.060376167297363, + "rewards/rejected": -14.877269744873047, + "step": 15924 + }, + { + "epoch": 2.48, + "learning_rate": 2.467293946781472e-06, + "logits/chosen": -2.094632863998413, + "logits/rejected": -2.936222553253174, + "logps/chosen": -134.9755401611328, + "logps/rejected": -376.92059326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.736098289489746, + "rewards/margins": 9.964112281799316, + "rewards/rejected": -17.700210571289062, + "step": 15925 + }, + { + "epoch": 2.48, + "learning_rate": 2.466560506250324e-06, + "logits/chosen": -1.996612787246704, + "logits/rejected": -2.7408273220062256, + "logps/chosen": -165.4159698486328, + "logps/rejected": -294.81201171875, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.208568572998047, + "rewards/margins": 5.42649507522583, + "rewards/rejected": -17.63506317138672, + "step": 15926 + }, + { + "epoch": 2.48, + "learning_rate": 2.4658270657191763e-06, + "logits/chosen": -2.157451629638672, + "logits/rejected": -2.763781785964966, + "logps/chosen": -182.26837158203125, + "logps/rejected": -292.31597900390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.652259826660156, + "rewards/margins": 6.629524230957031, + "rewards/rejected": -15.281784057617188, + "step": 15927 + }, + { + "epoch": 2.48, + "learning_rate": 2.465093625188028e-06, + "logits/chosen": -2.2659497261047363, + "logits/rejected": -2.429063081741333, + "logps/chosen": -205.06668090820312, + "logps/rejected": -449.0968933105469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.314176559448242, + "rewards/margins": 13.354615211486816, + "rewards/rejected": -20.668790817260742, + "step": 15928 + }, + { + "epoch": 2.48, + "learning_rate": 2.464360184656881e-06, + "logits/chosen": -1.1626486778259277, + "logits/rejected": -2.711827278137207, + "logps/chosen": -216.869873046875, + "logps/rejected": -514.7708740234375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.318707466125488, + "rewards/margins": 6.412345886230469, + "rewards/rejected": -18.73105239868164, + "step": 15929 + }, + { + "epoch": 2.48, + "learning_rate": 2.4636267441257328e-06, + "logits/chosen": -2.7018649578094482, + "logits/rejected": -2.8733556270599365, + "logps/chosen": -947.8079223632812, + "logps/rejected": -852.9722900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3579301834106445, + "rewards/margins": 12.620414733886719, + "rewards/rejected": -19.978343963623047, + "step": 15930 + }, + { + "epoch": 2.48, + "learning_rate": 2.462893303594585e-06, + "logits/chosen": -2.668138265609741, + "logits/rejected": -2.7753043174743652, + "logps/chosen": -97.15737915039062, + "logps/rejected": -246.81585693359375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.279293060302734, + "rewards/margins": 5.696502208709717, + "rewards/rejected": -13.97579574584961, + "step": 15931 + }, + { + "epoch": 2.48, + "learning_rate": 2.462159863063437e-06, + "logits/chosen": -2.6340227127075195, + "logits/rejected": -2.2232444286346436, + "logps/chosen": -355.890380859375, + "logps/rejected": -459.3273010253906, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.502578735351562, + "rewards/margins": 6.699397563934326, + "rewards/rejected": -16.201976776123047, + "step": 15932 + }, + { + "epoch": 2.48, + "learning_rate": 2.4614264225322892e-06, + "logits/chosen": -2.350937604904175, + "logits/rejected": -2.7443439960479736, + "logps/chosen": -210.75108337402344, + "logps/rejected": -506.7709655761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.55004596710205, + "rewards/margins": 10.699348449707031, + "rewards/rejected": -20.249393463134766, + "step": 15933 + }, + { + "epoch": 2.48, + "learning_rate": 2.460692982001141e-06, + "logits/chosen": -2.612464189529419, + "logits/rejected": -1.8258857727050781, + "logps/chosen": -317.10113525390625, + "logps/rejected": -348.2245788574219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.660091400146484, + "rewards/margins": 10.204395294189453, + "rewards/rejected": -18.864486694335938, + "step": 15934 + }, + { + "epoch": 2.48, + "learning_rate": 2.459959541469993e-06, + "logits/chosen": -1.988287329673767, + "logits/rejected": -2.83833909034729, + "logps/chosen": -297.8003234863281, + "logps/rejected": -638.6674194335938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.899301528930664, + "rewards/margins": 8.781103134155273, + "rewards/rejected": -20.680404663085938, + "step": 15935 + }, + { + "epoch": 2.48, + "learning_rate": 2.4592261009388453e-06, + "logits/chosen": -2.8292667865753174, + "logits/rejected": -2.9280593395233154, + "logps/chosen": -77.99214935302734, + "logps/rejected": -240.06869506835938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4863691329956055, + "rewards/margins": 10.111822128295898, + "rewards/rejected": -15.59819221496582, + "step": 15936 + }, + { + "epoch": 2.48, + "learning_rate": 2.4584926604076976e-06, + "logits/chosen": -2.6522905826568604, + "logits/rejected": -2.914978265762329, + "logps/chosen": -240.34292602539062, + "logps/rejected": -196.55088806152344, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4487886428833, + "rewards/margins": 7.052629470825195, + "rewards/rejected": -16.501419067382812, + "step": 15937 + }, + { + "epoch": 2.48, + "learning_rate": 2.45775921987655e-06, + "logits/chosen": -1.259676218032837, + "logits/rejected": -2.615877389907837, + "logps/chosen": -166.52154541015625, + "logps/rejected": -435.4145202636719, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.239572525024414, + "rewards/margins": 6.810824871063232, + "rewards/rejected": -18.050397872924805, + "step": 15938 + }, + { + "epoch": 2.48, + "learning_rate": 2.4570257793454018e-06, + "logits/chosen": -1.657760739326477, + "logits/rejected": -2.6575276851654053, + "logps/chosen": -168.39109802246094, + "logps/rejected": -541.5396118164062, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.300433158874512, + "rewards/margins": 12.742084503173828, + "rewards/rejected": -20.042516708374023, + "step": 15939 + }, + { + "epoch": 2.48, + "learning_rate": 2.456292338814254e-06, + "logits/chosen": -2.505289077758789, + "logits/rejected": -2.859006643295288, + "logps/chosen": -379.28912353515625, + "logps/rejected": -353.4329833984375, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.533291816711426, + "rewards/margins": 4.95424222946167, + "rewards/rejected": -15.487533569335938, + "step": 15940 + }, + { + "epoch": 2.48, + "learning_rate": 2.455558898283106e-06, + "logits/chosen": -2.68795108795166, + "logits/rejected": -2.2529122829437256, + "logps/chosen": -278.0555114746094, + "logps/rejected": -213.30067443847656, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.905633926391602, + "rewards/margins": 6.470154762268066, + "rewards/rejected": -14.375788688659668, + "step": 15941 + }, + { + "epoch": 2.48, + "learning_rate": 2.4548254577519583e-06, + "logits/chosen": -1.5044982433319092, + "logits/rejected": -2.627253293991089, + "logps/chosen": -273.15997314453125, + "logps/rejected": -473.4571838378906, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.02147102355957, + "rewards/margins": 9.165226936340332, + "rewards/rejected": -20.18669891357422, + "step": 15942 + }, + { + "epoch": 2.48, + "learning_rate": 2.45409201722081e-06, + "logits/chosen": -2.7509684562683105, + "logits/rejected": -1.4760169982910156, + "logps/chosen": -617.67822265625, + "logps/rejected": -641.9993896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.623110294342041, + "rewards/margins": 14.265769004821777, + "rewards/rejected": -17.888879776000977, + "step": 15943 + }, + { + "epoch": 2.48, + "learning_rate": 2.453358576689662e-06, + "logits/chosen": -1.984876036643982, + "logits/rejected": -2.247082233428955, + "logps/chosen": -160.2863311767578, + "logps/rejected": -334.4565734863281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.162393569946289, + "rewards/margins": 8.89942741394043, + "rewards/rejected": -18.06182098388672, + "step": 15944 + }, + { + "epoch": 2.48, + "learning_rate": 2.4526251361585143e-06, + "logits/chosen": -2.490422248840332, + "logits/rejected": -2.6513359546661377, + "logps/chosen": -297.5050048828125, + "logps/rejected": -340.8013916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.1432523727417, + "rewards/margins": 11.427785873413086, + "rewards/rejected": -19.5710391998291, + "step": 15945 + }, + { + "epoch": 2.48, + "learning_rate": 2.4518916956273666e-06, + "logits/chosen": -2.5507359504699707, + "logits/rejected": -2.9808058738708496, + "logps/chosen": -235.3571319580078, + "logps/rejected": -554.6002197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.324761390686035, + "rewards/margins": 14.041793823242188, + "rewards/rejected": -21.366554260253906, + "step": 15946 + }, + { + "epoch": 2.48, + "learning_rate": 2.451158255096219e-06, + "logits/chosen": -2.4523797035217285, + "logits/rejected": -2.8193953037261963, + "logps/chosen": -636.845703125, + "logps/rejected": -483.9104919433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.339336395263672, + "rewards/margins": 10.934566497802734, + "rewards/rejected": -21.273902893066406, + "step": 15947 + }, + { + "epoch": 2.48, + "learning_rate": 2.450424814565071e-06, + "logits/chosen": -2.406386375427246, + "logits/rejected": -2.874305248260498, + "logps/chosen": -154.2272491455078, + "logps/rejected": -441.415283203125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.132421493530273, + "rewards/margins": 10.21135139465332, + "rewards/rejected": -18.343772888183594, + "step": 15948 + }, + { + "epoch": 2.48, + "learning_rate": 2.449691374033923e-06, + "logits/chosen": -2.91141414642334, + "logits/rejected": -2.901658058166504, + "logps/chosen": -156.9635009765625, + "logps/rejected": -383.5362548828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.964987754821777, + "rewards/margins": 8.69031810760498, + "rewards/rejected": -18.655305862426758, + "step": 15949 + }, + { + "epoch": 2.48, + "learning_rate": 2.448957933502775e-06, + "logits/chosen": -2.879690408706665, + "logits/rejected": -2.923738956451416, + "logps/chosen": -149.86062622070312, + "logps/rejected": -267.46112060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.666875839233398, + "rewards/margins": 10.974027633666992, + "rewards/rejected": -17.64090347290039, + "step": 15950 + }, + { + "epoch": 2.48, + "learning_rate": 2.4482244929716273e-06, + "logits/chosen": -2.2345223426818848, + "logits/rejected": -2.8902390003204346, + "logps/chosen": -263.0167236328125, + "logps/rejected": -389.33441162109375, + "loss": 0.7526, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.946375846862793, + "rewards/margins": 5.224903106689453, + "rewards/rejected": -12.17127799987793, + "step": 15951 + }, + { + "epoch": 2.48, + "learning_rate": 2.447491052440479e-06, + "logits/chosen": -2.342454433441162, + "logits/rejected": -2.5083606243133545, + "logps/chosen": -193.091064453125, + "logps/rejected": -301.0887451171875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.07693099975586, + "rewards/margins": 6.4290008544921875, + "rewards/rejected": -15.505931854248047, + "step": 15952 + }, + { + "epoch": 2.48, + "learning_rate": 2.446757611909331e-06, + "logits/chosen": -1.5911370515823364, + "logits/rejected": -2.7211475372314453, + "logps/chosen": -219.94964599609375, + "logps/rejected": -402.64996337890625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.692960739135742, + "rewards/margins": 9.73800277709961, + "rewards/rejected": -20.43096351623535, + "step": 15953 + }, + { + "epoch": 2.48, + "learning_rate": 2.4460241713781838e-06, + "logits/chosen": -2.796454668045044, + "logits/rejected": -2.83042573928833, + "logps/chosen": -378.78765869140625, + "logps/rejected": -374.987548828125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.520936965942383, + "rewards/margins": 6.855863571166992, + "rewards/rejected": -15.376800537109375, + "step": 15954 + }, + { + "epoch": 2.48, + "learning_rate": 2.4452907308470356e-06, + "logits/chosen": -1.3997101783752441, + "logits/rejected": -2.4864389896392822, + "logps/chosen": -174.08990478515625, + "logps/rejected": -444.023193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.815347671508789, + "rewards/margins": 10.091135025024414, + "rewards/rejected": -21.906482696533203, + "step": 15955 + }, + { + "epoch": 2.48, + "learning_rate": 2.444557290315888e-06, + "logits/chosen": -2.197521924972534, + "logits/rejected": -1.8638558387756348, + "logps/chosen": -823.4708251953125, + "logps/rejected": -707.0028686523438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.874207496643066, + "rewards/margins": 8.821700096130371, + "rewards/rejected": -18.695907592773438, + "step": 15956 + }, + { + "epoch": 2.48, + "learning_rate": 2.44382384978474e-06, + "logits/chosen": -2.870217800140381, + "logits/rejected": -2.2719953060150146, + "logps/chosen": -706.9933471679688, + "logps/rejected": -557.6156616210938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.328960418701172, + "rewards/margins": 9.641660690307617, + "rewards/rejected": -18.97062110900879, + "step": 15957 + }, + { + "epoch": 2.48, + "learning_rate": 2.443090409253592e-06, + "logits/chosen": -2.451272487640381, + "logits/rejected": -1.718121886253357, + "logps/chosen": -387.09600830078125, + "logps/rejected": -432.98675537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.89686393737793, + "rewards/margins": 9.890263557434082, + "rewards/rejected": -18.787128448486328, + "step": 15958 + }, + { + "epoch": 2.48, + "learning_rate": 2.442356968722444e-06, + "logits/chosen": -0.8357003927230835, + "logits/rejected": -2.606715202331543, + "logps/chosen": -180.63189697265625, + "logps/rejected": -609.6110229492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.305364608764648, + "rewards/margins": 10.143852233886719, + "rewards/rejected": -19.449216842651367, + "step": 15959 + }, + { + "epoch": 2.48, + "learning_rate": 2.4416235281912963e-06, + "logits/chosen": -2.1577396392822266, + "logits/rejected": -2.842238664627075, + "logps/chosen": -211.1455535888672, + "logps/rejected": -449.7622375488281, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.70595932006836, + "rewards/margins": 7.654230117797852, + "rewards/rejected": -19.36018943786621, + "step": 15960 + }, + { + "epoch": 2.48, + "learning_rate": 2.440890087660148e-06, + "logits/chosen": -2.9292678833007812, + "logits/rejected": -2.638502597808838, + "logps/chosen": -180.235107421875, + "logps/rejected": -243.46917724609375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.212869644165039, + "rewards/margins": 8.452001571655273, + "rewards/rejected": -20.664871215820312, + "step": 15961 + }, + { + "epoch": 2.48, + "learning_rate": 2.4401566471290005e-06, + "logits/chosen": -3.0175821781158447, + "logits/rejected": -2.8751449584960938, + "logps/chosen": -150.9595489501953, + "logps/rejected": -328.4107971191406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.133384704589844, + "rewards/margins": 8.566274642944336, + "rewards/rejected": -17.699661254882812, + "step": 15962 + }, + { + "epoch": 2.48, + "learning_rate": 2.439423206597853e-06, + "logits/chosen": -2.008634090423584, + "logits/rejected": -2.813383102416992, + "logps/chosen": -397.0924377441406, + "logps/rejected": -597.9034423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2407865524292, + "rewards/margins": 10.81178092956543, + "rewards/rejected": -19.052566528320312, + "step": 15963 + }, + { + "epoch": 2.48, + "learning_rate": 2.4386897660667047e-06, + "logits/chosen": -2.941953659057617, + "logits/rejected": -2.83418607711792, + "logps/chosen": -260.16241455078125, + "logps/rejected": -348.01708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.941318035125732, + "rewards/margins": 8.698518753051758, + "rewards/rejected": -16.639835357666016, + "step": 15964 + }, + { + "epoch": 2.48, + "learning_rate": 2.437956325535557e-06, + "logits/chosen": -2.820822238922119, + "logits/rejected": -2.1445693969726562, + "logps/chosen": -481.396484375, + "logps/rejected": -430.490966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.82341480255127, + "rewards/margins": 8.575387954711914, + "rewards/rejected": -17.3988037109375, + "step": 15965 + }, + { + "epoch": 2.48, + "learning_rate": 2.437222885004409e-06, + "logits/chosen": -1.9355266094207764, + "logits/rejected": -2.685974359512329, + "logps/chosen": -220.71658325195312, + "logps/rejected": -458.0866394042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.212730407714844, + "rewards/margins": 11.8223295211792, + "rewards/rejected": -21.03506088256836, + "step": 15966 + }, + { + "epoch": 2.48, + "learning_rate": 2.436489444473261e-06, + "logits/chosen": -2.3805668354034424, + "logits/rejected": -2.4137892723083496, + "logps/chosen": -384.2423095703125, + "logps/rejected": -460.1282043457031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.966343402862549, + "rewards/margins": 13.753395080566406, + "rewards/rejected": -18.719738006591797, + "step": 15967 + }, + { + "epoch": 2.48, + "learning_rate": 2.435756003942113e-06, + "logits/chosen": -2.813143014907837, + "logits/rejected": -2.6597867012023926, + "logps/chosen": -868.2799072265625, + "logps/rejected": -668.2572631835938, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.137761116027832, + "rewards/margins": 11.213497161865234, + "rewards/rejected": -18.35125732421875, + "step": 15968 + }, + { + "epoch": 2.48, + "learning_rate": 2.4350225634109653e-06, + "logits/chosen": -2.093488931655884, + "logits/rejected": -2.459993839263916, + "logps/chosen": -282.18072509765625, + "logps/rejected": -505.9673156738281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.204598426818848, + "rewards/margins": 15.431787490844727, + "rewards/rejected": -23.63638687133789, + "step": 15969 + }, + { + "epoch": 2.48, + "learning_rate": 2.4342891228798172e-06, + "logits/chosen": -2.8410046100616455, + "logits/rejected": -2.147548198699951, + "logps/chosen": -267.97967529296875, + "logps/rejected": -370.36199951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.610151290893555, + "rewards/margins": 12.665441513061523, + "rewards/rejected": -18.275592803955078, + "step": 15970 + }, + { + "epoch": 2.48, + "learning_rate": 2.43355568234867e-06, + "logits/chosen": -2.4675424098968506, + "logits/rejected": -2.954700469970703, + "logps/chosen": -97.0914306640625, + "logps/rejected": -271.6804504394531, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0239481925964355, + "rewards/margins": 8.598871231079102, + "rewards/rejected": -15.622819900512695, + "step": 15971 + }, + { + "epoch": 2.48, + "learning_rate": 2.432822241817522e-06, + "logits/chosen": -2.832329273223877, + "logits/rejected": -2.5546257495880127, + "logps/chosen": -450.3374328613281, + "logps/rejected": -423.83807373046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.90103006362915, + "rewards/margins": 9.578742980957031, + "rewards/rejected": -14.479772567749023, + "step": 15972 + }, + { + "epoch": 2.48, + "learning_rate": 2.4320888012863737e-06, + "logits/chosen": -2.915701150894165, + "logits/rejected": -2.908918857574463, + "logps/chosen": -147.69981384277344, + "logps/rejected": -337.4700927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.59953498840332, + "rewards/margins": 13.077963829040527, + "rewards/rejected": -18.67749786376953, + "step": 15973 + }, + { + "epoch": 2.48, + "learning_rate": 2.431355360755226e-06, + "logits/chosen": -1.565443515777588, + "logits/rejected": -2.1347618103027344, + "logps/chosen": -220.55821228027344, + "logps/rejected": -429.41119384765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.114029884338379, + "rewards/margins": 8.302352905273438, + "rewards/rejected": -17.4163818359375, + "step": 15974 + }, + { + "epoch": 2.48, + "learning_rate": 2.430621920224078e-06, + "logits/chosen": -2.8359556198120117, + "logits/rejected": -2.205693244934082, + "logps/chosen": -695.0382690429688, + "logps/rejected": -588.7503662109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.138666152954102, + "rewards/margins": 8.696419715881348, + "rewards/rejected": -16.835086822509766, + "step": 15975 + }, + { + "epoch": 2.48, + "learning_rate": 2.42988847969293e-06, + "logits/chosen": -2.3534796237945557, + "logits/rejected": -2.7479031085968018, + "logps/chosen": -291.1527099609375, + "logps/rejected": -273.4020080566406, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.039817810058594, + "rewards/margins": 5.942357540130615, + "rewards/rejected": -15.982175827026367, + "step": 15976 + }, + { + "epoch": 2.48, + "learning_rate": 2.429155039161782e-06, + "logits/chosen": -1.5915367603302002, + "logits/rejected": -2.7001450061798096, + "logps/chosen": -171.4581298828125, + "logps/rejected": -533.29443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.444259643554688, + "rewards/margins": 10.945131301879883, + "rewards/rejected": -23.389389038085938, + "step": 15977 + }, + { + "epoch": 2.48, + "learning_rate": 2.4284215986306344e-06, + "logits/chosen": -1.790667176246643, + "logits/rejected": -2.578662395477295, + "logps/chosen": -455.3310546875, + "logps/rejected": -1298.091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.62834358215332, + "rewards/margins": 19.205156326293945, + "rewards/rejected": -26.833499908447266, + "step": 15978 + }, + { + "epoch": 2.49, + "learning_rate": 2.4276881580994867e-06, + "logits/chosen": -2.1344289779663086, + "logits/rejected": -1.937747836112976, + "logps/chosen": -420.5501708984375, + "logps/rejected": -494.60400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.241602897644043, + "rewards/margins": 11.091469764709473, + "rewards/rejected": -17.333072662353516, + "step": 15979 + }, + { + "epoch": 2.49, + "learning_rate": 2.426954717568339e-06, + "logits/chosen": -2.895803451538086, + "logits/rejected": -1.296918511390686, + "logps/chosen": -299.53131103515625, + "logps/rejected": -296.5229797363281, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.172367095947266, + "rewards/margins": 5.67714262008667, + "rewards/rejected": -14.849510192871094, + "step": 15980 + }, + { + "epoch": 2.49, + "learning_rate": 2.426221277037191e-06, + "logits/chosen": -2.655939817428589, + "logits/rejected": -2.1230580806732178, + "logps/chosen": -284.49652099609375, + "logps/rejected": -400.1976013183594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.38003158569336, + "rewards/margins": 13.012203216552734, + "rewards/rejected": -21.392234802246094, + "step": 15981 + }, + { + "epoch": 2.49, + "learning_rate": 2.425487836506043e-06, + "logits/chosen": -1.5827590227127075, + "logits/rejected": -2.6842048168182373, + "logps/chosen": -213.09555053710938, + "logps/rejected": -620.1626586914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622196197509766, + "rewards/margins": 11.609230041503906, + "rewards/rejected": -18.231426239013672, + "step": 15982 + }, + { + "epoch": 2.49, + "learning_rate": 2.424754395974895e-06, + "logits/chosen": -2.0211727619171143, + "logits/rejected": -2.8234360218048096, + "logps/chosen": -511.4141845703125, + "logps/rejected": -1041.499267578125, + "loss": 0.0522, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.70567798614502, + "rewards/margins": 4.177907943725586, + "rewards/rejected": -14.883585929870605, + "step": 15983 + }, + { + "epoch": 2.49, + "learning_rate": 2.424020955443747e-06, + "logits/chosen": -2.491675853729248, + "logits/rejected": -2.686424732208252, + "logps/chosen": -380.6876525878906, + "logps/rejected": -587.9586791992188, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.631573677062988, + "rewards/margins": 9.168410301208496, + "rewards/rejected": -20.799983978271484, + "step": 15984 + }, + { + "epoch": 2.49, + "learning_rate": 2.423287514912599e-06, + "logits/chosen": -2.3821725845336914, + "logits/rejected": -2.871206760406494, + "logps/chosen": -456.3635559082031, + "logps/rejected": -569.7452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.128551483154297, + "rewards/margins": 12.86503791809082, + "rewards/rejected": -19.99359130859375, + "step": 15985 + }, + { + "epoch": 2.49, + "learning_rate": 2.422554074381451e-06, + "logits/chosen": -2.4347705841064453, + "logits/rejected": -2.3434600830078125, + "logps/chosen": -215.4674530029297, + "logps/rejected": -498.8214111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.891358375549316, + "rewards/margins": 13.07885456085205, + "rewards/rejected": -20.970212936401367, + "step": 15986 + }, + { + "epoch": 2.49, + "learning_rate": 2.4218206338503034e-06, + "logits/chosen": -2.71378755569458, + "logits/rejected": -2.416872501373291, + "logps/chosen": -434.94482421875, + "logps/rejected": -509.59326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53106689453125, + "rewards/margins": 11.016456604003906, + "rewards/rejected": -16.547523498535156, + "step": 15987 + }, + { + "epoch": 2.49, + "learning_rate": 2.4210871933191557e-06, + "logits/chosen": -2.956050157546997, + "logits/rejected": -2.149024248123169, + "logps/chosen": -276.4073486328125, + "logps/rejected": -151.3261260986328, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.313032150268555, + "rewards/margins": 6.110025405883789, + "rewards/rejected": -13.423057556152344, + "step": 15988 + }, + { + "epoch": 2.49, + "learning_rate": 2.420353752788008e-06, + "logits/chosen": -2.8686320781707764, + "logits/rejected": -2.564943552017212, + "logps/chosen": -582.7689208984375, + "logps/rejected": -768.315673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.245527267456055, + "rewards/margins": 14.877391815185547, + "rewards/rejected": -24.1229190826416, + "step": 15989 + }, + { + "epoch": 2.49, + "learning_rate": 2.41962031225686e-06, + "logits/chosen": -2.6688942909240723, + "logits/rejected": -2.4184563159942627, + "logps/chosen": -501.115478515625, + "logps/rejected": -564.399658203125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.093421936035156, + "rewards/margins": 9.848814964294434, + "rewards/rejected": -21.942237854003906, + "step": 15990 + }, + { + "epoch": 2.49, + "learning_rate": 2.418886871725712e-06, + "logits/chosen": -2.5834460258483887, + "logits/rejected": -2.8443639278411865, + "logps/chosen": -148.02273559570312, + "logps/rejected": -517.5501708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.039670944213867, + "rewards/margins": 14.566354751586914, + "rewards/rejected": -24.60602569580078, + "step": 15991 + }, + { + "epoch": 2.49, + "learning_rate": 2.418153431194564e-06, + "logits/chosen": -2.9171018600463867, + "logits/rejected": -2.8475072383880615, + "logps/chosen": -289.15191650390625, + "logps/rejected": -448.4617004394531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.192540168762207, + "rewards/margins": 12.602030754089355, + "rewards/rejected": -21.794570922851562, + "step": 15992 + }, + { + "epoch": 2.49, + "learning_rate": 2.417419990663416e-06, + "logits/chosen": -2.8822808265686035, + "logits/rejected": -2.7776870727539062, + "logps/chosen": -199.390625, + "logps/rejected": -187.34188842773438, + "loss": 0.6417, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.190509796142578, + "rewards/margins": 4.508811950683594, + "rewards/rejected": -11.699321746826172, + "step": 15993 + }, + { + "epoch": 2.49, + "learning_rate": 2.4166865501322682e-06, + "logits/chosen": -2.4234607219696045, + "logits/rejected": -2.785397529602051, + "logps/chosen": -106.87602233886719, + "logps/rejected": -244.63897705078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.375226020812988, + "rewards/margins": 6.81768798828125, + "rewards/rejected": -15.192914009094238, + "step": 15994 + }, + { + "epoch": 2.49, + "learning_rate": 2.41595310960112e-06, + "logits/chosen": -1.8994332551956177, + "logits/rejected": -2.500565767288208, + "logps/chosen": -208.47930908203125, + "logps/rejected": -405.23052978515625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.376675605773926, + "rewards/margins": 6.3798418045043945, + "rewards/rejected": -15.75651741027832, + "step": 15995 + }, + { + "epoch": 2.49, + "learning_rate": 2.4152196690699724e-06, + "logits/chosen": -2.504204511642456, + "logits/rejected": -1.9175554513931274, + "logps/chosen": -522.5213012695312, + "logps/rejected": -571.400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.990274429321289, + "rewards/margins": 13.034309387207031, + "rewards/rejected": -21.02458381652832, + "step": 15996 + }, + { + "epoch": 2.49, + "learning_rate": 2.4144862285388247e-06, + "logits/chosen": -2.8960344791412354, + "logits/rejected": -2.972069025039673, + "logps/chosen": -174.50619506835938, + "logps/rejected": -326.19482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.916275978088379, + "rewards/margins": 8.220864295959473, + "rewards/rejected": -15.137140274047852, + "step": 15997 + }, + { + "epoch": 2.49, + "learning_rate": 2.413752788007677e-06, + "logits/chosen": -2.0328104496002197, + "logits/rejected": -2.791238784790039, + "logps/chosen": -193.0997314453125, + "logps/rejected": -435.1199035644531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5786638259887695, + "rewards/margins": 10.015342712402344, + "rewards/rejected": -17.594005584716797, + "step": 15998 + }, + { + "epoch": 2.49, + "learning_rate": 2.413019347476529e-06, + "logits/chosen": -2.73498272895813, + "logits/rejected": -2.7202186584472656, + "logps/chosen": -316.7295227050781, + "logps/rejected": -520.9249267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253961086273193, + "rewards/margins": 14.907503128051758, + "rewards/rejected": -19.16146469116211, + "step": 15999 + }, + { + "epoch": 2.49, + "learning_rate": 2.412285906945381e-06, + "logits/chosen": -2.7636380195617676, + "logits/rejected": -2.9894580841064453, + "logps/chosen": -108.8466567993164, + "logps/rejected": -334.1434326171875, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.520073413848877, + "rewards/margins": 8.140678405761719, + "rewards/rejected": -15.660751342773438, + "step": 16000 + }, + { + "epoch": 2.49, + "learning_rate": 2.411552466414233e-06, + "logits/chosen": -2.8614675998687744, + "logits/rejected": -2.8563272953033447, + "logps/chosen": -469.45013427734375, + "logps/rejected": -560.3908081054688, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37917709350586, + "rewards/margins": 12.3961820602417, + "rewards/rejected": -20.775358200073242, + "step": 16001 + }, + { + "epoch": 2.49, + "learning_rate": 2.4108190258830854e-06, + "logits/chosen": -2.5401391983032227, + "logits/rejected": -2.613189935684204, + "logps/chosen": -133.9098663330078, + "logps/rejected": -408.3072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.328859329223633, + "rewards/margins": 11.561137199401855, + "rewards/rejected": -17.889995574951172, + "step": 16002 + }, + { + "epoch": 2.49, + "learning_rate": 2.4100855853519372e-06, + "logits/chosen": -2.8810229301452637, + "logits/rejected": -1.908631682395935, + "logps/chosen": -600.6818237304688, + "logps/rejected": -552.9153442382812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.29069709777832, + "rewards/margins": 9.540599822998047, + "rewards/rejected": -19.831298828125, + "step": 16003 + }, + { + "epoch": 2.49, + "learning_rate": 2.409352144820789e-06, + "logits/chosen": -1.1130927801132202, + "logits/rejected": -2.6203701496124268, + "logps/chosen": -238.9819793701172, + "logps/rejected": -740.69775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.611784934997559, + "rewards/margins": 14.195358276367188, + "rewards/rejected": -22.807144165039062, + "step": 16004 + }, + { + "epoch": 2.49, + "learning_rate": 2.408618704289642e-06, + "logits/chosen": -3.0307908058166504, + "logits/rejected": -2.1957130432128906, + "logps/chosen": -379.35003662109375, + "logps/rejected": -323.71875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.371665954589844, + "rewards/margins": 8.885019302368164, + "rewards/rejected": -19.256685256958008, + "step": 16005 + }, + { + "epoch": 2.49, + "learning_rate": 2.4078852637584937e-06, + "logits/chosen": -2.86261248588562, + "logits/rejected": -2.4304137229919434, + "logps/chosen": -528.54931640625, + "logps/rejected": -538.51318359375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.30340576171875, + "rewards/margins": 8.164266586303711, + "rewards/rejected": -15.467672348022461, + "step": 16006 + }, + { + "epoch": 2.49, + "learning_rate": 2.407151823227346e-06, + "logits/chosen": -2.757633686065674, + "logits/rejected": -2.7454023361206055, + "logps/chosen": -138.60018920898438, + "logps/rejected": -328.94903564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.665491104125977, + "rewards/margins": 10.717034339904785, + "rewards/rejected": -19.382524490356445, + "step": 16007 + }, + { + "epoch": 2.49, + "learning_rate": 2.406418382696198e-06, + "logits/chosen": -1.5784285068511963, + "logits/rejected": -2.5290613174438477, + "logps/chosen": -256.6125183105469, + "logps/rejected": -564.564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.864730358123779, + "rewards/margins": 15.397313117980957, + "rewards/rejected": -20.262042999267578, + "step": 16008 + }, + { + "epoch": 2.49, + "learning_rate": 2.40568494216505e-06, + "logits/chosen": -2.642760992050171, + "logits/rejected": -1.5225014686584473, + "logps/chosen": -306.524658203125, + "logps/rejected": -402.8959045410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.759190559387207, + "rewards/margins": 13.574670791625977, + "rewards/rejected": -20.3338623046875, + "step": 16009 + }, + { + "epoch": 2.49, + "learning_rate": 2.404951501633902e-06, + "logits/chosen": -2.4520089626312256, + "logits/rejected": -2.0195419788360596, + "logps/chosen": -255.0716552734375, + "logps/rejected": -368.962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.214249610900879, + "rewards/margins": 9.312996864318848, + "rewards/rejected": -18.527246475219727, + "step": 16010 + }, + { + "epoch": 2.49, + "learning_rate": 2.4042180611027544e-06, + "logits/chosen": -2.8041367530822754, + "logits/rejected": -2.476341724395752, + "logps/chosen": -390.14910888671875, + "logps/rejected": -374.96270751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.148454666137695, + "rewards/margins": 11.935900688171387, + "rewards/rejected": -23.084354400634766, + "step": 16011 + }, + { + "epoch": 2.49, + "learning_rate": 2.4034846205716063e-06, + "logits/chosen": -1.5472602844238281, + "logits/rejected": -2.6727054119110107, + "logps/chosen": -224.36831665039062, + "logps/rejected": -476.0252380371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.671566963195801, + "rewards/margins": 13.159750938415527, + "rewards/rejected": -20.831317901611328, + "step": 16012 + }, + { + "epoch": 2.49, + "learning_rate": 2.4027511800404586e-06, + "logits/chosen": -2.6891837120056152, + "logits/rejected": -2.1008167266845703, + "logps/chosen": -621.5306396484375, + "logps/rejected": -640.0526123046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.504646301269531, + "rewards/margins": 11.352487564086914, + "rewards/rejected": -21.857133865356445, + "step": 16013 + }, + { + "epoch": 2.49, + "learning_rate": 2.402017739509311e-06, + "logits/chosen": -2.4771037101745605, + "logits/rejected": -2.904819965362549, + "logps/chosen": -420.85498046875, + "logps/rejected": -521.5191650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.388314247131348, + "rewards/margins": 11.863384246826172, + "rewards/rejected": -20.251697540283203, + "step": 16014 + }, + { + "epoch": 2.49, + "learning_rate": 2.4012842989781627e-06, + "logits/chosen": -2.092241048812866, + "logits/rejected": -2.6566882133483887, + "logps/chosen": -340.7291259765625, + "logps/rejected": -805.0618896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.338774681091309, + "rewards/margins": 14.895999908447266, + "rewards/rejected": -25.23477554321289, + "step": 16015 + }, + { + "epoch": 2.49, + "learning_rate": 2.400550858447015e-06, + "logits/chosen": -1.8960679769515991, + "logits/rejected": -2.662832260131836, + "logps/chosen": -202.1273193359375, + "logps/rejected": -549.708251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.336719512939453, + "rewards/margins": 9.914324760437012, + "rewards/rejected": -20.25104522705078, + "step": 16016 + }, + { + "epoch": 2.49, + "learning_rate": 2.399817417915867e-06, + "logits/chosen": -1.4593342542648315, + "logits/rejected": -2.7205827236175537, + "logps/chosen": -232.45596313476562, + "logps/rejected": -747.6282958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.687010765075684, + "rewards/margins": 12.898568153381348, + "rewards/rejected": -19.58557891845703, + "step": 16017 + }, + { + "epoch": 2.49, + "learning_rate": 2.3990839773847192e-06, + "logits/chosen": -1.6290467977523804, + "logits/rejected": -2.675229549407959, + "logps/chosen": -201.5587615966797, + "logps/rejected": -498.93896484375, + "loss": 0.0937, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.00566291809082, + "rewards/margins": 7.861880302429199, + "rewards/rejected": -19.867542266845703, + "step": 16018 + }, + { + "epoch": 2.49, + "learning_rate": 2.398350536853571e-06, + "logits/chosen": -1.9157887697219849, + "logits/rejected": -2.523216724395752, + "logps/chosen": -140.44052124023438, + "logps/rejected": -355.67181396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02099895477295, + "rewards/margins": 10.332813262939453, + "rewards/rejected": -18.35381317138672, + "step": 16019 + }, + { + "epoch": 2.49, + "learning_rate": 2.3976170963224234e-06, + "logits/chosen": -2.284132242202759, + "logits/rejected": -2.605196714401245, + "logps/chosen": -288.19677734375, + "logps/rejected": -534.4185180664062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.972897529602051, + "rewards/margins": 10.552248001098633, + "rewards/rejected": -17.525146484375, + "step": 16020 + }, + { + "epoch": 2.49, + "learning_rate": 2.3968836557912753e-06, + "logits/chosen": -2.8695228099823, + "logits/rejected": -2.881840705871582, + "logps/chosen": -326.84979248046875, + "logps/rejected": -371.9891662597656, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.788352966308594, + "rewards/margins": 6.704543590545654, + "rewards/rejected": -18.492897033691406, + "step": 16021 + }, + { + "epoch": 2.49, + "learning_rate": 2.3961502152601276e-06, + "logits/chosen": -1.9529668092727661, + "logits/rejected": -2.6920711994171143, + "logps/chosen": -326.1192626953125, + "logps/rejected": -458.4022216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.567672729492188, + "rewards/margins": 12.457290649414062, + "rewards/rejected": -23.02496337890625, + "step": 16022 + }, + { + "epoch": 2.49, + "learning_rate": 2.39541677472898e-06, + "logits/chosen": -2.934201717376709, + "logits/rejected": -3.0662574768066406, + "logps/chosen": -77.72159576416016, + "logps/rejected": -411.93389892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.561161994934082, + "rewards/margins": 10.821735382080078, + "rewards/rejected": -16.382898330688477, + "step": 16023 + }, + { + "epoch": 2.49, + "learning_rate": 2.3946833341978318e-06, + "logits/chosen": -1.6768877506256104, + "logits/rejected": -2.5621306896209717, + "logps/chosen": -186.3992919921875, + "logps/rejected": -403.72772216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.44202995300293, + "rewards/margins": 12.598722457885742, + "rewards/rejected": -19.040752410888672, + "step": 16024 + }, + { + "epoch": 2.49, + "learning_rate": 2.393949893666684e-06, + "logits/chosen": -2.5248053073883057, + "logits/rejected": -1.6502211093902588, + "logps/chosen": -252.27444458007812, + "logps/rejected": -402.98760986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.904987335205078, + "rewards/margins": 9.606380462646484, + "rewards/rejected": -22.511367797851562, + "step": 16025 + }, + { + "epoch": 2.49, + "learning_rate": 2.393216453135536e-06, + "logits/chosen": -2.202165365219116, + "logits/rejected": -2.8374123573303223, + "logps/chosen": -841.0629272460938, + "logps/rejected": -847.1771850585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.610311508178711, + "rewards/margins": 14.202817916870117, + "rewards/rejected": -24.813129425048828, + "step": 16026 + }, + { + "epoch": 2.49, + "learning_rate": 2.3924830126043883e-06, + "logits/chosen": -2.2949373722076416, + "logits/rejected": -2.7061686515808105, + "logps/chosen": -255.91795349121094, + "logps/rejected": -307.33001708984375, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.95574951171875, + "rewards/margins": 6.5688323974609375, + "rewards/rejected": -15.524581909179688, + "step": 16027 + }, + { + "epoch": 2.49, + "learning_rate": 2.39174957207324e-06, + "logits/chosen": -1.3875322341918945, + "logits/rejected": -2.5241119861602783, + "logps/chosen": -307.264404296875, + "logps/rejected": -595.67626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.31325912475586, + "rewards/margins": 11.697677612304688, + "rewards/rejected": -21.010936737060547, + "step": 16028 + }, + { + "epoch": 2.49, + "learning_rate": 2.3910161315420924e-06, + "logits/chosen": -2.78886342048645, + "logits/rejected": -2.1251208782196045, + "logps/chosen": -406.2325439453125, + "logps/rejected": -336.8272705078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668010711669922, + "rewards/margins": 6.964138984680176, + "rewards/rejected": -13.632149696350098, + "step": 16029 + }, + { + "epoch": 2.49, + "learning_rate": 2.3902826910109447e-06, + "logits/chosen": -1.8857638835906982, + "logits/rejected": -2.7711944580078125, + "logps/chosen": -328.6294860839844, + "logps/rejected": -464.26837158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.795040130615234, + "rewards/margins": 9.715286254882812, + "rewards/rejected": -16.510326385498047, + "step": 16030 + }, + { + "epoch": 2.49, + "learning_rate": 2.389549250479797e-06, + "logits/chosen": -1.061272144317627, + "logits/rejected": -2.2434468269348145, + "logps/chosen": -234.8717041015625, + "logps/rejected": -534.3997802734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.887375831604004, + "rewards/margins": 11.73487663269043, + "rewards/rejected": -19.62225341796875, + "step": 16031 + }, + { + "epoch": 2.49, + "learning_rate": 2.388815809948649e-06, + "logits/chosen": -1.009800910949707, + "logits/rejected": -1.699703574180603, + "logps/chosen": -200.9361114501953, + "logps/rejected": -687.9033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.123461723327637, + "rewards/margins": 14.259106636047363, + "rewards/rejected": -19.382568359375, + "step": 16032 + }, + { + "epoch": 2.49, + "learning_rate": 2.388082369417501e-06, + "logits/chosen": -2.6603028774261475, + "logits/rejected": -1.8799517154693604, + "logps/chosen": -484.95947265625, + "logps/rejected": -506.228271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.155228614807129, + "rewards/margins": 10.688770294189453, + "rewards/rejected": -21.843997955322266, + "step": 16033 + }, + { + "epoch": 2.49, + "learning_rate": 2.387348928886353e-06, + "logits/chosen": -1.3868478536605835, + "logits/rejected": -2.1581060886383057, + "logps/chosen": -217.71102905273438, + "logps/rejected": -398.90789794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570520401000977, + "rewards/margins": 9.806604385375977, + "rewards/rejected": -16.377124786376953, + "step": 16034 + }, + { + "epoch": 2.49, + "learning_rate": 2.386615488355205e-06, + "logits/chosen": -1.8730491399765015, + "logits/rejected": -2.417454957962036, + "logps/chosen": -181.10787963867188, + "logps/rejected": -428.2110595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.349543571472168, + "rewards/margins": 11.432158470153809, + "rewards/rejected": -21.781702041625977, + "step": 16035 + }, + { + "epoch": 2.49, + "learning_rate": 2.3858820478240573e-06, + "logits/chosen": -2.7844724655151367, + "logits/rejected": -2.8706445693969727, + "logps/chosen": -342.020751953125, + "logps/rejected": -337.01568603515625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.858121871948242, + "rewards/margins": 7.868070602416992, + "rewards/rejected": -16.726192474365234, + "step": 16036 + }, + { + "epoch": 2.49, + "learning_rate": 2.385148607292909e-06, + "logits/chosen": -2.779268264770508, + "logits/rejected": -2.8128409385681152, + "logps/chosen": -126.177490234375, + "logps/rejected": -426.6554870605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.135835647583008, + "rewards/margins": 10.113188743591309, + "rewards/rejected": -20.2490234375, + "step": 16037 + }, + { + "epoch": 2.49, + "learning_rate": 2.3844151667617615e-06, + "logits/chosen": -2.1981732845306396, + "logits/rejected": -2.661168098449707, + "logps/chosen": -256.0027770996094, + "logps/rejected": -372.3169250488281, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.205986022949219, + "rewards/margins": 9.737022399902344, + "rewards/rejected": -19.943008422851562, + "step": 16038 + }, + { + "epoch": 2.49, + "learning_rate": 2.3836817262306138e-06, + "logits/chosen": -1.7959915399551392, + "logits/rejected": -2.6125495433807373, + "logps/chosen": -257.55352783203125, + "logps/rejected": -442.2911682128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8278656005859375, + "rewards/margins": 11.56745719909668, + "rewards/rejected": -17.39532470703125, + "step": 16039 + }, + { + "epoch": 2.49, + "learning_rate": 2.382948285699466e-06, + "logits/chosen": -2.1935231685638428, + "logits/rejected": -2.8999104499816895, + "logps/chosen": -749.5411376953125, + "logps/rejected": -734.426513671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.267102241516113, + "rewards/margins": 10.631102561950684, + "rewards/rejected": -20.898204803466797, + "step": 16040 + }, + { + "epoch": 2.49, + "learning_rate": 2.382214845168318e-06, + "logits/chosen": -2.7624073028564453, + "logits/rejected": -3.031836986541748, + "logps/chosen": -169.85211181640625, + "logps/rejected": -284.2193603515625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3566789627075195, + "rewards/margins": 8.098505973815918, + "rewards/rejected": -12.455184936523438, + "step": 16041 + }, + { + "epoch": 2.49, + "learning_rate": 2.38148140463717e-06, + "logits/chosen": -2.158801794052124, + "logits/rejected": -2.763051986694336, + "logps/chosen": -231.73776245117188, + "logps/rejected": -514.0918579101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.041397094726562, + "rewards/margins": 14.722915649414062, + "rewards/rejected": -24.764312744140625, + "step": 16042 + }, + { + "epoch": 2.5, + "learning_rate": 2.380747964106022e-06, + "logits/chosen": -2.242136001586914, + "logits/rejected": -2.8977699279785156, + "logps/chosen": -233.40821838378906, + "logps/rejected": -609.1878662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.864847183227539, + "rewards/margins": 12.735689163208008, + "rewards/rejected": -20.600536346435547, + "step": 16043 + }, + { + "epoch": 2.5, + "learning_rate": 2.380014523574874e-06, + "logits/chosen": -2.5034704208374023, + "logits/rejected": -2.1001715660095215, + "logps/chosen": -223.60545349121094, + "logps/rejected": -334.7021789550781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.643925666809082, + "rewards/margins": 10.94166088104248, + "rewards/rejected": -21.585586547851562, + "step": 16044 + }, + { + "epoch": 2.5, + "learning_rate": 2.3792810830437263e-06, + "logits/chosen": -1.6456369161605835, + "logits/rejected": -2.489406108856201, + "logps/chosen": -237.73187255859375, + "logps/rejected": -488.34442138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.620603561401367, + "rewards/margins": 13.509847640991211, + "rewards/rejected": -22.130451202392578, + "step": 16045 + }, + { + "epoch": 2.5, + "learning_rate": 2.378547642512578e-06, + "logits/chosen": -2.0686216354370117, + "logits/rejected": -2.1047229766845703, + "logps/chosen": -1099.6358642578125, + "logps/rejected": -690.409912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.64772891998291, + "rewards/margins": 13.719411849975586, + "rewards/rejected": -23.36713981628418, + "step": 16046 + }, + { + "epoch": 2.5, + "learning_rate": 2.377814201981431e-06, + "logits/chosen": -2.518153429031372, + "logits/rejected": -2.8996422290802, + "logps/chosen": -156.31948852539062, + "logps/rejected": -321.66082763671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.15806770324707, + "rewards/margins": 9.387411117553711, + "rewards/rejected": -16.54547882080078, + "step": 16047 + }, + { + "epoch": 2.5, + "learning_rate": 2.3770807614502828e-06, + "logits/chosen": -2.891103744506836, + "logits/rejected": -1.8726328611373901, + "logps/chosen": -310.9174499511719, + "logps/rejected": -403.0748596191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8083953857421875, + "rewards/margins": 12.581633567810059, + "rewards/rejected": -20.390029907226562, + "step": 16048 + }, + { + "epoch": 2.5, + "learning_rate": 2.376347320919135e-06, + "logits/chosen": -2.2564544677734375, + "logits/rejected": -2.93192195892334, + "logps/chosen": -153.6261749267578, + "logps/rejected": -263.0446472167969, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.148330211639404, + "rewards/margins": 8.142505645751953, + "rewards/rejected": -13.2908353805542, + "step": 16049 + }, + { + "epoch": 2.5, + "learning_rate": 2.375613880387987e-06, + "logits/chosen": -2.5401992797851562, + "logits/rejected": -3.0135338306427, + "logps/chosen": -349.7738952636719, + "logps/rejected": -440.5694274902344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.250703811645508, + "rewards/margins": 7.343052387237549, + "rewards/rejected": -17.59375762939453, + "step": 16050 + }, + { + "epoch": 2.5, + "learning_rate": 2.3748804398568393e-06, + "logits/chosen": -2.0373682975769043, + "logits/rejected": -2.4190011024475098, + "logps/chosen": -244.2100067138672, + "logps/rejected": -292.57965087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.048164367675781, + "rewards/margins": 10.770917892456055, + "rewards/rejected": -18.819082260131836, + "step": 16051 + }, + { + "epoch": 2.5, + "learning_rate": 2.374146999325691e-06, + "logits/chosen": -2.012054204940796, + "logits/rejected": -2.716036081314087, + "logps/chosen": -199.63922119140625, + "logps/rejected": -494.2093505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8220720291137695, + "rewards/margins": 12.94517707824707, + "rewards/rejected": -20.767250061035156, + "step": 16052 + }, + { + "epoch": 2.5, + "learning_rate": 2.373413558794543e-06, + "logits/chosen": -0.6426936984062195, + "logits/rejected": -1.9757122993469238, + "logps/chosen": -248.087646484375, + "logps/rejected": -521.574951171875, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.042810440063477, + "rewards/margins": 10.950358390808105, + "rewards/rejected": -20.993167877197266, + "step": 16053 + }, + { + "epoch": 2.5, + "learning_rate": 2.3726801182633953e-06, + "logits/chosen": -2.269481658935547, + "logits/rejected": -2.3518779277801514, + "logps/chosen": -765.9376220703125, + "logps/rejected": -878.4201049804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.687885284423828, + "rewards/margins": 11.925004959106445, + "rewards/rejected": -21.612890243530273, + "step": 16054 + }, + { + "epoch": 2.5, + "learning_rate": 2.3719466777322476e-06, + "logits/chosen": -2.7499310970306396, + "logits/rejected": -2.292531728744507, + "logps/chosen": -483.46484375, + "logps/rejected": -494.9502258300781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.484131813049316, + "rewards/margins": 9.996883392333984, + "rewards/rejected": -16.481014251708984, + "step": 16055 + }, + { + "epoch": 2.5, + "learning_rate": 2.3712132372011e-06, + "logits/chosen": -2.596360921859741, + "logits/rejected": -2.640268564224243, + "logps/chosen": -163.09707641601562, + "logps/rejected": -415.48284912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.428143501281738, + "rewards/margins": 13.463228225708008, + "rewards/rejected": -21.89137077331543, + "step": 16056 + }, + { + "epoch": 2.5, + "learning_rate": 2.370479796669952e-06, + "logits/chosen": -2.7830352783203125, + "logits/rejected": -2.661768674850464, + "logps/chosen": -480.20892333984375, + "logps/rejected": -428.4832763671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.396695137023926, + "rewards/margins": 7.520007610321045, + "rewards/rejected": -15.916702270507812, + "step": 16057 + }, + { + "epoch": 2.5, + "learning_rate": 2.369746356138804e-06, + "logits/chosen": -2.4658241271972656, + "logits/rejected": -2.769166946411133, + "logps/chosen": -128.16619873046875, + "logps/rejected": -305.62542724609375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.174641132354736, + "rewards/margins": 10.158102035522461, + "rewards/rejected": -17.33274269104004, + "step": 16058 + }, + { + "epoch": 2.5, + "learning_rate": 2.369012915607656e-06, + "logits/chosen": -2.7500388622283936, + "logits/rejected": -2.8389861583709717, + "logps/chosen": -461.744384765625, + "logps/rejected": -414.650634765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.331634521484375, + "rewards/margins": 8.366865158081055, + "rewards/rejected": -17.698501586914062, + "step": 16059 + }, + { + "epoch": 2.5, + "learning_rate": 2.3682794750765083e-06, + "logits/chosen": -1.2479392290115356, + "logits/rejected": -2.4092934131622314, + "logps/chosen": -694.5073852539062, + "logps/rejected": -847.7544555664062, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.786087036132812, + "rewards/margins": 11.984561920166016, + "rewards/rejected": -21.770648956298828, + "step": 16060 + }, + { + "epoch": 2.5, + "learning_rate": 2.36754603454536e-06, + "logits/chosen": -2.8452508449554443, + "logits/rejected": -2.481550693511963, + "logps/chosen": -520.3717041015625, + "logps/rejected": -429.3072814941406, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.56857967376709, + "rewards/margins": 6.993279933929443, + "rewards/rejected": -15.561859130859375, + "step": 16061 + }, + { + "epoch": 2.5, + "learning_rate": 2.366812594014212e-06, + "logits/chosen": -2.411362409591675, + "logits/rejected": -2.1559901237487793, + "logps/chosen": -237.359619140625, + "logps/rejected": -485.29107666015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.360698699951172, + "rewards/margins": 9.107404708862305, + "rewards/rejected": -18.468103408813477, + "step": 16062 + }, + { + "epoch": 2.5, + "learning_rate": 2.3660791534830643e-06, + "logits/chosen": -2.862908363342285, + "logits/rejected": -1.5702811479568481, + "logps/chosen": -784.475341796875, + "logps/rejected": -526.6414184570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.065860748291016, + "rewards/margins": 10.458019256591797, + "rewards/rejected": -18.523880004882812, + "step": 16063 + }, + { + "epoch": 2.5, + "learning_rate": 2.3653457129519166e-06, + "logits/chosen": -2.6627938747406006, + "logits/rejected": -2.148191452026367, + "logps/chosen": -397.8728332519531, + "logps/rejected": -480.19024658203125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.61532974243164, + "rewards/margins": 6.917656421661377, + "rewards/rejected": -15.53298568725586, + "step": 16064 + }, + { + "epoch": 2.5, + "learning_rate": 2.364612272420769e-06, + "logits/chosen": -2.6056337356567383, + "logits/rejected": -2.3257884979248047, + "logps/chosen": -162.2342529296875, + "logps/rejected": -301.55462646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.528789520263672, + "rewards/margins": 10.087272644042969, + "rewards/rejected": -19.61606216430664, + "step": 16065 + }, + { + "epoch": 2.5, + "learning_rate": 2.363878831889621e-06, + "logits/chosen": -2.730586051940918, + "logits/rejected": -2.6913344860076904, + "logps/chosen": -465.90008544921875, + "logps/rejected": -428.0207214355469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.722661972045898, + "rewards/margins": 11.17184066772461, + "rewards/rejected": -19.894502639770508, + "step": 16066 + }, + { + "epoch": 2.5, + "learning_rate": 2.363145391358473e-06, + "logits/chosen": -2.734652519226074, + "logits/rejected": -1.8606749773025513, + "logps/chosen": -457.6348876953125, + "logps/rejected": -400.74334716796875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.143506050109863, + "rewards/margins": 8.942535400390625, + "rewards/rejected": -19.086040496826172, + "step": 16067 + }, + { + "epoch": 2.5, + "learning_rate": 2.362411950827325e-06, + "logits/chosen": -2.9085962772369385, + "logits/rejected": -2.9499642848968506, + "logps/chosen": -90.78873443603516, + "logps/rejected": -228.50160217285156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.828770637512207, + "rewards/margins": 9.486310958862305, + "rewards/rejected": -17.315082550048828, + "step": 16068 + }, + { + "epoch": 2.5, + "learning_rate": 2.3616785102961773e-06, + "logits/chosen": -2.9278812408447266, + "logits/rejected": -2.902808904647827, + "logps/chosen": -180.11419677734375, + "logps/rejected": -255.09878540039062, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.781393051147461, + "rewards/margins": 7.242575168609619, + "rewards/rejected": -17.023967742919922, + "step": 16069 + }, + { + "epoch": 2.5, + "learning_rate": 2.360945069765029e-06, + "logits/chosen": -2.841667652130127, + "logits/rejected": -2.3113269805908203, + "logps/chosen": -223.79010009765625, + "logps/rejected": -336.8815612792969, + "loss": 0.1122, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.977400779724121, + "rewards/margins": 9.894241333007812, + "rewards/rejected": -17.87164306640625, + "step": 16070 + }, + { + "epoch": 2.5, + "learning_rate": 2.360211629233881e-06, + "logits/chosen": -2.671602249145508, + "logits/rejected": -3.017062187194824, + "logps/chosen": -143.3907470703125, + "logps/rejected": -428.39886474609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.60805606842041, + "rewards/margins": 8.266691207885742, + "rewards/rejected": -18.87474822998047, + "step": 16071 + }, + { + "epoch": 2.5, + "learning_rate": 2.3594781887027338e-06, + "logits/chosen": -2.859821319580078, + "logits/rejected": -2.95628023147583, + "logps/chosen": -219.40234375, + "logps/rejected": -269.26702880859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.234289646148682, + "rewards/margins": 8.46524715423584, + "rewards/rejected": -14.699536323547363, + "step": 16072 + }, + { + "epoch": 2.5, + "learning_rate": 2.3587447481715857e-06, + "logits/chosen": -2.084376096725464, + "logits/rejected": -2.6348514556884766, + "logps/chosen": -186.0953369140625, + "logps/rejected": -427.95013427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.707553863525391, + "rewards/margins": 12.054363250732422, + "rewards/rejected": -19.761917114257812, + "step": 16073 + }, + { + "epoch": 2.5, + "learning_rate": 2.358011307640438e-06, + "logits/chosen": -0.9921959638595581, + "logits/rejected": -2.387301445007324, + "logps/chosen": -143.85885620117188, + "logps/rejected": -537.4640502929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.03278636932373, + "rewards/margins": 15.353490829467773, + "rewards/rejected": -23.38627815246582, + "step": 16074 + }, + { + "epoch": 2.5, + "learning_rate": 2.35727786710929e-06, + "logits/chosen": -1.739454984664917, + "logits/rejected": -2.8650739192962646, + "logps/chosen": -215.87847900390625, + "logps/rejected": -447.90362548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.354091644287109, + "rewards/margins": 16.53231430053711, + "rewards/rejected": -22.88640594482422, + "step": 16075 + }, + { + "epoch": 2.5, + "learning_rate": 2.356544426578142e-06, + "logits/chosen": -2.14422869682312, + "logits/rejected": -2.5903375148773193, + "logps/chosen": -148.85015869140625, + "logps/rejected": -374.08380126953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.718674659729004, + "rewards/margins": 9.596818923950195, + "rewards/rejected": -19.315494537353516, + "step": 16076 + }, + { + "epoch": 2.5, + "learning_rate": 2.355810986046994e-06, + "logits/chosen": -2.8421475887298584, + "logits/rejected": -2.019279718399048, + "logps/chosen": -200.82437133789062, + "logps/rejected": -296.7468566894531, + "loss": 0.0858, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.850217819213867, + "rewards/margins": 8.229486465454102, + "rewards/rejected": -14.079704284667969, + "step": 16077 + }, + { + "epoch": 2.5, + "learning_rate": 2.3550775455158463e-06, + "logits/chosen": -2.7978639602661133, + "logits/rejected": -2.9051733016967773, + "logps/chosen": -171.93862915039062, + "logps/rejected": -482.7847900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.909257888793945, + "rewards/margins": 11.387001037597656, + "rewards/rejected": -20.29625701904297, + "step": 16078 + }, + { + "epoch": 2.5, + "learning_rate": 2.354344104984698e-06, + "logits/chosen": -2.5628952980041504, + "logits/rejected": -1.459922194480896, + "logps/chosen": -310.56378173828125, + "logps/rejected": -235.519775390625, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.559345245361328, + "rewards/margins": 8.487737655639648, + "rewards/rejected": -16.04708480834961, + "step": 16079 + }, + { + "epoch": 2.5, + "learning_rate": 2.3536106644535505e-06, + "logits/chosen": -1.850589394569397, + "logits/rejected": -2.8080568313598633, + "logps/chosen": -166.21029663085938, + "logps/rejected": -329.6273193359375, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.302024841308594, + "rewards/margins": 8.71288013458252, + "rewards/rejected": -19.014904022216797, + "step": 16080 + }, + { + "epoch": 2.5, + "learning_rate": 2.352877223922403e-06, + "logits/chosen": -2.51701283454895, + "logits/rejected": -1.8266957998275757, + "logps/chosen": -312.5957946777344, + "logps/rejected": -388.8214111328125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.470194816589355, + "rewards/margins": 7.350081920623779, + "rewards/rejected": -16.820276260375977, + "step": 16081 + }, + { + "epoch": 2.5, + "learning_rate": 2.3521437833912547e-06, + "logits/chosen": -2.5718111991882324, + "logits/rejected": -2.4010164737701416, + "logps/chosen": -250.32693481445312, + "logps/rejected": -274.50189208984375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.704170227050781, + "rewards/margins": 6.609955787658691, + "rewards/rejected": -13.314125061035156, + "step": 16082 + }, + { + "epoch": 2.5, + "learning_rate": 2.351410342860107e-06, + "logits/chosen": -2.9844279289245605, + "logits/rejected": -2.7827322483062744, + "logps/chosen": -379.8433837890625, + "logps/rejected": -463.7418212890625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.637032508850098, + "rewards/margins": 7.5513787269592285, + "rewards/rejected": -16.188411712646484, + "step": 16083 + }, + { + "epoch": 2.5, + "learning_rate": 2.350676902328959e-06, + "logits/chosen": -1.6616555452346802, + "logits/rejected": -2.6782169342041016, + "logps/chosen": -264.59454345703125, + "logps/rejected": -357.2569580078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.00755500793457, + "rewards/margins": 6.986168384552002, + "rewards/rejected": -16.993722915649414, + "step": 16084 + }, + { + "epoch": 2.5, + "learning_rate": 2.349943461797811e-06, + "logits/chosen": -2.69036602973938, + "logits/rejected": -2.6129703521728516, + "logps/chosen": -89.58909606933594, + "logps/rejected": -409.1831970214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602707862854004, + "rewards/margins": 13.940349578857422, + "rewards/rejected": -20.54305648803711, + "step": 16085 + }, + { + "epoch": 2.5, + "learning_rate": 2.349210021266663e-06, + "logits/chosen": -2.8047068119049072, + "logits/rejected": -2.2404048442840576, + "logps/chosen": -888.468505859375, + "logps/rejected": -669.9259643554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.520284652709961, + "rewards/margins": 11.670230865478516, + "rewards/rejected": -20.190515518188477, + "step": 16086 + }, + { + "epoch": 2.5, + "learning_rate": 2.3484765807355153e-06, + "logits/chosen": -2.9223055839538574, + "logits/rejected": -1.5847268104553223, + "logps/chosen": -315.24774169921875, + "logps/rejected": -255.03140258789062, + "loss": 0.0521, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.301253318786621, + "rewards/margins": 6.5498785972595215, + "rewards/rejected": -13.851131439208984, + "step": 16087 + }, + { + "epoch": 2.5, + "learning_rate": 2.3477431402043672e-06, + "logits/chosen": -1.8532016277313232, + "logits/rejected": -2.64652419090271, + "logps/chosen": -290.1087341308594, + "logps/rejected": -373.0772705078125, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.385648727416992, + "rewards/margins": 4.331472396850586, + "rewards/rejected": -15.717121124267578, + "step": 16088 + }, + { + "epoch": 2.5, + "learning_rate": 2.34700969967322e-06, + "logits/chosen": -2.373722791671753, + "logits/rejected": -1.4087114334106445, + "logps/chosen": -275.64886474609375, + "logps/rejected": -278.1712646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.370159149169922, + "rewards/margins": 9.331892013549805, + "rewards/rejected": -18.702051162719727, + "step": 16089 + }, + { + "epoch": 2.5, + "learning_rate": 2.346276259142072e-06, + "logits/chosen": -1.5496689081192017, + "logits/rejected": -2.75455904006958, + "logps/chosen": -159.75564575195312, + "logps/rejected": -566.9129638671875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.368175983428955, + "rewards/margins": 10.843954086303711, + "rewards/rejected": -17.21213150024414, + "step": 16090 + }, + { + "epoch": 2.5, + "learning_rate": 2.3455428186109237e-06, + "logits/chosen": -0.7163934111595154, + "logits/rejected": -2.722134590148926, + "logps/chosen": -204.70822143554688, + "logps/rejected": -453.35528564453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.202765464782715, + "rewards/margins": 8.290718078613281, + "rewards/rejected": -18.493484497070312, + "step": 16091 + }, + { + "epoch": 2.5, + "learning_rate": 2.344809378079776e-06, + "logits/chosen": -2.046666145324707, + "logits/rejected": -2.543663501739502, + "logps/chosen": -181.97744750976562, + "logps/rejected": -185.95909118652344, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.95179557800293, + "rewards/margins": 6.597437858581543, + "rewards/rejected": -13.549233436584473, + "step": 16092 + }, + { + "epoch": 2.5, + "learning_rate": 2.344075937548628e-06, + "logits/chosen": -1.8920414447784424, + "logits/rejected": -2.719238758087158, + "logps/chosen": -340.6746826171875, + "logps/rejected": -554.7533569335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.186841011047363, + "rewards/margins": 12.58635139465332, + "rewards/rejected": -22.773193359375, + "step": 16093 + }, + { + "epoch": 2.5, + "learning_rate": 2.34334249701748e-06, + "logits/chosen": -1.876440405845642, + "logits/rejected": -2.7548394203186035, + "logps/chosen": -276.8796691894531, + "logps/rejected": -715.9970092773438, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.705366134643555, + "rewards/margins": 7.3978962898254395, + "rewards/rejected": -23.10326385498047, + "step": 16094 + }, + { + "epoch": 2.5, + "learning_rate": 2.342609056486332e-06, + "logits/chosen": -2.9212636947631836, + "logits/rejected": -2.693554639816284, + "logps/chosen": -396.30645751953125, + "logps/rejected": -292.97052001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8682355880737305, + "rewards/margins": 9.55622673034668, + "rewards/rejected": -15.424463272094727, + "step": 16095 + }, + { + "epoch": 2.5, + "learning_rate": 2.3418756159551844e-06, + "logits/chosen": -2.896347761154175, + "logits/rejected": -2.3945486545562744, + "logps/chosen": -453.35528564453125, + "logps/rejected": -373.41143798828125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.195374488830566, + "rewards/margins": 6.7512102127075195, + "rewards/rejected": -14.946584701538086, + "step": 16096 + }, + { + "epoch": 2.5, + "learning_rate": 2.3411421754240367e-06, + "logits/chosen": -1.6986907720565796, + "logits/rejected": -2.7804079055786133, + "logps/chosen": -170.5093536376953, + "logps/rejected": -416.2975158691406, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.249024391174316, + "rewards/margins": 9.626731872558594, + "rewards/rejected": -17.875755310058594, + "step": 16097 + }, + { + "epoch": 2.5, + "learning_rate": 2.340408734892889e-06, + "logits/chosen": -2.863077163696289, + "logits/rejected": -2.5630855560302734, + "logps/chosen": -912.9072265625, + "logps/rejected": -725.380126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.695528030395508, + "rewards/margins": 10.437810897827148, + "rewards/rejected": -19.133338928222656, + "step": 16098 + }, + { + "epoch": 2.5, + "learning_rate": 2.339675294361741e-06, + "logits/chosen": -2.8241488933563232, + "logits/rejected": -2.45737361907959, + "logps/chosen": -271.8542785644531, + "logps/rejected": -234.60195922851562, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.53176498413086, + "rewards/margins": 7.11531925201416, + "rewards/rejected": -15.647085189819336, + "step": 16099 + }, + { + "epoch": 2.5, + "learning_rate": 2.338941853830593e-06, + "logits/chosen": -1.509702444076538, + "logits/rejected": -2.3616943359375, + "logps/chosen": -601.304931640625, + "logps/rejected": -941.138916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.295348167419434, + "rewards/margins": 14.245006561279297, + "rewards/rejected": -23.540353775024414, + "step": 16100 + }, + { + "epoch": 2.5, + "learning_rate": 2.338208413299445e-06, + "logits/chosen": -2.6105563640594482, + "logits/rejected": -2.8295369148254395, + "logps/chosen": -395.1065673828125, + "logps/rejected": -507.35418701171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.216709136962891, + "rewards/margins": 10.098793029785156, + "rewards/rejected": -17.315502166748047, + "step": 16101 + }, + { + "epoch": 2.5, + "learning_rate": 2.337474972768297e-06, + "logits/chosen": -2.4598805904388428, + "logits/rejected": -2.0386738777160645, + "logps/chosen": -254.66726684570312, + "logps/rejected": -328.7760009765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.383100509643555, + "rewards/margins": 8.769441604614258, + "rewards/rejected": -19.152542114257812, + "step": 16102 + }, + { + "epoch": 2.5, + "learning_rate": 2.336741532237149e-06, + "logits/chosen": -3.017326593399048, + "logits/rejected": -2.16530442237854, + "logps/chosen": -542.9154052734375, + "logps/rejected": -408.7805480957031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.573448181152344, + "rewards/margins": 8.194591522216797, + "rewards/rejected": -17.76803970336914, + "step": 16103 + }, + { + "epoch": 2.5, + "learning_rate": 2.336008091706001e-06, + "logits/chosen": -2.651252269744873, + "logits/rejected": -2.8746447563171387, + "logps/chosen": -98.76099395751953, + "logps/rejected": -240.8833770751953, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.145254135131836, + "rewards/margins": 6.977399826049805, + "rewards/rejected": -14.12265396118164, + "step": 16104 + }, + { + "epoch": 2.5, + "learning_rate": 2.3352746511748534e-06, + "logits/chosen": -1.8050849437713623, + "logits/rejected": -2.9001121520996094, + "logps/chosen": -157.42593383789062, + "logps/rejected": -446.7781982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9015092849731445, + "rewards/margins": 13.79415512084961, + "rewards/rejected": -19.69566535949707, + "step": 16105 + }, + { + "epoch": 2.5, + "learning_rate": 2.3345412106437057e-06, + "logits/chosen": -2.795201301574707, + "logits/rejected": -2.1843936443328857, + "logps/chosen": -330.55218505859375, + "logps/rejected": -401.88922119140625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.27186107635498, + "rewards/margins": 6.219797134399414, + "rewards/rejected": -17.491657257080078, + "step": 16106 + }, + { + "epoch": 2.5, + "learning_rate": 2.333807770112558e-06, + "logits/chosen": -1.802773118019104, + "logits/rejected": -2.821686029434204, + "logps/chosen": -173.06942749023438, + "logps/rejected": -538.737060546875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.826688766479492, + "rewards/margins": 8.833806991577148, + "rewards/rejected": -15.66049575805664, + "step": 16107 + }, + { + "epoch": 2.51, + "learning_rate": 2.33307432958141e-06, + "logits/chosen": -2.23093843460083, + "logits/rejected": -2.6702167987823486, + "logps/chosen": -273.34259033203125, + "logps/rejected": -474.125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.049196243286133, + "rewards/margins": 12.687850952148438, + "rewards/rejected": -20.737045288085938, + "step": 16108 + }, + { + "epoch": 2.51, + "learning_rate": 2.332340889050262e-06, + "logits/chosen": -2.0878148078918457, + "logits/rejected": -2.665153741836548, + "logps/chosen": -198.49839782714844, + "logps/rejected": -435.3965759277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.319901466369629, + "rewards/margins": 10.494425773620605, + "rewards/rejected": -18.814327239990234, + "step": 16109 + }, + { + "epoch": 2.51, + "learning_rate": 2.331607448519114e-06, + "logits/chosen": -1.796129584312439, + "logits/rejected": -2.3736517429351807, + "logps/chosen": -183.76849365234375, + "logps/rejected": -388.54888916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.663303375244141, + "rewards/margins": 14.594804763793945, + "rewards/rejected": -22.258108139038086, + "step": 16110 + }, + { + "epoch": 2.51, + "learning_rate": 2.330874007987966e-06, + "logits/chosen": -2.363445281982422, + "logits/rejected": -2.7745487689971924, + "logps/chosen": -261.8782958984375, + "logps/rejected": -434.9917297363281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.784126281738281, + "rewards/margins": 8.82670783996582, + "rewards/rejected": -14.610834121704102, + "step": 16111 + }, + { + "epoch": 2.51, + "learning_rate": 2.3301405674568182e-06, + "logits/chosen": -2.761101007461548, + "logits/rejected": -2.907660961151123, + "logps/chosen": -204.0179443359375, + "logps/rejected": -553.9121704101562, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.21163558959961, + "rewards/margins": 11.219766616821289, + "rewards/rejected": -19.4314022064209, + "step": 16112 + }, + { + "epoch": 2.51, + "learning_rate": 2.32940712692567e-06, + "logits/chosen": -1.7059074640274048, + "logits/rejected": -2.602175712585449, + "logps/chosen": -258.5715026855469, + "logps/rejected": -550.8438720703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.834305763244629, + "rewards/margins": 7.406371593475342, + "rewards/rejected": -14.240676879882812, + "step": 16113 + }, + { + "epoch": 2.51, + "learning_rate": 2.328673686394523e-06, + "logits/chosen": -1.099544882774353, + "logits/rejected": -2.572068929672241, + "logps/chosen": -163.43624877929688, + "logps/rejected": -454.707275390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.069723129272461, + "rewards/margins": 8.69894790649414, + "rewards/rejected": -19.7686710357666, + "step": 16114 + }, + { + "epoch": 2.51, + "learning_rate": 2.3279402458633747e-06, + "logits/chosen": -2.9685072898864746, + "logits/rejected": -1.958363652229309, + "logps/chosen": -533.1519775390625, + "logps/rejected": -305.4510192871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.598652720451355, + "rewards/margins": 11.500653266906738, + "rewards/rejected": -13.099306106567383, + "step": 16115 + }, + { + "epoch": 2.51, + "learning_rate": 2.327206805332227e-06, + "logits/chosen": -2.766392469406128, + "logits/rejected": -1.3399628400802612, + "logps/chosen": -321.8851623535156, + "logps/rejected": -323.0274353027344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.284327507019043, + "rewards/margins": 10.566839218139648, + "rewards/rejected": -13.851165771484375, + "step": 16116 + }, + { + "epoch": 2.51, + "learning_rate": 2.326473364801079e-06, + "logits/chosen": -3.033247947692871, + "logits/rejected": -3.0698392391204834, + "logps/chosen": -130.61785888671875, + "logps/rejected": -512.8369140625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.252002716064453, + "rewards/margins": 10.413076400756836, + "rewards/rejected": -18.66507911682129, + "step": 16117 + }, + { + "epoch": 2.51, + "learning_rate": 2.325739924269931e-06, + "logits/chosen": -2.708874464035034, + "logits/rejected": -2.471565008163452, + "logps/chosen": -370.3018798828125, + "logps/rejected": -416.7300109863281, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.751487731933594, + "rewards/margins": 6.809622287750244, + "rewards/rejected": -17.561111450195312, + "step": 16118 + }, + { + "epoch": 2.51, + "learning_rate": 2.325006483738783e-06, + "logits/chosen": -2.4672353267669678, + "logits/rejected": -2.0224664211273193, + "logps/chosen": -252.69322204589844, + "logps/rejected": -252.73858642578125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4476318359375, + "rewards/margins": 5.465517044067383, + "rewards/rejected": -13.913148880004883, + "step": 16119 + }, + { + "epoch": 2.51, + "learning_rate": 2.3242730432076354e-06, + "logits/chosen": -2.542954444885254, + "logits/rejected": -2.756843090057373, + "logps/chosen": -311.7821044921875, + "logps/rejected": -384.1959228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0115861892700195, + "rewards/margins": 13.932929992675781, + "rewards/rejected": -18.944517135620117, + "step": 16120 + }, + { + "epoch": 2.51, + "learning_rate": 2.3235396026764873e-06, + "logits/chosen": -1.347151517868042, + "logits/rejected": -2.833178997039795, + "logps/chosen": -600.5281982421875, + "logps/rejected": -695.6778564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3024139404296875, + "rewards/margins": 10.248979568481445, + "rewards/rejected": -17.551395416259766, + "step": 16121 + }, + { + "epoch": 2.51, + "learning_rate": 2.3228061621453396e-06, + "logits/chosen": -1.975095272064209, + "logits/rejected": -2.8212387561798096, + "logps/chosen": -295.79876708984375, + "logps/rejected": -298.0546875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963478088378906, + "rewards/margins": 7.296123504638672, + "rewards/rejected": -13.259601593017578, + "step": 16122 + }, + { + "epoch": 2.51, + "learning_rate": 2.322072721614192e-06, + "logits/chosen": -1.8090696334838867, + "logits/rejected": -2.29093599319458, + "logps/chosen": -576.66796875, + "logps/rejected": -728.376708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.212634086608887, + "rewards/margins": 20.292526245117188, + "rewards/rejected": -27.505159378051758, + "step": 16123 + }, + { + "epoch": 2.51, + "learning_rate": 2.3213392810830437e-06, + "logits/chosen": -2.1150741577148438, + "logits/rejected": -2.799100637435913, + "logps/chosen": -96.955810546875, + "logps/rejected": -250.7677001953125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7476396560668945, + "rewards/margins": 6.736814498901367, + "rewards/rejected": -14.484454154968262, + "step": 16124 + }, + { + "epoch": 2.51, + "learning_rate": 2.320605840551896e-06, + "logits/chosen": -1.3012953996658325, + "logits/rejected": -2.3958353996276855, + "logps/chosen": -186.01589965820312, + "logps/rejected": -562.9138793945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.44053840637207, + "rewards/margins": 15.819357872009277, + "rewards/rejected": -21.25989532470703, + "step": 16125 + }, + { + "epoch": 2.51, + "learning_rate": 2.319872400020748e-06, + "logits/chosen": -1.5116276741027832, + "logits/rejected": -2.745579481124878, + "logps/chosen": -236.50186157226562, + "logps/rejected": -971.5061645507812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.416074752807617, + "rewards/margins": 16.79546546936035, + "rewards/rejected": -23.21154022216797, + "step": 16126 + }, + { + "epoch": 2.51, + "learning_rate": 2.3191389594896002e-06, + "logits/chosen": -1.983638048171997, + "logits/rejected": -2.819897174835205, + "logps/chosen": -390.97332763671875, + "logps/rejected": -572.0858764648438, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.763388633728027, + "rewards/margins": 8.646419525146484, + "rewards/rejected": -19.409809112548828, + "step": 16127 + }, + { + "epoch": 2.51, + "learning_rate": 2.318405518958452e-06, + "logits/chosen": -3.0385475158691406, + "logits/rejected": -2.796776294708252, + "logps/chosen": -261.8399963378906, + "logps/rejected": -377.73272705078125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.810101509094238, + "rewards/margins": 6.165888786315918, + "rewards/rejected": -15.975990295410156, + "step": 16128 + }, + { + "epoch": 2.51, + "learning_rate": 2.3176720784273044e-06, + "logits/chosen": -2.815061092376709, + "logits/rejected": -1.352982997894287, + "logps/chosen": -484.18145751953125, + "logps/rejected": -413.507568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.030614852905273, + "rewards/margins": 10.457454681396484, + "rewards/rejected": -18.488069534301758, + "step": 16129 + }, + { + "epoch": 2.51, + "learning_rate": 2.3169386378961563e-06, + "logits/chosen": -2.184713125228882, + "logits/rejected": -2.8984761238098145, + "logps/chosen": -199.72132873535156, + "logps/rejected": -293.8004455566406, + "loss": 1.0507, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.641578674316406, + "rewards/margins": 5.174848556518555, + "rewards/rejected": -15.816427230834961, + "step": 16130 + }, + { + "epoch": 2.51, + "learning_rate": 2.3162051973650086e-06, + "logits/chosen": -2.8184752464294434, + "logits/rejected": -2.8725593090057373, + "logps/chosen": -142.503662109375, + "logps/rejected": -367.53228759765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2002763748168945, + "rewards/margins": 10.14236068725586, + "rewards/rejected": -16.34263801574707, + "step": 16131 + }, + { + "epoch": 2.51, + "learning_rate": 2.315471756833861e-06, + "logits/chosen": -1.7080737352371216, + "logits/rejected": -2.3541271686553955, + "logps/chosen": -226.0219268798828, + "logps/rejected": -352.9696350097656, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.332929611206055, + "rewards/margins": 6.853416919708252, + "rewards/rejected": -15.186346054077148, + "step": 16132 + }, + { + "epoch": 2.51, + "learning_rate": 2.3147383163027128e-06, + "logits/chosen": -2.330718517303467, + "logits/rejected": -2.7760448455810547, + "logps/chosen": -139.70774841308594, + "logps/rejected": -331.55926513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.721944808959961, + "rewards/margins": 9.31220531463623, + "rewards/rejected": -20.034149169921875, + "step": 16133 + }, + { + "epoch": 2.51, + "learning_rate": 2.314004875771565e-06, + "logits/chosen": -2.693493604660034, + "logits/rejected": -2.612370729446411, + "logps/chosen": -516.499267578125, + "logps/rejected": -587.5252685546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.790051460266113, + "rewards/margins": 9.67627239227295, + "rewards/rejected": -17.466323852539062, + "step": 16134 + }, + { + "epoch": 2.51, + "learning_rate": 2.313271435240417e-06, + "logits/chosen": -2.6646511554718018, + "logits/rejected": -2.9417412281036377, + "logps/chosen": -918.364013671875, + "logps/rejected": -652.518310546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.262785911560059, + "rewards/margins": 8.244307518005371, + "rewards/rejected": -16.50709342956543, + "step": 16135 + }, + { + "epoch": 2.51, + "learning_rate": 2.3125379947092692e-06, + "logits/chosen": -2.923710823059082, + "logits/rejected": -2.91890025138855, + "logps/chosen": -210.45318603515625, + "logps/rejected": -374.6728820800781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.82650089263916, + "rewards/margins": 10.017524719238281, + "rewards/rejected": -20.844024658203125, + "step": 16136 + }, + { + "epoch": 2.51, + "learning_rate": 2.311804554178121e-06, + "logits/chosen": -2.910994052886963, + "logits/rejected": -3.008782386779785, + "logps/chosen": -150.85372924804688, + "logps/rejected": -534.047607421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2603864669799805, + "rewards/margins": 10.033149719238281, + "rewards/rejected": -17.293535232543945, + "step": 16137 + }, + { + "epoch": 2.51, + "learning_rate": 2.3110711136469734e-06, + "logits/chosen": -2.450225353240967, + "logits/rejected": -2.1124961376190186, + "logps/chosen": -373.28729248046875, + "logps/rejected": -439.8966064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.785475730895996, + "rewards/margins": 12.64029312133789, + "rewards/rejected": -21.42576789855957, + "step": 16138 + }, + { + "epoch": 2.51, + "learning_rate": 2.3103376731158257e-06, + "logits/chosen": -1.6808407306671143, + "logits/rejected": -2.917484760284424, + "logps/chosen": -333.620361328125, + "logps/rejected": -574.6699829101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028658390045166, + "rewards/margins": 11.734046936035156, + "rewards/rejected": -17.762704849243164, + "step": 16139 + }, + { + "epoch": 2.51, + "learning_rate": 2.3096042325846776e-06, + "logits/chosen": -1.937178373336792, + "logits/rejected": -2.6325747966766357, + "logps/chosen": -175.4672088623047, + "logps/rejected": -562.4548950195312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.646553993225098, + "rewards/margins": 8.697803497314453, + "rewards/rejected": -18.344356536865234, + "step": 16140 + }, + { + "epoch": 2.51, + "learning_rate": 2.30887079205353e-06, + "logits/chosen": -2.7920312881469727, + "logits/rejected": -2.913668155670166, + "logps/chosen": -208.7889404296875, + "logps/rejected": -276.0096130371094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.296178340911865, + "rewards/margins": 7.8023786544799805, + "rewards/rejected": -15.098556518554688, + "step": 16141 + }, + { + "epoch": 2.51, + "learning_rate": 2.3081373515223818e-06, + "logits/chosen": -2.6176040172576904, + "logits/rejected": -2.7510664463043213, + "logps/chosen": -184.7867889404297, + "logps/rejected": -309.6424865722656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.644762992858887, + "rewards/margins": 7.817109107971191, + "rewards/rejected": -18.461872100830078, + "step": 16142 + }, + { + "epoch": 2.51, + "learning_rate": 2.307403910991234e-06, + "logits/chosen": -2.870852470397949, + "logits/rejected": -2.3328990936279297, + "logps/chosen": -770.740234375, + "logps/rejected": -772.7555541992188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.968997001647949, + "rewards/margins": 10.730823516845703, + "rewards/rejected": -18.69982147216797, + "step": 16143 + }, + { + "epoch": 2.51, + "learning_rate": 2.306670470460086e-06, + "logits/chosen": -2.900128126144409, + "logits/rejected": -2.774413585662842, + "logps/chosen": -590.7902221679688, + "logps/rejected": -517.0114135742188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.663577079772949, + "rewards/margins": 9.943023681640625, + "rewards/rejected": -17.60660171508789, + "step": 16144 + }, + { + "epoch": 2.51, + "learning_rate": 2.3059370299289383e-06, + "logits/chosen": -2.750042200088501, + "logits/rejected": -2.8153982162475586, + "logps/chosen": -188.82826232910156, + "logps/rejected": -362.16253662109375, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.695119857788086, + "rewards/margins": 4.484726905822754, + "rewards/rejected": -16.179847717285156, + "step": 16145 + }, + { + "epoch": 2.51, + "learning_rate": 2.30520358939779e-06, + "logits/chosen": -0.5989421010017395, + "logits/rejected": -2.600210189819336, + "logps/chosen": -135.70150756835938, + "logps/rejected": -600.846435546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.681921005249023, + "rewards/margins": 7.632186412811279, + "rewards/rejected": -18.314105987548828, + "step": 16146 + }, + { + "epoch": 2.51, + "learning_rate": 2.3044701488666424e-06, + "logits/chosen": -2.5632309913635254, + "logits/rejected": -1.5087355375289917, + "logps/chosen": -304.3418273925781, + "logps/rejected": -219.27926635742188, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.70074462890625, + "rewards/margins": 7.17900276184082, + "rewards/rejected": -15.87974739074707, + "step": 16147 + }, + { + "epoch": 2.51, + "learning_rate": 2.3037367083354947e-06, + "logits/chosen": -2.005589485168457, + "logits/rejected": -2.8744089603424072, + "logps/chosen": -167.81088256835938, + "logps/rejected": -423.13116455078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.035343170166016, + "rewards/margins": 9.529285430908203, + "rewards/rejected": -18.56462860107422, + "step": 16148 + }, + { + "epoch": 2.51, + "learning_rate": 2.303003267804347e-06, + "logits/chosen": -3.040703058242798, + "logits/rejected": -2.279552459716797, + "logps/chosen": -336.8575439453125, + "logps/rejected": -386.4761047363281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.814245700836182, + "rewards/margins": 8.150206565856934, + "rewards/rejected": -13.964452743530273, + "step": 16149 + }, + { + "epoch": 2.51, + "learning_rate": 2.302269827273199e-06, + "logits/chosen": -2.0035817623138428, + "logits/rejected": -2.8278968334198, + "logps/chosen": -135.94854736328125, + "logps/rejected": -459.425048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906576156616211, + "rewards/margins": 13.017951965332031, + "rewards/rejected": -18.924528121948242, + "step": 16150 + }, + { + "epoch": 2.51, + "learning_rate": 2.301536386742051e-06, + "logits/chosen": -2.708383083343506, + "logits/rejected": -0.625063955783844, + "logps/chosen": -243.29629516601562, + "logps/rejected": -193.249755859375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.603036880493164, + "rewards/margins": 8.011438369750977, + "rewards/rejected": -16.61447525024414, + "step": 16151 + }, + { + "epoch": 2.51, + "learning_rate": 2.300802946210903e-06, + "logits/chosen": -1.475913643836975, + "logits/rejected": -2.611121416091919, + "logps/chosen": -298.5860595703125, + "logps/rejected": -552.4359741210938, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.283841133117676, + "rewards/margins": 10.156994819641113, + "rewards/rejected": -19.44083595275879, + "step": 16152 + }, + { + "epoch": 2.51, + "learning_rate": 2.300069505679755e-06, + "logits/chosen": -1.8357247114181519, + "logits/rejected": -2.7306787967681885, + "logps/chosen": -104.1608657836914, + "logps/rejected": -279.71356201171875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220856666564941, + "rewards/margins": 7.380555152893066, + "rewards/rejected": -14.601411819458008, + "step": 16153 + }, + { + "epoch": 2.51, + "learning_rate": 2.2993360651486073e-06, + "logits/chosen": -3.009693145751953, + "logits/rejected": -1.5524829626083374, + "logps/chosen": -488.79998779296875, + "logps/rejected": -325.20263671875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.869343757629395, + "rewards/margins": 9.035985946655273, + "rewards/rejected": -18.905330657958984, + "step": 16154 + }, + { + "epoch": 2.51, + "learning_rate": 2.298602624617459e-06, + "logits/chosen": -2.2936038970947266, + "logits/rejected": -2.1280574798583984, + "logps/chosen": -284.69781494140625, + "logps/rejected": -402.93206787109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.818155288696289, + "rewards/margins": 8.211316108703613, + "rewards/rejected": -16.029470443725586, + "step": 16155 + }, + { + "epoch": 2.51, + "learning_rate": 2.297869184086312e-06, + "logits/chosen": -1.2002772092819214, + "logits/rejected": -2.5881597995758057, + "logps/chosen": -189.16326904296875, + "logps/rejected": -357.4273376464844, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.742486000061035, + "rewards/margins": 8.141554832458496, + "rewards/rejected": -18.88404083251953, + "step": 16156 + }, + { + "epoch": 2.51, + "learning_rate": 2.2971357435551638e-06, + "logits/chosen": -2.425920009613037, + "logits/rejected": -3.1386499404907227, + "logps/chosen": -102.15890502929688, + "logps/rejected": -287.8724365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.443273544311523, + "rewards/margins": 10.335771560668945, + "rewards/rejected": -14.779045104980469, + "step": 16157 + }, + { + "epoch": 2.51, + "learning_rate": 2.296402303024016e-06, + "logits/chosen": -2.684793710708618, + "logits/rejected": -1.7894619703292847, + "logps/chosen": -221.04515075683594, + "logps/rejected": -302.2923889160156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.150211334228516, + "rewards/margins": 13.755054473876953, + "rewards/rejected": -18.90526580810547, + "step": 16158 + }, + { + "epoch": 2.51, + "learning_rate": 2.295668862492868e-06, + "logits/chosen": -2.613313674926758, + "logits/rejected": -2.937899112701416, + "logps/chosen": -121.31497955322266, + "logps/rejected": -256.1451416015625, + "loss": 0.5608, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.889142990112305, + "rewards/margins": 4.70526123046875, + "rewards/rejected": -14.594404220581055, + "step": 16159 + }, + { + "epoch": 2.51, + "learning_rate": 2.29493542196172e-06, + "logits/chosen": -2.8337225914001465, + "logits/rejected": -2.5696535110473633, + "logps/chosen": -496.12786865234375, + "logps/rejected": -623.6116943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.125988006591797, + "rewards/margins": 12.017313003540039, + "rewards/rejected": -23.143299102783203, + "step": 16160 + }, + { + "epoch": 2.51, + "learning_rate": 2.294201981430572e-06, + "logits/chosen": -2.806130886077881, + "logits/rejected": -2.977165460586548, + "logps/chosen": -482.1861572265625, + "logps/rejected": -544.9708251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.432834625244141, + "rewards/margins": 11.066570281982422, + "rewards/rejected": -18.499404907226562, + "step": 16161 + }, + { + "epoch": 2.51, + "learning_rate": 2.293468540899424e-06, + "logits/chosen": -2.714812755584717, + "logits/rejected": -2.7746682167053223, + "logps/chosen": -668.7444458007812, + "logps/rejected": -594.3192138671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.451791763305664, + "rewards/margins": 7.286402225494385, + "rewards/rejected": -16.73819351196289, + "step": 16162 + }, + { + "epoch": 2.51, + "learning_rate": 2.2927351003682763e-06, + "logits/chosen": -1.8633311986923218, + "logits/rejected": -2.6283485889434814, + "logps/chosen": -95.62393188476562, + "logps/rejected": -355.20654296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.746584892272949, + "rewards/margins": 7.800081253051758, + "rewards/rejected": -15.546667098999023, + "step": 16163 + }, + { + "epoch": 2.51, + "learning_rate": 2.2920016598371286e-06, + "logits/chosen": -2.742030620574951, + "logits/rejected": -2.873745918273926, + "logps/chosen": -272.85968017578125, + "logps/rejected": -451.8031005859375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.59350299835205, + "rewards/margins": 6.9115190505981445, + "rewards/rejected": -16.505022048950195, + "step": 16164 + }, + { + "epoch": 2.51, + "learning_rate": 2.291268219305981e-06, + "logits/chosen": -1.6284761428833008, + "logits/rejected": -2.5381245613098145, + "logps/chosen": -284.2369384765625, + "logps/rejected": -636.0399169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.597387313842773, + "rewards/margins": 16.843196868896484, + "rewards/rejected": -27.440582275390625, + "step": 16165 + }, + { + "epoch": 2.51, + "learning_rate": 2.290534778774833e-06, + "logits/chosen": -1.609089732170105, + "logits/rejected": -2.904468536376953, + "logps/chosen": -237.11911010742188, + "logps/rejected": -392.88726806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.807705879211426, + "rewards/margins": 10.16512680053711, + "rewards/rejected": -16.97283172607422, + "step": 16166 + }, + { + "epoch": 2.51, + "learning_rate": 2.289801338243685e-06, + "logits/chosen": -2.6606905460357666, + "logits/rejected": -3.0030722618103027, + "logps/chosen": -357.8934326171875, + "logps/rejected": -400.90423583984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.763435363769531, + "rewards/margins": 11.157119750976562, + "rewards/rejected": -17.920555114746094, + "step": 16167 + }, + { + "epoch": 2.51, + "learning_rate": 2.289067897712537e-06, + "logits/chosen": -1.9739234447479248, + "logits/rejected": -2.786705732345581, + "logps/chosen": -290.3092041015625, + "logps/rejected": -457.265625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.168227195739746, + "rewards/margins": 9.842411994934082, + "rewards/rejected": -19.010639190673828, + "step": 16168 + }, + { + "epoch": 2.51, + "learning_rate": 2.2883344571813893e-06, + "logits/chosen": -1.6998398303985596, + "logits/rejected": -2.8370680809020996, + "logps/chosen": -146.61402893066406, + "logps/rejected": -411.408935546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.951750755310059, + "rewards/margins": 8.368345260620117, + "rewards/rejected": -16.320096969604492, + "step": 16169 + }, + { + "epoch": 2.51, + "learning_rate": 2.287601016650241e-06, + "logits/chosen": -2.66304087638855, + "logits/rejected": -2.7478244304656982, + "logps/chosen": -120.81108856201172, + "logps/rejected": -331.42608642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.517885684967041, + "rewards/margins": 12.954292297363281, + "rewards/rejected": -19.472177505493164, + "step": 16170 + }, + { + "epoch": 2.51, + "learning_rate": 2.286867576119093e-06, + "logits/chosen": -2.4524409770965576, + "logits/rejected": -1.990740418434143, + "logps/chosen": -248.13796997070312, + "logps/rejected": -339.4009094238281, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.081639289855957, + "rewards/margins": 7.424869537353516, + "rewards/rejected": -15.506508827209473, + "step": 16171 + }, + { + "epoch": 2.52, + "learning_rate": 2.2861341355879453e-06, + "logits/chosen": -2.477839469909668, + "logits/rejected": -2.7016799449920654, + "logps/chosen": -401.3927001953125, + "logps/rejected": -510.39849853515625, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.463107109069824, + "rewards/margins": 7.2620134353637695, + "rewards/rejected": -17.725120544433594, + "step": 16172 + }, + { + "epoch": 2.52, + "learning_rate": 2.2854006950567976e-06, + "logits/chosen": -1.761381983757019, + "logits/rejected": -2.2369065284729004, + "logps/chosen": -114.19707489013672, + "logps/rejected": -213.86866760253906, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.82315444946289, + "rewards/margins": 5.393331050872803, + "rewards/rejected": -14.216485977172852, + "step": 16173 + }, + { + "epoch": 2.52, + "learning_rate": 2.28466725452565e-06, + "logits/chosen": -2.5898609161376953, + "logits/rejected": -1.3424209356307983, + "logps/chosen": -649.4177856445312, + "logps/rejected": -390.79058837890625, + "loss": 0.0728, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.425373077392578, + "rewards/margins": 4.491941452026367, + "rewards/rejected": -15.917314529418945, + "step": 16174 + }, + { + "epoch": 2.52, + "learning_rate": 2.283933813994502e-06, + "logits/chosen": -2.514044761657715, + "logits/rejected": -2.895857572555542, + "logps/chosen": -138.07952880859375, + "logps/rejected": -235.76734924316406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811705589294434, + "rewards/margins": 8.787942886352539, + "rewards/rejected": -15.599649429321289, + "step": 16175 + }, + { + "epoch": 2.52, + "learning_rate": 2.283200373463354e-06, + "logits/chosen": -2.246602773666382, + "logits/rejected": -1.9658641815185547, + "logps/chosen": -238.80368041992188, + "logps/rejected": -329.73284912109375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.464094161987305, + "rewards/margins": 6.867725372314453, + "rewards/rejected": -15.331819534301758, + "step": 16176 + }, + { + "epoch": 2.52, + "learning_rate": 2.282466932932206e-06, + "logits/chosen": -2.1311912536621094, + "logits/rejected": -2.7130074501037598, + "logps/chosen": -137.99530029296875, + "logps/rejected": -472.61627197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.058311462402344, + "rewards/margins": 13.105655670166016, + "rewards/rejected": -20.16396713256836, + "step": 16177 + }, + { + "epoch": 2.52, + "learning_rate": 2.2817334924010583e-06, + "logits/chosen": -1.9637444019317627, + "logits/rejected": -2.714421272277832, + "logps/chosen": -553.6720581054688, + "logps/rejected": -653.5780639648438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.457908630371094, + "rewards/margins": 13.342653274536133, + "rewards/rejected": -22.800561904907227, + "step": 16178 + }, + { + "epoch": 2.52, + "learning_rate": 2.28100005186991e-06, + "logits/chosen": -2.7626638412475586, + "logits/rejected": -3.0134451389312744, + "logps/chosen": -145.30136108398438, + "logps/rejected": -304.022216796875, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.055912971496582, + "rewards/margins": 6.273049831390381, + "rewards/rejected": -14.328963279724121, + "step": 16179 + }, + { + "epoch": 2.52, + "learning_rate": 2.280266611338762e-06, + "logits/chosen": -2.859781503677368, + "logits/rejected": -1.738794207572937, + "logps/chosen": -537.96337890625, + "logps/rejected": -507.8860168457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.784736633300781, + "rewards/margins": 14.312973022460938, + "rewards/rejected": -27.09770965576172, + "step": 16180 + }, + { + "epoch": 2.52, + "learning_rate": 2.2795331708076144e-06, + "logits/chosen": -2.643109083175659, + "logits/rejected": -2.9497230052948, + "logps/chosen": -615.9766235351562, + "logps/rejected": -727.2138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.15993881225586, + "rewards/margins": 12.790075302124023, + "rewards/rejected": -22.95001220703125, + "step": 16181 + }, + { + "epoch": 2.52, + "learning_rate": 2.2787997302764667e-06, + "logits/chosen": -2.4970145225524902, + "logits/rejected": -2.7912867069244385, + "logps/chosen": -550.1251831054688, + "logps/rejected": -666.0557861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.450826644897461, + "rewards/margins": 17.640369415283203, + "rewards/rejected": -25.09119415283203, + "step": 16182 + }, + { + "epoch": 2.52, + "learning_rate": 2.278066289745319e-06, + "logits/chosen": -2.9095723628997803, + "logits/rejected": -2.964465379714966, + "logps/chosen": -138.81393432617188, + "logps/rejected": -253.89352416992188, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.724115371704102, + "rewards/margins": 7.483410358428955, + "rewards/rejected": -16.2075252532959, + "step": 16183 + }, + { + "epoch": 2.52, + "learning_rate": 2.277332849214171e-06, + "logits/chosen": -2.096625804901123, + "logits/rejected": -2.4890122413635254, + "logps/chosen": -498.95111083984375, + "logps/rejected": -645.9610595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.991020202636719, + "rewards/margins": 17.58016014099121, + "rewards/rejected": -23.57118034362793, + "step": 16184 + }, + { + "epoch": 2.52, + "learning_rate": 2.276599408683023e-06, + "logits/chosen": -1.3115249872207642, + "logits/rejected": -2.805508613586426, + "logps/chosen": -250.1640625, + "logps/rejected": -485.49609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.113332748413086, + "rewards/margins": 15.578254699707031, + "rewards/rejected": -23.691587448120117, + "step": 16185 + }, + { + "epoch": 2.52, + "learning_rate": 2.275865968151875e-06, + "logits/chosen": -2.93742299079895, + "logits/rejected": -2.4929070472717285, + "logps/chosen": -262.2164001464844, + "logps/rejected": -257.61700439453125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6504998207092285, + "rewards/margins": 7.56711483001709, + "rewards/rejected": -13.217615127563477, + "step": 16186 + }, + { + "epoch": 2.52, + "learning_rate": 2.2751325276207273e-06, + "logits/chosen": -2.2555954456329346, + "logits/rejected": -2.8044638633728027, + "logps/chosen": -277.6230163574219, + "logps/rejected": -441.64141845703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.727060317993164, + "rewards/margins": 8.533817291259766, + "rewards/rejected": -21.26087760925293, + "step": 16187 + }, + { + "epoch": 2.52, + "learning_rate": 2.274399087089579e-06, + "logits/chosen": -1.6552008390426636, + "logits/rejected": -2.5253794193267822, + "logps/chosen": -267.53204345703125, + "logps/rejected": -602.44384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.731453895568848, + "rewards/margins": 15.758373260498047, + "rewards/rejected": -23.489826202392578, + "step": 16188 + }, + { + "epoch": 2.52, + "learning_rate": 2.2736656465584315e-06, + "logits/chosen": -2.6661062240600586, + "logits/rejected": -2.5401618480682373, + "logps/chosen": -213.27481079101562, + "logps/rejected": -334.9337463378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.219358921051025, + "rewards/margins": 12.498833656311035, + "rewards/rejected": -16.71819305419922, + "step": 16189 + }, + { + "epoch": 2.52, + "learning_rate": 2.272932206027284e-06, + "logits/chosen": -2.4870479106903076, + "logits/rejected": -2.9105608463287354, + "logps/chosen": -191.23965454101562, + "logps/rejected": -445.08502197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.100825309753418, + "rewards/margins": 12.851045608520508, + "rewards/rejected": -21.95186996459961, + "step": 16190 + }, + { + "epoch": 2.52, + "learning_rate": 2.2721987654961357e-06, + "logits/chosen": -2.159637689590454, + "logits/rejected": -2.3045220375061035, + "logps/chosen": -508.6407470703125, + "logps/rejected": -629.527099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.939285278320312, + "rewards/margins": 16.11590576171875, + "rewards/rejected": -26.055192947387695, + "step": 16191 + }, + { + "epoch": 2.52, + "learning_rate": 2.271465324964988e-06, + "logits/chosen": -2.7413527965545654, + "logits/rejected": -2.4796512126922607, + "logps/chosen": -556.8006591796875, + "logps/rejected": -593.03759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.893191814422607, + "rewards/margins": 12.072559356689453, + "rewards/rejected": -19.96575164794922, + "step": 16192 + }, + { + "epoch": 2.52, + "learning_rate": 2.27073188443384e-06, + "logits/chosen": -3.072999954223633, + "logits/rejected": -2.4960906505584717, + "logps/chosen": -322.8326110839844, + "logps/rejected": -155.5556640625, + "loss": 0.0781, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1491193771362305, + "rewards/margins": 4.417951583862305, + "rewards/rejected": -11.567070960998535, + "step": 16193 + }, + { + "epoch": 2.52, + "learning_rate": 2.269998443902692e-06, + "logits/chosen": -1.621396541595459, + "logits/rejected": -2.4731314182281494, + "logps/chosen": -185.65939331054688, + "logps/rejected": -483.107177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995050430297852, + "rewards/margins": 14.598541259765625, + "rewards/rejected": -23.593591690063477, + "step": 16194 + }, + { + "epoch": 2.52, + "learning_rate": 2.269265003371544e-06, + "logits/chosen": -2.3513376712799072, + "logits/rejected": -2.2753360271453857, + "logps/chosen": -321.65673828125, + "logps/rejected": -336.672607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.134482383728027, + "rewards/margins": 10.132668495178223, + "rewards/rejected": -15.26715087890625, + "step": 16195 + }, + { + "epoch": 2.52, + "learning_rate": 2.2685315628403963e-06, + "logits/chosen": -2.767895460128784, + "logits/rejected": -2.9501070976257324, + "logps/chosen": -399.09722900390625, + "logps/rejected": -484.333984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.100813865661621, + "rewards/margins": 7.338764190673828, + "rewards/rejected": -20.439579010009766, + "step": 16196 + }, + { + "epoch": 2.52, + "learning_rate": 2.2677981223092482e-06, + "logits/chosen": -2.8482792377471924, + "logits/rejected": -1.812923550605774, + "logps/chosen": -757.7350463867188, + "logps/rejected": -388.99755859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.431588649749756, + "rewards/margins": 8.470841407775879, + "rewards/rejected": -14.902429580688477, + "step": 16197 + }, + { + "epoch": 2.52, + "learning_rate": 2.2670646817781005e-06, + "logits/chosen": -2.8979287147521973, + "logits/rejected": -2.400256872177124, + "logps/chosen": -197.39288330078125, + "logps/rejected": -291.8168640136719, + "loss": 1.0606, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.919216156005859, + "rewards/margins": 4.3744401931762695, + "rewards/rejected": -11.293656349182129, + "step": 16198 + }, + { + "epoch": 2.52, + "learning_rate": 2.266331241246953e-06, + "logits/chosen": -2.4147205352783203, + "logits/rejected": -2.6839828491210938, + "logps/chosen": -277.6132507324219, + "logps/rejected": -436.9752197265625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.07833480834961, + "rewards/margins": 8.569379806518555, + "rewards/rejected": -16.64771270751953, + "step": 16199 + }, + { + "epoch": 2.52, + "learning_rate": 2.2655978007158047e-06, + "logits/chosen": -2.855586051940918, + "logits/rejected": -2.7089600563049316, + "logps/chosen": -175.1904296875, + "logps/rejected": -287.2119445800781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.38570785522461, + "rewards/margins": 9.236566543579102, + "rewards/rejected": -17.62227439880371, + "step": 16200 + }, + { + "epoch": 2.52, + "learning_rate": 2.264864360184657e-06, + "logits/chosen": -2.0315158367156982, + "logits/rejected": -2.2724263668060303, + "logps/chosen": -227.08644104003906, + "logps/rejected": -549.8746948242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.107186794281006, + "rewards/margins": 17.404024124145508, + "rewards/rejected": -24.511211395263672, + "step": 16201 + }, + { + "epoch": 2.52, + "learning_rate": 2.264130919653509e-06, + "logits/chosen": -2.864988088607788, + "logits/rejected": -0.7296600937843323, + "logps/chosen": -639.0860595703125, + "logps/rejected": -367.59375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.850906372070312, + "rewards/margins": 11.08415412902832, + "rewards/rejected": -19.935060501098633, + "step": 16202 + }, + { + "epoch": 2.52, + "learning_rate": 2.263397479122361e-06, + "logits/chosen": -1.5761003494262695, + "logits/rejected": -2.416069507598877, + "logps/chosen": -148.91372680664062, + "logps/rejected": -454.5831298828125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.169095993041992, + "rewards/margins": 16.62982940673828, + "rewards/rejected": -25.798925399780273, + "step": 16203 + }, + { + "epoch": 2.52, + "learning_rate": 2.262664038591213e-06, + "logits/chosen": -2.0329158306121826, + "logits/rejected": -2.429112434387207, + "logps/chosen": -703.8316650390625, + "logps/rejected": -771.2821655273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085752964019775, + "rewards/margins": 17.67940902709961, + "rewards/rejected": -23.765161514282227, + "step": 16204 + }, + { + "epoch": 2.52, + "learning_rate": 2.2619305980600654e-06, + "logits/chosen": -2.6570520401000977, + "logits/rejected": -2.687117099761963, + "logps/chosen": -530.1322021484375, + "logps/rejected": -562.4217529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.75908899307251, + "rewards/margins": 11.55072021484375, + "rewards/rejected": -19.309810638427734, + "step": 16205 + }, + { + "epoch": 2.52, + "learning_rate": 2.2611971575289172e-06, + "logits/chosen": -1.648691177368164, + "logits/rejected": -2.8259217739105225, + "logps/chosen": -281.791748046875, + "logps/rejected": -586.4921264648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.666830539703369, + "rewards/margins": 17.177047729492188, + "rewards/rejected": -21.84387969970703, + "step": 16206 + }, + { + "epoch": 2.52, + "learning_rate": 2.26046371699777e-06, + "logits/chosen": -0.8768866658210754, + "logits/rejected": -2.5355000495910645, + "logps/chosen": -176.9207000732422, + "logps/rejected": -590.4768676757812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.779404640197754, + "rewards/margins": 9.46615982055664, + "rewards/rejected": -19.24556541442871, + "step": 16207 + }, + { + "epoch": 2.52, + "learning_rate": 2.259730276466622e-06, + "logits/chosen": -2.913593292236328, + "logits/rejected": -2.898364305496216, + "logps/chosen": -73.34664916992188, + "logps/rejected": -191.30108642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.682826042175293, + "rewards/margins": 9.625986099243164, + "rewards/rejected": -15.30881118774414, + "step": 16208 + }, + { + "epoch": 2.52, + "learning_rate": 2.2589968359354737e-06, + "logits/chosen": -2.8252792358398438, + "logits/rejected": -0.8235202431678772, + "logps/chosen": -350.786865234375, + "logps/rejected": -297.0063781738281, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.53615951538086, + "rewards/margins": 7.758090496063232, + "rewards/rejected": -17.29425048828125, + "step": 16209 + }, + { + "epoch": 2.52, + "learning_rate": 2.258263395404326e-06, + "logits/chosen": -2.5966880321502686, + "logits/rejected": -1.8135045766830444, + "logps/chosen": -312.38592529296875, + "logps/rejected": -380.33013916015625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.136659622192383, + "rewards/margins": 8.021093368530273, + "rewards/rejected": -16.157752990722656, + "step": 16210 + }, + { + "epoch": 2.52, + "learning_rate": 2.257529954873178e-06, + "logits/chosen": -2.530146360397339, + "logits/rejected": -2.2837326526641846, + "logps/chosen": -309.26544189453125, + "logps/rejected": -481.200439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.299395561218262, + "rewards/margins": 11.19424057006836, + "rewards/rejected": -20.493637084960938, + "step": 16211 + }, + { + "epoch": 2.52, + "learning_rate": 2.25679651434203e-06, + "logits/chosen": -2.1439032554626465, + "logits/rejected": -2.6685688495635986, + "logps/chosen": -418.54180908203125, + "logps/rejected": -492.28509521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.965890884399414, + "rewards/margins": 10.72365951538086, + "rewards/rejected": -19.689550399780273, + "step": 16212 + }, + { + "epoch": 2.52, + "learning_rate": 2.256063073810882e-06, + "logits/chosen": -2.7221574783325195, + "logits/rejected": -1.9491640329360962, + "logps/chosen": -386.87811279296875, + "logps/rejected": -344.7900695800781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.75800895690918, + "rewards/margins": 11.818588256835938, + "rewards/rejected": -19.576595306396484, + "step": 16213 + }, + { + "epoch": 2.52, + "learning_rate": 2.2553296332797344e-06, + "logits/chosen": -2.7773118019104004, + "logits/rejected": -2.0589895248413086, + "logps/chosen": -666.1784057617188, + "logps/rejected": -633.890380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.876590728759766, + "rewards/margins": 10.614864349365234, + "rewards/rejected": -20.491455078125, + "step": 16214 + }, + { + "epoch": 2.52, + "learning_rate": 2.2545961927485867e-06, + "logits/chosen": -1.961391806602478, + "logits/rejected": -2.713686943054199, + "logps/chosen": -239.695068359375, + "logps/rejected": -415.40216064453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.648713111877441, + "rewards/margins": 9.382759094238281, + "rewards/rejected": -19.031471252441406, + "step": 16215 + }, + { + "epoch": 2.52, + "learning_rate": 2.253862752217439e-06, + "logits/chosen": -2.4781155586242676, + "logits/rejected": -2.8886585235595703, + "logps/chosen": -122.928955078125, + "logps/rejected": -347.1868896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463094711303711, + "rewards/margins": 13.945450782775879, + "rewards/rejected": -19.408546447753906, + "step": 16216 + }, + { + "epoch": 2.52, + "learning_rate": 2.253129311686291e-06, + "logits/chosen": -2.9698901176452637, + "logits/rejected": -3.010308027267456, + "logps/chosen": -99.26358032226562, + "logps/rejected": -253.07171630859375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9076738357543945, + "rewards/margins": 7.950179100036621, + "rewards/rejected": -14.857852935791016, + "step": 16217 + }, + { + "epoch": 2.52, + "learning_rate": 2.252395871155143e-06, + "logits/chosen": -1.313636064529419, + "logits/rejected": -2.607407569885254, + "logps/chosen": -146.86227416992188, + "logps/rejected": -509.80615234375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.000697135925293, + "rewards/margins": 12.644447326660156, + "rewards/rejected": -21.645145416259766, + "step": 16218 + }, + { + "epoch": 2.52, + "learning_rate": 2.251662430623995e-06, + "logits/chosen": -2.809175968170166, + "logits/rejected": -2.2493178844451904, + "logps/chosen": -705.15380859375, + "logps/rejected": -481.71087646484375, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.900581359863281, + "rewards/margins": 4.247498989105225, + "rewards/rejected": -18.148080825805664, + "step": 16219 + }, + { + "epoch": 2.52, + "learning_rate": 2.250928990092847e-06, + "logits/chosen": -2.5247254371643066, + "logits/rejected": -2.5558340549468994, + "logps/chosen": -410.66961669921875, + "logps/rejected": -431.88385009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.50399398803711, + "rewards/margins": 9.578176498413086, + "rewards/rejected": -19.082168579101562, + "step": 16220 + }, + { + "epoch": 2.52, + "learning_rate": 2.2501955495616992e-06, + "logits/chosen": -2.8201520442962646, + "logits/rejected": -1.856552004814148, + "logps/chosen": -213.80450439453125, + "logps/rejected": -418.52880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.792251110076904, + "rewards/margins": 9.854660987854004, + "rewards/rejected": -17.64691162109375, + "step": 16221 + }, + { + "epoch": 2.52, + "learning_rate": 2.249462109030551e-06, + "logits/chosen": -1.7968982458114624, + "logits/rejected": -2.802788734436035, + "logps/chosen": -151.08126831054688, + "logps/rejected": -556.86962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.70844841003418, + "rewards/margins": 15.391599655151367, + "rewards/rejected": -26.100048065185547, + "step": 16222 + }, + { + "epoch": 2.52, + "learning_rate": 2.2487286684994034e-06, + "logits/chosen": -1.819993257522583, + "logits/rejected": -2.6244657039642334, + "logps/chosen": -180.2916259765625, + "logps/rejected": -342.26397705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.387334823608398, + "rewards/margins": 8.959466934204102, + "rewards/rejected": -15.3468017578125, + "step": 16223 + }, + { + "epoch": 2.52, + "learning_rate": 2.2479952279682557e-06, + "logits/chosen": -2.838958740234375, + "logits/rejected": -2.8175368309020996, + "logps/chosen": -365.3398742675781, + "logps/rejected": -421.0772705078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.480796813964844, + "rewards/margins": 8.976995468139648, + "rewards/rejected": -20.457792282104492, + "step": 16224 + }, + { + "epoch": 2.52, + "learning_rate": 2.247261787437108e-06, + "logits/chosen": -2.2415828704833984, + "logits/rejected": -3.025179386138916, + "logps/chosen": -121.02806091308594, + "logps/rejected": -338.84814453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.749948501586914, + "rewards/margins": 6.863171577453613, + "rewards/rejected": -14.613120079040527, + "step": 16225 + }, + { + "epoch": 2.52, + "learning_rate": 2.24652834690596e-06, + "logits/chosen": -2.4305765628814697, + "logits/rejected": -2.5021190643310547, + "logps/chosen": -211.3921661376953, + "logps/rejected": -304.9675598144531, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.184450149536133, + "rewards/margins": 8.798454284667969, + "rewards/rejected": -16.9829044342041, + "step": 16226 + }, + { + "epoch": 2.52, + "learning_rate": 2.245794906374812e-06, + "logits/chosen": -2.3349921703338623, + "logits/rejected": -2.093665361404419, + "logps/chosen": -350.4083251953125, + "logps/rejected": -364.15460205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.721308708190918, + "rewards/margins": 10.650239944458008, + "rewards/rejected": -19.371549606323242, + "step": 16227 + }, + { + "epoch": 2.52, + "learning_rate": 2.245061465843664e-06, + "logits/chosen": -2.64886474609375, + "logits/rejected": -2.8249685764312744, + "logps/chosen": -133.57928466796875, + "logps/rejected": -314.616943359375, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.761348724365234, + "rewards/margins": 10.802928924560547, + "rewards/rejected": -19.56427764892578, + "step": 16228 + }, + { + "epoch": 2.52, + "learning_rate": 2.244328025312516e-06, + "logits/chosen": -2.448847770690918, + "logits/rejected": -2.7443840503692627, + "logps/chosen": -409.16668701171875, + "logps/rejected": -435.7269287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.773414611816406, + "rewards/margins": 11.572563171386719, + "rewards/rejected": -18.345977783203125, + "step": 16229 + }, + { + "epoch": 2.52, + "learning_rate": 2.2435945847813682e-06, + "logits/chosen": -2.4015235900878906, + "logits/rejected": -2.817012310028076, + "logps/chosen": -198.43331909179688, + "logps/rejected": -504.5028076171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.214059829711914, + "rewards/margins": 15.026996612548828, + "rewards/rejected": -22.241056442260742, + "step": 16230 + }, + { + "epoch": 2.52, + "learning_rate": 2.24286114425022e-06, + "logits/chosen": -1.6878913640975952, + "logits/rejected": -2.5232605934143066, + "logps/chosen": -161.28457641601562, + "logps/rejected": -312.3099670410156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4921345710754395, + "rewards/margins": 7.622790336608887, + "rewards/rejected": -15.114925384521484, + "step": 16231 + }, + { + "epoch": 2.52, + "learning_rate": 2.242127703719073e-06, + "logits/chosen": -2.3945248126983643, + "logits/rejected": -2.5672671794891357, + "logps/chosen": -209.94259643554688, + "logps/rejected": -403.62225341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.931831359863281, + "rewards/margins": 9.91331672668457, + "rewards/rejected": -15.845148086547852, + "step": 16232 + }, + { + "epoch": 2.52, + "learning_rate": 2.2413942631879247e-06, + "logits/chosen": -2.510424852371216, + "logits/rejected": -2.7156851291656494, + "logps/chosen": -212.48553466796875, + "logps/rejected": -409.9333801269531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.952311515808105, + "rewards/margins": 8.121490478515625, + "rewards/rejected": -17.073801040649414, + "step": 16233 + }, + { + "epoch": 2.52, + "learning_rate": 2.240660822656777e-06, + "logits/chosen": -3.0049264430999756, + "logits/rejected": -2.7838807106018066, + "logps/chosen": -282.0834655761719, + "logps/rejected": -207.33843994140625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.192852973937988, + "rewards/margins": 5.87700080871582, + "rewards/rejected": -14.069853782653809, + "step": 16234 + }, + { + "epoch": 2.52, + "learning_rate": 2.239927382125629e-06, + "logits/chosen": -2.734361171722412, + "logits/rejected": -2.4991281032562256, + "logps/chosen": -1043.3125, + "logps/rejected": -758.284423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.3443021774292, + "rewards/margins": 10.321362495422363, + "rewards/rejected": -21.665664672851562, + "step": 16235 + }, + { + "epoch": 2.53, + "learning_rate": 2.239193941594481e-06, + "logits/chosen": -1.1712961196899414, + "logits/rejected": -2.3975555896759033, + "logps/chosen": -142.47653198242188, + "logps/rejected": -383.3569641113281, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690083503723145, + "rewards/margins": 7.332723140716553, + "rewards/rejected": -16.02280616760254, + "step": 16236 + }, + { + "epoch": 2.53, + "learning_rate": 2.238460501063333e-06, + "logits/chosen": -2.501185178756714, + "logits/rejected": -2.3554251194000244, + "logps/chosen": -316.64129638671875, + "logps/rejected": -432.170654296875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.951915740966797, + "rewards/margins": 9.031242370605469, + "rewards/rejected": -18.983158111572266, + "step": 16237 + }, + { + "epoch": 2.53, + "learning_rate": 2.2377270605321854e-06, + "logits/chosen": -1.6130260229110718, + "logits/rejected": -2.7286465167999268, + "logps/chosen": -271.6708679199219, + "logps/rejected": -534.6380004882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.181360244750977, + "rewards/margins": 14.090705871582031, + "rewards/rejected": -22.27206802368164, + "step": 16238 + }, + { + "epoch": 2.53, + "learning_rate": 2.2369936200010373e-06, + "logits/chosen": -2.760044813156128, + "logits/rejected": -2.8365302085876465, + "logps/chosen": -560.7342529296875, + "logps/rejected": -634.5399780273438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.029834747314453, + "rewards/margins": 9.47549819946289, + "rewards/rejected": -18.505332946777344, + "step": 16239 + }, + { + "epoch": 2.53, + "learning_rate": 2.2362601794698896e-06, + "logits/chosen": -2.605982780456543, + "logits/rejected": -2.853668451309204, + "logps/chosen": -413.26617431640625, + "logps/rejected": -569.1600341796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.674494743347168, + "rewards/margins": 9.647014617919922, + "rewards/rejected": -17.321510314941406, + "step": 16240 + }, + { + "epoch": 2.53, + "learning_rate": 2.235526738938742e-06, + "logits/chosen": -2.8320977687835693, + "logits/rejected": -3.016533613204956, + "logps/chosen": -725.561279296875, + "logps/rejected": -332.5399169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.621004104614258, + "rewards/margins": 8.921417236328125, + "rewards/rejected": -15.542421340942383, + "step": 16241 + }, + { + "epoch": 2.53, + "learning_rate": 2.2347932984075938e-06, + "logits/chosen": -3.013526678085327, + "logits/rejected": -2.8190255165100098, + "logps/chosen": -164.73721313476562, + "logps/rejected": -223.08758544921875, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.555957794189453, + "rewards/margins": 5.231961250305176, + "rewards/rejected": -16.787919998168945, + "step": 16242 + }, + { + "epoch": 2.53, + "learning_rate": 2.234059857876446e-06, + "logits/chosen": -2.73356294631958, + "logits/rejected": -1.5844218730926514, + "logps/chosen": -425.28717041015625, + "logps/rejected": -452.55059814453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.907586097717285, + "rewards/margins": 9.625988006591797, + "rewards/rejected": -17.533573150634766, + "step": 16243 + }, + { + "epoch": 2.53, + "learning_rate": 2.233326417345298e-06, + "logits/chosen": -1.173226237297058, + "logits/rejected": -2.5894978046417236, + "logps/chosen": -177.37948608398438, + "logps/rejected": -512.7786254882812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.562028884887695, + "rewards/margins": 8.651333808898926, + "rewards/rejected": -18.213363647460938, + "step": 16244 + }, + { + "epoch": 2.53, + "learning_rate": 2.2325929768141502e-06, + "logits/chosen": -2.594226360321045, + "logits/rejected": -2.009948492050171, + "logps/chosen": -147.6446533203125, + "logps/rejected": -261.4822998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.328064918518066, + "rewards/margins": 9.693979263305664, + "rewards/rejected": -14.022043228149414, + "step": 16245 + }, + { + "epoch": 2.53, + "learning_rate": 2.231859536283002e-06, + "logits/chosen": -2.6605286598205566, + "logits/rejected": -2.8902361392974854, + "logps/chosen": -124.3336410522461, + "logps/rejected": -412.9214172363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.758148193359375, + "rewards/margins": 12.600150108337402, + "rewards/rejected": -22.358299255371094, + "step": 16246 + }, + { + "epoch": 2.53, + "learning_rate": 2.2311260957518544e-06, + "logits/chosen": -1.9348770380020142, + "logits/rejected": -2.7395639419555664, + "logps/chosen": -250.28759765625, + "logps/rejected": -403.82904052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.275018692016602, + "rewards/margins": 10.307685852050781, + "rewards/rejected": -20.582704544067383, + "step": 16247 + }, + { + "epoch": 2.53, + "learning_rate": 2.2303926552207063e-06, + "logits/chosen": -2.416051149368286, + "logits/rejected": -2.7676823139190674, + "logps/chosen": -646.636962890625, + "logps/rejected": -774.4461669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.742822647094727, + "rewards/margins": 11.988819122314453, + "rewards/rejected": -21.731639862060547, + "step": 16248 + }, + { + "epoch": 2.53, + "learning_rate": 2.2296592146895586e-06, + "logits/chosen": -1.4924100637435913, + "logits/rejected": -2.556873083114624, + "logps/chosen": -171.67628479003906, + "logps/rejected": -371.9618225097656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.892451286315918, + "rewards/margins": 7.594861030578613, + "rewards/rejected": -17.48731231689453, + "step": 16249 + }, + { + "epoch": 2.53, + "learning_rate": 2.228925774158411e-06, + "logits/chosen": -2.9189605712890625, + "logits/rejected": -2.231072187423706, + "logps/chosen": -309.72998046875, + "logps/rejected": -413.48858642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.178610801696777, + "rewards/margins": 14.063600540161133, + "rewards/rejected": -21.242210388183594, + "step": 16250 + }, + { + "epoch": 2.53, + "learning_rate": 2.2281923336272628e-06, + "logits/chosen": -2.4140138626098633, + "logits/rejected": -2.8368947505950928, + "logps/chosen": -288.33648681640625, + "logps/rejected": -447.6075134277344, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.640348434448242, + "rewards/margins": 5.0413384437561035, + "rewards/rejected": -13.681686401367188, + "step": 16251 + }, + { + "epoch": 2.53, + "learning_rate": 2.227458893096115e-06, + "logits/chosen": -2.8379435539245605, + "logits/rejected": -1.5275579690933228, + "logps/chosen": -462.5027160644531, + "logps/rejected": -176.5944061279297, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.29860258102417, + "rewards/margins": 4.3771586418151855, + "rewards/rejected": -10.675761222839355, + "step": 16252 + }, + { + "epoch": 2.53, + "learning_rate": 2.226725452564967e-06, + "logits/chosen": -2.753800392150879, + "logits/rejected": -2.923738718032837, + "logps/chosen": -156.5391082763672, + "logps/rejected": -369.8091125488281, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.760408401489258, + "rewards/margins": 7.953522205352783, + "rewards/rejected": -17.713932037353516, + "step": 16253 + }, + { + "epoch": 2.53, + "learning_rate": 2.2259920120338193e-06, + "logits/chosen": -2.5172019004821777, + "logits/rejected": -2.9172279834747314, + "logps/chosen": -117.810546875, + "logps/rejected": -281.59521484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.979425430297852, + "rewards/margins": 9.530957221984863, + "rewards/rejected": -19.51038360595703, + "step": 16254 + }, + { + "epoch": 2.53, + "learning_rate": 2.225258571502671e-06, + "logits/chosen": -2.4131085872650146, + "logits/rejected": -2.4883041381835938, + "logps/chosen": -126.06781005859375, + "logps/rejected": -305.5166931152344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.417742729187012, + "rewards/margins": 11.458660125732422, + "rewards/rejected": -19.87640380859375, + "step": 16255 + }, + { + "epoch": 2.53, + "learning_rate": 2.2245251309715234e-06, + "logits/chosen": -2.4033970832824707, + "logits/rejected": -2.586232900619507, + "logps/chosen": -236.01190185546875, + "logps/rejected": -559.498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.941998481750488, + "rewards/margins": 16.93659210205078, + "rewards/rejected": -24.878589630126953, + "step": 16256 + }, + { + "epoch": 2.53, + "learning_rate": 2.2237916904403757e-06, + "logits/chosen": -2.4137773513793945, + "logits/rejected": -2.877319574356079, + "logps/chosen": -461.65960693359375, + "logps/rejected": -456.15106201171875, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.428799629211426, + "rewards/margins": 5.581124305725098, + "rewards/rejected": -14.009923934936523, + "step": 16257 + }, + { + "epoch": 2.53, + "learning_rate": 2.2230582499092276e-06, + "logits/chosen": -1.1640307903289795, + "logits/rejected": -2.6492648124694824, + "logps/chosen": -185.6097869873047, + "logps/rejected": -535.079345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.860224723815918, + "rewards/margins": 15.251818656921387, + "rewards/rejected": -25.112043380737305, + "step": 16258 + }, + { + "epoch": 2.53, + "learning_rate": 2.22232480937808e-06, + "logits/chosen": -2.8223226070404053, + "logits/rejected": -2.8201043605804443, + "logps/chosen": -869.9417114257812, + "logps/rejected": -665.3038940429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2514753341674805, + "rewards/margins": 10.857864379882812, + "rewards/rejected": -17.10934066772461, + "step": 16259 + }, + { + "epoch": 2.53, + "learning_rate": 2.221591368846932e-06, + "logits/chosen": -2.022488594055176, + "logits/rejected": -2.796253204345703, + "logps/chosen": -233.4511260986328, + "logps/rejected": -458.4883728027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.08777904510498, + "rewards/margins": 14.487534523010254, + "rewards/rejected": -22.575313568115234, + "step": 16260 + }, + { + "epoch": 2.53, + "learning_rate": 2.220857928315784e-06, + "logits/chosen": -2.6901795864105225, + "logits/rejected": -3.073596715927124, + "logps/chosen": -157.49929809570312, + "logps/rejected": -332.5168762207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.307918548583984, + "rewards/margins": 10.167621612548828, + "rewards/rejected": -19.475540161132812, + "step": 16261 + }, + { + "epoch": 2.53, + "learning_rate": 2.220124487784636e-06, + "logits/chosen": -1.8820780515670776, + "logits/rejected": -2.4252095222473145, + "logps/chosen": -216.40817260742188, + "logps/rejected": -393.177978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.022472381591797, + "rewards/margins": 10.710847854614258, + "rewards/rejected": -20.733320236206055, + "step": 16262 + }, + { + "epoch": 2.53, + "learning_rate": 2.2193910472534883e-06, + "logits/chosen": -2.454573154449463, + "logits/rejected": -2.624720335006714, + "logps/chosen": -149.60546875, + "logps/rejected": -278.2821044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.6592378616333, + "rewards/margins": 10.857376098632812, + "rewards/rejected": -19.516613006591797, + "step": 16263 + }, + { + "epoch": 2.53, + "learning_rate": 2.21865760672234e-06, + "logits/chosen": -2.3559162616729736, + "logits/rejected": -2.895542860031128, + "logps/chosen": -340.4971008300781, + "logps/rejected": -408.1700439453125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.570437431335449, + "rewards/margins": 6.970663070678711, + "rewards/rejected": -14.54110050201416, + "step": 16264 + }, + { + "epoch": 2.53, + "learning_rate": 2.2179241661911925e-06, + "logits/chosen": -1.9366768598556519, + "logits/rejected": -2.665553569793701, + "logps/chosen": -128.88011169433594, + "logps/rejected": -367.9627685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781401634216309, + "rewards/margins": 10.952747344970703, + "rewards/rejected": -16.734148025512695, + "step": 16265 + }, + { + "epoch": 2.53, + "learning_rate": 2.2171907256600448e-06, + "logits/chosen": -2.5429599285125732, + "logits/rejected": -2.975771188735962, + "logps/chosen": -78.27681732177734, + "logps/rejected": -226.0814208984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.144712448120117, + "rewards/margins": 10.02304458618164, + "rewards/rejected": -14.167757034301758, + "step": 16266 + }, + { + "epoch": 2.53, + "learning_rate": 2.216457285128897e-06, + "logits/chosen": -2.8933660984039307, + "logits/rejected": -2.4968373775482178, + "logps/chosen": -195.19253540039062, + "logps/rejected": -159.30999755859375, + "loss": 0.6385, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.636861801147461, + "rewards/margins": 2.613497734069824, + "rewards/rejected": -14.250358581542969, + "step": 16267 + }, + { + "epoch": 2.53, + "learning_rate": 2.215723844597749e-06, + "logits/chosen": -2.869711399078369, + "logits/rejected": -2.8597569465637207, + "logps/chosen": -106.54258728027344, + "logps/rejected": -508.2730712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.030844688415527, + "rewards/margins": 11.115479469299316, + "rewards/rejected": -20.146324157714844, + "step": 16268 + }, + { + "epoch": 2.53, + "learning_rate": 2.214990404066601e-06, + "logits/chosen": -2.9287631511688232, + "logits/rejected": -2.8613038063049316, + "logps/chosen": -572.5699462890625, + "logps/rejected": -621.300048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.839347839355469, + "rewards/margins": 9.008827209472656, + "rewards/rejected": -17.848175048828125, + "step": 16269 + }, + { + "epoch": 2.53, + "learning_rate": 2.214256963535453e-06, + "logits/chosen": -1.899506688117981, + "logits/rejected": -2.3201546669006348, + "logps/chosen": -241.55661010742188, + "logps/rejected": -423.4560546875, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.48530101776123, + "rewards/margins": 6.878758907318115, + "rewards/rejected": -16.364059448242188, + "step": 16270 + }, + { + "epoch": 2.53, + "learning_rate": 2.213523523004305e-06, + "logits/chosen": -0.9885565042495728, + "logits/rejected": -1.89666748046875, + "logps/chosen": -364.35882568359375, + "logps/rejected": -744.838134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.30911922454834, + "rewards/margins": 14.289040565490723, + "rewards/rejected": -28.598159790039062, + "step": 16271 + }, + { + "epoch": 2.53, + "learning_rate": 2.2127900824731573e-06, + "logits/chosen": -2.7343854904174805, + "logits/rejected": -2.8384690284729004, + "logps/chosen": -323.9292297363281, + "logps/rejected": -446.4823303222656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.6478910446167, + "rewards/margins": 19.152799606323242, + "rewards/rejected": -27.800689697265625, + "step": 16272 + }, + { + "epoch": 2.53, + "learning_rate": 2.212056641942009e-06, + "logits/chosen": -2.554323673248291, + "logits/rejected": -2.888519048690796, + "logps/chosen": -212.08433532714844, + "logps/rejected": -447.98992919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.653650283813477, + "rewards/margins": 10.969953536987305, + "rewards/rejected": -23.62360382080078, + "step": 16273 + }, + { + "epoch": 2.53, + "learning_rate": 2.211323201410862e-06, + "logits/chosen": -1.2904032468795776, + "logits/rejected": -2.4010865688323975, + "logps/chosen": -256.1502990722656, + "logps/rejected": -638.71728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.274681091308594, + "rewards/margins": 14.819992065429688, + "rewards/rejected": -26.09467315673828, + "step": 16274 + }, + { + "epoch": 2.53, + "learning_rate": 2.2105897608797138e-06, + "logits/chosen": -2.411945343017578, + "logits/rejected": -2.3523237705230713, + "logps/chosen": -212.4586181640625, + "logps/rejected": -234.52574157714844, + "loss": 0.0733, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.140630722045898, + "rewards/margins": 5.957508563995361, + "rewards/rejected": -16.0981388092041, + "step": 16275 + }, + { + "epoch": 2.53, + "learning_rate": 2.209856320348566e-06, + "logits/chosen": -2.941348075866699, + "logits/rejected": -1.2083277702331543, + "logps/chosen": -338.890625, + "logps/rejected": -311.49237060546875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.182392120361328, + "rewards/margins": 8.034926414489746, + "rewards/rejected": -16.217317581176758, + "step": 16276 + }, + { + "epoch": 2.53, + "learning_rate": 2.209122879817418e-06, + "logits/chosen": -2.5772552490234375, + "logits/rejected": -1.3235782384872437, + "logps/chosen": -429.32586669921875, + "logps/rejected": -403.2016296386719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.237605094909668, + "rewards/margins": 12.706052780151367, + "rewards/rejected": -23.94365692138672, + "step": 16277 + }, + { + "epoch": 2.53, + "learning_rate": 2.20838943928627e-06, + "logits/chosen": -3.03775691986084, + "logits/rejected": -2.547172784805298, + "logps/chosen": -264.6317138671875, + "logps/rejected": -267.54583740234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7921249866485596, + "rewards/margins": 6.9696831703186035, + "rewards/rejected": -10.761808395385742, + "step": 16278 + }, + { + "epoch": 2.53, + "learning_rate": 2.207655998755122e-06, + "logits/chosen": -2.9382450580596924, + "logits/rejected": -2.250563383102417, + "logps/chosen": -186.0475616455078, + "logps/rejected": -247.353515625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.271956443786621, + "rewards/margins": 8.573488235473633, + "rewards/rejected": -16.845443725585938, + "step": 16279 + }, + { + "epoch": 2.53, + "learning_rate": 2.206922558223974e-06, + "logits/chosen": -1.5306915044784546, + "logits/rejected": -2.8540284633636475, + "logps/chosen": -115.0057373046875, + "logps/rejected": -459.3980407714844, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.043378829956055, + "rewards/margins": 7.857178211212158, + "rewards/rejected": -17.900556564331055, + "step": 16280 + }, + { + "epoch": 2.53, + "learning_rate": 2.2061891176928263e-06, + "logits/chosen": -2.5156545639038086, + "logits/rejected": -2.599236011505127, + "logps/chosen": -298.984375, + "logps/rejected": -409.89404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.142658233642578, + "rewards/margins": 12.108846664428711, + "rewards/rejected": -23.251502990722656, + "step": 16281 + }, + { + "epoch": 2.53, + "learning_rate": 2.2054556771616786e-06, + "logits/chosen": -3.0438172817230225, + "logits/rejected": -2.6425342559814453, + "logps/chosen": -295.6710205078125, + "logps/rejected": -415.2477722167969, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278327941894531, + "rewards/margins": 10.74726390838623, + "rewards/rejected": -16.025592803955078, + "step": 16282 + }, + { + "epoch": 2.53, + "learning_rate": 2.204722236630531e-06, + "logits/chosen": -2.645878314971924, + "logits/rejected": -3.0070347785949707, + "logps/chosen": -151.64093017578125, + "logps/rejected": -355.638671875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.490501403808594, + "rewards/margins": 7.182147979736328, + "rewards/rejected": -16.672649383544922, + "step": 16283 + }, + { + "epoch": 2.53, + "learning_rate": 2.203988796099383e-06, + "logits/chosen": -2.7259159088134766, + "logits/rejected": -1.7264325618743896, + "logps/chosen": -301.55902099609375, + "logps/rejected": -369.26190185546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.770644187927246, + "rewards/margins": 9.55933952331543, + "rewards/rejected": -18.32998275756836, + "step": 16284 + }, + { + "epoch": 2.53, + "learning_rate": 2.203255355568235e-06, + "logits/chosen": -2.8642566204071045, + "logits/rejected": -2.976039409637451, + "logps/chosen": -84.46241760253906, + "logps/rejected": -235.06976318359375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.413090705871582, + "rewards/margins": 9.045750617980957, + "rewards/rejected": -14.458841323852539, + "step": 16285 + }, + { + "epoch": 2.53, + "learning_rate": 2.202521915037087e-06, + "logits/chosen": -1.9459397792816162, + "logits/rejected": -2.7278177738189697, + "logps/chosen": -189.84527587890625, + "logps/rejected": -231.05194091796875, + "loss": 0.0379, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779457092285156, + "rewards/margins": 3.942920207977295, + "rewards/rejected": -12.72237777709961, + "step": 16286 + }, + { + "epoch": 2.53, + "learning_rate": 2.2017884745059393e-06, + "logits/chosen": -2.805633783340454, + "logits/rejected": -2.6645820140838623, + "logps/chosen": -391.6238098144531, + "logps/rejected": -468.13629150390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3455023765563965, + "rewards/margins": 9.456827163696289, + "rewards/rejected": -14.802330017089844, + "step": 16287 + }, + { + "epoch": 2.53, + "learning_rate": 2.201055033974791e-06, + "logits/chosen": -2.689300537109375, + "logits/rejected": -2.6829233169555664, + "logps/chosen": -205.82046508789062, + "logps/rejected": -266.73028564453125, + "loss": 0.9977, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.122389316558838, + "rewards/margins": 4.407164573669434, + "rewards/rejected": -11.52955436706543, + "step": 16288 + }, + { + "epoch": 2.53, + "learning_rate": 2.200321593443643e-06, + "logits/chosen": -2.835632085800171, + "logits/rejected": -2.5595452785491943, + "logps/chosen": -261.40887451171875, + "logps/rejected": -387.0218200683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13888168334961, + "rewards/margins": 10.035372734069824, + "rewards/rejected": -18.17425537109375, + "step": 16289 + }, + { + "epoch": 2.53, + "learning_rate": 2.1995881529124953e-06, + "logits/chosen": -2.694394111633301, + "logits/rejected": -2.100360870361328, + "logps/chosen": -260.649169921875, + "logps/rejected": -354.92474365234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.295794486999512, + "rewards/margins": 9.717802047729492, + "rewards/rejected": -15.013596534729004, + "step": 16290 + }, + { + "epoch": 2.53, + "learning_rate": 2.1988547123813476e-06, + "logits/chosen": -2.913043737411499, + "logits/rejected": -2.836585760116577, + "logps/chosen": -400.72222900390625, + "logps/rejected": -501.74041748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.507038116455078, + "rewards/margins": 8.938251495361328, + "rewards/rejected": -19.445289611816406, + "step": 16291 + }, + { + "epoch": 2.53, + "learning_rate": 2.1981212718502e-06, + "logits/chosen": -2.8279733657836914, + "logits/rejected": -2.5722529888153076, + "logps/chosen": -406.02252197265625, + "logps/rejected": -519.699462890625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.968536376953125, + "rewards/margins": 9.26781940460205, + "rewards/rejected": -16.23635482788086, + "step": 16292 + }, + { + "epoch": 2.53, + "learning_rate": 2.197387831319052e-06, + "logits/chosen": -0.9245878458023071, + "logits/rejected": -2.677910804748535, + "logps/chosen": -221.51431274414062, + "logps/rejected": -634.805908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.703121185302734, + "rewards/margins": 10.223943710327148, + "rewards/rejected": -20.927064895629883, + "step": 16293 + }, + { + "epoch": 2.53, + "learning_rate": 2.196654390787904e-06, + "logits/chosen": -2.82861590385437, + "logits/rejected": -2.354928731918335, + "logps/chosen": -209.4940185546875, + "logps/rejected": -222.6427001953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.894704818725586, + "rewards/margins": 7.138640880584717, + "rewards/rejected": -17.03334617614746, + "step": 16294 + }, + { + "epoch": 2.53, + "learning_rate": 2.195920950256756e-06, + "logits/chosen": -1.3902053833007812, + "logits/rejected": -2.286768913269043, + "logps/chosen": -193.8076171875, + "logps/rejected": -503.81561279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.200554370880127, + "rewards/margins": 12.288324356079102, + "rewards/rejected": -19.488880157470703, + "step": 16295 + }, + { + "epoch": 2.53, + "learning_rate": 2.1951875097256083e-06, + "logits/chosen": -2.667941093444824, + "logits/rejected": -2.8479394912719727, + "logps/chosen": -146.82473754882812, + "logps/rejected": -374.61846923828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.231988906860352, + "rewards/margins": 12.006247520446777, + "rewards/rejected": -22.238235473632812, + "step": 16296 + }, + { + "epoch": 2.53, + "learning_rate": 2.19445406919446e-06, + "logits/chosen": -2.411712408065796, + "logits/rejected": -2.034996509552002, + "logps/chosen": -193.026123046875, + "logps/rejected": -407.38372802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.221827507019043, + "rewards/margins": 14.488154411315918, + "rewards/rejected": -24.70998191833496, + "step": 16297 + }, + { + "epoch": 2.53, + "learning_rate": 2.193720628663312e-06, + "logits/chosen": -2.3429315090179443, + "logits/rejected": -2.7214386463165283, + "logps/chosen": -444.52618408203125, + "logps/rejected": -573.4166870117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.121012687683105, + "rewards/margins": 8.669234275817871, + "rewards/rejected": -19.790246963500977, + "step": 16298 + }, + { + "epoch": 2.53, + "learning_rate": 2.192987188132165e-06, + "logits/chosen": -2.2163422107696533, + "logits/rejected": -2.563295364379883, + "logps/chosen": -446.1612243652344, + "logps/rejected": -445.50103759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.037881851196289, + "rewards/margins": 13.08896255493164, + "rewards/rejected": -22.126846313476562, + "step": 16299 + }, + { + "epoch": 2.53, + "learning_rate": 2.1922537476010167e-06, + "logits/chosen": -2.646449089050293, + "logits/rejected": -2.5026979446411133, + "logps/chosen": -289.0243225097656, + "logps/rejected": -254.91119384765625, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.356576919555664, + "rewards/margins": 3.754270553588867, + "rewards/rejected": -15.110847473144531, + "step": 16300 + }, + { + "epoch": 2.54, + "learning_rate": 2.191520307069869e-06, + "logits/chosen": -2.6178200244903564, + "logits/rejected": -2.866698741912842, + "logps/chosen": -602.4066772460938, + "logps/rejected": -582.52880859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.437882423400879, + "rewards/margins": 8.953307151794434, + "rewards/rejected": -18.391189575195312, + "step": 16301 + }, + { + "epoch": 2.54, + "learning_rate": 2.190786866538721e-06, + "logits/chosen": -2.7582790851593018, + "logits/rejected": -2.243511438369751, + "logps/chosen": -259.6801452636719, + "logps/rejected": -271.7669982910156, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.495425701141357, + "rewards/margins": 5.436152935028076, + "rewards/rejected": -12.931578636169434, + "step": 16302 + }, + { + "epoch": 2.54, + "learning_rate": 2.190053426007573e-06, + "logits/chosen": -2.5086519718170166, + "logits/rejected": -1.6125547885894775, + "logps/chosen": -350.22216796875, + "logps/rejected": -294.6074523925781, + "loss": 1.1789, + "rewards/accuracies": 0.5, + "rewards/chosen": -6.775549411773682, + "rewards/margins": 3.546529769897461, + "rewards/rejected": -10.3220796585083, + "step": 16303 + }, + { + "epoch": 2.54, + "learning_rate": 2.189319985476425e-06, + "logits/chosen": -2.5002095699310303, + "logits/rejected": -2.8120079040527344, + "logps/chosen": -160.25485229492188, + "logps/rejected": -337.60546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07806921005249, + "rewards/margins": 10.257585525512695, + "rewards/rejected": -16.335655212402344, + "step": 16304 + }, + { + "epoch": 2.54, + "learning_rate": 2.1885865449452773e-06, + "logits/chosen": -2.33229923248291, + "logits/rejected": -2.5713436603546143, + "logps/chosen": -163.25299072265625, + "logps/rejected": -361.572509765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.630715370178223, + "rewards/margins": 8.361618041992188, + "rewards/rejected": -19.992332458496094, + "step": 16305 + }, + { + "epoch": 2.54, + "learning_rate": 2.187853104414129e-06, + "logits/chosen": -2.111420154571533, + "logits/rejected": -2.860605239868164, + "logps/chosen": -178.40296936035156, + "logps/rejected": -443.2225036621094, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.74061393737793, + "rewards/margins": 9.011357307434082, + "rewards/rejected": -18.751972198486328, + "step": 16306 + }, + { + "epoch": 2.54, + "learning_rate": 2.1871196638829815e-06, + "logits/chosen": -1.8755214214324951, + "logits/rejected": -2.7287232875823975, + "logps/chosen": -236.24855041503906, + "logps/rejected": -397.5701904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.174552917480469, + "rewards/margins": 9.12548542022705, + "rewards/rejected": -18.300037384033203, + "step": 16307 + }, + { + "epoch": 2.54, + "learning_rate": 2.186386223351834e-06, + "logits/chosen": -2.2966468334198, + "logits/rejected": -2.4065258502960205, + "logps/chosen": -265.58624267578125, + "logps/rejected": -405.12921142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.190798282623291, + "rewards/margins": 12.017313957214355, + "rewards/rejected": -18.208112716674805, + "step": 16308 + }, + { + "epoch": 2.54, + "learning_rate": 2.1856527828206857e-06, + "logits/chosen": -2.0174999237060547, + "logits/rejected": -1.656244158744812, + "logps/chosen": -942.2752075195312, + "logps/rejected": -724.4525756835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.053024291992188, + "rewards/margins": 14.663793563842773, + "rewards/rejected": -23.71681785583496, + "step": 16309 + }, + { + "epoch": 2.54, + "learning_rate": 2.184919342289538e-06, + "logits/chosen": -2.879300117492676, + "logits/rejected": -2.692744731903076, + "logps/chosen": -263.78076171875, + "logps/rejected": -288.28375244140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.916184425354004, + "rewards/margins": 8.031521797180176, + "rewards/rejected": -12.94770622253418, + "step": 16310 + }, + { + "epoch": 2.54, + "learning_rate": 2.18418590175839e-06, + "logits/chosen": -2.908980131149292, + "logits/rejected": -2.4565815925598145, + "logps/chosen": -317.67156982421875, + "logps/rejected": -341.06256103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.506210327148438, + "rewards/margins": 11.407243728637695, + "rewards/rejected": -19.913455963134766, + "step": 16311 + }, + { + "epoch": 2.54, + "learning_rate": 2.183452461227242e-06, + "logits/chosen": -2.6930601596832275, + "logits/rejected": -2.9241750240325928, + "logps/chosen": -303.6175537109375, + "logps/rejected": -441.6619873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.767459392547607, + "rewards/margins": 9.578008651733398, + "rewards/rejected": -14.345467567443848, + "step": 16312 + }, + { + "epoch": 2.54, + "learning_rate": 2.182719020696094e-06, + "logits/chosen": -2.6377549171447754, + "logits/rejected": -1.9142353534698486, + "logps/chosen": -317.8437194824219, + "logps/rejected": -347.56903076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.458719253540039, + "rewards/margins": 13.065183639526367, + "rewards/rejected": -15.523902893066406, + "step": 16313 + }, + { + "epoch": 2.54, + "learning_rate": 2.1819855801649464e-06, + "logits/chosen": -1.5473352670669556, + "logits/rejected": -2.474625825881958, + "logps/chosen": -214.11651611328125, + "logps/rejected": -492.9788818359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.54990005493164, + "rewards/margins": 12.251836776733398, + "rewards/rejected": -22.80173683166504, + "step": 16314 + }, + { + "epoch": 2.54, + "learning_rate": 2.1812521396337982e-06, + "logits/chosen": -2.1860573291778564, + "logits/rejected": -2.7317593097686768, + "logps/chosen": -245.97381591796875, + "logps/rejected": -384.55718994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.080284118652344, + "rewards/margins": 11.043461799621582, + "rewards/rejected": -16.123746871948242, + "step": 16315 + }, + { + "epoch": 2.54, + "learning_rate": 2.180518699102651e-06, + "logits/chosen": -2.4574263095855713, + "logits/rejected": -2.8561794757843018, + "logps/chosen": -187.647705078125, + "logps/rejected": -404.8077392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.456547260284424, + "rewards/margins": 13.742149353027344, + "rewards/rejected": -20.19869613647461, + "step": 16316 + }, + { + "epoch": 2.54, + "learning_rate": 2.179785258571503e-06, + "logits/chosen": -3.0462911128997803, + "logits/rejected": -3.192204713821411, + "logps/chosen": -137.938232421875, + "logps/rejected": -197.67787170410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.658567428588867, + "rewards/margins": 9.027008056640625, + "rewards/rejected": -16.685575485229492, + "step": 16317 + }, + { + "epoch": 2.54, + "learning_rate": 2.1790518180403547e-06, + "logits/chosen": -1.2281792163848877, + "logits/rejected": -2.0355708599090576, + "logps/chosen": -234.72177124023438, + "logps/rejected": -532.3001098632812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.106245994567871, + "rewards/margins": 12.731694221496582, + "rewards/rejected": -21.837940216064453, + "step": 16318 + }, + { + "epoch": 2.54, + "learning_rate": 2.178318377509207e-06, + "logits/chosen": -2.661344051361084, + "logits/rejected": -2.9442825317382812, + "logps/chosen": -135.04588317871094, + "logps/rejected": -331.64154052734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.63996696472168, + "rewards/margins": 8.670958518981934, + "rewards/rejected": -16.310924530029297, + "step": 16319 + }, + { + "epoch": 2.54, + "learning_rate": 2.177584936978059e-06, + "logits/chosen": -2.583623170852661, + "logits/rejected": -1.7507010698318481, + "logps/chosen": -169.50039672851562, + "logps/rejected": -277.1038818359375, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.497200965881348, + "rewards/margins": 5.508206367492676, + "rewards/rejected": -15.005407333374023, + "step": 16320 + }, + { + "epoch": 2.54, + "learning_rate": 2.176851496446911e-06, + "logits/chosen": -2.6635403633117676, + "logits/rejected": -2.000842571258545, + "logps/chosen": -318.4022216796875, + "logps/rejected": -465.906982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159814834594727, + "rewards/margins": 11.039745330810547, + "rewards/rejected": -18.199562072753906, + "step": 16321 + }, + { + "epoch": 2.54, + "learning_rate": 2.176118055915763e-06, + "logits/chosen": -2.6362383365631104, + "logits/rejected": -0.5665785074234009, + "logps/chosen": -416.3878173828125, + "logps/rejected": -263.66339111328125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.98476505279541, + "rewards/margins": 7.783207416534424, + "rewards/rejected": -17.767972946166992, + "step": 16322 + }, + { + "epoch": 2.54, + "learning_rate": 2.1753846153846154e-06, + "logits/chosen": -2.0373318195343018, + "logits/rejected": -2.7621281147003174, + "logps/chosen": -112.91598510742188, + "logps/rejected": -287.39013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67291259765625, + "rewards/margins": 9.92631721496582, + "rewards/rejected": -17.59922981262207, + "step": 16323 + }, + { + "epoch": 2.54, + "learning_rate": 2.1746511748534677e-06, + "logits/chosen": -2.3231852054595947, + "logits/rejected": -2.819854259490967, + "logps/chosen": -266.10748291015625, + "logps/rejected": -333.5208740234375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.46249008178711, + "rewards/margins": 6.76832389831543, + "rewards/rejected": -17.23081398010254, + "step": 16324 + }, + { + "epoch": 2.54, + "learning_rate": 2.17391773432232e-06, + "logits/chosen": -2.9862539768218994, + "logits/rejected": -1.9997766017913818, + "logps/chosen": -273.19866943359375, + "logps/rejected": -282.3448486328125, + "loss": 0.5082, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.338748931884766, + "rewards/margins": 4.5172858238220215, + "rewards/rejected": -12.856035232543945, + "step": 16325 + }, + { + "epoch": 2.54, + "learning_rate": 2.173184293791172e-06, + "logits/chosen": -2.726722240447998, + "logits/rejected": -1.2730168104171753, + "logps/chosen": -483.2982177734375, + "logps/rejected": -303.46990966796875, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.486284255981445, + "rewards/margins": 5.5737504959106445, + "rewards/rejected": -15.06003475189209, + "step": 16326 + }, + { + "epoch": 2.54, + "learning_rate": 2.1724508532600237e-06, + "logits/chosen": -2.0806338787078857, + "logits/rejected": -2.861630916595459, + "logps/chosen": -439.0303649902344, + "logps/rejected": -884.7654418945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.638193130493164, + "rewards/margins": 14.206926345825195, + "rewards/rejected": -23.84511947631836, + "step": 16327 + }, + { + "epoch": 2.54, + "learning_rate": 2.171717412728876e-06, + "logits/chosen": -1.1818621158599854, + "logits/rejected": -2.7935497760772705, + "logps/chosen": -121.12728118896484, + "logps/rejected": -410.29296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.618888854980469, + "rewards/margins": 12.930061340332031, + "rewards/rejected": -19.5489501953125, + "step": 16328 + }, + { + "epoch": 2.54, + "learning_rate": 2.170983972197728e-06, + "logits/chosen": -2.6889865398406982, + "logits/rejected": -2.5526061058044434, + "logps/chosen": -878.5255126953125, + "logps/rejected": -559.08056640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.135190010070801, + "rewards/margins": 9.624298095703125, + "rewards/rejected": -16.75948715209961, + "step": 16329 + }, + { + "epoch": 2.54, + "learning_rate": 2.1702505316665802e-06, + "logits/chosen": -1.540420651435852, + "logits/rejected": -2.618368625640869, + "logps/chosen": -320.6910400390625, + "logps/rejected": -418.6628112792969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.326008796691895, + "rewards/margins": 8.971680641174316, + "rewards/rejected": -20.29768943786621, + "step": 16330 + }, + { + "epoch": 2.54, + "learning_rate": 2.169517091135432e-06, + "logits/chosen": -2.115459680557251, + "logits/rejected": -3.0314955711364746, + "logps/chosen": -201.62225341796875, + "logps/rejected": -499.1746826171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.18496036529541, + "rewards/margins": 9.643672943115234, + "rewards/rejected": -18.82863426208496, + "step": 16331 + }, + { + "epoch": 2.54, + "learning_rate": 2.1687836506042844e-06, + "logits/chosen": -1.7001551389694214, + "logits/rejected": -2.662383556365967, + "logps/chosen": -161.37313842773438, + "logps/rejected": -474.4596862792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.176363945007324, + "rewards/margins": 11.365945816040039, + "rewards/rejected": -19.54231071472168, + "step": 16332 + }, + { + "epoch": 2.54, + "learning_rate": 2.1680502100731367e-06, + "logits/chosen": -1.7696934938430786, + "logits/rejected": -2.2169294357299805, + "logps/chosen": -260.3992919921875, + "logps/rejected": -431.97186279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.067597389221191, + "rewards/margins": 10.055994987487793, + "rewards/rejected": -21.123592376708984, + "step": 16333 + }, + { + "epoch": 2.54, + "learning_rate": 2.167316769541989e-06, + "logits/chosen": -2.6205012798309326, + "logits/rejected": -2.699810743331909, + "logps/chosen": -265.0649108886719, + "logps/rejected": -367.785888671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.263384819030762, + "rewards/margins": 8.704488754272461, + "rewards/rejected": -16.967872619628906, + "step": 16334 + }, + { + "epoch": 2.54, + "learning_rate": 2.166583329010841e-06, + "logits/chosen": -2.4957029819488525, + "logits/rejected": -2.1409912109375, + "logps/chosen": -258.8472900390625, + "logps/rejected": -439.6050720214844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.152835845947266, + "rewards/margins": 9.250467300415039, + "rewards/rejected": -19.403303146362305, + "step": 16335 + }, + { + "epoch": 2.54, + "learning_rate": 2.165849888479693e-06, + "logits/chosen": -2.6022391319274902, + "logits/rejected": -2.5737643241882324, + "logps/chosen": -407.9123229980469, + "logps/rejected": -418.49078369140625, + "loss": 0.4126, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.525038719177246, + "rewards/margins": 6.987016201019287, + "rewards/rejected": -19.512054443359375, + "step": 16336 + }, + { + "epoch": 2.54, + "learning_rate": 2.165116447948545e-06, + "logits/chosen": -1.3186007738113403, + "logits/rejected": -2.447985887527466, + "logps/chosen": -315.7115783691406, + "logps/rejected": -495.68170166015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52454948425293, + "rewards/margins": 7.463644504547119, + "rewards/rejected": -15.98819351196289, + "step": 16337 + }, + { + "epoch": 2.54, + "learning_rate": 2.164383007417397e-06, + "logits/chosen": -1.8812767267227173, + "logits/rejected": -2.9080605506896973, + "logps/chosen": -199.15757751464844, + "logps/rejected": -412.57183837890625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.490117073059082, + "rewards/margins": 7.187524795532227, + "rewards/rejected": -20.677642822265625, + "step": 16338 + }, + { + "epoch": 2.54, + "learning_rate": 2.1636495668862492e-06, + "logits/chosen": -2.5115370750427246, + "logits/rejected": -2.860499858856201, + "logps/chosen": -498.083740234375, + "logps/rejected": -529.8585205078125, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.791976928710938, + "rewards/margins": 8.200584411621094, + "rewards/rejected": -18.99256134033203, + "step": 16339 + }, + { + "epoch": 2.54, + "learning_rate": 2.162916126355101e-06, + "logits/chosen": -1.4617542028427124, + "logits/rejected": -2.6817259788513184, + "logps/chosen": -214.93338012695312, + "logps/rejected": -510.96978759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.806015014648438, + "rewards/margins": 12.092527389526367, + "rewards/rejected": -20.898542404174805, + "step": 16340 + }, + { + "epoch": 2.54, + "learning_rate": 2.162182685823954e-06, + "logits/chosen": -1.2451220750808716, + "logits/rejected": -2.5545637607574463, + "logps/chosen": -291.2510986328125, + "logps/rejected": -585.1248779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.256658554077148, + "rewards/margins": 14.538928985595703, + "rewards/rejected": -22.79558753967285, + "step": 16341 + }, + { + "epoch": 2.54, + "learning_rate": 2.1614492452928057e-06, + "logits/chosen": -2.7601332664489746, + "logits/rejected": -2.5229971408843994, + "logps/chosen": -730.1696166992188, + "logps/rejected": -586.319580078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.627593994140625, + "rewards/margins": 9.156739234924316, + "rewards/rejected": -16.784332275390625, + "step": 16342 + }, + { + "epoch": 2.54, + "learning_rate": 2.160715804761658e-06, + "logits/chosen": -2.550785779953003, + "logits/rejected": -2.127189874649048, + "logps/chosen": -261.60986328125, + "logps/rejected": -280.5439147949219, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.757786750793457, + "rewards/margins": 7.062417030334473, + "rewards/rejected": -13.82020378112793, + "step": 16343 + }, + { + "epoch": 2.54, + "learning_rate": 2.15998236423051e-06, + "logits/chosen": -2.680227518081665, + "logits/rejected": -2.7543487548828125, + "logps/chosen": -177.49322509765625, + "logps/rejected": -318.46075439453125, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.013697624206543, + "rewards/margins": 6.5694260597229, + "rewards/rejected": -15.583124160766602, + "step": 16344 + }, + { + "epoch": 2.54, + "learning_rate": 2.159248923699362e-06, + "logits/chosen": -2.737133264541626, + "logits/rejected": -2.306060314178467, + "logps/chosen": -428.77532958984375, + "logps/rejected": -438.5005187988281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.938154220581055, + "rewards/margins": 9.282331466674805, + "rewards/rejected": -17.22048568725586, + "step": 16345 + }, + { + "epoch": 2.54, + "learning_rate": 2.158515483168214e-06, + "logits/chosen": -2.447680711746216, + "logits/rejected": -2.7928314208984375, + "logps/chosen": -225.67160034179688, + "logps/rejected": -369.6460266113281, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.030987739562988, + "rewards/margins": 10.598003387451172, + "rewards/rejected": -18.628990173339844, + "step": 16346 + }, + { + "epoch": 2.54, + "learning_rate": 2.157782042637066e-06, + "logits/chosen": -1.495787262916565, + "logits/rejected": -2.446755886077881, + "logps/chosen": -293.5055847167969, + "logps/rejected": -467.6571350097656, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.885210037231445, + "rewards/margins": 10.315248489379883, + "rewards/rejected": -20.200458526611328, + "step": 16347 + }, + { + "epoch": 2.54, + "learning_rate": 2.1570486021059183e-06, + "logits/chosen": -2.6269612312316895, + "logits/rejected": -1.8781187534332275, + "logps/chosen": -486.8863525390625, + "logps/rejected": -389.1086120605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.091821670532227, + "rewards/margins": 11.22487735748291, + "rewards/rejected": -23.316699981689453, + "step": 16348 + }, + { + "epoch": 2.54, + "learning_rate": 2.1563151615747706e-06, + "logits/chosen": -1.4774490594863892, + "logits/rejected": -2.386425018310547, + "logps/chosen": -136.20140075683594, + "logps/rejected": -439.0116882324219, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.295272827148438, + "rewards/margins": 9.090810775756836, + "rewards/rejected": -18.386083602905273, + "step": 16349 + }, + { + "epoch": 2.54, + "learning_rate": 2.155581721043623e-06, + "logits/chosen": -1.6666932106018066, + "logits/rejected": -2.5931475162506104, + "logps/chosen": -297.3429260253906, + "logps/rejected": -502.51824951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.911344528198242, + "rewards/margins": 14.868762969970703, + "rewards/rejected": -24.780107498168945, + "step": 16350 + }, + { + "epoch": 2.54, + "learning_rate": 2.1548482805124747e-06, + "logits/chosen": -2.5586276054382324, + "logits/rejected": -1.9981340169906616, + "logps/chosen": -205.22918701171875, + "logps/rejected": -260.20916748046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.118234634399414, + "rewards/margins": 7.3290910720825195, + "rewards/rejected": -14.447325706481934, + "step": 16351 + }, + { + "epoch": 2.54, + "learning_rate": 2.154114839981327e-06, + "logits/chosen": -2.6607818603515625, + "logits/rejected": -2.473464250564575, + "logps/chosen": -223.3566131591797, + "logps/rejected": -347.5153503417969, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.918415069580078, + "rewards/margins": 4.304394245147705, + "rewards/rejected": -15.222809791564941, + "step": 16352 + }, + { + "epoch": 2.54, + "learning_rate": 2.153381399450179e-06, + "logits/chosen": -2.99945330619812, + "logits/rejected": -3.0411336421966553, + "logps/chosen": -99.96257019042969, + "logps/rejected": -218.38584899902344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.106457710266113, + "rewards/margins": 8.690103530883789, + "rewards/rejected": -15.796561241149902, + "step": 16353 + }, + { + "epoch": 2.54, + "learning_rate": 2.1526479589190312e-06, + "logits/chosen": -2.4815595149993896, + "logits/rejected": -2.0762500762939453, + "logps/chosen": -165.37684631347656, + "logps/rejected": -332.9617919921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.70816707611084, + "rewards/margins": 8.931109428405762, + "rewards/rejected": -18.6392765045166, + "step": 16354 + }, + { + "epoch": 2.54, + "learning_rate": 2.151914518387883e-06, + "logits/chosen": -2.426276206970215, + "logits/rejected": -2.753021478652954, + "logps/chosen": -291.83489990234375, + "logps/rejected": -424.43450927734375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.190103530883789, + "rewards/margins": 6.806064605712891, + "rewards/rejected": -13.99616813659668, + "step": 16355 + }, + { + "epoch": 2.54, + "learning_rate": 2.1511810778567354e-06, + "logits/chosen": -2.315786361694336, + "logits/rejected": -2.5903313159942627, + "logps/chosen": -194.19989013671875, + "logps/rejected": -541.5503540039062, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.272307395935059, + "rewards/margins": 10.59899616241455, + "rewards/rejected": -19.87130355834961, + "step": 16356 + }, + { + "epoch": 2.54, + "learning_rate": 2.1504476373255873e-06, + "logits/chosen": -2.4684388637542725, + "logits/rejected": -1.698331594467163, + "logps/chosen": -277.7291259765625, + "logps/rejected": -325.65069580078125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.105785369873047, + "rewards/margins": 7.177079200744629, + "rewards/rejected": -16.28286361694336, + "step": 16357 + }, + { + "epoch": 2.54, + "learning_rate": 2.1497141967944396e-06, + "logits/chosen": -1.8729642629623413, + "logits/rejected": -2.8664000034332275, + "logps/chosen": -119.70703125, + "logps/rejected": -327.30255126953125, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.110711097717285, + "rewards/margins": 4.0644850730896, + "rewards/rejected": -14.175195693969727, + "step": 16358 + }, + { + "epoch": 2.54, + "learning_rate": 2.148980756263292e-06, + "logits/chosen": -2.7130825519561768, + "logits/rejected": -2.0836286544799805, + "logps/chosen": -253.61648559570312, + "logps/rejected": -462.86260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.575431823730469, + "rewards/margins": 16.46289825439453, + "rewards/rejected": -24.038330078125, + "step": 16359 + }, + { + "epoch": 2.54, + "learning_rate": 2.1482473157321438e-06, + "logits/chosen": -2.1812825202941895, + "logits/rejected": -2.887554407119751, + "logps/chosen": -276.9803466796875, + "logps/rejected": -656.49365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.504120826721191, + "rewards/margins": 15.702603340148926, + "rewards/rejected": -23.206724166870117, + "step": 16360 + }, + { + "epoch": 2.54, + "learning_rate": 2.147513875200996e-06, + "logits/chosen": -1.3036259412765503, + "logits/rejected": -2.9676902294158936, + "logps/chosen": -178.65940856933594, + "logps/rejected": -494.6922607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.134782314300537, + "rewards/margins": 13.15336799621582, + "rewards/rejected": -18.288150787353516, + "step": 16361 + }, + { + "epoch": 2.54, + "learning_rate": 2.146780434669848e-06, + "logits/chosen": -2.6628239154815674, + "logits/rejected": -2.667264938354492, + "logps/chosen": -582.7937622070312, + "logps/rejected": -489.5531005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.110623359680176, + "rewards/margins": 16.23788070678711, + "rewards/rejected": -24.34850311279297, + "step": 16362 + }, + { + "epoch": 2.54, + "learning_rate": 2.1460469941387002e-06, + "logits/chosen": -2.5086264610290527, + "logits/rejected": -2.5612709522247314, + "logps/chosen": -315.54876708984375, + "logps/rejected": -491.7951354980469, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.123202323913574, + "rewards/margins": 8.515024185180664, + "rewards/rejected": -16.638225555419922, + "step": 16363 + }, + { + "epoch": 2.54, + "learning_rate": 2.145313553607552e-06, + "logits/chosen": -2.29119873046875, + "logits/rejected": -2.516838312149048, + "logps/chosen": -113.14279174804688, + "logps/rejected": -284.3155822753906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.925963401794434, + "rewards/margins": 8.847562789916992, + "rewards/rejected": -17.77352523803711, + "step": 16364 + }, + { + "epoch": 2.55, + "learning_rate": 2.1445801130764044e-06, + "logits/chosen": -0.9009377956390381, + "logits/rejected": -2.950026035308838, + "logps/chosen": -199.93174743652344, + "logps/rejected": -694.6796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.347277641296387, + "rewards/margins": 10.93338394165039, + "rewards/rejected": -18.280662536621094, + "step": 16365 + }, + { + "epoch": 2.55, + "learning_rate": 2.1438466725452563e-06, + "logits/chosen": -1.415107250213623, + "logits/rejected": -2.4715940952301025, + "logps/chosen": -164.98089599609375, + "logps/rejected": -429.0774841308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.574832916259766, + "rewards/margins": 14.07424545288086, + "rewards/rejected": -19.649078369140625, + "step": 16366 + }, + { + "epoch": 2.55, + "learning_rate": 2.1431132320141086e-06, + "logits/chosen": -0.9537608623504639, + "logits/rejected": -2.311069965362549, + "logps/chosen": -170.15115356445312, + "logps/rejected": -521.3482666015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.542840957641602, + "rewards/margins": 9.161163330078125, + "rewards/rejected": -19.704004287719727, + "step": 16367 + }, + { + "epoch": 2.55, + "learning_rate": 2.142379791482961e-06, + "logits/chosen": -2.168107032775879, + "logits/rejected": -2.8660287857055664, + "logps/chosen": -474.64324951171875, + "logps/rejected": -477.38958740234375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.578852653503418, + "rewards/margins": 6.668348789215088, + "rewards/rejected": -17.247201919555664, + "step": 16368 + }, + { + "epoch": 2.55, + "learning_rate": 2.1416463509518128e-06, + "logits/chosen": -2.2776594161987305, + "logits/rejected": -2.6904170513153076, + "logps/chosen": -136.95016479492188, + "logps/rejected": -375.1300354003906, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.811948776245117, + "rewards/margins": 12.176036834716797, + "rewards/rejected": -22.98798370361328, + "step": 16369 + }, + { + "epoch": 2.55, + "learning_rate": 2.140912910420665e-06, + "logits/chosen": -2.4370105266571045, + "logits/rejected": -2.6860721111297607, + "logps/chosen": -186.52394104003906, + "logps/rejected": -507.0550537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426215648651123, + "rewards/margins": 15.402013778686523, + "rewards/rejected": -20.828229904174805, + "step": 16370 + }, + { + "epoch": 2.55, + "learning_rate": 2.140179469889517e-06, + "logits/chosen": -2.351052761077881, + "logits/rejected": -2.618647336959839, + "logps/chosen": -168.44061279296875, + "logps/rejected": -380.23675537109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.927308559417725, + "rewards/margins": 8.811464309692383, + "rewards/rejected": -15.738773345947266, + "step": 16371 + }, + { + "epoch": 2.55, + "learning_rate": 2.1394460293583693e-06, + "logits/chosen": -2.965148448944092, + "logits/rejected": -3.1338963508605957, + "logps/chosen": -324.00872802734375, + "logps/rejected": -377.4942321777344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.481734752655029, + "rewards/margins": 7.920721530914307, + "rewards/rejected": -15.402456283569336, + "step": 16372 + }, + { + "epoch": 2.55, + "learning_rate": 2.138712588827221e-06, + "logits/chosen": -2.3092379570007324, + "logits/rejected": -2.5372095108032227, + "logps/chosen": -308.2477111816406, + "logps/rejected": -464.50042724609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.73668098449707, + "rewards/margins": 12.875795364379883, + "rewards/rejected": -23.612476348876953, + "step": 16373 + }, + { + "epoch": 2.55, + "learning_rate": 2.1379791482960734e-06, + "logits/chosen": -1.7925927639007568, + "logits/rejected": -2.3512423038482666, + "logps/chosen": -249.01390075683594, + "logps/rejected": -438.8580322265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.781530380249023, + "rewards/margins": 13.955309867858887, + "rewards/rejected": -23.736841201782227, + "step": 16374 + }, + { + "epoch": 2.55, + "learning_rate": 2.1372457077649258e-06, + "logits/chosen": -1.2286062240600586, + "logits/rejected": -1.2191985845565796, + "logps/chosen": -449.96221923828125, + "logps/rejected": -791.2858276367188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.981680870056152, + "rewards/margins": 12.02614974975586, + "rewards/rejected": -20.007831573486328, + "step": 16375 + }, + { + "epoch": 2.55, + "learning_rate": 2.1365122672337776e-06, + "logits/chosen": -2.482234477996826, + "logits/rejected": -0.9812818169593811, + "logps/chosen": -385.2247619628906, + "logps/rejected": -323.4525451660156, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.275445938110352, + "rewards/margins": 9.887327194213867, + "rewards/rejected": -18.16277313232422, + "step": 16376 + }, + { + "epoch": 2.55, + "learning_rate": 2.13577882670263e-06, + "logits/chosen": -2.2091593742370605, + "logits/rejected": -2.5431745052337646, + "logps/chosen": -181.95306396484375, + "logps/rejected": -422.2208251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.697955131530762, + "rewards/margins": 12.513870239257812, + "rewards/rejected": -23.21182632446289, + "step": 16377 + }, + { + "epoch": 2.55, + "learning_rate": 2.135045386171482e-06, + "logits/chosen": -2.0491485595703125, + "logits/rejected": -2.700620174407959, + "logps/chosen": -210.7396240234375, + "logps/rejected": -322.94525146484375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.463423728942871, + "rewards/margins": 6.481879711151123, + "rewards/rejected": -14.945302963256836, + "step": 16378 + }, + { + "epoch": 2.55, + "learning_rate": 2.134311945640334e-06, + "logits/chosen": -1.0966212749481201, + "logits/rejected": -2.6363024711608887, + "logps/chosen": -161.76492309570312, + "logps/rejected": -608.0738525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.709716796875, + "rewards/margins": 14.242927551269531, + "rewards/rejected": -23.95264434814453, + "step": 16379 + }, + { + "epoch": 2.55, + "learning_rate": 2.133578505109186e-06, + "logits/chosen": -2.7341501712799072, + "logits/rejected": -2.617621421813965, + "logps/chosen": -176.23876953125, + "logps/rejected": -261.05810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1978373527526855, + "rewards/margins": 10.586221694946289, + "rewards/rejected": -17.7840576171875, + "step": 16380 + }, + { + "epoch": 2.55, + "learning_rate": 2.1328450645780383e-06, + "logits/chosen": -1.146526575088501, + "logits/rejected": -2.6268227100372314, + "logps/chosen": -115.89044952392578, + "logps/rejected": -367.64697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.784633159637451, + "rewards/margins": 11.563737869262695, + "rewards/rejected": -18.348371505737305, + "step": 16381 + }, + { + "epoch": 2.55, + "learning_rate": 2.13211162404689e-06, + "logits/chosen": -2.8134477138519287, + "logits/rejected": -2.026078462600708, + "logps/chosen": -155.1913604736328, + "logps/rejected": -236.67657470703125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.88644027709961, + "rewards/margins": 7.096988201141357, + "rewards/rejected": -16.983428955078125, + "step": 16382 + }, + { + "epoch": 2.55, + "learning_rate": 2.1313781835157425e-06, + "logits/chosen": -1.0373468399047852, + "logits/rejected": -2.633302688598633, + "logps/chosen": -189.26712036132812, + "logps/rejected": -672.036376953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.065393447875977, + "rewards/margins": 13.689053535461426, + "rewards/rejected": -24.75444793701172, + "step": 16383 + }, + { + "epoch": 2.55, + "learning_rate": 2.1306447429845948e-06, + "logits/chosen": -2.8704843521118164, + "logits/rejected": -2.4953043460845947, + "logps/chosen": -666.602294921875, + "logps/rejected": -565.759521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040518760681152, + "rewards/margins": 11.44403076171875, + "rewards/rejected": -17.48455047607422, + "step": 16384 + }, + { + "epoch": 2.55, + "learning_rate": 2.129911302453447e-06, + "logits/chosen": -2.2852225303649902, + "logits/rejected": -2.5099472999572754, + "logps/chosen": -149.35446166992188, + "logps/rejected": -236.11410522460938, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.264276504516602, + "rewards/margins": 8.832162857055664, + "rewards/rejected": -20.096439361572266, + "step": 16385 + }, + { + "epoch": 2.55, + "learning_rate": 2.129177861922299e-06, + "logits/chosen": -2.4758622646331787, + "logits/rejected": -2.7861979007720947, + "logps/chosen": -320.86376953125, + "logps/rejected": -448.30828857421875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.77279281616211, + "rewards/margins": 6.9910502433776855, + "rewards/rejected": -18.763843536376953, + "step": 16386 + }, + { + "epoch": 2.55, + "learning_rate": 2.128444421391151e-06, + "logits/chosen": -1.5920097827911377, + "logits/rejected": -2.687570095062256, + "logps/chosen": -133.15713500976562, + "logps/rejected": -394.20416259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.674493789672852, + "rewards/margins": 10.666268348693848, + "rewards/rejected": -21.340763092041016, + "step": 16387 + }, + { + "epoch": 2.55, + "learning_rate": 2.127710980860003e-06, + "logits/chosen": -2.4909987449645996, + "logits/rejected": -2.7186005115509033, + "logps/chosen": -227.7225341796875, + "logps/rejected": -354.2957763671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.450222015380859, + "rewards/margins": 7.492356300354004, + "rewards/rejected": -14.942578315734863, + "step": 16388 + }, + { + "epoch": 2.55, + "learning_rate": 2.126977540328855e-06, + "logits/chosen": -2.4850943088531494, + "logits/rejected": -2.7833263874053955, + "logps/chosen": -562.2744750976562, + "logps/rejected": -398.5552062988281, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.3656005859375, + "rewards/margins": 4.822090148925781, + "rewards/rejected": -13.187690734863281, + "step": 16389 + }, + { + "epoch": 2.55, + "learning_rate": 2.1262440997977073e-06, + "logits/chosen": -2.264866590499878, + "logits/rejected": -2.7330970764160156, + "logps/chosen": -214.43458557128906, + "logps/rejected": -421.9562683105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.188790798187256, + "rewards/margins": 12.114920616149902, + "rewards/rejected": -19.3037109375, + "step": 16390 + }, + { + "epoch": 2.55, + "learning_rate": 2.125510659266559e-06, + "logits/chosen": -2.8726534843444824, + "logits/rejected": -2.7727997303009033, + "logps/chosen": -515.9317626953125, + "logps/rejected": -714.795654296875, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.176461219787598, + "rewards/margins": 5.056550979614258, + "rewards/rejected": -13.233012199401855, + "step": 16391 + }, + { + "epoch": 2.55, + "learning_rate": 2.124777218735412e-06, + "logits/chosen": -2.7988157272338867, + "logits/rejected": -1.2439029216766357, + "logps/chosen": -767.388671875, + "logps/rejected": -526.0203857421875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.340929985046387, + "rewards/margins": 9.846569061279297, + "rewards/rejected": -18.1875, + "step": 16392 + }, + { + "epoch": 2.55, + "learning_rate": 2.124043778204264e-06, + "logits/chosen": -2.1553444862365723, + "logits/rejected": -2.4733264446258545, + "logps/chosen": -373.94512939453125, + "logps/rejected": -643.282470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.393245697021484, + "rewards/margins": 11.21566390991211, + "rewards/rejected": -19.608909606933594, + "step": 16393 + }, + { + "epoch": 2.55, + "learning_rate": 2.123310337673116e-06, + "logits/chosen": -1.3851544857025146, + "logits/rejected": -2.7414069175720215, + "logps/chosen": -225.81373596191406, + "logps/rejected": -539.660400390625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.569293022155762, + "rewards/margins": 6.374687194824219, + "rewards/rejected": -17.943981170654297, + "step": 16394 + }, + { + "epoch": 2.55, + "learning_rate": 2.122576897141968e-06, + "logits/chosen": -2.862104654312134, + "logits/rejected": -2.7334206104278564, + "logps/chosen": -481.654541015625, + "logps/rejected": -390.5564270019531, + "loss": 0.7196, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.136792182922363, + "rewards/margins": 3.8159866333007812, + "rewards/rejected": -13.952778816223145, + "step": 16395 + }, + { + "epoch": 2.55, + "learning_rate": 2.12184345661082e-06, + "logits/chosen": -2.536219358444214, + "logits/rejected": -1.7009528875350952, + "logps/chosen": -450.6540222167969, + "logps/rejected": -542.1058959960938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.52142333984375, + "rewards/margins": 9.764677047729492, + "rewards/rejected": -20.286102294921875, + "step": 16396 + }, + { + "epoch": 2.55, + "learning_rate": 2.121110016079672e-06, + "logits/chosen": -1.3510737419128418, + "logits/rejected": -2.38905668258667, + "logps/chosen": -122.60343933105469, + "logps/rejected": -459.5648193359375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.482059478759766, + "rewards/margins": 10.43856430053711, + "rewards/rejected": -18.920623779296875, + "step": 16397 + }, + { + "epoch": 2.55, + "learning_rate": 2.120376575548524e-06, + "logits/chosen": -2.870156764984131, + "logits/rejected": -0.7051486372947693, + "logps/chosen": -760.35546875, + "logps/rejected": -507.5566711425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.685493469238281, + "rewards/margins": 11.978496551513672, + "rewards/rejected": -23.663990020751953, + "step": 16398 + }, + { + "epoch": 2.55, + "learning_rate": 2.1196431350173763e-06, + "logits/chosen": -2.1790335178375244, + "logits/rejected": -2.7422983646392822, + "logps/chosen": -456.23944091796875, + "logps/rejected": -669.853271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.823488235473633, + "rewards/margins": 10.089319229125977, + "rewards/rejected": -17.91280746459961, + "step": 16399 + }, + { + "epoch": 2.55, + "learning_rate": 2.1189096944862286e-06, + "logits/chosen": -1.430793046951294, + "logits/rejected": -2.481492280960083, + "logps/chosen": -144.2674560546875, + "logps/rejected": -379.7938232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.239275932312012, + "rewards/margins": 12.338617324829102, + "rewards/rejected": -19.577892303466797, + "step": 16400 + }, + { + "epoch": 2.55, + "learning_rate": 2.118176253955081e-06, + "logits/chosen": -2.2707858085632324, + "logits/rejected": -2.8860135078430176, + "logps/chosen": -158.03436279296875, + "logps/rejected": -423.3104248046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.412243843078613, + "rewards/margins": 9.181781768798828, + "rewards/rejected": -18.594024658203125, + "step": 16401 + }, + { + "epoch": 2.55, + "learning_rate": 2.117442813423933e-06, + "logits/chosen": -2.546743869781494, + "logits/rejected": -2.989259958267212, + "logps/chosen": -167.51527404785156, + "logps/rejected": -521.7440185546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9237141609191895, + "rewards/margins": 13.725812911987305, + "rewards/rejected": -20.649526596069336, + "step": 16402 + }, + { + "epoch": 2.55, + "learning_rate": 2.116709372892785e-06, + "logits/chosen": -2.796935796737671, + "logits/rejected": -2.9876322746276855, + "logps/chosen": -308.66033935546875, + "logps/rejected": -491.23895263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.750883102416992, + "rewards/margins": 11.0325927734375, + "rewards/rejected": -19.783475875854492, + "step": 16403 + }, + { + "epoch": 2.55, + "learning_rate": 2.115975932361637e-06, + "logits/chosen": -2.203479290008545, + "logits/rejected": -2.6010241508483887, + "logps/chosen": -517.7350463867188, + "logps/rejected": -508.21759033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7044477462768555, + "rewards/margins": 12.534820556640625, + "rewards/rejected": -18.239269256591797, + "step": 16404 + }, + { + "epoch": 2.55, + "learning_rate": 2.1152424918304893e-06, + "logits/chosen": -2.9073026180267334, + "logits/rejected": -2.9767823219299316, + "logps/chosen": -328.8564453125, + "logps/rejected": -293.7696533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.641321182250977, + "rewards/margins": 9.971595764160156, + "rewards/rejected": -17.612916946411133, + "step": 16405 + }, + { + "epoch": 2.55, + "learning_rate": 2.114509051299341e-06, + "logits/chosen": -2.8039352893829346, + "logits/rejected": -2.46034574508667, + "logps/chosen": -460.4486389160156, + "logps/rejected": -330.78009033203125, + "loss": 1.4041, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.618143081665039, + "rewards/margins": 3.743161201477051, + "rewards/rejected": -13.36130428314209, + "step": 16406 + }, + { + "epoch": 2.55, + "learning_rate": 2.113775610768193e-06, + "logits/chosen": -1.5062072277069092, + "logits/rejected": -2.7786974906921387, + "logps/chosen": -214.40380859375, + "logps/rejected": -377.2589111328125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.198366165161133, + "rewards/margins": 9.372465133666992, + "rewards/rejected": -17.570831298828125, + "step": 16407 + }, + { + "epoch": 2.55, + "learning_rate": 2.1130421702370454e-06, + "logits/chosen": -1.9959335327148438, + "logits/rejected": -2.650205135345459, + "logps/chosen": -306.8487854003906, + "logps/rejected": -473.8043212890625, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.841259002685547, + "rewards/margins": 7.511767387390137, + "rewards/rejected": -17.35302734375, + "step": 16408 + }, + { + "epoch": 2.55, + "learning_rate": 2.1123087297058977e-06, + "logits/chosen": -1.8647360801696777, + "logits/rejected": -2.673374891281128, + "logps/chosen": -332.86676025390625, + "logps/rejected": -421.81781005859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.426527976989746, + "rewards/margins": 8.538370132446289, + "rewards/rejected": -19.96489715576172, + "step": 16409 + }, + { + "epoch": 2.55, + "learning_rate": 2.11157528917475e-06, + "logits/chosen": -2.4269778728485107, + "logits/rejected": -2.4040207862854004, + "logps/chosen": -235.77340698242188, + "logps/rejected": -356.9075927734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.245169639587402, + "rewards/margins": 8.684173583984375, + "rewards/rejected": -21.929344177246094, + "step": 16410 + }, + { + "epoch": 2.55, + "learning_rate": 2.110841848643602e-06, + "logits/chosen": -1.8354601860046387, + "logits/rejected": -2.621931791305542, + "logps/chosen": -179.6222381591797, + "logps/rejected": -375.8695068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3658037185668945, + "rewards/margins": 10.215764045715332, + "rewards/rejected": -16.581567764282227, + "step": 16411 + }, + { + "epoch": 2.55, + "learning_rate": 2.110108408112454e-06, + "logits/chosen": -1.9049804210662842, + "logits/rejected": -2.5460963249206543, + "logps/chosen": -194.4461669921875, + "logps/rejected": -314.63970947265625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.78541374206543, + "rewards/margins": 7.632279396057129, + "rewards/rejected": -17.417694091796875, + "step": 16412 + }, + { + "epoch": 2.55, + "learning_rate": 2.109374967581306e-06, + "logits/chosen": -2.2507081031799316, + "logits/rejected": -2.6330409049987793, + "logps/chosen": -270.51861572265625, + "logps/rejected": -278.1283264160156, + "loss": 0.9315, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.571168899536133, + "rewards/margins": 3.0810842514038086, + "rewards/rejected": -12.652254104614258, + "step": 16413 + }, + { + "epoch": 2.55, + "learning_rate": 2.1086415270501583e-06, + "logits/chosen": -3.0500693321228027, + "logits/rejected": -2.473154306411743, + "logps/chosen": -388.904052734375, + "logps/rejected": -346.03814697265625, + "loss": 1.0841, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.670602798461914, + "rewards/margins": 2.822718858718872, + "rewards/rejected": -12.493322372436523, + "step": 16414 + }, + { + "epoch": 2.55, + "learning_rate": 2.10790808651901e-06, + "logits/chosen": -1.0681779384613037, + "logits/rejected": -2.78137469291687, + "logps/chosen": -206.1971435546875, + "logps/rejected": -527.409912109375, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.964094638824463, + "rewards/margins": 9.168920516967773, + "rewards/rejected": -16.133014678955078, + "step": 16415 + }, + { + "epoch": 2.55, + "learning_rate": 2.107174645987862e-06, + "logits/chosen": -2.6063716411590576, + "logits/rejected": -1.8808420896530151, + "logps/chosen": -773.6229248046875, + "logps/rejected": -582.971435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.678945541381836, + "rewards/margins": 12.628480911254883, + "rewards/rejected": -22.30742645263672, + "step": 16416 + }, + { + "epoch": 2.55, + "learning_rate": 2.106441205456715e-06, + "logits/chosen": -2.036125659942627, + "logits/rejected": -2.659499406814575, + "logps/chosen": -221.45736694335938, + "logps/rejected": -484.74566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.85130500793457, + "rewards/margins": 12.279293060302734, + "rewards/rejected": -23.130598068237305, + "step": 16417 + }, + { + "epoch": 2.55, + "learning_rate": 2.1057077649255667e-06, + "logits/chosen": -0.8440791964530945, + "logits/rejected": -2.313995838165283, + "logps/chosen": -123.89262390136719, + "logps/rejected": -459.24530029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.145989418029785, + "rewards/margins": 13.129920959472656, + "rewards/rejected": -22.275909423828125, + "step": 16418 + }, + { + "epoch": 2.55, + "learning_rate": 2.104974324394419e-06, + "logits/chosen": -2.924985885620117, + "logits/rejected": -1.406647801399231, + "logps/chosen": -580.46533203125, + "logps/rejected": -426.07952880859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.845094680786133, + "rewards/margins": 8.461820602416992, + "rewards/rejected": -19.306915283203125, + "step": 16419 + }, + { + "epoch": 2.55, + "learning_rate": 2.104240883863271e-06, + "logits/chosen": -2.6979644298553467, + "logits/rejected": -2.558353900909424, + "logps/chosen": -333.4924621582031, + "logps/rejected": -468.44635009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.758651733398438, + "rewards/margins": 10.07740306854248, + "rewards/rejected": -19.836055755615234, + "step": 16420 + }, + { + "epoch": 2.55, + "learning_rate": 2.103507443332123e-06, + "logits/chosen": -2.493683099746704, + "logits/rejected": -2.1757051944732666, + "logps/chosen": -234.23793029785156, + "logps/rejected": -405.2475891113281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.305035591125488, + "rewards/margins": 8.678133010864258, + "rewards/rejected": -18.98316764831543, + "step": 16421 + }, + { + "epoch": 2.55, + "learning_rate": 2.102774002800975e-06, + "logits/chosen": -2.7230803966522217, + "logits/rejected": -2.385418176651001, + "logps/chosen": -419.544677734375, + "logps/rejected": -491.00323486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.158551216125488, + "rewards/margins": 9.013086318969727, + "rewards/rejected": -19.17163848876953, + "step": 16422 + }, + { + "epoch": 2.55, + "learning_rate": 2.1020405622698273e-06, + "logits/chosen": -2.6980881690979004, + "logits/rejected": -1.9114842414855957, + "logps/chosen": -354.89483642578125, + "logps/rejected": -343.63812255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.943423271179199, + "rewards/margins": 11.486501693725586, + "rewards/rejected": -18.4299259185791, + "step": 16423 + }, + { + "epoch": 2.55, + "learning_rate": 2.1013071217386792e-06, + "logits/chosen": -2.5677287578582764, + "logits/rejected": -2.772797107696533, + "logps/chosen": -417.7445373535156, + "logps/rejected": -539.8092041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.137838363647461, + "rewards/margins": 11.386002540588379, + "rewards/rejected": -22.523841857910156, + "step": 16424 + }, + { + "epoch": 2.55, + "learning_rate": 2.1005736812075315e-06, + "logits/chosen": -1.5433332920074463, + "logits/rejected": -2.6399941444396973, + "logps/chosen": -181.5802001953125, + "logps/rejected": -502.9587097167969, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.206192970275879, + "rewards/margins": 10.219193458557129, + "rewards/rejected": -19.425386428833008, + "step": 16425 + }, + { + "epoch": 2.55, + "learning_rate": 2.099840240676384e-06, + "logits/chosen": -2.565805673599243, + "logits/rejected": -2.567073345184326, + "logps/chosen": -213.52407836914062, + "logps/rejected": -362.784423828125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.889795303344727, + "rewards/margins": 8.70329475402832, + "rewards/rejected": -16.593090057373047, + "step": 16426 + }, + { + "epoch": 2.55, + "learning_rate": 2.0991068001452357e-06, + "logits/chosen": -1.392660140991211, + "logits/rejected": -2.509775161743164, + "logps/chosen": -205.53749084472656, + "logps/rejected": -426.42974853515625, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.149686813354492, + "rewards/margins": 5.5647454261779785, + "rewards/rejected": -17.714431762695312, + "step": 16427 + }, + { + "epoch": 2.55, + "learning_rate": 2.098373359614088e-06, + "logits/chosen": -2.8568851947784424, + "logits/rejected": -2.527191400527954, + "logps/chosen": -218.54486083984375, + "logps/rejected": -234.62413024902344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.63502311706543, + "rewards/margins": 7.956528663635254, + "rewards/rejected": -13.591550827026367, + "step": 16428 + }, + { + "epoch": 2.56, + "learning_rate": 2.09763991908294e-06, + "logits/chosen": -2.535562038421631, + "logits/rejected": -2.6642005443573, + "logps/chosen": -247.0948486328125, + "logps/rejected": -373.0325012207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.39210319519043, + "rewards/margins": 11.299793243408203, + "rewards/rejected": -19.691896438598633, + "step": 16429 + }, + { + "epoch": 2.56, + "learning_rate": 2.096906478551792e-06, + "logits/chosen": -2.6095783710479736, + "logits/rejected": -2.6924805641174316, + "logps/chosen": -709.3167724609375, + "logps/rejected": -478.39312744140625, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.735904693603516, + "rewards/margins": 9.96395492553711, + "rewards/rejected": -20.699859619140625, + "step": 16430 + }, + { + "epoch": 2.56, + "learning_rate": 2.096173038020644e-06, + "logits/chosen": -1.9737157821655273, + "logits/rejected": -2.4982364177703857, + "logps/chosen": -249.7827606201172, + "logps/rejected": -498.783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.424276351928711, + "rewards/margins": 15.882122039794922, + "rewards/rejected": -22.306398391723633, + "step": 16431 + }, + { + "epoch": 2.56, + "learning_rate": 2.0954395974894964e-06, + "logits/chosen": -2.5639450550079346, + "logits/rejected": -3.051443338394165, + "logps/chosen": -150.56983947753906, + "logps/rejected": -437.498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.124326229095459, + "rewards/margins": 14.400327682495117, + "rewards/rejected": -20.524654388427734, + "step": 16432 + }, + { + "epoch": 2.56, + "learning_rate": 2.0947061569583482e-06, + "logits/chosen": -3.1034677028656006, + "logits/rejected": -2.751984119415283, + "logps/chosen": -293.28338623046875, + "logps/rejected": -291.66607666015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.511235237121582, + "rewards/margins": 10.942788124084473, + "rewards/rejected": -13.454023361206055, + "step": 16433 + }, + { + "epoch": 2.56, + "learning_rate": 2.093972716427201e-06, + "logits/chosen": -1.6953463554382324, + "logits/rejected": -3.0518250465393066, + "logps/chosen": -180.71847534179688, + "logps/rejected": -672.5474853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.669561862945557, + "rewards/margins": 15.4287109375, + "rewards/rejected": -23.0982723236084, + "step": 16434 + }, + { + "epoch": 2.56, + "learning_rate": 2.093239275896053e-06, + "logits/chosen": -2.8592779636383057, + "logits/rejected": -2.900761604309082, + "logps/chosen": -102.64784240722656, + "logps/rejected": -230.10189819335938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.954845428466797, + "rewards/margins": 8.958488464355469, + "rewards/rejected": -15.913333892822266, + "step": 16435 + }, + { + "epoch": 2.56, + "learning_rate": 2.0925058353649047e-06, + "logits/chosen": -1.2598928213119507, + "logits/rejected": -2.3141720294952393, + "logps/chosen": -186.56753540039062, + "logps/rejected": -492.92645263671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.257509231567383, + "rewards/margins": 9.854747772216797, + "rewards/rejected": -18.11225700378418, + "step": 16436 + }, + { + "epoch": 2.56, + "learning_rate": 2.091772394833757e-06, + "logits/chosen": -2.7026307582855225, + "logits/rejected": -2.7566065788269043, + "logps/chosen": -566.8473510742188, + "logps/rejected": -782.2344970703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.500962257385254, + "rewards/margins": 12.620728492736816, + "rewards/rejected": -20.12169075012207, + "step": 16437 + }, + { + "epoch": 2.56, + "learning_rate": 2.091038954302609e-06, + "logits/chosen": -1.035494089126587, + "logits/rejected": -2.1128642559051514, + "logps/chosen": -139.75888061523438, + "logps/rejected": -565.9718627929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.939311981201172, + "rewards/margins": 15.809335708618164, + "rewards/rejected": -23.748645782470703, + "step": 16438 + }, + { + "epoch": 2.56, + "learning_rate": 2.090305513771461e-06, + "logits/chosen": -2.8854243755340576, + "logits/rejected": -2.928048849105835, + "logps/chosen": -100.06755065917969, + "logps/rejected": -291.8043518066406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.017560005187988, + "rewards/margins": 9.403611183166504, + "rewards/rejected": -17.421171188354492, + "step": 16439 + }, + { + "epoch": 2.56, + "learning_rate": 2.089572073240313e-06, + "logits/chosen": -2.4642326831817627, + "logits/rejected": -2.4330077171325684, + "logps/chosen": -311.8308410644531, + "logps/rejected": -293.0038146972656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039333343505859, + "rewards/margins": 7.884492874145508, + "rewards/rejected": -14.923826217651367, + "step": 16440 + }, + { + "epoch": 2.56, + "learning_rate": 2.0888386327091654e-06, + "logits/chosen": -2.8300929069519043, + "logits/rejected": -2.6784942150115967, + "logps/chosen": -232.3555908203125, + "logps/rejected": -346.5806579589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.290095329284668, + "rewards/margins": 8.958963394165039, + "rewards/rejected": -17.24905776977539, + "step": 16441 + }, + { + "epoch": 2.56, + "learning_rate": 2.0881051921780177e-06, + "logits/chosen": -2.055168867111206, + "logits/rejected": -2.6609108448028564, + "logps/chosen": -179.11260986328125, + "logps/rejected": -328.6885986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414925575256348, + "rewards/margins": 9.811878204345703, + "rewards/rejected": -17.226802825927734, + "step": 16442 + }, + { + "epoch": 2.56, + "learning_rate": 2.08737175164687e-06, + "logits/chosen": -2.8050429821014404, + "logits/rejected": -1.9763842821121216, + "logps/chosen": -479.6368103027344, + "logps/rejected": -252.51612854003906, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5755295753479, + "rewards/margins": 5.746006965637207, + "rewards/rejected": -13.321537017822266, + "step": 16443 + }, + { + "epoch": 2.56, + "learning_rate": 2.086638311115722e-06, + "logits/chosen": -1.8042550086975098, + "logits/rejected": -2.783177375793457, + "logps/chosen": -282.87359619140625, + "logps/rejected": -575.1746826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.795324325561523, + "rewards/margins": 13.03188705444336, + "rewards/rejected": -20.827211380004883, + "step": 16444 + }, + { + "epoch": 2.56, + "learning_rate": 2.0859048705845737e-06, + "logits/chosen": -1.6910167932510376, + "logits/rejected": -2.5889768600463867, + "logps/chosen": -223.62973022460938, + "logps/rejected": -491.38116455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.381566047668457, + "rewards/margins": 10.284181594848633, + "rewards/rejected": -18.665748596191406, + "step": 16445 + }, + { + "epoch": 2.56, + "learning_rate": 2.085171430053426e-06, + "logits/chosen": -1.8585036993026733, + "logits/rejected": -2.597695827484131, + "logps/chosen": -276.6835632324219, + "logps/rejected": -626.0400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.630956649780273, + "rewards/margins": 11.219779014587402, + "rewards/rejected": -20.85073471069336, + "step": 16446 + }, + { + "epoch": 2.56, + "learning_rate": 2.084437989522278e-06, + "logits/chosen": -2.406083822250366, + "logits/rejected": -2.5355749130249023, + "logps/chosen": -191.43960571289062, + "logps/rejected": -324.14776611328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.890963554382324, + "rewards/margins": 9.311355590820312, + "rewards/rejected": -16.20231819152832, + "step": 16447 + }, + { + "epoch": 2.56, + "learning_rate": 2.0837045489911302e-06, + "logits/chosen": -2.6629891395568848, + "logits/rejected": -1.8735723495483398, + "logps/chosen": -217.57418823242188, + "logps/rejected": -374.74127197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8129513263702393, + "rewards/margins": 15.143901824951172, + "rewards/rejected": -18.956851959228516, + "step": 16448 + }, + { + "epoch": 2.56, + "learning_rate": 2.082971108459982e-06, + "logits/chosen": -2.9171860218048096, + "logits/rejected": -2.6764423847198486, + "logps/chosen": -219.6935577392578, + "logps/rejected": -324.12420654296875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.525212287902832, + "rewards/margins": 6.195168972015381, + "rewards/rejected": -17.720380783081055, + "step": 16449 + }, + { + "epoch": 2.56, + "learning_rate": 2.0822376679288344e-06, + "logits/chosen": -2.1210312843322754, + "logits/rejected": -2.6051042079925537, + "logps/chosen": -224.509765625, + "logps/rejected": -334.1959228515625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.786928176879883, + "rewards/margins": 8.799482345581055, + "rewards/rejected": -17.586410522460938, + "step": 16450 + }, + { + "epoch": 2.56, + "learning_rate": 2.0815042273976867e-06, + "logits/chosen": -2.7680504322052, + "logits/rejected": -3.0695760250091553, + "logps/chosen": -113.2940902709961, + "logps/rejected": -219.41152954101562, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.075904846191406, + "rewards/margins": 6.624483108520508, + "rewards/rejected": -15.700387954711914, + "step": 16451 + }, + { + "epoch": 2.56, + "learning_rate": 2.080770786866539e-06, + "logits/chosen": -2.767996072769165, + "logits/rejected": -2.9192588329315186, + "logps/chosen": -130.35421752929688, + "logps/rejected": -326.58001708984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.953190803527832, + "rewards/margins": 9.627228736877441, + "rewards/rejected": -16.580419540405273, + "step": 16452 + }, + { + "epoch": 2.56, + "learning_rate": 2.080037346335391e-06, + "logits/chosen": -1.7296401262283325, + "logits/rejected": -2.3126864433288574, + "logps/chosen": -174.8126220703125, + "logps/rejected": -443.4057922363281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.252704620361328, + "rewards/margins": 8.222846984863281, + "rewards/rejected": -20.47555160522461, + "step": 16453 + }, + { + "epoch": 2.56, + "learning_rate": 2.079303905804243e-06, + "logits/chosen": -2.137641429901123, + "logits/rejected": -2.977250099182129, + "logps/chosen": -118.42235565185547, + "logps/rejected": -495.2010192871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.61698579788208, + "rewards/margins": 12.89832878112793, + "rewards/rejected": -20.51531410217285, + "step": 16454 + }, + { + "epoch": 2.56, + "learning_rate": 2.078570465273095e-06, + "logits/chosen": -2.082775831222534, + "logits/rejected": -2.8200523853302, + "logps/chosen": -141.18167114257812, + "logps/rejected": -403.5874938964844, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.755879402160645, + "rewards/margins": 6.297214508056641, + "rewards/rejected": -16.05309295654297, + "step": 16455 + }, + { + "epoch": 2.56, + "learning_rate": 2.077837024741947e-06, + "logits/chosen": -1.889086365699768, + "logits/rejected": -2.4397103786468506, + "logps/chosen": -197.6165008544922, + "logps/rejected": -419.2023010253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.159420013427734, + "rewards/margins": 9.134638786315918, + "rewards/rejected": -22.29405975341797, + "step": 16456 + }, + { + "epoch": 2.56, + "learning_rate": 2.0771035842107993e-06, + "logits/chosen": -2.321784257888794, + "logits/rejected": -2.8095548152923584, + "logps/chosen": -317.9182434082031, + "logps/rejected": -454.3074035644531, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.822144508361816, + "rewards/margins": 7.924738883972168, + "rewards/rejected": -20.746883392333984, + "step": 16457 + }, + { + "epoch": 2.56, + "learning_rate": 2.076370143679651e-06, + "logits/chosen": -2.05145001411438, + "logits/rejected": -2.7160563468933105, + "logps/chosen": -588.0609741210938, + "logps/rejected": -748.95458984375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.69686508178711, + "rewards/margins": 8.829431533813477, + "rewards/rejected": -21.526294708251953, + "step": 16458 + }, + { + "epoch": 2.56, + "learning_rate": 2.075636703148504e-06, + "logits/chosen": -2.8450496196746826, + "logits/rejected": -2.253361463546753, + "logps/chosen": -914.7247314453125, + "logps/rejected": -538.5639038085938, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.541217803955078, + "rewards/margins": 5.392062187194824, + "rewards/rejected": -13.933279037475586, + "step": 16459 + }, + { + "epoch": 2.56, + "learning_rate": 2.0749032626173557e-06, + "logits/chosen": -2.0680770874023438, + "logits/rejected": -3.007007122039795, + "logps/chosen": -497.74700927734375, + "logps/rejected": -559.3318481445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.063644409179688, + "rewards/margins": 10.595256805419922, + "rewards/rejected": -20.65890121459961, + "step": 16460 + }, + { + "epoch": 2.56, + "learning_rate": 2.074169822086208e-06, + "logits/chosen": -2.6256585121154785, + "logits/rejected": -2.6718862056732178, + "logps/chosen": -266.773681640625, + "logps/rejected": -343.4259033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.78382682800293, + "rewards/margins": 11.428082466125488, + "rewards/rejected": -18.211910247802734, + "step": 16461 + }, + { + "epoch": 2.56, + "learning_rate": 2.07343638155506e-06, + "logits/chosen": -2.8722472190856934, + "logits/rejected": -1.8233585357666016, + "logps/chosen": -785.9275512695312, + "logps/rejected": -513.9920043945312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.047342300415039, + "rewards/margins": 15.442277908325195, + "rewards/rejected": -22.489620208740234, + "step": 16462 + }, + { + "epoch": 2.56, + "learning_rate": 2.0727029410239122e-06, + "logits/chosen": -2.513228416442871, + "logits/rejected": -2.739500045776367, + "logps/chosen": -228.23953247070312, + "logps/rejected": -250.89602661132812, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.379888534545898, + "rewards/margins": 8.172088623046875, + "rewards/rejected": -17.551977157592773, + "step": 16463 + }, + { + "epoch": 2.56, + "learning_rate": 2.071969500492764e-06, + "logits/chosen": -2.5161426067352295, + "logits/rejected": -2.8528261184692383, + "logps/chosen": -711.0213012695312, + "logps/rejected": -751.5557861328125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.704511642456055, + "rewards/margins": 6.767767429351807, + "rewards/rejected": -21.472278594970703, + "step": 16464 + }, + { + "epoch": 2.56, + "learning_rate": 2.071236059961616e-06, + "logits/chosen": -2.9849421977996826, + "logits/rejected": -1.915367841720581, + "logps/chosen": -211.4861602783203, + "logps/rejected": -139.86183166503906, + "loss": 2.0124, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.683136940002441, + "rewards/margins": 3.28163743019104, + "rewards/rejected": -11.964774131774902, + "step": 16465 + }, + { + "epoch": 2.56, + "learning_rate": 2.0705026194304683e-06, + "logits/chosen": -2.9575035572052, + "logits/rejected": -1.7004939317703247, + "logps/chosen": -327.0761413574219, + "logps/rejected": -250.9757080078125, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.422510147094727, + "rewards/margins": 8.17176342010498, + "rewards/rejected": -13.594273567199707, + "step": 16466 + }, + { + "epoch": 2.56, + "learning_rate": 2.0697691788993206e-06, + "logits/chosen": -0.9547727704048157, + "logits/rejected": -2.7548129558563232, + "logps/chosen": -223.4613037109375, + "logps/rejected": -580.213623046875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.22451400756836, + "rewards/margins": 8.897978782653809, + "rewards/rejected": -17.122493743896484, + "step": 16467 + }, + { + "epoch": 2.56, + "learning_rate": 2.069035738368173e-06, + "logits/chosen": -2.78027081489563, + "logits/rejected": -2.2428321838378906, + "logps/chosen": -339.5260009765625, + "logps/rejected": -260.89508056640625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5422868728637695, + "rewards/margins": 6.056323051452637, + "rewards/rejected": -13.598609924316406, + "step": 16468 + }, + { + "epoch": 2.56, + "learning_rate": 2.0683022978370248e-06, + "logits/chosen": -1.7367022037506104, + "logits/rejected": -2.8311729431152344, + "logps/chosen": -158.0127716064453, + "logps/rejected": -397.33831787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.050125122070312, + "rewards/margins": 8.877384185791016, + "rewards/rejected": -18.927509307861328, + "step": 16469 + }, + { + "epoch": 2.56, + "learning_rate": 2.067568857305877e-06, + "logits/chosen": -2.5291733741760254, + "logits/rejected": -3.0691823959350586, + "logps/chosen": -95.58429718017578, + "logps/rejected": -305.37213134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.037787437438965, + "rewards/margins": 9.737741470336914, + "rewards/rejected": -17.775527954101562, + "step": 16470 + }, + { + "epoch": 2.56, + "learning_rate": 2.066835416774729e-06, + "logits/chosen": -1.9043447971343994, + "logits/rejected": -2.7182509899139404, + "logps/chosen": -144.08145141601562, + "logps/rejected": -425.18939208984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.136012077331543, + "rewards/margins": 11.057661056518555, + "rewards/rejected": -22.19367218017578, + "step": 16471 + }, + { + "epoch": 2.56, + "learning_rate": 2.0661019762435812e-06, + "logits/chosen": -2.038959503173828, + "logits/rejected": -2.5423874855041504, + "logps/chosen": -190.98260498046875, + "logps/rejected": -465.5477600097656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.361026763916016, + "rewards/margins": 11.76511287689209, + "rewards/rejected": -22.126140594482422, + "step": 16472 + }, + { + "epoch": 2.56, + "learning_rate": 2.065368535712433e-06, + "logits/chosen": -3.010716199874878, + "logits/rejected": -2.932034730911255, + "logps/chosen": -689.8211669921875, + "logps/rejected": -319.455322265625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.496663093566895, + "rewards/margins": 6.679393768310547, + "rewards/rejected": -15.176056861877441, + "step": 16473 + }, + { + "epoch": 2.56, + "learning_rate": 2.0646350951812854e-06, + "logits/chosen": -2.163830280303955, + "logits/rejected": -2.5684046745300293, + "logps/chosen": -229.76202392578125, + "logps/rejected": -375.9779052734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.40772819519043, + "rewards/margins": 8.332597732543945, + "rewards/rejected": -18.740325927734375, + "step": 16474 + }, + { + "epoch": 2.56, + "learning_rate": 2.0639016546501373e-06, + "logits/chosen": -2.3994226455688477, + "logits/rejected": -2.3190932273864746, + "logps/chosen": -140.9390411376953, + "logps/rejected": -348.15234375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.205785751342773, + "rewards/margins": 11.52950668334961, + "rewards/rejected": -16.735292434692383, + "step": 16475 + }, + { + "epoch": 2.56, + "learning_rate": 2.0631682141189896e-06, + "logits/chosen": -2.5007007122039795, + "logits/rejected": -1.918447494506836, + "logps/chosen": -348.23724365234375, + "logps/rejected": -355.6744079589844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.84011459350586, + "rewards/margins": 9.400103569030762, + "rewards/rejected": -19.240219116210938, + "step": 16476 + }, + { + "epoch": 2.56, + "learning_rate": 2.062434773587842e-06, + "logits/chosen": -1.4177162647247314, + "logits/rejected": -1.4996933937072754, + "logps/chosen": -394.7923583984375, + "logps/rejected": -659.2318115234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.949573516845703, + "rewards/margins": 9.02783203125, + "rewards/rejected": -23.977405548095703, + "step": 16477 + }, + { + "epoch": 2.56, + "learning_rate": 2.0617013330566938e-06, + "logits/chosen": -2.1450769901275635, + "logits/rejected": -2.7900748252868652, + "logps/chosen": -323.659423828125, + "logps/rejected": -533.6717529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.836841583251953, + "rewards/margins": 12.162809371948242, + "rewards/rejected": -22.999650955200195, + "step": 16478 + }, + { + "epoch": 2.56, + "learning_rate": 2.060967892525546e-06, + "logits/chosen": -2.6039371490478516, + "logits/rejected": -2.5412323474884033, + "logps/chosen": -185.58551025390625, + "logps/rejected": -265.47418212890625, + "loss": 0.0646, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.507802963256836, + "rewards/margins": 6.289519309997559, + "rewards/rejected": -15.797322273254395, + "step": 16479 + }, + { + "epoch": 2.56, + "learning_rate": 2.060234451994398e-06, + "logits/chosen": -2.414018392562866, + "logits/rejected": -0.8960561752319336, + "logps/chosen": -308.29351806640625, + "logps/rejected": -167.3225555419922, + "loss": 0.5879, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.469426155090332, + "rewards/margins": 2.972701072692871, + "rewards/rejected": -13.442127227783203, + "step": 16480 + }, + { + "epoch": 2.56, + "learning_rate": 2.0595010114632503e-06, + "logits/chosen": -1.3680329322814941, + "logits/rejected": -2.5240378379821777, + "logps/chosen": -158.89633178710938, + "logps/rejected": -440.2023010253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.937336921691895, + "rewards/margins": 9.806273460388184, + "rewards/rejected": -21.743610382080078, + "step": 16481 + }, + { + "epoch": 2.56, + "learning_rate": 2.058767570932102e-06, + "logits/chosen": -2.1572065353393555, + "logits/rejected": -2.81473708152771, + "logps/chosen": -118.23690795898438, + "logps/rejected": -386.5351257324219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.804672241210938, + "rewards/margins": 10.75719165802002, + "rewards/rejected": -21.56186294555664, + "step": 16482 + }, + { + "epoch": 2.56, + "learning_rate": 2.0580341304009544e-06, + "logits/chosen": -1.2046757936477661, + "logits/rejected": -2.4822440147399902, + "logps/chosen": -217.31430053710938, + "logps/rejected": -545.8839111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.069348335266113, + "rewards/margins": 10.533798217773438, + "rewards/rejected": -21.603145599365234, + "step": 16483 + }, + { + "epoch": 2.56, + "learning_rate": 2.0573006898698067e-06, + "logits/chosen": -2.068892002105713, + "logits/rejected": -2.8822765350341797, + "logps/chosen": -169.4531707763672, + "logps/rejected": -225.55899047851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051949977874756, + "rewards/margins": 10.13503646850586, + "rewards/rejected": -16.186986923217773, + "step": 16484 + }, + { + "epoch": 2.56, + "learning_rate": 2.0565672493386586e-06, + "logits/chosen": -2.5529143810272217, + "logits/rejected": -2.9274144172668457, + "logps/chosen": -738.23095703125, + "logps/rejected": -728.8931884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.015655517578125, + "rewards/margins": 11.383116722106934, + "rewards/rejected": -20.398773193359375, + "step": 16485 + }, + { + "epoch": 2.56, + "learning_rate": 2.055833808807511e-06, + "logits/chosen": -2.613189935684204, + "logits/rejected": -2.7613930702209473, + "logps/chosen": -317.78143310546875, + "logps/rejected": -554.9766235351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.03903579711914, + "rewards/margins": 9.749476432800293, + "rewards/rejected": -17.78851318359375, + "step": 16486 + }, + { + "epoch": 2.56, + "learning_rate": 2.055100368276363e-06, + "logits/chosen": -2.402869462966919, + "logits/rejected": -2.8020694255828857, + "logps/chosen": -731.16650390625, + "logps/rejected": -801.5870361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.027618408203125, + "rewards/margins": 15.84744644165039, + "rewards/rejected": -24.875064849853516, + "step": 16487 + }, + { + "epoch": 2.56, + "learning_rate": 2.054366927745215e-06, + "logits/chosen": -1.6669256687164307, + "logits/rejected": -2.552208662033081, + "logps/chosen": -286.63848876953125, + "logps/rejected": -487.3487243652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.937654495239258, + "rewards/margins": 9.108912467956543, + "rewards/rejected": -21.046566009521484, + "step": 16488 + }, + { + "epoch": 2.56, + "learning_rate": 2.053633487214067e-06, + "logits/chosen": -2.9128477573394775, + "logits/rejected": -1.9346948862075806, + "logps/chosen": -274.3790283203125, + "logps/rejected": -173.92190551757812, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2966203689575195, + "rewards/margins": 8.369828224182129, + "rewards/rejected": -15.666448593139648, + "step": 16489 + }, + { + "epoch": 2.56, + "learning_rate": 2.0529000466829193e-06, + "logits/chosen": -2.572195053100586, + "logits/rejected": -2.7891852855682373, + "logps/chosen": -127.40939331054688, + "logps/rejected": -329.730224609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.434927940368652, + "rewards/margins": 9.274763107299805, + "rewards/rejected": -16.709692001342773, + "step": 16490 + }, + { + "epoch": 2.56, + "learning_rate": 2.052166606151771e-06, + "logits/chosen": -2.6791412830352783, + "logits/rejected": -2.616785764694214, + "logps/chosen": -353.2658996582031, + "logps/rejected": -500.1600036621094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.653617858886719, + "rewards/margins": 10.518668174743652, + "rewards/rejected": -16.172286987304688, + "step": 16491 + }, + { + "epoch": 2.56, + "learning_rate": 2.0514331656206235e-06, + "logits/chosen": -2.032132625579834, + "logits/rejected": -2.3154118061065674, + "logps/chosen": -114.27664184570312, + "logps/rejected": -242.77346801757812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.187063217163086, + "rewards/margins": 10.046579360961914, + "rewards/rejected": -19.233642578125, + "step": 16492 + }, + { + "epoch": 2.57, + "learning_rate": 2.0506997250894758e-06, + "logits/chosen": -2.2912704944610596, + "logits/rejected": -2.8181252479553223, + "logps/chosen": -112.44697570800781, + "logps/rejected": -257.92938232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.457902908325195, + "rewards/margins": 10.388853073120117, + "rewards/rejected": -19.846755981445312, + "step": 16493 + }, + { + "epoch": 2.57, + "learning_rate": 2.049966284558328e-06, + "logits/chosen": -2.189783811569214, + "logits/rejected": -2.409769058227539, + "logps/chosen": -219.357177734375, + "logps/rejected": -529.0318603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.680070877075195, + "rewards/margins": 10.20814323425293, + "rewards/rejected": -16.888214111328125, + "step": 16494 + }, + { + "epoch": 2.57, + "learning_rate": 2.04923284402718e-06, + "logits/chosen": -3.027095317840576, + "logits/rejected": -2.5937278270721436, + "logps/chosen": -269.72515869140625, + "logps/rejected": -254.47088623046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.55987548828125, + "rewards/margins": 8.599833488464355, + "rewards/rejected": -13.159708976745605, + "step": 16495 + }, + { + "epoch": 2.57, + "learning_rate": 2.048499403496032e-06, + "logits/chosen": -1.7609944343566895, + "logits/rejected": -2.514369249343872, + "logps/chosen": -272.3091125488281, + "logps/rejected": -657.19384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.313992500305176, + "rewards/margins": 12.888662338256836, + "rewards/rejected": -19.202653884887695, + "step": 16496 + }, + { + "epoch": 2.57, + "learning_rate": 2.047765962964884e-06, + "logits/chosen": -1.2568535804748535, + "logits/rejected": -2.383784294128418, + "logps/chosen": -234.87033081054688, + "logps/rejected": -592.9185791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.855886936187744, + "rewards/margins": 12.341665267944336, + "rewards/rejected": -20.197551727294922, + "step": 16497 + }, + { + "epoch": 2.57, + "learning_rate": 2.047032522433736e-06, + "logits/chosen": -1.7730612754821777, + "logits/rejected": -2.4705424308776855, + "logps/chosen": -189.26629638671875, + "logps/rejected": -530.0086669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.972499370574951, + "rewards/margins": 17.22418785095215, + "rewards/rejected": -22.196687698364258, + "step": 16498 + }, + { + "epoch": 2.57, + "learning_rate": 2.0462990819025883e-06, + "logits/chosen": -1.6725475788116455, + "logits/rejected": -2.6422643661499023, + "logps/chosen": -323.124755859375, + "logps/rejected": -687.3782348632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.354440689086914, + "rewards/margins": 11.66008186340332, + "rewards/rejected": -23.014522552490234, + "step": 16499 + }, + { + "epoch": 2.57, + "learning_rate": 2.04556564137144e-06, + "logits/chosen": -2.431981325149536, + "logits/rejected": -2.927917242050171, + "logps/chosen": -71.74591827392578, + "logps/rejected": -269.95751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.567929267883301, + "rewards/margins": 12.109167098999023, + "rewards/rejected": -18.67709732055664, + "step": 16500 + }, + { + "epoch": 2.57, + "learning_rate": 2.044832200840293e-06, + "logits/chosen": -1.2499370574951172, + "logits/rejected": -2.3051950931549072, + "logps/chosen": -164.4041290283203, + "logps/rejected": -611.238037109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.294783592224121, + "rewards/margins": 7.853209495544434, + "rewards/rejected": -20.147993087768555, + "step": 16501 + }, + { + "epoch": 2.57, + "learning_rate": 2.0440987603091448e-06, + "logits/chosen": -1.6987208127975464, + "logits/rejected": -2.497962713241577, + "logps/chosen": -189.15460205078125, + "logps/rejected": -349.0560607910156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.403925895690918, + "rewards/margins": 8.179475784301758, + "rewards/rejected": -18.58340072631836, + "step": 16502 + }, + { + "epoch": 2.57, + "learning_rate": 2.043365319777997e-06, + "logits/chosen": -1.861293911933899, + "logits/rejected": -2.54964280128479, + "logps/chosen": -247.08013916015625, + "logps/rejected": -489.34051513671875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.115718364715576, + "rewards/margins": 9.837335586547852, + "rewards/rejected": -16.953054428100586, + "step": 16503 + }, + { + "epoch": 2.57, + "learning_rate": 2.042631879246849e-06, + "logits/chosen": -2.4999606609344482, + "logits/rejected": -2.4225430488586426, + "logps/chosen": -311.331298828125, + "logps/rejected": -304.685546875, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.218421936035156, + "rewards/margins": 7.103367328643799, + "rewards/rejected": -19.321788787841797, + "step": 16504 + }, + { + "epoch": 2.57, + "learning_rate": 2.041898438715701e-06, + "logits/chosen": -1.7523483037948608, + "logits/rejected": -2.6361701488494873, + "logps/chosen": -360.8189697265625, + "logps/rejected": -444.7686462402344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.08979606628418, + "rewards/margins": 9.694673538208008, + "rewards/rejected": -17.784469604492188, + "step": 16505 + }, + { + "epoch": 2.57, + "learning_rate": 2.041164998184553e-06, + "logits/chosen": -2.429260730743408, + "logits/rejected": -2.969714403152466, + "logps/chosen": -609.8108520507812, + "logps/rejected": -606.7963256835938, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.233726501464844, + "rewards/margins": 7.061612129211426, + "rewards/rejected": -15.295339584350586, + "step": 16506 + }, + { + "epoch": 2.57, + "learning_rate": 2.040431557653405e-06, + "logits/chosen": -2.2867565155029297, + "logits/rejected": -2.8125457763671875, + "logps/chosen": -163.9520721435547, + "logps/rejected": -314.48028564453125, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.044901847839355, + "rewards/margins": 8.99203109741211, + "rewards/rejected": -18.03693199157715, + "step": 16507 + }, + { + "epoch": 2.57, + "learning_rate": 2.0396981171222573e-06, + "logits/chosen": -3.0000267028808594, + "logits/rejected": -2.823120355606079, + "logps/chosen": -313.02935791015625, + "logps/rejected": -560.218017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.227092742919922, + "rewards/margins": 12.864324569702148, + "rewards/rejected": -23.091419219970703, + "step": 16508 + }, + { + "epoch": 2.57, + "learning_rate": 2.0389646765911096e-06, + "logits/chosen": -2.6760311126708984, + "logits/rejected": -1.0632308721542358, + "logps/chosen": -488.4458923339844, + "logps/rejected": -237.04586791992188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.924751281738281, + "rewards/margins": 8.344226837158203, + "rewards/rejected": -17.268978118896484, + "step": 16509 + }, + { + "epoch": 2.57, + "learning_rate": 2.038231236059962e-06, + "logits/chosen": -2.7182300090789795, + "logits/rejected": -1.9456626176834106, + "logps/chosen": -366.437255859375, + "logps/rejected": -376.2789611816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.337803840637207, + "rewards/margins": 16.15683364868164, + "rewards/rejected": -26.494638442993164, + "step": 16510 + }, + { + "epoch": 2.57, + "learning_rate": 2.037497795528814e-06, + "logits/chosen": -3.088651657104492, + "logits/rejected": -2.8357582092285156, + "logps/chosen": -202.8255615234375, + "logps/rejected": -230.90159606933594, + "loss": 0.0506, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.755096912384033, + "rewards/margins": 6.086446285247803, + "rewards/rejected": -12.841543197631836, + "step": 16511 + }, + { + "epoch": 2.57, + "learning_rate": 2.036764354997666e-06, + "logits/chosen": -2.400883197784424, + "logits/rejected": -2.777177095413208, + "logps/chosen": -150.19027709960938, + "logps/rejected": -430.34771728515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.249048233032227, + "rewards/margins": 8.866130828857422, + "rewards/rejected": -16.11518096923828, + "step": 16512 + }, + { + "epoch": 2.57, + "learning_rate": 2.036030914466518e-06, + "logits/chosen": -2.6884560585021973, + "logits/rejected": -1.0655932426452637, + "logps/chosen": -263.696533203125, + "logps/rejected": -230.90713500976562, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.680262088775635, + "rewards/margins": 7.893266201019287, + "rewards/rejected": -15.573528289794922, + "step": 16513 + }, + { + "epoch": 2.57, + "learning_rate": 2.03529747393537e-06, + "logits/chosen": -1.4027655124664307, + "logits/rejected": -2.6542625427246094, + "logps/chosen": -138.5742645263672, + "logps/rejected": -402.895263671875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.683833122253418, + "rewards/margins": 5.420194625854492, + "rewards/rejected": -16.104026794433594, + "step": 16514 + }, + { + "epoch": 2.57, + "learning_rate": 2.034564033404222e-06, + "logits/chosen": -1.4239648580551147, + "logits/rejected": -2.604700803756714, + "logps/chosen": -128.0823516845703, + "logps/rejected": -301.1387939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.135570526123047, + "rewards/margins": 9.9450101852417, + "rewards/rejected": -20.080581665039062, + "step": 16515 + }, + { + "epoch": 2.57, + "learning_rate": 2.033830592873074e-06, + "logits/chosen": -2.8603415489196777, + "logits/rejected": -2.757341146469116, + "logps/chosen": -157.06439208984375, + "logps/rejected": -289.65570068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.416369438171387, + "rewards/margins": 11.325115203857422, + "rewards/rejected": -16.741483688354492, + "step": 16516 + }, + { + "epoch": 2.57, + "learning_rate": 2.0330971523419263e-06, + "logits/chosen": -2.6482973098754883, + "logits/rejected": -2.5793309211730957, + "logps/chosen": -402.8521728515625, + "logps/rejected": -605.8640747070312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.315813064575195, + "rewards/margins": 9.968365669250488, + "rewards/rejected": -20.2841796875, + "step": 16517 + }, + { + "epoch": 2.57, + "learning_rate": 2.0323637118107787e-06, + "logits/chosen": -1.0040479898452759, + "logits/rejected": -2.0257680416107178, + "logps/chosen": -236.33236694335938, + "logps/rejected": -531.9345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.732052803039551, + "rewards/margins": 11.901998519897461, + "rewards/rejected": -18.634052276611328, + "step": 16518 + }, + { + "epoch": 2.57, + "learning_rate": 2.031630271279631e-06, + "logits/chosen": -2.477128028869629, + "logits/rejected": -2.7775230407714844, + "logps/chosen": -565.1539306640625, + "logps/rejected": -578.5635986328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.17108154296875, + "rewards/margins": 8.830902099609375, + "rewards/rejected": -19.001983642578125, + "step": 16519 + }, + { + "epoch": 2.57, + "learning_rate": 2.030896830748483e-06, + "logits/chosen": -2.00256609916687, + "logits/rejected": -2.959531784057617, + "logps/chosen": -357.38507080078125, + "logps/rejected": -555.7100830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.23006010055542, + "rewards/margins": 13.712067604064941, + "rewards/rejected": -17.942127227783203, + "step": 16520 + }, + { + "epoch": 2.57, + "learning_rate": 2.030163390217335e-06, + "logits/chosen": -2.587826728820801, + "logits/rejected": -2.8639559745788574, + "logps/chosen": -157.59640502929688, + "logps/rejected": -323.92681884765625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.097487449645996, + "rewards/margins": 7.134190082550049, + "rewards/rejected": -15.231677055358887, + "step": 16521 + }, + { + "epoch": 2.57, + "learning_rate": 2.029429949686187e-06, + "logits/chosen": -2.5435173511505127, + "logits/rejected": -1.7080206871032715, + "logps/chosen": -266.2226257324219, + "logps/rejected": -268.38763427734375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.038289070129395, + "rewards/margins": 5.48290491104126, + "rewards/rejected": -15.521194458007812, + "step": 16522 + }, + { + "epoch": 2.57, + "learning_rate": 2.0286965091550393e-06, + "logits/chosen": -2.563575267791748, + "logits/rejected": -2.192476272583008, + "logps/chosen": -353.0895690917969, + "logps/rejected": -313.94342041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.360779762268066, + "rewards/margins": 13.126299858093262, + "rewards/rejected": -18.487079620361328, + "step": 16523 + }, + { + "epoch": 2.57, + "learning_rate": 2.027963068623891e-06, + "logits/chosen": -1.0873171091079712, + "logits/rejected": -1.4789925813674927, + "logps/chosen": -482.1727600097656, + "logps/rejected": -442.5803527832031, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.594433784484863, + "rewards/margins": 8.308032989501953, + "rewards/rejected": -14.9024658203125, + "step": 16524 + }, + { + "epoch": 2.57, + "learning_rate": 2.027229628092743e-06, + "logits/chosen": -2.468493938446045, + "logits/rejected": -1.222537875175476, + "logps/chosen": -308.7630615234375, + "logps/rejected": -292.28375244140625, + "loss": 0.4835, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.7438249588012695, + "rewards/margins": 4.301013946533203, + "rewards/rejected": -12.044838905334473, + "step": 16525 + }, + { + "epoch": 2.57, + "learning_rate": 2.026496187561596e-06, + "logits/chosen": -2.219179391860962, + "logits/rejected": -2.7918832302093506, + "logps/chosen": -172.2422332763672, + "logps/rejected": -349.0086669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.56594181060791, + "rewards/margins": 9.991336822509766, + "rewards/rejected": -18.55727767944336, + "step": 16526 + }, + { + "epoch": 2.57, + "learning_rate": 2.0257627470304477e-06, + "logits/chosen": -2.7862353324890137, + "logits/rejected": -2.0028555393218994, + "logps/chosen": -361.49334716796875, + "logps/rejected": -366.62060546875, + "loss": 0.663, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.207691192626953, + "rewards/margins": 0.9462695121765137, + "rewards/rejected": -15.153961181640625, + "step": 16527 + }, + { + "epoch": 2.57, + "learning_rate": 2.0250293064993e-06, + "logits/chosen": -2.294631004333496, + "logits/rejected": -2.4618520736694336, + "logps/chosen": -206.96133422851562, + "logps/rejected": -272.7886047363281, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.11266803741455, + "rewards/margins": 6.995208740234375, + "rewards/rejected": -18.10787582397461, + "step": 16528 + }, + { + "epoch": 2.57, + "learning_rate": 2.024295865968152e-06, + "logits/chosen": -2.517996311187744, + "logits/rejected": -2.990358591079712, + "logps/chosen": -186.77752685546875, + "logps/rejected": -377.4171447753906, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.038358688354492, + "rewards/margins": 6.659006118774414, + "rewards/rejected": -16.697364807128906, + "step": 16529 + }, + { + "epoch": 2.57, + "learning_rate": 2.023562425437004e-06, + "logits/chosen": -1.5764310359954834, + "logits/rejected": -2.6621835231781006, + "logps/chosen": -252.9635009765625, + "logps/rejected": -658.0704956054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.268329620361328, + "rewards/margins": 13.431032180786133, + "rewards/rejected": -22.699363708496094, + "step": 16530 + }, + { + "epoch": 2.57, + "learning_rate": 2.022828984905856e-06, + "logits/chosen": -2.9903016090393066, + "logits/rejected": -2.387474775314331, + "logps/chosen": -318.6872253417969, + "logps/rejected": -171.89620971679688, + "loss": 0.1315, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.439769744873047, + "rewards/margins": 4.4565324783325195, + "rewards/rejected": -12.896302223205566, + "step": 16531 + }, + { + "epoch": 2.57, + "learning_rate": 2.0220955443747083e-06, + "logits/chosen": -1.7626553773880005, + "logits/rejected": -2.2062551975250244, + "logps/chosen": -352.94317626953125, + "logps/rejected": -688.8282470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.287492752075195, + "rewards/margins": 14.417943000793457, + "rewards/rejected": -23.70543670654297, + "step": 16532 + }, + { + "epoch": 2.57, + "learning_rate": 2.0213621038435602e-06, + "logits/chosen": -2.161320924758911, + "logits/rejected": -2.572122097015381, + "logps/chosen": -172.63119506835938, + "logps/rejected": -280.9280090332031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.49356460571289, + "rewards/margins": 8.428349494934082, + "rewards/rejected": -17.921913146972656, + "step": 16533 + }, + { + "epoch": 2.57, + "learning_rate": 2.0206286633124125e-06, + "logits/chosen": -2.308837652206421, + "logits/rejected": -1.8331793546676636, + "logps/chosen": -414.98486328125, + "logps/rejected": -339.1032409667969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.200271606445312, + "rewards/margins": 9.681314468383789, + "rewards/rejected": -17.8815860748291, + "step": 16534 + }, + { + "epoch": 2.57, + "learning_rate": 2.019895222781265e-06, + "logits/chosen": -2.8355917930603027, + "logits/rejected": -2.8953280448913574, + "logps/chosen": -127.09236907958984, + "logps/rejected": -324.3081970214844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.736711502075195, + "rewards/margins": 11.183825492858887, + "rewards/rejected": -18.920536041259766, + "step": 16535 + }, + { + "epoch": 2.57, + "learning_rate": 2.0191617822501167e-06, + "logits/chosen": -2.9193310737609863, + "logits/rejected": -2.911881446838379, + "logps/chosen": -526.15283203125, + "logps/rejected": -612.6277465820312, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7235260009765625, + "rewards/margins": 9.388853073120117, + "rewards/rejected": -17.11237907409668, + "step": 16536 + }, + { + "epoch": 2.57, + "learning_rate": 2.018428341718969e-06, + "logits/chosen": -1.9054632186889648, + "logits/rejected": -2.6568689346313477, + "logps/chosen": -156.12149047851562, + "logps/rejected": -507.809326171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.086705207824707, + "rewards/margins": 8.905433654785156, + "rewards/rejected": -17.992137908935547, + "step": 16537 + }, + { + "epoch": 2.57, + "learning_rate": 2.017694901187821e-06, + "logits/chosen": -2.7284367084503174, + "logits/rejected": -2.7494914531707764, + "logps/chosen": -188.09640502929688, + "logps/rejected": -249.27310180664062, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.627458572387695, + "rewards/margins": 5.119852066040039, + "rewards/rejected": -11.747310638427734, + "step": 16538 + }, + { + "epoch": 2.57, + "learning_rate": 2.016961460656673e-06, + "logits/chosen": -2.533432722091675, + "logits/rejected": -2.9625582695007324, + "logps/chosen": -443.97564697265625, + "logps/rejected": -482.4661865234375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.041454792022705, + "rewards/margins": 6.539390563964844, + "rewards/rejected": -13.58084487915039, + "step": 16539 + }, + { + "epoch": 2.57, + "learning_rate": 2.016228020125525e-06, + "logits/chosen": -2.401707410812378, + "logits/rejected": -2.770749568939209, + "logps/chosen": -220.58758544921875, + "logps/rejected": -455.6124267578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.6201810836792, + "rewards/margins": 9.296784400939941, + "rewards/rejected": -18.91696548461914, + "step": 16540 + }, + { + "epoch": 2.57, + "learning_rate": 2.0154945795943774e-06, + "logits/chosen": -2.865374803543091, + "logits/rejected": -1.950001835823059, + "logps/chosen": -307.91748046875, + "logps/rejected": -286.6387023925781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.883011817932129, + "rewards/margins": 10.677149772644043, + "rewards/rejected": -16.560161590576172, + "step": 16541 + }, + { + "epoch": 2.57, + "learning_rate": 2.0147611390632292e-06, + "logits/chosen": -2.560640335083008, + "logits/rejected": -1.5759351253509521, + "logps/chosen": -188.17352294921875, + "logps/rejected": -220.53280639648438, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.762726783752441, + "rewards/margins": 7.67691707611084, + "rewards/rejected": -19.43964385986328, + "step": 16542 + }, + { + "epoch": 2.57, + "learning_rate": 2.014027698532082e-06, + "logits/chosen": -2.47902512550354, + "logits/rejected": -2.77736759185791, + "logps/chosen": -212.93345642089844, + "logps/rejected": -402.12841796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.482898712158203, + "rewards/margins": 7.086343288421631, + "rewards/rejected": -15.569242477416992, + "step": 16543 + }, + { + "epoch": 2.57, + "learning_rate": 2.013294258000934e-06, + "logits/chosen": -1.879839539527893, + "logits/rejected": -2.8213508129119873, + "logps/chosen": -218.96002197265625, + "logps/rejected": -526.6322021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.669891357421875, + "rewards/margins": 10.839544296264648, + "rewards/rejected": -22.50943374633789, + "step": 16544 + }, + { + "epoch": 2.57, + "learning_rate": 2.0125608174697857e-06, + "logits/chosen": -2.003861904144287, + "logits/rejected": -2.9138877391815186, + "logps/chosen": -483.46795654296875, + "logps/rejected": -607.5787963867188, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.764780044555664, + "rewards/margins": 8.038522720336914, + "rewards/rejected": -18.803302764892578, + "step": 16545 + }, + { + "epoch": 2.57, + "learning_rate": 2.011827376938638e-06, + "logits/chosen": -2.193877696990967, + "logits/rejected": -2.638214588165283, + "logps/chosen": -199.970947265625, + "logps/rejected": -420.6324462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.74897575378418, + "rewards/margins": 9.996562004089355, + "rewards/rejected": -16.74553680419922, + "step": 16546 + }, + { + "epoch": 2.57, + "learning_rate": 2.01109393640749e-06, + "logits/chosen": -1.8675568103790283, + "logits/rejected": -2.668281078338623, + "logps/chosen": -139.63351440429688, + "logps/rejected": -320.1524658203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.088016510009766, + "rewards/margins": 7.811043739318848, + "rewards/rejected": -14.899060249328613, + "step": 16547 + }, + { + "epoch": 2.57, + "learning_rate": 2.010360495876342e-06, + "logits/chosen": -3.029568672180176, + "logits/rejected": -2.8972854614257812, + "logps/chosen": -314.62152099609375, + "logps/rejected": -335.4537353515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.291762351989746, + "rewards/margins": 8.229230880737305, + "rewards/rejected": -16.520992279052734, + "step": 16548 + }, + { + "epoch": 2.57, + "learning_rate": 2.009627055345194e-06, + "logits/chosen": -2.751818895339966, + "logits/rejected": -2.966618537902832, + "logps/chosen": -166.11676025390625, + "logps/rejected": -268.2088317871094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.616632461547852, + "rewards/margins": 8.49659252166748, + "rewards/rejected": -13.113224983215332, + "step": 16549 + }, + { + "epoch": 2.57, + "learning_rate": 2.0088936148140464e-06, + "logits/chosen": -1.272533893585205, + "logits/rejected": -2.5644371509552, + "logps/chosen": -140.8799591064453, + "logps/rejected": -431.146240234375, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.088094711303711, + "rewards/margins": 8.044925689697266, + "rewards/rejected": -19.133020401000977, + "step": 16550 + }, + { + "epoch": 2.57, + "learning_rate": 2.0081601742828983e-06, + "logits/chosen": -2.665658473968506, + "logits/rejected": -2.5529887676239014, + "logps/chosen": -201.61947631835938, + "logps/rejected": -271.49285888671875, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.652971267700195, + "rewards/margins": 7.106800556182861, + "rewards/rejected": -15.759771347045898, + "step": 16551 + }, + { + "epoch": 2.57, + "learning_rate": 2.007426733751751e-06, + "logits/chosen": -0.6347591280937195, + "logits/rejected": -2.738872766494751, + "logps/chosen": -145.3008575439453, + "logps/rejected": -626.5679931640625, + "loss": 0.2388, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.901857376098633, + "rewards/margins": 8.020326614379883, + "rewards/rejected": -18.922183990478516, + "step": 16552 + }, + { + "epoch": 2.57, + "learning_rate": 2.006693293220603e-06, + "logits/chosen": -1.9336053133010864, + "logits/rejected": -2.6741561889648438, + "logps/chosen": -443.33575439453125, + "logps/rejected": -564.2860107421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.40732192993164, + "rewards/margins": 8.377044677734375, + "rewards/rejected": -17.784366607666016, + "step": 16553 + }, + { + "epoch": 2.57, + "learning_rate": 2.0059598526894547e-06, + "logits/chosen": -0.8583928942680359, + "logits/rejected": -2.2672197818756104, + "logps/chosen": -191.434326171875, + "logps/rejected": -380.955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.621889114379883, + "rewards/margins": 9.836969375610352, + "rewards/rejected": -19.458858489990234, + "step": 16554 + }, + { + "epoch": 2.57, + "learning_rate": 2.005226412158307e-06, + "logits/chosen": -1.6984776258468628, + "logits/rejected": -2.40692400932312, + "logps/chosen": -214.17874145507812, + "logps/rejected": -463.85992431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.501288414001465, + "rewards/margins": 8.733410835266113, + "rewards/rejected": -20.234699249267578, + "step": 16555 + }, + { + "epoch": 2.57, + "learning_rate": 2.004492971627159e-06, + "logits/chosen": -2.01951003074646, + "logits/rejected": -2.672828435897827, + "logps/chosen": -440.2361755371094, + "logps/rejected": -484.4444580078125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.899705410003662, + "rewards/margins": 6.810649394989014, + "rewards/rejected": -13.710354804992676, + "step": 16556 + }, + { + "epoch": 2.57, + "learning_rate": 2.0037595310960112e-06, + "logits/chosen": -2.3755977153778076, + "logits/rejected": -2.2775228023529053, + "logps/chosen": -191.1369171142578, + "logps/rejected": -274.5281982421875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.809459686279297, + "rewards/margins": 7.088433742523193, + "rewards/rejected": -17.89789390563965, + "step": 16557 + }, + { + "epoch": 2.58, + "learning_rate": 2.003026090564863e-06, + "logits/chosen": -1.727994441986084, + "logits/rejected": -2.625725746154785, + "logps/chosen": -310.05120849609375, + "logps/rejected": -618.3427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.661832809448242, + "rewards/margins": 12.780189514160156, + "rewards/rejected": -22.4420223236084, + "step": 16558 + }, + { + "epoch": 2.58, + "learning_rate": 2.0022926500337154e-06, + "logits/chosen": -1.6891921758651733, + "logits/rejected": -2.649864673614502, + "logps/chosen": -137.21975708007812, + "logps/rejected": -292.71240234375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.72806167602539, + "rewards/margins": 6.976219177246094, + "rewards/rejected": -18.704280853271484, + "step": 16559 + }, + { + "epoch": 2.58, + "learning_rate": 2.0015592095025677e-06, + "logits/chosen": -2.6176774501800537, + "logits/rejected": -2.0641531944274902, + "logps/chosen": -206.65380859375, + "logps/rejected": -477.1623229980469, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.504970550537109, + "rewards/margins": 10.636113166809082, + "rewards/rejected": -18.141084671020508, + "step": 16560 + }, + { + "epoch": 2.58, + "learning_rate": 2.00082576897142e-06, + "logits/chosen": -1.9312522411346436, + "logits/rejected": -2.2168216705322266, + "logps/chosen": -181.40457153320312, + "logps/rejected": -308.7570495605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6692962646484375, + "rewards/margins": 10.117969512939453, + "rewards/rejected": -16.78726577758789, + "step": 16561 + }, + { + "epoch": 2.58, + "learning_rate": 2.000092328440272e-06, + "logits/chosen": -2.316540241241455, + "logits/rejected": -2.8010427951812744, + "logps/chosen": -234.91506958007812, + "logps/rejected": -396.63580322265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.609505653381348, + "rewards/margins": 9.684616088867188, + "rewards/rejected": -19.29412078857422, + "step": 16562 + }, + { + "epoch": 2.58, + "learning_rate": 1.9993588879091238e-06, + "logits/chosen": -2.1516964435577393, + "logits/rejected": -2.7709248065948486, + "logps/chosen": -93.6390380859375, + "logps/rejected": -462.1545104980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.564055442810059, + "rewards/margins": 16.54890251159668, + "rewards/rejected": -23.112957000732422, + "step": 16563 + }, + { + "epoch": 2.58, + "learning_rate": 1.998625447377976e-06, + "logits/chosen": -2.683528184890747, + "logits/rejected": -3.0637738704681396, + "logps/chosen": -172.30026245117188, + "logps/rejected": -476.14794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.363956451416016, + "rewards/margins": 10.207317352294922, + "rewards/rejected": -17.571273803710938, + "step": 16564 + }, + { + "epoch": 2.58, + "learning_rate": 1.997892006846828e-06, + "logits/chosen": -2.1375579833984375, + "logits/rejected": -2.6646151542663574, + "logps/chosen": -213.30917358398438, + "logps/rejected": -491.99853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.18628978729248, + "rewards/margins": 10.304764747619629, + "rewards/rejected": -20.49105453491211, + "step": 16565 + }, + { + "epoch": 2.58, + "learning_rate": 1.9971585663156802e-06, + "logits/chosen": -2.7403979301452637, + "logits/rejected": -2.891526699066162, + "logps/chosen": -113.40174102783203, + "logps/rejected": -442.579833984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.135829925537109, + "rewards/margins": 14.518946647644043, + "rewards/rejected": -20.65477752685547, + "step": 16566 + }, + { + "epoch": 2.58, + "learning_rate": 1.996425125784532e-06, + "logits/chosen": -2.8752503395080566, + "logits/rejected": -2.666857957839966, + "logps/chosen": -376.20648193359375, + "logps/rejected": -358.3656311035156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.390765190124512, + "rewards/margins": 8.737850189208984, + "rewards/rejected": -17.128616333007812, + "step": 16567 + }, + { + "epoch": 2.58, + "learning_rate": 1.9956916852533844e-06, + "logits/chosen": -2.0995781421661377, + "logits/rejected": -2.499009847640991, + "logps/chosen": -151.493408203125, + "logps/rejected": -296.5535888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.007549285888672, + "rewards/margins": 11.028453826904297, + "rewards/rejected": -19.03600311279297, + "step": 16568 + }, + { + "epoch": 2.58, + "learning_rate": 1.9949582447222367e-06, + "logits/chosen": -1.4823561906814575, + "logits/rejected": -2.3623151779174805, + "logps/chosen": -220.1385498046875, + "logps/rejected": -598.8387451171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.154176712036133, + "rewards/margins": 9.901725769042969, + "rewards/rejected": -18.0559024810791, + "step": 16569 + }, + { + "epoch": 2.58, + "learning_rate": 1.994224804191089e-06, + "logits/chosen": -2.8536393642425537, + "logits/rejected": -3.1003122329711914, + "logps/chosen": -112.17236328125, + "logps/rejected": -302.67840576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.397857189178467, + "rewards/margins": 10.777119636535645, + "rewards/rejected": -17.174976348876953, + "step": 16570 + }, + { + "epoch": 2.58, + "learning_rate": 1.993491363659941e-06, + "logits/chosen": -2.1611790657043457, + "logits/rejected": -2.775233745574951, + "logps/chosen": -361.9422912597656, + "logps/rejected": -402.06756591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222085952758789, + "rewards/margins": 11.817193984985352, + "rewards/rejected": -18.03927993774414, + "step": 16571 + }, + { + "epoch": 2.58, + "learning_rate": 1.992757923128793e-06, + "logits/chosen": -2.9060301780700684, + "logits/rejected": -2.308394432067871, + "logps/chosen": -667.704345703125, + "logps/rejected": -510.9815979003906, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.212069511413574, + "rewards/margins": 6.361316204071045, + "rewards/rejected": -16.573387145996094, + "step": 16572 + }, + { + "epoch": 2.58, + "learning_rate": 1.992024482597645e-06, + "logits/chosen": -2.746948480606079, + "logits/rejected": -1.3380802869796753, + "logps/chosen": -874.1694946289062, + "logps/rejected": -524.0319213867188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.525274753570557, + "rewards/margins": 9.258255004882812, + "rewards/rejected": -16.783531188964844, + "step": 16573 + }, + { + "epoch": 2.58, + "learning_rate": 1.991291042066497e-06, + "logits/chosen": -2.0377750396728516, + "logits/rejected": -2.889495372772217, + "logps/chosen": -190.13519287109375, + "logps/rejected": -547.0587768554688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.964480400085449, + "rewards/margins": 12.105016708374023, + "rewards/rejected": -20.069496154785156, + "step": 16574 + }, + { + "epoch": 2.58, + "learning_rate": 1.9905576015353493e-06, + "logits/chosen": -2.550722360610962, + "logits/rejected": -1.7247469425201416, + "logps/chosen": -759.8455810546875, + "logps/rejected": -479.6463928222656, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.907173156738281, + "rewards/margins": 9.121060371398926, + "rewards/rejected": -20.02823257446289, + "step": 16575 + }, + { + "epoch": 2.58, + "learning_rate": 1.989824161004201e-06, + "logits/chosen": -2.1774165630340576, + "logits/rejected": -2.8204410076141357, + "logps/chosen": -377.6393737792969, + "logps/rejected": -562.4163208007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.633420944213867, + "rewards/margins": 12.57486343383789, + "rewards/rejected": -19.208284378051758, + "step": 16576 + }, + { + "epoch": 2.58, + "learning_rate": 1.989090720473054e-06, + "logits/chosen": -2.806191921234131, + "logits/rejected": -2.530947208404541, + "logps/chosen": -193.05532836914062, + "logps/rejected": -349.6923522949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.051820755004883, + "rewards/margins": 11.354223251342773, + "rewards/rejected": -21.406044006347656, + "step": 16577 + }, + { + "epoch": 2.58, + "learning_rate": 1.9883572799419057e-06, + "logits/chosen": -1.864811658859253, + "logits/rejected": -2.895796298980713, + "logps/chosen": -408.73785400390625, + "logps/rejected": -687.1104736328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.790593147277832, + "rewards/margins": 11.074495315551758, + "rewards/rejected": -18.865089416503906, + "step": 16578 + }, + { + "epoch": 2.58, + "learning_rate": 1.987623839410758e-06, + "logits/chosen": -1.5433681011199951, + "logits/rejected": -2.955340623855591, + "logps/chosen": -138.3231658935547, + "logps/rejected": -371.58062744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.666624069213867, + "rewards/margins": 10.344905853271484, + "rewards/rejected": -18.01152992248535, + "step": 16579 + }, + { + "epoch": 2.58, + "learning_rate": 1.98689039887961e-06, + "logits/chosen": -2.362957715988159, + "logits/rejected": -2.210520029067993, + "logps/chosen": -247.66781616210938, + "logps/rejected": -305.90789794921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.456786155700684, + "rewards/margins": 7.895450115203857, + "rewards/rejected": -16.352235794067383, + "step": 16580 + }, + { + "epoch": 2.58, + "learning_rate": 1.9861569583484622e-06, + "logits/chosen": -2.8460371494293213, + "logits/rejected": -2.9175937175750732, + "logps/chosen": -577.95166015625, + "logps/rejected": -647.8253173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.864665985107422, + "rewards/margins": 12.140764236450195, + "rewards/rejected": -21.005428314208984, + "step": 16581 + }, + { + "epoch": 2.58, + "learning_rate": 1.985423517817314e-06, + "logits/chosen": -2.8441717624664307, + "logits/rejected": -1.525535225868225, + "logps/chosen": -356.40673828125, + "logps/rejected": -204.42507934570312, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406398296356201, + "rewards/margins": 6.307605743408203, + "rewards/rejected": -12.714004516601562, + "step": 16582 + }, + { + "epoch": 2.58, + "learning_rate": 1.984690077286166e-06, + "logits/chosen": -2.7085986137390137, + "logits/rejected": -2.805391788482666, + "logps/chosen": -140.32090759277344, + "logps/rejected": -405.065185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.185187339782715, + "rewards/margins": 10.90063762664795, + "rewards/rejected": -20.085824966430664, + "step": 16583 + }, + { + "epoch": 2.58, + "learning_rate": 1.9839566367550183e-06, + "logits/chosen": -2.9201269149780273, + "logits/rejected": -2.9694082736968994, + "logps/chosen": -376.2553405761719, + "logps/rejected": -511.9179992675781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.744492530822754, + "rewards/margins": 10.701415061950684, + "rewards/rejected": -17.445907592773438, + "step": 16584 + }, + { + "epoch": 2.58, + "learning_rate": 1.9832231962238706e-06, + "logits/chosen": -2.869274139404297, + "logits/rejected": -2.8594815731048584, + "logps/chosen": -206.7653045654297, + "logps/rejected": -308.4376220703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.471628665924072, + "rewards/margins": 8.93980884552002, + "rewards/rejected": -14.41143798828125, + "step": 16585 + }, + { + "epoch": 2.58, + "learning_rate": 1.982489755692723e-06, + "logits/chosen": -1.7420074939727783, + "logits/rejected": -2.377898693084717, + "logps/chosen": -221.40292358398438, + "logps/rejected": -449.684814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.361848831176758, + "rewards/margins": 13.160926818847656, + "rewards/rejected": -23.522775650024414, + "step": 16586 + }, + { + "epoch": 2.58, + "learning_rate": 1.9817563151615748e-06, + "logits/chosen": -2.8208320140838623, + "logits/rejected": -2.612154483795166, + "logps/chosen": -475.01220703125, + "logps/rejected": -736.3189697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.109626770019531, + "rewards/margins": 11.675573348999023, + "rewards/rejected": -19.785200119018555, + "step": 16587 + }, + { + "epoch": 2.58, + "learning_rate": 1.981022874630427e-06, + "logits/chosen": -2.7149741649627686, + "logits/rejected": -1.6193766593933105, + "logps/chosen": -375.9525146484375, + "logps/rejected": -418.6846923828125, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.325234413146973, + "rewards/margins": 6.765081405639648, + "rewards/rejected": -15.090316772460938, + "step": 16588 + }, + { + "epoch": 2.58, + "learning_rate": 1.980289434099279e-06, + "logits/chosen": -1.206539273262024, + "logits/rejected": -2.705760955810547, + "logps/chosen": -197.72439575195312, + "logps/rejected": -570.5728759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.189366340637207, + "rewards/margins": 10.481527328491211, + "rewards/rejected": -19.670894622802734, + "step": 16589 + }, + { + "epoch": 2.58, + "learning_rate": 1.9795559935681313e-06, + "logits/chosen": -2.1682629585266113, + "logits/rejected": -2.7477309703826904, + "logps/chosen": -171.49110412597656, + "logps/rejected": -485.9742736816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.333847045898438, + "rewards/margins": 10.40365982055664, + "rewards/rejected": -22.737506866455078, + "step": 16590 + }, + { + "epoch": 2.58, + "learning_rate": 1.978822553036983e-06, + "logits/chosen": -2.842461347579956, + "logits/rejected": -2.059279680252075, + "logps/chosen": -439.00689697265625, + "logps/rejected": -409.71795654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.770362854003906, + "rewards/margins": 10.99906063079834, + "rewards/rejected": -19.76942253112793, + "step": 16591 + }, + { + "epoch": 2.58, + "learning_rate": 1.9780891125058354e-06, + "logits/chosen": -1.4372799396514893, + "logits/rejected": -2.38254714012146, + "logps/chosen": -175.18362426757812, + "logps/rejected": -389.7703857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.026216506958008, + "rewards/margins": 12.190465927124023, + "rewards/rejected": -22.21668243408203, + "step": 16592 + }, + { + "epoch": 2.58, + "learning_rate": 1.9773556719746873e-06, + "logits/chosen": -0.8884218335151672, + "logits/rejected": -2.476994752883911, + "logps/chosen": -212.689453125, + "logps/rejected": -659.9056396484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.919109344482422, + "rewards/margins": 7.510858058929443, + "rewards/rejected": -21.42996597290039, + "step": 16593 + }, + { + "epoch": 2.58, + "learning_rate": 1.9766222314435396e-06, + "logits/chosen": -1.0520436763763428, + "logits/rejected": -2.5071446895599365, + "logps/chosen": -169.26492309570312, + "logps/rejected": -408.7065124511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.797793388366699, + "rewards/margins": 12.400039672851562, + "rewards/rejected": -20.197834014892578, + "step": 16594 + }, + { + "epoch": 2.58, + "learning_rate": 1.975888790912392e-06, + "logits/chosen": -1.1092815399169922, + "logits/rejected": -2.250864028930664, + "logps/chosen": -346.20050048828125, + "logps/rejected": -772.8123779296875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.002433776855469, + "rewards/margins": 12.812284469604492, + "rewards/rejected": -24.814720153808594, + "step": 16595 + }, + { + "epoch": 2.58, + "learning_rate": 1.975155350381244e-06, + "logits/chosen": -2.596111297607422, + "logits/rejected": -2.1763930320739746, + "logps/chosen": -238.52529907226562, + "logps/rejected": -233.8673095703125, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.62654972076416, + "rewards/margins": 8.950431823730469, + "rewards/rejected": -16.576982498168945, + "step": 16596 + }, + { + "epoch": 2.58, + "learning_rate": 1.974421909850096e-06, + "logits/chosen": -2.9501190185546875, + "logits/rejected": -2.206559658050537, + "logps/chosen": -253.20814514160156, + "logps/rejected": -338.7640380859375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.925509452819824, + "rewards/margins": 11.264208793640137, + "rewards/rejected": -19.18971824645996, + "step": 16597 + }, + { + "epoch": 2.58, + "learning_rate": 1.973688469318948e-06, + "logits/chosen": -2.760066032409668, + "logits/rejected": -2.1803929805755615, + "logps/chosen": -700.1112060546875, + "logps/rejected": -627.6932373046875, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.732587814331055, + "rewards/margins": 6.2594146728515625, + "rewards/rejected": -16.992002487182617, + "step": 16598 + }, + { + "epoch": 2.58, + "learning_rate": 1.9729550287878003e-06, + "logits/chosen": -2.779930830001831, + "logits/rejected": -3.070568323135376, + "logps/chosen": -238.32229614257812, + "logps/rejected": -200.47300720214844, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.726295471191406, + "rewards/margins": 3.225125312805176, + "rewards/rejected": -13.951420783996582, + "step": 16599 + }, + { + "epoch": 2.58, + "learning_rate": 1.972221588256652e-06, + "logits/chosen": -1.4481842517852783, + "logits/rejected": -2.6287589073181152, + "logps/chosen": -268.3905944824219, + "logps/rejected": -496.974609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.44290542602539, + "rewards/margins": 8.862156867980957, + "rewards/rejected": -18.30506134033203, + "step": 16600 + }, + { + "epoch": 2.58, + "learning_rate": 1.9714881477255045e-06, + "logits/chosen": -1.3772295713424683, + "logits/rejected": -1.2920476198196411, + "logps/chosen": -481.8846130371094, + "logps/rejected": -558.07666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.018675804138184, + "rewards/margins": 12.92087459564209, + "rewards/rejected": -20.939550399780273, + "step": 16601 + }, + { + "epoch": 2.58, + "learning_rate": 1.9707547071943568e-06, + "logits/chosen": -2.621692419052124, + "logits/rejected": -2.2417266368865967, + "logps/chosen": -305.03533935546875, + "logps/rejected": -340.76483154296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.885871887207031, + "rewards/margins": 8.72509765625, + "rewards/rejected": -16.61096954345703, + "step": 16602 + }, + { + "epoch": 2.58, + "learning_rate": 1.9700212666632086e-06, + "logits/chosen": -2.3523378372192383, + "logits/rejected": -2.5903050899505615, + "logps/chosen": -191.28851318359375, + "logps/rejected": -316.4291076660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.048020362854004, + "rewards/margins": 11.450815200805664, + "rewards/rejected": -21.498836517333984, + "step": 16603 + }, + { + "epoch": 2.58, + "learning_rate": 1.969287826132061e-06, + "logits/chosen": -1.26531982421875, + "logits/rejected": -2.481816053390503, + "logps/chosen": -263.53802490234375, + "logps/rejected": -514.0704345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.557205200195312, + "rewards/margins": 10.438459396362305, + "rewards/rejected": -18.99566650390625, + "step": 16604 + }, + { + "epoch": 2.58, + "learning_rate": 1.968554385600913e-06, + "logits/chosen": -3.1079351902008057, + "logits/rejected": -2.1163880825042725, + "logps/chosen": -360.36004638671875, + "logps/rejected": -459.167724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.261627197265625, + "rewards/margins": 12.856470108032227, + "rewards/rejected": -20.118099212646484, + "step": 16605 + }, + { + "epoch": 2.58, + "learning_rate": 1.967820945069765e-06, + "logits/chosen": -1.4671484231948853, + "logits/rejected": -2.9783377647399902, + "logps/chosen": -97.73951721191406, + "logps/rejected": -555.0311279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.105063438415527, + "rewards/margins": 15.92995548248291, + "rewards/rejected": -21.035018920898438, + "step": 16606 + }, + { + "epoch": 2.58, + "learning_rate": 1.967087504538617e-06, + "logits/chosen": -3.058781147003174, + "logits/rejected": -3.060504198074341, + "logps/chosen": -332.15521240234375, + "logps/rejected": -378.06439208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.69038200378418, + "rewards/margins": 8.802593231201172, + "rewards/rejected": -19.492977142333984, + "step": 16607 + }, + { + "epoch": 2.58, + "learning_rate": 1.9663540640074693e-06, + "logits/chosen": -2.6252012252807617, + "logits/rejected": -2.06611967086792, + "logps/chosen": -282.68536376953125, + "logps/rejected": -284.6445007324219, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.314599990844727, + "rewards/margins": 5.563503265380859, + "rewards/rejected": -16.878103256225586, + "step": 16608 + }, + { + "epoch": 2.58, + "learning_rate": 1.965620623476321e-06, + "logits/chosen": -2.3287925720214844, + "logits/rejected": -1.7281917333602905, + "logps/chosen": -264.29962158203125, + "logps/rejected": -367.4458312988281, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.045187950134277, + "rewards/margins": 10.401515007019043, + "rewards/rejected": -21.44670295715332, + "step": 16609 + }, + { + "epoch": 2.58, + "learning_rate": 1.9648871829451735e-06, + "logits/chosen": -1.7994047403335571, + "logits/rejected": -2.5885515213012695, + "logps/chosen": -255.68948364257812, + "logps/rejected": -330.82781982421875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9770379066467285, + "rewards/margins": 9.9902982711792, + "rewards/rejected": -16.967336654663086, + "step": 16610 + }, + { + "epoch": 2.58, + "learning_rate": 1.9641537424140258e-06, + "logits/chosen": -1.8839439153671265, + "logits/rejected": -2.6416120529174805, + "logps/chosen": -206.1829833984375, + "logps/rejected": -403.5816650390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.593624114990234, + "rewards/margins": 9.791473388671875, + "rewards/rejected": -20.38509750366211, + "step": 16611 + }, + { + "epoch": 2.58, + "learning_rate": 1.963420301882878e-06, + "logits/chosen": -1.8462330102920532, + "logits/rejected": -2.4933700561523438, + "logps/chosen": -269.18487548828125, + "logps/rejected": -408.8883056640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.264034271240234, + "rewards/margins": 8.414104461669922, + "rewards/rejected": -20.678138732910156, + "step": 16612 + }, + { + "epoch": 2.58, + "learning_rate": 1.96268686135173e-06, + "logits/chosen": -2.5908684730529785, + "logits/rejected": -2.044475793838501, + "logps/chosen": -258.52349853515625, + "logps/rejected": -379.6693420410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.859394073486328, + "rewards/margins": 11.631515502929688, + "rewards/rejected": -24.490909576416016, + "step": 16613 + }, + { + "epoch": 2.58, + "learning_rate": 1.961953420820582e-06, + "logits/chosen": -2.610614061355591, + "logits/rejected": -2.3676793575286865, + "logps/chosen": -365.28448486328125, + "logps/rejected": -414.9873352050781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.507255554199219, + "rewards/margins": 10.330780982971191, + "rewards/rejected": -20.838035583496094, + "step": 16614 + }, + { + "epoch": 2.58, + "learning_rate": 1.961219980289434e-06, + "logits/chosen": -2.526409864425659, + "logits/rejected": -2.87265682220459, + "logps/chosen": -128.9430694580078, + "logps/rejected": -371.78936767578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.127266883850098, + "rewards/margins": 11.641443252563477, + "rewards/rejected": -20.76871109008789, + "step": 16615 + }, + { + "epoch": 2.58, + "learning_rate": 1.960486539758286e-06, + "logits/chosen": -2.379302501678467, + "logits/rejected": -2.973008632659912, + "logps/chosen": -102.34757995605469, + "logps/rejected": -384.8349609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.98854923248291, + "rewards/margins": 13.520824432373047, + "rewards/rejected": -21.509374618530273, + "step": 16616 + }, + { + "epoch": 2.58, + "learning_rate": 1.9597530992271383e-06, + "logits/chosen": -2.385998010635376, + "logits/rejected": -2.9738519191741943, + "logps/chosen": -246.16357421875, + "logps/rejected": -366.173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.642541885375977, + "rewards/margins": 10.06967544555664, + "rewards/rejected": -17.712215423583984, + "step": 16617 + }, + { + "epoch": 2.58, + "learning_rate": 1.95901965869599e-06, + "logits/chosen": -2.2526144981384277, + "logits/rejected": -2.6200859546661377, + "logps/chosen": -588.2640380859375, + "logps/rejected": -614.1104736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.849176406860352, + "rewards/margins": 14.352752685546875, + "rewards/rejected": -23.201927185058594, + "step": 16618 + }, + { + "epoch": 2.58, + "learning_rate": 1.958286218164843e-06, + "logits/chosen": -2.024717092514038, + "logits/rejected": -2.1232950687408447, + "logps/chosen": -224.74612426757812, + "logps/rejected": -469.6296081542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.572397232055664, + "rewards/margins": 13.252738952636719, + "rewards/rejected": -19.82513427734375, + "step": 16619 + }, + { + "epoch": 2.58, + "learning_rate": 1.957552777633695e-06, + "logits/chosen": -1.076708436012268, + "logits/rejected": -2.4646055698394775, + "logps/chosen": -182.46743774414062, + "logps/rejected": -584.6919555664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.53585433959961, + "rewards/margins": 12.676691055297852, + "rewards/rejected": -25.21254539489746, + "step": 16620 + }, + { + "epoch": 2.58, + "learning_rate": 1.956819337102547e-06, + "logits/chosen": -2.892120838165283, + "logits/rejected": -2.4941060543060303, + "logps/chosen": -164.11227416992188, + "logps/rejected": -298.17401123046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.082362174987793, + "rewards/margins": 8.108769416809082, + "rewards/rejected": -18.191131591796875, + "step": 16621 + }, + { + "epoch": 2.59, + "learning_rate": 1.956085896571399e-06, + "logits/chosen": -2.9018232822418213, + "logits/rejected": -3.0542852878570557, + "logps/chosen": -283.47052001953125, + "logps/rejected": -561.3858642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.638870239257812, + "rewards/margins": 9.769197463989258, + "rewards/rejected": -20.40806770324707, + "step": 16622 + }, + { + "epoch": 2.59, + "learning_rate": 1.955352456040251e-06, + "logits/chosen": -2.8937478065490723, + "logits/rejected": -1.9680832624435425, + "logps/chosen": -213.41708374023438, + "logps/rejected": -149.22962951660156, + "loss": 0.1707, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.44281005859375, + "rewards/margins": 3.257596015930176, + "rewards/rejected": -13.700406074523926, + "step": 16623 + }, + { + "epoch": 2.59, + "learning_rate": 1.954619015509103e-06, + "logits/chosen": -1.7310186624526978, + "logits/rejected": -2.34993839263916, + "logps/chosen": -174.02935791015625, + "logps/rejected": -413.04058837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.010580062866211, + "rewards/margins": 10.375091552734375, + "rewards/rejected": -21.385669708251953, + "step": 16624 + }, + { + "epoch": 2.59, + "learning_rate": 1.953885574977955e-06, + "logits/chosen": -2.5143065452575684, + "logits/rejected": -2.5501179695129395, + "logps/chosen": -107.37387084960938, + "logps/rejected": -272.83642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.485490798950195, + "rewards/margins": 9.752726554870605, + "rewards/rejected": -18.238216400146484, + "step": 16625 + }, + { + "epoch": 2.59, + "learning_rate": 1.9531521344468073e-06, + "logits/chosen": -2.8570332527160645, + "logits/rejected": -2.588404655456543, + "logps/chosen": -273.4746398925781, + "logps/rejected": -366.57147216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.051671981811523, + "rewards/margins": 11.021141052246094, + "rewards/rejected": -21.07281494140625, + "step": 16626 + }, + { + "epoch": 2.59, + "learning_rate": 1.9524186939156596e-06, + "logits/chosen": -2.7546775341033936, + "logits/rejected": -1.7287909984588623, + "logps/chosen": -347.2458190917969, + "logps/rejected": -296.2039794921875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.077852249145508, + "rewards/margins": 7.617141246795654, + "rewards/rejected": -16.694992065429688, + "step": 16627 + }, + { + "epoch": 2.59, + "learning_rate": 1.951685253384512e-06, + "logits/chosen": -2.0504019260406494, + "logits/rejected": -2.875089645385742, + "logps/chosen": -248.12313842773438, + "logps/rejected": -450.0260009765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.029390335083008, + "rewards/margins": 9.145694732666016, + "rewards/rejected": -17.175085067749023, + "step": 16628 + }, + { + "epoch": 2.59, + "learning_rate": 1.950951812853364e-06, + "logits/chosen": -2.0656094551086426, + "logits/rejected": -2.839592695236206, + "logps/chosen": -651.8881225585938, + "logps/rejected": -555.2816162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.764269828796387, + "rewards/margins": 9.723170280456543, + "rewards/rejected": -16.48744010925293, + "step": 16629 + }, + { + "epoch": 2.59, + "learning_rate": 1.950218372322216e-06, + "logits/chosen": -1.6463847160339355, + "logits/rejected": -2.622544765472412, + "logps/chosen": -142.664794921875, + "logps/rejected": -428.2372741699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2892279624938965, + "rewards/margins": 11.891868591308594, + "rewards/rejected": -19.181095123291016, + "step": 16630 + }, + { + "epoch": 2.59, + "learning_rate": 1.949484931791068e-06, + "logits/chosen": -1.6492209434509277, + "logits/rejected": -2.742286205291748, + "logps/chosen": -166.0496063232422, + "logps/rejected": -537.0455322265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.291873931884766, + "rewards/margins": 11.561042785644531, + "rewards/rejected": -21.852916717529297, + "step": 16631 + }, + { + "epoch": 2.59, + "learning_rate": 1.94875149125992e-06, + "logits/chosen": -2.2415237426757812, + "logits/rejected": -2.758875846862793, + "logps/chosen": -190.9298095703125, + "logps/rejected": -321.3748779296875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.359013557434082, + "rewards/margins": 8.776923179626465, + "rewards/rejected": -21.135936737060547, + "step": 16632 + }, + { + "epoch": 2.59, + "learning_rate": 1.948018050728772e-06, + "logits/chosen": -2.7574260234832764, + "logits/rejected": -2.26314640045166, + "logps/chosen": -530.3665161132812, + "logps/rejected": -520.9417724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.506767272949219, + "rewards/margins": 10.709272384643555, + "rewards/rejected": -20.216039657592773, + "step": 16633 + }, + { + "epoch": 2.59, + "learning_rate": 1.947284610197624e-06, + "logits/chosen": -2.8477447032928467, + "logits/rejected": -2.573479652404785, + "logps/chosen": -479.09906005859375, + "logps/rejected": -640.68701171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.913509368896484, + "rewards/margins": 7.6348443031311035, + "rewards/rejected": -18.54835319519043, + "step": 16634 + }, + { + "epoch": 2.59, + "learning_rate": 1.9465511696664764e-06, + "logits/chosen": -2.8479232788085938, + "logits/rejected": -2.8118810653686523, + "logps/chosen": -162.89309692382812, + "logps/rejected": -380.3189697265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.699612617492676, + "rewards/margins": 11.686573028564453, + "rewards/rejected": -20.386184692382812, + "step": 16635 + }, + { + "epoch": 2.59, + "learning_rate": 1.9458177291353287e-06, + "logits/chosen": -1.61614191532135, + "logits/rejected": -1.9884904623031616, + "logps/chosen": -306.1012878417969, + "logps/rejected": -695.4468383789062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.637744903564453, + "rewards/margins": 8.768414497375488, + "rewards/rejected": -20.406158447265625, + "step": 16636 + }, + { + "epoch": 2.59, + "learning_rate": 1.945084288604181e-06, + "logits/chosen": -2.6400563716888428, + "logits/rejected": -2.5969431400299072, + "logps/chosen": -253.89480590820312, + "logps/rejected": -462.4106140136719, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.368316650390625, + "rewards/margins": 13.595659255981445, + "rewards/rejected": -22.96397590637207, + "step": 16637 + }, + { + "epoch": 2.59, + "learning_rate": 1.944350848073033e-06, + "logits/chosen": -0.712864875793457, + "logits/rejected": -2.7917964458465576, + "logps/chosen": -197.00213623046875, + "logps/rejected": -605.5240478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.419336318969727, + "rewards/margins": 11.501045227050781, + "rewards/rejected": -22.920381546020508, + "step": 16638 + }, + { + "epoch": 2.59, + "learning_rate": 1.943617407541885e-06, + "logits/chosen": -1.5394864082336426, + "logits/rejected": -2.888796806335449, + "logps/chosen": -143.93408203125, + "logps/rejected": -496.203857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.299428939819336, + "rewards/margins": 13.216856956481934, + "rewards/rejected": -21.516286849975586, + "step": 16639 + }, + { + "epoch": 2.59, + "learning_rate": 1.942883967010737e-06, + "logits/chosen": -1.184456706047058, + "logits/rejected": -2.569329023361206, + "logps/chosen": -158.28561401367188, + "logps/rejected": -512.0068969726562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.195880889892578, + "rewards/margins": 11.192438125610352, + "rewards/rejected": -20.388317108154297, + "step": 16640 + }, + { + "epoch": 2.59, + "learning_rate": 1.9421505264795893e-06, + "logits/chosen": -0.5283637642860413, + "logits/rejected": -2.7172272205352783, + "logps/chosen": -128.43736267089844, + "logps/rejected": -381.542236328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.737566947937012, + "rewards/margins": 9.532943725585938, + "rewards/rejected": -19.270511627197266, + "step": 16641 + }, + { + "epoch": 2.59, + "learning_rate": 1.941417085948441e-06, + "logits/chosen": -1.3299126625061035, + "logits/rejected": -2.5767149925231934, + "logps/chosen": -367.50750732421875, + "logps/rejected": -632.0654907226562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.12629508972168, + "rewards/margins": 10.187442779541016, + "rewards/rejected": -22.313737869262695, + "step": 16642 + }, + { + "epoch": 2.59, + "learning_rate": 1.940683645417293e-06, + "logits/chosen": -2.517171859741211, + "logits/rejected": -2.9117345809936523, + "logps/chosen": -122.85804748535156, + "logps/rejected": -519.4371337890625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.844087600708008, + "rewards/margins": 8.109329223632812, + "rewards/rejected": -18.95341682434082, + "step": 16643 + }, + { + "epoch": 2.59, + "learning_rate": 1.939950204886146e-06, + "logits/chosen": -2.4875755310058594, + "logits/rejected": -2.956094741821289, + "logps/chosen": -98.86810302734375, + "logps/rejected": -207.4017333984375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.395148277282715, + "rewards/margins": 8.325413703918457, + "rewards/rejected": -15.720561981201172, + "step": 16644 + }, + { + "epoch": 2.59, + "learning_rate": 1.9392167643549977e-06, + "logits/chosen": -1.744376301765442, + "logits/rejected": -2.41086745262146, + "logps/chosen": -237.3831329345703, + "logps/rejected": -629.3292236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.379913330078125, + "rewards/margins": 9.628561019897461, + "rewards/rejected": -22.008474349975586, + "step": 16645 + }, + { + "epoch": 2.59, + "learning_rate": 1.93848332382385e-06, + "logits/chosen": -2.6523079872131348, + "logits/rejected": -1.5953056812286377, + "logps/chosen": -118.32533264160156, + "logps/rejected": -227.7760772705078, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43878698348999, + "rewards/margins": 8.173725128173828, + "rewards/rejected": -14.61251163482666, + "step": 16646 + }, + { + "epoch": 2.59, + "learning_rate": 1.937749883292702e-06, + "logits/chosen": -2.988358974456787, + "logits/rejected": -2.7306549549102783, + "logps/chosen": -704.0966186523438, + "logps/rejected": -598.9193115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.664524078369141, + "rewards/margins": 12.55984115600586, + "rewards/rejected": -19.224365234375, + "step": 16647 + }, + { + "epoch": 2.59, + "learning_rate": 1.937016442761554e-06, + "logits/chosen": -1.0468833446502686, + "logits/rejected": -1.4895247220993042, + "logps/chosen": -158.980224609375, + "logps/rejected": -392.97930908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.387152671813965, + "rewards/margins": 12.696836471557617, + "rewards/rejected": -19.083988189697266, + "step": 16648 + }, + { + "epoch": 2.59, + "learning_rate": 1.936283002230406e-06, + "logits/chosen": -1.216913104057312, + "logits/rejected": -2.4322102069854736, + "logps/chosen": -202.6240997314453, + "logps/rejected": -619.3309326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.141935348510742, + "rewards/margins": 12.292289733886719, + "rewards/rejected": -22.434223175048828, + "step": 16649 + }, + { + "epoch": 2.59, + "learning_rate": 1.9355495616992583e-06, + "logits/chosen": -2.017207145690918, + "logits/rejected": -2.8440330028533936, + "logps/chosen": -190.15829467773438, + "logps/rejected": -313.06317138671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.606804847717285, + "rewards/margins": 7.776627063751221, + "rewards/rejected": -15.383432388305664, + "step": 16650 + }, + { + "epoch": 2.59, + "learning_rate": 1.9348161211681102e-06, + "logits/chosen": -2.3182756900787354, + "logits/rejected": -1.9261717796325684, + "logps/chosen": -233.62646484375, + "logps/rejected": -269.897705078125, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.60640811920166, + "rewards/margins": 6.76785945892334, + "rewards/rejected": -16.374267578125, + "step": 16651 + }, + { + "epoch": 2.59, + "learning_rate": 1.9340826806369625e-06, + "logits/chosen": -1.9959288835525513, + "logits/rejected": -2.681361436843872, + "logps/chosen": -249.876953125, + "logps/rejected": -355.33050537109375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.606091499328613, + "rewards/margins": 7.1514692306518555, + "rewards/rejected": -18.75756072998047, + "step": 16652 + }, + { + "epoch": 2.59, + "learning_rate": 1.933349240105815e-06, + "logits/chosen": -2.5524652004241943, + "logits/rejected": -2.8159661293029785, + "logps/chosen": -89.07832336425781, + "logps/rejected": -243.5930633544922, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.758581161499023, + "rewards/margins": 8.178670883178711, + "rewards/rejected": -15.937252044677734, + "step": 16653 + }, + { + "epoch": 2.59, + "learning_rate": 1.9326157995746667e-06, + "logits/chosen": -2.0575273036956787, + "logits/rejected": -2.577470064163208, + "logps/chosen": -328.4464111328125, + "logps/rejected": -484.0237121582031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.599141120910645, + "rewards/margins": 9.092308044433594, + "rewards/rejected": -20.691448211669922, + "step": 16654 + }, + { + "epoch": 2.59, + "learning_rate": 1.931882359043519e-06, + "logits/chosen": -1.740836262702942, + "logits/rejected": -2.0693705081939697, + "logps/chosen": -160.47415161132812, + "logps/rejected": -340.869384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.650373458862305, + "rewards/margins": 12.445064544677734, + "rewards/rejected": -23.09543800354004, + "step": 16655 + }, + { + "epoch": 2.59, + "learning_rate": 1.931148918512371e-06, + "logits/chosen": -2.702178478240967, + "logits/rejected": -2.6100521087646484, + "logps/chosen": -359.55853271484375, + "logps/rejected": -297.61041259765625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8351850509643555, + "rewards/margins": 8.079795837402344, + "rewards/rejected": -15.914979934692383, + "step": 16656 + }, + { + "epoch": 2.59, + "learning_rate": 1.930415477981223e-06, + "logits/chosen": -2.091275930404663, + "logits/rejected": -2.7060182094573975, + "logps/chosen": -242.67491149902344, + "logps/rejected": -533.4071655273438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.500739097595215, + "rewards/margins": 9.417853355407715, + "rewards/rejected": -18.91859245300293, + "step": 16657 + }, + { + "epoch": 2.59, + "learning_rate": 1.929682037450075e-06, + "logits/chosen": -2.9803476333618164, + "logits/rejected": -2.131298542022705, + "logps/chosen": -243.20608520507812, + "logps/rejected": -286.55926513671875, + "loss": 0.0855, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.052892684936523, + "rewards/margins": 5.600381851196289, + "rewards/rejected": -16.653274536132812, + "step": 16658 + }, + { + "epoch": 2.59, + "learning_rate": 1.9289485969189274e-06, + "logits/chosen": -2.4041519165039062, + "logits/rejected": -2.58099102973938, + "logps/chosen": -231.5445556640625, + "logps/rejected": -422.05206298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.492709159851074, + "rewards/margins": 10.64614486694336, + "rewards/rejected": -16.13885498046875, + "step": 16659 + }, + { + "epoch": 2.59, + "learning_rate": 1.9282151563877792e-06, + "logits/chosen": -2.7537808418273926, + "logits/rejected": -2.1572811603546143, + "logps/chosen": -574.3257446289062, + "logps/rejected": -605.4596557617188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.64086627960205, + "rewards/margins": 9.806610107421875, + "rewards/rejected": -18.44747543334961, + "step": 16660 + }, + { + "epoch": 2.59, + "learning_rate": 1.927481715856632e-06, + "logits/chosen": -2.107987403869629, + "logits/rejected": -2.5595414638519287, + "logps/chosen": -293.65966796875, + "logps/rejected": -475.75054931640625, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.934710502624512, + "rewards/margins": 10.154052734375, + "rewards/rejected": -19.088764190673828, + "step": 16661 + }, + { + "epoch": 2.59, + "learning_rate": 1.926748275325484e-06, + "logits/chosen": -1.9318773746490479, + "logits/rejected": -2.5118935108184814, + "logps/chosen": -153.92529296875, + "logps/rejected": -303.7884521484375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.600240707397461, + "rewards/margins": 5.714787006378174, + "rewards/rejected": -17.31502914428711, + "step": 16662 + }, + { + "epoch": 2.59, + "learning_rate": 1.9260148347943357e-06, + "logits/chosen": -2.6050455570220947, + "logits/rejected": -2.6296446323394775, + "logps/chosen": -281.1638488769531, + "logps/rejected": -444.97357177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.119478225708008, + "rewards/margins": 13.990974426269531, + "rewards/rejected": -22.110450744628906, + "step": 16663 + }, + { + "epoch": 2.59, + "learning_rate": 1.925281394263188e-06, + "logits/chosen": -1.9004744291305542, + "logits/rejected": -2.717456579208374, + "logps/chosen": -280.6175231933594, + "logps/rejected": -553.6710815429688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.379964828491211, + "rewards/margins": 12.939287185668945, + "rewards/rejected": -22.319252014160156, + "step": 16664 + }, + { + "epoch": 2.59, + "learning_rate": 1.92454795373204e-06, + "logits/chosen": -2.816108465194702, + "logits/rejected": -1.7374321222305298, + "logps/chosen": -533.9844970703125, + "logps/rejected": -257.3680419921875, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.506327152252197, + "rewards/margins": 8.222322463989258, + "rewards/rejected": -15.728650093078613, + "step": 16665 + }, + { + "epoch": 2.59, + "learning_rate": 1.9238145132008922e-06, + "logits/chosen": -2.576084852218628, + "logits/rejected": -2.865095376968384, + "logps/chosen": -779.1461791992188, + "logps/rejected": -760.1145629882812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3587236404418945, + "rewards/margins": 9.841899871826172, + "rewards/rejected": -16.200624465942383, + "step": 16666 + }, + { + "epoch": 2.59, + "learning_rate": 1.923081072669744e-06, + "logits/chosen": -2.504424571990967, + "logits/rejected": -2.8842153549194336, + "logps/chosen": -291.01806640625, + "logps/rejected": -557.962158203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.850292205810547, + "rewards/margins": 7.694044589996338, + "rewards/rejected": -18.544336318969727, + "step": 16667 + }, + { + "epoch": 2.59, + "learning_rate": 1.9223476321385964e-06, + "logits/chosen": -2.601698875427246, + "logits/rejected": -1.837532877922058, + "logps/chosen": -310.62640380859375, + "logps/rejected": -415.0765380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.386709213256836, + "rewards/margins": 8.698423385620117, + "rewards/rejected": -19.085132598876953, + "step": 16668 + }, + { + "epoch": 2.59, + "learning_rate": 1.9216141916074487e-06, + "logits/chosen": -2.6245601177215576, + "logits/rejected": -2.7385501861572266, + "logps/chosen": -846.58203125, + "logps/rejected": -654.7528686523438, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.231472969055176, + "rewards/margins": 7.299722671508789, + "rewards/rejected": -17.53119468688965, + "step": 16669 + }, + { + "epoch": 2.59, + "learning_rate": 1.920880751076301e-06, + "logits/chosen": -2.5432536602020264, + "logits/rejected": -2.4075241088867188, + "logps/chosen": -452.2870178222656, + "logps/rejected": -377.05712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.665849685668945, + "rewards/margins": 9.636398315429688, + "rewards/rejected": -20.30224609375, + "step": 16670 + }, + { + "epoch": 2.59, + "learning_rate": 1.920147310545153e-06, + "logits/chosen": -1.7667934894561768, + "logits/rejected": -2.7185113430023193, + "logps/chosen": -174.2489471435547, + "logps/rejected": -375.49200439453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.876224517822266, + "rewards/margins": 8.559417724609375, + "rewards/rejected": -18.43564224243164, + "step": 16671 + }, + { + "epoch": 2.59, + "learning_rate": 1.9194138700140048e-06, + "logits/chosen": -2.2673451900482178, + "logits/rejected": -2.568391799926758, + "logps/chosen": -237.06564331054688, + "logps/rejected": -478.7171936035156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.752541542053223, + "rewards/margins": 13.324094772338867, + "rewards/rejected": -22.076637268066406, + "step": 16672 + }, + { + "epoch": 2.59, + "learning_rate": 1.918680429482857e-06, + "logits/chosen": -2.799414873123169, + "logits/rejected": -2.3130552768707275, + "logps/chosen": -460.8079528808594, + "logps/rejected": -528.541748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.396243572235107, + "rewards/margins": 12.42622184753418, + "rewards/rejected": -19.822465896606445, + "step": 16673 + }, + { + "epoch": 2.59, + "learning_rate": 1.917946988951709e-06, + "logits/chosen": -2.551409959793091, + "logits/rejected": -0.9487833380699158, + "logps/chosen": -238.66680908203125, + "logps/rejected": -144.97674560546875, + "loss": 0.1386, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2813825607299805, + "rewards/margins": 5.209486961364746, + "rewards/rejected": -12.490869522094727, + "step": 16674 + }, + { + "epoch": 2.59, + "learning_rate": 1.9172135484205612e-06, + "logits/chosen": -2.6067330837249756, + "logits/rejected": -1.2318676710128784, + "logps/chosen": -240.24124145507812, + "logps/rejected": -237.0891571044922, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.613290786743164, + "rewards/margins": 4.032129764556885, + "rewards/rejected": -16.64542007446289, + "step": 16675 + }, + { + "epoch": 2.59, + "learning_rate": 1.916480107889413e-06, + "logits/chosen": -2.8244504928588867, + "logits/rejected": -1.776767373085022, + "logps/chosen": -1337.4833984375, + "logps/rejected": -809.3770141601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.82350492477417, + "rewards/margins": 14.388263702392578, + "rewards/rejected": -18.211769104003906, + "step": 16676 + }, + { + "epoch": 2.59, + "learning_rate": 1.9157466673582654e-06, + "logits/chosen": -2.332672119140625, + "logits/rejected": -0.7088856101036072, + "logps/chosen": -232.3219757080078, + "logps/rejected": -202.03662109375, + "loss": 0.3729, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.62463092803955, + "rewards/margins": 1.9095349311828613, + "rewards/rejected": -14.53416633605957, + "step": 16677 + }, + { + "epoch": 2.59, + "learning_rate": 1.9150132268271177e-06, + "logits/chosen": -2.7640655040740967, + "logits/rejected": -2.3105664253234863, + "logps/chosen": -516.7387084960938, + "logps/rejected": -524.71142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.542835235595703, + "rewards/margins": 10.17129898071289, + "rewards/rejected": -20.714134216308594, + "step": 16678 + }, + { + "epoch": 2.59, + "learning_rate": 1.91427978629597e-06, + "logits/chosen": -2.345081329345703, + "logits/rejected": -2.5965681076049805, + "logps/chosen": -178.6241455078125, + "logps/rejected": -364.59014892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.88141393661499, + "rewards/margins": 9.440013885498047, + "rewards/rejected": -16.321428298950195, + "step": 16679 + }, + { + "epoch": 2.59, + "learning_rate": 1.913546345764822e-06, + "logits/chosen": -1.7979540824890137, + "logits/rejected": -2.519390344619751, + "logps/chosen": -233.51614379882812, + "logps/rejected": -372.5709533691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.769974708557129, + "rewards/margins": 10.211556434631348, + "rewards/rejected": -18.981531143188477, + "step": 16680 + }, + { + "epoch": 2.59, + "learning_rate": 1.912812905233674e-06, + "logits/chosen": -2.328136444091797, + "logits/rejected": -2.5016841888427734, + "logps/chosen": -281.8755187988281, + "logps/rejected": -544.23486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.59587287902832, + "rewards/margins": 14.430608749389648, + "rewards/rejected": -23.02648162841797, + "step": 16681 + }, + { + "epoch": 2.59, + "learning_rate": 1.912079464702526e-06, + "logits/chosen": -2.730525493621826, + "logits/rejected": -2.9233322143554688, + "logps/chosen": -232.13992309570312, + "logps/rejected": -295.9681396484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.723448753356934, + "rewards/margins": 8.779789924621582, + "rewards/rejected": -17.503238677978516, + "step": 16682 + }, + { + "epoch": 2.59, + "learning_rate": 1.911346024171378e-06, + "logits/chosen": -2.6938726902008057, + "logits/rejected": -1.0362684726715088, + "logps/chosen": -285.0958557128906, + "logps/rejected": -273.22552490234375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.402524948120117, + "rewards/margins": 6.060383319854736, + "rewards/rejected": -19.462907791137695, + "step": 16683 + }, + { + "epoch": 2.59, + "learning_rate": 1.9106125836402303e-06, + "logits/chosen": -2.3084657192230225, + "logits/rejected": -2.7731564044952393, + "logps/chosen": -167.59934997558594, + "logps/rejected": -434.3681945800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.834770202636719, + "rewards/margins": 12.236246109008789, + "rewards/rejected": -24.07101821899414, + "step": 16684 + }, + { + "epoch": 2.59, + "learning_rate": 1.909879143109082e-06, + "logits/chosen": -2.8733959197998047, + "logits/rejected": -2.9147088527679443, + "logps/chosen": -291.4916076660156, + "logps/rejected": -358.6328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.088937759399414, + "rewards/margins": 7.021760940551758, + "rewards/rejected": -16.110698699951172, + "step": 16685 + }, + { + "epoch": 2.6, + "learning_rate": 1.909145702577935e-06, + "logits/chosen": -1.5355074405670166, + "logits/rejected": -2.7076773643493652, + "logps/chosen": -520.7072143554688, + "logps/rejected": -783.8935546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.495386123657227, + "rewards/margins": 12.47877311706543, + "rewards/rejected": -20.974159240722656, + "step": 16686 + }, + { + "epoch": 2.6, + "learning_rate": 1.9084122620467867e-06, + "logits/chosen": -1.7666212320327759, + "logits/rejected": -2.5566084384918213, + "logps/chosen": -264.089599609375, + "logps/rejected": -558.98828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.33987808227539, + "rewards/margins": 10.119657516479492, + "rewards/rejected": -20.459535598754883, + "step": 16687 + }, + { + "epoch": 2.6, + "learning_rate": 1.907678821515639e-06, + "logits/chosen": -1.7619091272354126, + "logits/rejected": -2.878969669342041, + "logps/chosen": -173.45994567871094, + "logps/rejected": -293.05615234375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.47372817993164, + "rewards/margins": 6.234220504760742, + "rewards/rejected": -15.707948684692383, + "step": 16688 + }, + { + "epoch": 2.6, + "learning_rate": 1.906945380984491e-06, + "logits/chosen": -1.9417376518249512, + "logits/rejected": -2.8288187980651855, + "logps/chosen": -123.48583221435547, + "logps/rejected": -411.08734130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.758710861206055, + "rewards/margins": 11.646434783935547, + "rewards/rejected": -22.405147552490234, + "step": 16689 + }, + { + "epoch": 2.6, + "learning_rate": 1.9062119404533432e-06, + "logits/chosen": -2.7605323791503906, + "logits/rejected": -2.6030845642089844, + "logps/chosen": -523.9379272460938, + "logps/rejected": -769.9209594726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.13361930847168, + "rewards/margins": 13.098257064819336, + "rewards/rejected": -22.231876373291016, + "step": 16690 + }, + { + "epoch": 2.6, + "learning_rate": 1.905478499922195e-06, + "logits/chosen": -1.897438645362854, + "logits/rejected": -2.790445327758789, + "logps/chosen": -187.49403381347656, + "logps/rejected": -524.4122924804688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.51791000366211, + "rewards/margins": 9.960258483886719, + "rewards/rejected": -20.478168487548828, + "step": 16691 + }, + { + "epoch": 2.6, + "learning_rate": 1.9047450593910472e-06, + "logits/chosen": -1.7955158948898315, + "logits/rejected": -2.655841827392578, + "logps/chosen": -186.65774536132812, + "logps/rejected": -324.4122009277344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.390424728393555, + "rewards/margins": 8.110960006713867, + "rewards/rejected": -16.501384735107422, + "step": 16692 + }, + { + "epoch": 2.6, + "learning_rate": 1.9040116188598995e-06, + "logits/chosen": -2.5496819019317627, + "logits/rejected": -2.1657772064208984, + "logps/chosen": -216.1231231689453, + "logps/rejected": -270.9997863769531, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.017086029052734, + "rewards/margins": 5.160305500030518, + "rewards/rejected": -16.177391052246094, + "step": 16693 + }, + { + "epoch": 2.6, + "learning_rate": 1.9032781783287514e-06, + "logits/chosen": -1.7491811513900757, + "logits/rejected": -2.4052348136901855, + "logps/chosen": -283.13250732421875, + "logps/rejected": -610.684814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.631958961486816, + "rewards/margins": 12.27115535736084, + "rewards/rejected": -18.903114318847656, + "step": 16694 + }, + { + "epoch": 2.6, + "learning_rate": 1.9025447377976037e-06, + "logits/chosen": -2.783810615539551, + "logits/rejected": -2.5039098262786865, + "logps/chosen": -649.0052490234375, + "logps/rejected": -583.0010986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.650285720825195, + "rewards/margins": 9.690116882324219, + "rewards/rejected": -21.340402603149414, + "step": 16695 + }, + { + "epoch": 2.6, + "learning_rate": 1.9018112972664555e-06, + "logits/chosen": -2.485884428024292, + "logits/rejected": -2.774177312850952, + "logps/chosen": -233.7300567626953, + "logps/rejected": -342.8797607421875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.549554824829102, + "rewards/margins": 10.813915252685547, + "rewards/rejected": -21.36347198486328, + "step": 16696 + }, + { + "epoch": 2.6, + "learning_rate": 1.901077856735308e-06, + "logits/chosen": -1.3699711561203003, + "logits/rejected": -2.100646734237671, + "logps/chosen": -168.78012084960938, + "logps/rejected": -362.7625732421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.321504592895508, + "rewards/margins": 12.329242706298828, + "rewards/rejected": -22.650747299194336, + "step": 16697 + }, + { + "epoch": 2.6, + "learning_rate": 1.90034441620416e-06, + "logits/chosen": -2.598038673400879, + "logits/rejected": -2.081054210662842, + "logps/chosen": -389.1557312011719, + "logps/rejected": -571.6644287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.698206901550293, + "rewards/margins": 11.75832748413086, + "rewards/rejected": -22.45653533935547, + "step": 16698 + }, + { + "epoch": 2.6, + "learning_rate": 1.8996109756730122e-06, + "logits/chosen": -2.3589460849761963, + "logits/rejected": -2.969728708267212, + "logps/chosen": -858.399169921875, + "logps/rejected": -764.2589111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.753265380859375, + "rewards/margins": 9.400164604187012, + "rewards/rejected": -14.153429985046387, + "step": 16699 + }, + { + "epoch": 2.6, + "learning_rate": 1.8988775351418641e-06, + "logits/chosen": -1.9547412395477295, + "logits/rejected": -2.737102746963501, + "logps/chosen": -484.7705078125, + "logps/rejected": -528.822998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.916645050048828, + "rewards/margins": 10.5546293258667, + "rewards/rejected": -19.47127342224121, + "step": 16700 + }, + { + "epoch": 2.6, + "learning_rate": 1.8981440946107162e-06, + "logits/chosen": -1.7024401426315308, + "logits/rejected": -2.215927839279175, + "logps/chosen": -270.2149963378906, + "logps/rejected": -469.634521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.691817283630371, + "rewards/margins": 14.802719116210938, + "rewards/rejected": -22.494537353515625, + "step": 16701 + }, + { + "epoch": 2.6, + "learning_rate": 1.8974106540795685e-06, + "logits/chosen": -2.777562141418457, + "logits/rejected": -2.4134256839752197, + "logps/chosen": -560.8531494140625, + "logps/rejected": -376.6232604980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.745538711547852, + "rewards/margins": 11.65279769897461, + "rewards/rejected": -18.39833641052246, + "step": 16702 + }, + { + "epoch": 2.6, + "learning_rate": 1.8966772135484204e-06, + "logits/chosen": -2.5088558197021484, + "logits/rejected": -2.9760546684265137, + "logps/chosen": -164.04237365722656, + "logps/rejected": -359.7656555175781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.55582046508789, + "rewards/margins": 7.6768317222595215, + "rewards/rejected": -18.232650756835938, + "step": 16703 + }, + { + "epoch": 2.6, + "learning_rate": 1.8959437730172727e-06, + "logits/chosen": -1.664510726928711, + "logits/rejected": -2.4300520420074463, + "logps/chosen": -177.181396484375, + "logps/rejected": -433.4739990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.769338607788086, + "rewards/margins": 13.438243865966797, + "rewards/rejected": -23.20758056640625, + "step": 16704 + }, + { + "epoch": 2.6, + "learning_rate": 1.8952103324861248e-06, + "logits/chosen": -2.0427048206329346, + "logits/rejected": -2.5629961490631104, + "logps/chosen": -379.88848876953125, + "logps/rejected": -400.2824401855469, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.706786155700684, + "rewards/margins": 4.611057758331299, + "rewards/rejected": -17.31784439086914, + "step": 16705 + }, + { + "epoch": 2.6, + "learning_rate": 1.894476891954977e-06, + "logits/chosen": -2.9547624588012695, + "logits/rejected": -1.2721127271652222, + "logps/chosen": -398.9436340332031, + "logps/rejected": -270.88494873046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.86838436126709, + "rewards/margins": 9.506574630737305, + "rewards/rejected": -16.374958038330078, + "step": 16706 + }, + { + "epoch": 2.6, + "learning_rate": 1.893743451423829e-06, + "logits/chosen": -2.6327083110809326, + "logits/rejected": -2.932081699371338, + "logps/chosen": -265.1755065917969, + "logps/rejected": -492.34356689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.575857162475586, + "rewards/margins": 11.010250091552734, + "rewards/rejected": -20.58610725402832, + "step": 16707 + }, + { + "epoch": 2.6, + "learning_rate": 1.8930100108926813e-06, + "logits/chosen": -2.6368930339813232, + "logits/rejected": -2.0904123783111572, + "logps/chosen": -718.213623046875, + "logps/rejected": -518.3468627929688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.251914978027344, + "rewards/margins": 9.853212356567383, + "rewards/rejected": -22.105127334594727, + "step": 16708 + }, + { + "epoch": 2.6, + "learning_rate": 1.8922765703615334e-06, + "logits/chosen": -1.5765970945358276, + "logits/rejected": -2.835452079772949, + "logps/chosen": -176.23892211914062, + "logps/rejected": -653.490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.575960159301758, + "rewards/margins": 12.180295944213867, + "rewards/rejected": -21.756256103515625, + "step": 16709 + }, + { + "epoch": 2.6, + "learning_rate": 1.8915431298303857e-06, + "logits/chosen": -2.8849446773529053, + "logits/rejected": -2.7201764583587646, + "logps/chosen": -524.0319213867188, + "logps/rejected": -636.8071899414062, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.213322639465332, + "rewards/margins": 8.457376480102539, + "rewards/rejected": -17.670700073242188, + "step": 16710 + }, + { + "epoch": 2.6, + "learning_rate": 1.8908096892992375e-06, + "logits/chosen": -2.8362715244293213, + "logits/rejected": -2.886322259902954, + "logps/chosen": -397.6051025390625, + "logps/rejected": -675.7684326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.260051727294922, + "rewards/margins": 13.77743911743164, + "rewards/rejected": -23.037490844726562, + "step": 16711 + }, + { + "epoch": 2.6, + "learning_rate": 1.8900762487680894e-06, + "logits/chosen": -2.9020004272460938, + "logits/rejected": -2.917832136154175, + "logps/chosen": -126.88614654541016, + "logps/rejected": -315.8485107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.76347541809082, + "rewards/margins": 10.956932067871094, + "rewards/rejected": -21.720407485961914, + "step": 16712 + }, + { + "epoch": 2.6, + "learning_rate": 1.8893428082369417e-06, + "logits/chosen": -1.4244765043258667, + "logits/rejected": -2.4239306449890137, + "logps/chosen": -307.9231262207031, + "logps/rejected": -569.5185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.848960876464844, + "rewards/margins": 13.279705047607422, + "rewards/rejected": -23.128665924072266, + "step": 16713 + }, + { + "epoch": 2.6, + "learning_rate": 1.8886093677057938e-06, + "logits/chosen": -1.8472516536712646, + "logits/rejected": -2.7096023559570312, + "logps/chosen": -143.1328582763672, + "logps/rejected": -465.4526672363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.576973915100098, + "rewards/margins": 16.592004776000977, + "rewards/rejected": -24.16897964477539, + "step": 16714 + }, + { + "epoch": 2.6, + "learning_rate": 1.8878759271746461e-06, + "logits/chosen": -2.4548494815826416, + "logits/rejected": -2.169674873352051, + "logps/chosen": -278.91925048828125, + "logps/rejected": -416.5299377441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.684194564819336, + "rewards/margins": 15.509376525878906, + "rewards/rejected": -22.193571090698242, + "step": 16715 + }, + { + "epoch": 2.6, + "learning_rate": 1.887142486643498e-06, + "logits/chosen": -1.3267327547073364, + "logits/rejected": -2.66610050201416, + "logps/chosen": -168.3389892578125, + "logps/rejected": -369.20648193359375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.357477188110352, + "rewards/margins": 8.416236877441406, + "rewards/rejected": -17.773714065551758, + "step": 16716 + }, + { + "epoch": 2.6, + "learning_rate": 1.8864090461123503e-06, + "logits/chosen": -2.773918628692627, + "logits/rejected": -0.9220566153526306, + "logps/chosen": -286.9158020019531, + "logps/rejected": -228.30587768554688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.701818466186523, + "rewards/margins": 9.438684463500977, + "rewards/rejected": -18.1405029296875, + "step": 16717 + }, + { + "epoch": 2.6, + "learning_rate": 1.8856756055812024e-06, + "logits/chosen": -2.4410033226013184, + "logits/rejected": -2.913789749145508, + "logps/chosen": -173.98365783691406, + "logps/rejected": -284.4058837890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.827435493469238, + "rewards/margins": 9.413735389709473, + "rewards/rejected": -16.24117088317871, + "step": 16718 + }, + { + "epoch": 2.6, + "learning_rate": 1.8849421650500547e-06, + "logits/chosen": -1.9467995166778564, + "logits/rejected": -2.6435608863830566, + "logps/chosen": -155.37171936035156, + "logps/rejected": -415.133056640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.571240425109863, + "rewards/margins": 8.873800277709961, + "rewards/rejected": -20.44504165649414, + "step": 16719 + }, + { + "epoch": 2.6, + "learning_rate": 1.8842087245189066e-06, + "logits/chosen": -2.4468088150024414, + "logits/rejected": -2.8615663051605225, + "logps/chosen": -116.25147247314453, + "logps/rejected": -377.409423828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.523797988891602, + "rewards/margins": 7.932809829711914, + "rewards/rejected": -17.456607818603516, + "step": 16720 + }, + { + "epoch": 2.6, + "learning_rate": 1.8834752839877584e-06, + "logits/chosen": -2.7624125480651855, + "logits/rejected": -2.832166910171509, + "logps/chosen": -98.06880950927734, + "logps/rejected": -241.18408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.076861381530762, + "rewards/margins": 11.157976150512695, + "rewards/rejected": -18.234838485717773, + "step": 16721 + }, + { + "epoch": 2.6, + "learning_rate": 1.882741843456611e-06, + "logits/chosen": -2.337463855743408, + "logits/rejected": -2.6730732917785645, + "logps/chosen": -376.33648681640625, + "logps/rejected": -428.70977783203125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.953128814697266, + "rewards/margins": 6.810699939727783, + "rewards/rejected": -17.76382827758789, + "step": 16722 + }, + { + "epoch": 2.6, + "learning_rate": 1.8820084029254628e-06, + "logits/chosen": -2.450178623199463, + "logits/rejected": -2.9205706119537354, + "logps/chosen": -209.79302978515625, + "logps/rejected": -307.8729553222656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.517986297607422, + "rewards/margins": 7.424062252044678, + "rewards/rejected": -13.942048072814941, + "step": 16723 + }, + { + "epoch": 2.6, + "learning_rate": 1.8812749623943151e-06, + "logits/chosen": -2.8135316371917725, + "logits/rejected": -2.6459200382232666, + "logps/chosen": -563.5786743164062, + "logps/rejected": -604.7349243164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.674522399902344, + "rewards/margins": 12.84942626953125, + "rewards/rejected": -24.523948669433594, + "step": 16724 + }, + { + "epoch": 2.6, + "learning_rate": 1.880541521863167e-06, + "logits/chosen": -2.3531126976013184, + "logits/rejected": -2.470278263092041, + "logps/chosen": -324.37310791015625, + "logps/rejected": -402.49395751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.682071685791016, + "rewards/margins": 8.685447692871094, + "rewards/rejected": -22.36751937866211, + "step": 16725 + }, + { + "epoch": 2.6, + "learning_rate": 1.8798080813320195e-06, + "logits/chosen": -1.897136926651001, + "logits/rejected": -2.371063709259033, + "logps/chosen": -263.23681640625, + "logps/rejected": -378.4000549316406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.783716201782227, + "rewards/margins": 8.499910354614258, + "rewards/rejected": -16.283626556396484, + "step": 16726 + }, + { + "epoch": 2.6, + "learning_rate": 1.8790746408008714e-06, + "logits/chosen": -1.8034449815750122, + "logits/rejected": -2.467987298965454, + "logps/chosen": -246.83815002441406, + "logps/rejected": -454.5721130371094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.822000503540039, + "rewards/margins": 9.857070922851562, + "rewards/rejected": -19.6790714263916, + "step": 16727 + }, + { + "epoch": 2.6, + "learning_rate": 1.8783412002697237e-06, + "logits/chosen": -2.777006149291992, + "logits/rejected": -2.2483065128326416, + "logps/chosen": -614.745361328125, + "logps/rejected": -562.5716552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.193363189697266, + "rewards/margins": 10.959081649780273, + "rewards/rejected": -21.15244483947754, + "step": 16728 + }, + { + "epoch": 2.6, + "learning_rate": 1.8776077597385756e-06, + "logits/chosen": -1.1988738775253296, + "logits/rejected": -2.5814497470855713, + "logps/chosen": -148.43231201171875, + "logps/rejected": -374.94305419921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.906035423278809, + "rewards/margins": 8.522726058959961, + "rewards/rejected": -18.428760528564453, + "step": 16729 + }, + { + "epoch": 2.6, + "learning_rate": 1.8768743192074279e-06, + "logits/chosen": -2.5183157920837402, + "logits/rejected": -2.5270395278930664, + "logps/chosen": -338.97216796875, + "logps/rejected": -390.1380615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5230560302734375, + "rewards/margins": 12.993318557739258, + "rewards/rejected": -20.516374588012695, + "step": 16730 + }, + { + "epoch": 2.6, + "learning_rate": 1.87614087867628e-06, + "logits/chosen": -1.7762364149093628, + "logits/rejected": -2.629089832305908, + "logps/chosen": -175.3919677734375, + "logps/rejected": -417.7822265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.12574291229248, + "rewards/margins": 8.7051420211792, + "rewards/rejected": -20.83088493347168, + "step": 16731 + }, + { + "epoch": 2.6, + "learning_rate": 1.8754074381451318e-06, + "logits/chosen": -2.998927116394043, + "logits/rejected": -2.5271596908569336, + "logps/chosen": -523.6483154296875, + "logps/rejected": -511.10565185546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.276362419128418, + "rewards/margins": 7.94046688079834, + "rewards/rejected": -16.216829299926758, + "step": 16732 + }, + { + "epoch": 2.6, + "learning_rate": 1.8746739976139842e-06, + "logits/chosen": -2.961153984069824, + "logits/rejected": -2.7387850284576416, + "logps/chosen": -588.541748046875, + "logps/rejected": -443.90802001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.158136367797852, + "rewards/margins": 8.734138488769531, + "rewards/rejected": -17.892274856567383, + "step": 16733 + }, + { + "epoch": 2.6, + "learning_rate": 1.8739405570828362e-06, + "logits/chosen": -1.1025949716567993, + "logits/rejected": -2.135906457901001, + "logps/chosen": -242.79855346679688, + "logps/rejected": -628.516845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.157859802246094, + "rewards/margins": 16.78461456298828, + "rewards/rejected": -24.942474365234375, + "step": 16734 + }, + { + "epoch": 2.6, + "learning_rate": 1.8732071165516885e-06, + "logits/chosen": -1.8601281642913818, + "logits/rejected": -2.174213409423828, + "logps/chosen": -486.53485107421875, + "logps/rejected": -484.87725830078125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.63909387588501, + "rewards/margins": 7.685733795166016, + "rewards/rejected": -15.324827194213867, + "step": 16735 + }, + { + "epoch": 2.6, + "learning_rate": 1.8724736760205404e-06, + "logits/chosen": -1.735438585281372, + "logits/rejected": -2.9538004398345947, + "logps/chosen": -303.624755859375, + "logps/rejected": -736.916259765625, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.038530349731445, + "rewards/margins": 6.957719326019287, + "rewards/rejected": -17.99625015258789, + "step": 16736 + }, + { + "epoch": 2.6, + "learning_rate": 1.8717402354893927e-06, + "logits/chosen": -2.479698896408081, + "logits/rejected": -0.6793938279151917, + "logps/chosen": -231.8706817626953, + "logps/rejected": -206.53497314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.77365493774414, + "rewards/margins": 9.582388877868652, + "rewards/rejected": -19.35604476928711, + "step": 16737 + }, + { + "epoch": 2.6, + "learning_rate": 1.8710067949582446e-06, + "logits/chosen": -2.7629406452178955, + "logits/rejected": -2.4351158142089844, + "logps/chosen": -441.57794189453125, + "logps/rejected": -554.9701538085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.559031009674072, + "rewards/margins": 15.041346549987793, + "rewards/rejected": -22.600378036499023, + "step": 16738 + }, + { + "epoch": 2.6, + "learning_rate": 1.8702733544270971e-06, + "logits/chosen": -2.9216530323028564, + "logits/rejected": -2.3696067333221436, + "logps/chosen": -199.70556640625, + "logps/rejected": -317.8060607910156, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.422338485717773, + "rewards/margins": 6.23780632019043, + "rewards/rejected": -15.660144805908203, + "step": 16739 + }, + { + "epoch": 2.6, + "learning_rate": 1.869539913895949e-06, + "logits/chosen": -2.6871440410614014, + "logits/rejected": -2.914364814758301, + "logps/chosen": -548.1422729492188, + "logps/rejected": -477.328857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.662793159484863, + "rewards/margins": 13.253256797790527, + "rewards/rejected": -19.91604995727539, + "step": 16740 + }, + { + "epoch": 2.6, + "learning_rate": 1.8688064733648009e-06, + "logits/chosen": -2.698521375656128, + "logits/rejected": -2.8244144916534424, + "logps/chosen": -113.3597183227539, + "logps/rejected": -433.9947204589844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.212312698364258, + "rewards/margins": 9.723756790161133, + "rewards/rejected": -18.93606948852539, + "step": 16741 + }, + { + "epoch": 2.6, + "learning_rate": 1.8680730328336532e-06, + "logits/chosen": -1.7882411479949951, + "logits/rejected": -2.321790933609009, + "logps/chosen": -195.1961669921875, + "logps/rejected": -447.05035400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.26856517791748, + "rewards/margins": 11.949956893920898, + "rewards/rejected": -20.218521118164062, + "step": 16742 + }, + { + "epoch": 2.6, + "learning_rate": 1.8673395923025053e-06, + "logits/chosen": -2.5076961517333984, + "logits/rejected": -1.5556119680404663, + "logps/chosen": -337.28472900390625, + "logps/rejected": -283.2808837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.256587982177734, + "rewards/margins": 9.108369827270508, + "rewards/rejected": -18.364957809448242, + "step": 16743 + }, + { + "epoch": 2.6, + "learning_rate": 1.8666061517713576e-06, + "logits/chosen": -2.2932424545288086, + "logits/rejected": -2.471031427383423, + "logps/chosen": -207.2538604736328, + "logps/rejected": -310.04510498046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.660604953765869, + "rewards/margins": 7.687699317932129, + "rewards/rejected": -14.348304748535156, + "step": 16744 + }, + { + "epoch": 2.6, + "learning_rate": 1.8658727112402094e-06, + "logits/chosen": -2.7795071601867676, + "logits/rejected": -2.6978678703308105, + "logps/chosen": -245.9609375, + "logps/rejected": -355.50213623046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.900961399078369, + "rewards/margins": 11.311592102050781, + "rewards/rejected": -18.212553024291992, + "step": 16745 + }, + { + "epoch": 2.6, + "learning_rate": 1.8651392707090617e-06, + "logits/chosen": -1.8593968152999878, + "logits/rejected": -2.2520604133605957, + "logps/chosen": -223.99411010742188, + "logps/rejected": -364.7958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.851434707641602, + "rewards/margins": 9.638193130493164, + "rewards/rejected": -18.489627838134766, + "step": 16746 + }, + { + "epoch": 2.6, + "learning_rate": 1.8644058301779138e-06, + "logits/chosen": -2.7147605419158936, + "logits/rejected": -2.815582036972046, + "logps/chosen": -281.28826904296875, + "logps/rejected": -308.5590515136719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.816030979156494, + "rewards/margins": 8.296029090881348, + "rewards/rejected": -15.112060546875, + "step": 16747 + }, + { + "epoch": 2.6, + "learning_rate": 1.8636723896467661e-06, + "logits/chosen": -2.5054235458374023, + "logits/rejected": -1.788565993309021, + "logps/chosen": -437.8194274902344, + "logps/rejected": -537.5379028320312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.671112060546875, + "rewards/margins": 13.613128662109375, + "rewards/rejected": -21.28424072265625, + "step": 16748 + }, + { + "epoch": 2.6, + "learning_rate": 1.862938949115618e-06, + "logits/chosen": -2.3770320415496826, + "logits/rejected": -2.840045690536499, + "logps/chosen": -507.338623046875, + "logps/rejected": -691.4268798828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.813292980194092, + "rewards/margins": 10.441269874572754, + "rewards/rejected": -18.254562377929688, + "step": 16749 + }, + { + "epoch": 2.6, + "learning_rate": 1.8622055085844699e-06, + "logits/chosen": -2.572394371032715, + "logits/rejected": -2.5870137214660645, + "logps/chosen": -486.83074951171875, + "logps/rejected": -598.2057495117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.92323637008667, + "rewards/margins": 15.690074920654297, + "rewards/rejected": -22.613311767578125, + "step": 16750 + }, + { + "epoch": 2.61, + "learning_rate": 1.8614720680533224e-06, + "logits/chosen": -2.2978036403656006, + "logits/rejected": -2.727363348007202, + "logps/chosen": -160.34469604492188, + "logps/rejected": -501.14044189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.425240516662598, + "rewards/margins": 12.748455047607422, + "rewards/rejected": -22.173694610595703, + "step": 16751 + }, + { + "epoch": 2.61, + "learning_rate": 1.8607386275221743e-06, + "logits/chosen": -2.9036941528320312, + "logits/rejected": -3.026982545852661, + "logps/chosen": -266.4986877441406, + "logps/rejected": -203.29872131347656, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.167309761047363, + "rewards/margins": 9.705878257751465, + "rewards/rejected": -17.873188018798828, + "step": 16752 + }, + { + "epoch": 2.61, + "learning_rate": 1.8600051869910266e-06, + "logits/chosen": -2.495779275894165, + "logits/rejected": -2.8218681812286377, + "logps/chosen": -203.9385528564453, + "logps/rejected": -353.09375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.378464698791504, + "rewards/margins": 10.610981941223145, + "rewards/rejected": -17.98944664001465, + "step": 16753 + }, + { + "epoch": 2.61, + "learning_rate": 1.8592717464598785e-06, + "logits/chosen": -2.6071434020996094, + "logits/rejected": -2.7397477626800537, + "logps/chosen": -139.1622314453125, + "logps/rejected": -311.1338806152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.01518440246582, + "rewards/margins": 9.392732620239258, + "rewards/rejected": -17.407917022705078, + "step": 16754 + }, + { + "epoch": 2.61, + "learning_rate": 1.8585383059287308e-06, + "logits/chosen": -2.672098159790039, + "logits/rejected": -2.657566785812378, + "logps/chosen": -291.84747314453125, + "logps/rejected": -585.2271728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.392138481140137, + "rewards/margins": 15.47809886932373, + "rewards/rejected": -25.870237350463867, + "step": 16755 + }, + { + "epoch": 2.61, + "learning_rate": 1.8578048653975829e-06, + "logits/chosen": -1.6786653995513916, + "logits/rejected": -2.7565555572509766, + "logps/chosen": -218.9914093017578, + "logps/rejected": -494.8641357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.167728424072266, + "rewards/margins": 10.420848846435547, + "rewards/rejected": -17.588577270507812, + "step": 16756 + }, + { + "epoch": 2.61, + "learning_rate": 1.8570714248664352e-06, + "logits/chosen": -2.710132598876953, + "logits/rejected": -2.917135238647461, + "logps/chosen": -210.96075439453125, + "logps/rejected": -539.2930908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.259111404418945, + "rewards/margins": 12.713078498840332, + "rewards/rejected": -18.972190856933594, + "step": 16757 + }, + { + "epoch": 2.61, + "learning_rate": 1.856337984335287e-06, + "logits/chosen": -3.0870163440704346, + "logits/rejected": -2.892368793487549, + "logps/chosen": -317.4640808105469, + "logps/rejected": -363.68231201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.732911109924316, + "rewards/margins": 9.992907524108887, + "rewards/rejected": -16.725818634033203, + "step": 16758 + }, + { + "epoch": 2.61, + "learning_rate": 1.8556045438041393e-06, + "logits/chosen": -2.5870306491851807, + "logits/rejected": -1.5070605278015137, + "logps/chosen": -192.42230224609375, + "logps/rejected": -332.7602233886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.599989414215088, + "rewards/margins": 15.720830917358398, + "rewards/rejected": -23.320819854736328, + "step": 16759 + }, + { + "epoch": 2.61, + "learning_rate": 1.8548711032729914e-06, + "logits/chosen": -2.84123158454895, + "logits/rejected": -1.7704439163208008, + "logps/chosen": -220.2382354736328, + "logps/rejected": -309.3019104003906, + "loss": 1.6727, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.590303421020508, + "rewards/margins": 4.249053001403809, + "rewards/rejected": -13.839357376098633, + "step": 16760 + }, + { + "epoch": 2.61, + "learning_rate": 1.8541376627418433e-06, + "logits/chosen": -2.7771408557891846, + "logits/rejected": -2.009089231491089, + "logps/chosen": -483.0302429199219, + "logps/rejected": -445.52386474609375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.098674774169922, + "rewards/margins": 7.605575084686279, + "rewards/rejected": -18.70425033569336, + "step": 16761 + }, + { + "epoch": 2.61, + "learning_rate": 1.8534042222106956e-06, + "logits/chosen": -3.0439846515655518, + "logits/rejected": -2.963191509246826, + "logps/chosen": -300.32769775390625, + "logps/rejected": -266.76348876953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.854116439819336, + "rewards/margins": 7.9183030128479, + "rewards/rejected": -15.772419929504395, + "step": 16762 + }, + { + "epoch": 2.61, + "learning_rate": 1.8526707816795475e-06, + "logits/chosen": -1.7860299348831177, + "logits/rejected": -2.664720296859741, + "logps/chosen": -239.74879455566406, + "logps/rejected": -557.0953369140625, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.144344329833984, + "rewards/margins": 8.590299606323242, + "rewards/rejected": -18.734643936157227, + "step": 16763 + }, + { + "epoch": 2.61, + "learning_rate": 1.8519373411484e-06, + "logits/chosen": -2.809256076812744, + "logits/rejected": -1.629633903503418, + "logps/chosen": -266.1769714355469, + "logps/rejected": -279.78228759765625, + "loss": 0.6816, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.660664558410645, + "rewards/margins": 2.918222188949585, + "rewards/rejected": -14.578886032104492, + "step": 16764 + }, + { + "epoch": 2.61, + "learning_rate": 1.8512039006172519e-06, + "logits/chosen": -2.8367462158203125, + "logits/rejected": -2.900515079498291, + "logps/chosen": -148.91549682617188, + "logps/rejected": -241.0473175048828, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.552653312683105, + "rewards/margins": 8.442964553833008, + "rewards/rejected": -16.995616912841797, + "step": 16765 + }, + { + "epoch": 2.61, + "learning_rate": 1.8504704600861042e-06, + "logits/chosen": -2.841204881668091, + "logits/rejected": -2.8592844009399414, + "logps/chosen": -173.60140991210938, + "logps/rejected": -301.2809753417969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.985053062438965, + "rewards/margins": 7.659564971923828, + "rewards/rejected": -18.64461898803711, + "step": 16766 + }, + { + "epoch": 2.61, + "learning_rate": 1.849737019554956e-06, + "logits/chosen": -2.596278429031372, + "logits/rejected": -2.794780731201172, + "logps/chosen": -225.58517456054688, + "logps/rejected": -431.202880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.77450942993164, + "rewards/margins": 11.002184867858887, + "rewards/rejected": -19.776695251464844, + "step": 16767 + }, + { + "epoch": 2.61, + "learning_rate": 1.8490035790238086e-06, + "logits/chosen": -1.502915620803833, + "logits/rejected": -2.676992893218994, + "logps/chosen": -205.36959838867188, + "logps/rejected": -372.63165283203125, + "loss": 0.07, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.901796340942383, + "rewards/margins": 4.853233337402344, + "rewards/rejected": -16.755029678344727, + "step": 16768 + }, + { + "epoch": 2.61, + "learning_rate": 1.8482701384926605e-06, + "logits/chosen": -1.6879801750183105, + "logits/rejected": -2.9107868671417236, + "logps/chosen": -356.95147705078125, + "logps/rejected": -611.0191040039062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.992738723754883, + "rewards/margins": 11.642142295837402, + "rewards/rejected": -23.63488006591797, + "step": 16769 + }, + { + "epoch": 2.61, + "learning_rate": 1.8475366979615123e-06, + "logits/chosen": -1.3189271688461304, + "logits/rejected": -1.9894119501113892, + "logps/chosen": -233.30184936523438, + "logps/rejected": -508.31939697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.348037719726562, + "rewards/margins": 9.479825973510742, + "rewards/rejected": -18.827865600585938, + "step": 16770 + }, + { + "epoch": 2.61, + "learning_rate": 1.8468032574303646e-06, + "logits/chosen": -2.6171560287475586, + "logits/rejected": -2.731065511703491, + "logps/chosen": -816.2532348632812, + "logps/rejected": -702.6407470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.260395050048828, + "rewards/margins": 11.605585098266602, + "rewards/rejected": -25.86598014831543, + "step": 16771 + }, + { + "epoch": 2.61, + "learning_rate": 1.8460698168992167e-06, + "logits/chosen": -2.7059061527252197, + "logits/rejected": -2.8522796630859375, + "logps/chosen": -135.43545532226562, + "logps/rejected": -300.40576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.008520126342773, + "rewards/margins": 11.109654426574707, + "rewards/rejected": -19.118175506591797, + "step": 16772 + }, + { + "epoch": 2.61, + "learning_rate": 1.845336376368069e-06, + "logits/chosen": -2.7965967655181885, + "logits/rejected": -1.7407562732696533, + "logps/chosen": -360.3336181640625, + "logps/rejected": -600.5640869140625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.330051422119141, + "rewards/margins": 13.954439163208008, + "rewards/rejected": -20.28449058532715, + "step": 16773 + }, + { + "epoch": 2.61, + "learning_rate": 1.844602935836921e-06, + "logits/chosen": -2.912689685821533, + "logits/rejected": -2.066997766494751, + "logps/chosen": -611.001220703125, + "logps/rejected": -582.6707763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3389840126037598, + "rewards/margins": 16.716936111450195, + "rewards/rejected": -20.055919647216797, + "step": 16774 + }, + { + "epoch": 2.61, + "learning_rate": 1.8438694953057732e-06, + "logits/chosen": -2.6585254669189453, + "logits/rejected": -2.088003158569336, + "logps/chosen": -280.222900390625, + "logps/rejected": -297.97088623046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.733369827270508, + "rewards/margins": 8.463834762573242, + "rewards/rejected": -22.19720458984375, + "step": 16775 + }, + { + "epoch": 2.61, + "learning_rate": 1.8431360547746253e-06, + "logits/chosen": -1.800223469734192, + "logits/rejected": -2.9999287128448486, + "logps/chosen": -278.5535888671875, + "logps/rejected": -449.0238037109375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.434326171875, + "rewards/margins": 6.209209442138672, + "rewards/rejected": -17.643535614013672, + "step": 16776 + }, + { + "epoch": 2.61, + "learning_rate": 1.8424026142434776e-06, + "logits/chosen": -2.620049238204956, + "logits/rejected": -2.84722638130188, + "logps/chosen": -259.06353759765625, + "logps/rejected": -331.21356201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.036781311035156, + "rewards/margins": 9.22126579284668, + "rewards/rejected": -18.258047103881836, + "step": 16777 + }, + { + "epoch": 2.61, + "learning_rate": 1.8416691737123295e-06, + "logits/chosen": -2.8987326622009277, + "logits/rejected": -1.3194935321807861, + "logps/chosen": -614.0035400390625, + "logps/rejected": -421.8187561035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.869422912597656, + "rewards/margins": 12.248367309570312, + "rewards/rejected": -22.11779022216797, + "step": 16778 + }, + { + "epoch": 2.61, + "learning_rate": 1.8409357331811818e-06, + "logits/chosen": -2.724264144897461, + "logits/rejected": -2.9065940380096436, + "logps/chosen": -254.2900848388672, + "logps/rejected": -433.2724304199219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.741792678833008, + "rewards/margins": 10.700959205627441, + "rewards/rejected": -19.442750930786133, + "step": 16779 + }, + { + "epoch": 2.61, + "learning_rate": 1.8402022926500337e-06, + "logits/chosen": -2.2496488094329834, + "logits/rejected": -2.8824925422668457, + "logps/chosen": -153.50650024414062, + "logps/rejected": -491.0406494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354344844818115, + "rewards/margins": 13.150486946105957, + "rewards/rejected": -17.504833221435547, + "step": 16780 + }, + { + "epoch": 2.61, + "learning_rate": 1.8394688521188857e-06, + "logits/chosen": -1.8314285278320312, + "logits/rejected": -2.693286895751953, + "logps/chosen": -252.46588134765625, + "logps/rejected": -600.3900756835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.135061264038086, + "rewards/margins": 9.700126647949219, + "rewards/rejected": -18.835189819335938, + "step": 16781 + }, + { + "epoch": 2.61, + "learning_rate": 1.838735411587738e-06, + "logits/chosen": -0.40686115622520447, + "logits/rejected": -2.7558176517486572, + "logps/chosen": -177.97088623046875, + "logps/rejected": -588.201416015625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.374726295471191, + "rewards/margins": 6.152137756347656, + "rewards/rejected": -20.52686309814453, + "step": 16782 + }, + { + "epoch": 2.61, + "learning_rate": 1.83800197105659e-06, + "logits/chosen": -2.7845141887664795, + "logits/rejected": -1.9069147109985352, + "logps/chosen": -319.58477783203125, + "logps/rejected": -236.48907470703125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.083174228668213, + "rewards/margins": 6.436525821685791, + "rewards/rejected": -12.519700050354004, + "step": 16783 + }, + { + "epoch": 2.61, + "learning_rate": 1.8372685305254422e-06, + "logits/chosen": -2.8253612518310547, + "logits/rejected": -2.6063661575317383, + "logps/chosen": -187.05059814453125, + "logps/rejected": -233.5006103515625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.409669876098633, + "rewards/margins": 6.69028902053833, + "rewards/rejected": -16.099960327148438, + "step": 16784 + }, + { + "epoch": 2.61, + "learning_rate": 1.8365350899942943e-06, + "logits/chosen": -2.4080824851989746, + "logits/rejected": -2.793119430541992, + "logps/chosen": -181.582275390625, + "logps/rejected": -364.82208251953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.831913948059082, + "rewards/margins": 11.142292976379395, + "rewards/rejected": -19.974206924438477, + "step": 16785 + }, + { + "epoch": 2.61, + "learning_rate": 1.8358016494631466e-06, + "logits/chosen": -0.9169510006904602, + "logits/rejected": -1.7739567756652832, + "logps/chosen": -204.7838134765625, + "logps/rejected": -470.9764099121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.274792194366455, + "rewards/margins": 13.808334350585938, + "rewards/rejected": -21.083126068115234, + "step": 16786 + }, + { + "epoch": 2.61, + "learning_rate": 1.8350682089319985e-06, + "logits/chosen": -2.624267816543579, + "logits/rejected": -2.234606981277466, + "logps/chosen": -385.1349182128906, + "logps/rejected": -448.5523986816406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.492055892944336, + "rewards/margins": 9.948570251464844, + "rewards/rejected": -19.44062614440918, + "step": 16787 + }, + { + "epoch": 2.61, + "learning_rate": 1.8343347684008508e-06, + "logits/chosen": -2.322909116744995, + "logits/rejected": -2.5478768348693848, + "logps/chosen": -233.61962890625, + "logps/rejected": -454.4674072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.244424343109131, + "rewards/margins": 13.344293594360352, + "rewards/rejected": -19.58871841430664, + "step": 16788 + }, + { + "epoch": 2.61, + "learning_rate": 1.8336013278697029e-06, + "logits/chosen": -2.789283037185669, + "logits/rejected": -1.6114166975021362, + "logps/chosen": -275.55938720703125, + "logps/rejected": -231.46389770507812, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.91981029510498, + "rewards/margins": 7.813946723937988, + "rewards/rejected": -16.73375701904297, + "step": 16789 + }, + { + "epoch": 2.61, + "learning_rate": 1.8328678873385548e-06, + "logits/chosen": -1.3497308492660522, + "logits/rejected": -1.542763113975525, + "logps/chosen": -70.41659545898438, + "logps/rejected": -270.82659912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035529136657715, + "rewards/margins": 9.286097526550293, + "rewards/rejected": -15.321626663208008, + "step": 16790 + }, + { + "epoch": 2.61, + "learning_rate": 1.832134446807407e-06, + "logits/chosen": -2.826406717300415, + "logits/rejected": -2.8305764198303223, + "logps/chosen": -118.71221923828125, + "logps/rejected": -249.0726776123047, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7035088539123535, + "rewards/margins": 10.729028701782227, + "rewards/rejected": -17.432537078857422, + "step": 16791 + }, + { + "epoch": 2.61, + "learning_rate": 1.831401006276259e-06, + "logits/chosen": -1.6679240465164185, + "logits/rejected": -2.6999635696411133, + "logps/chosen": -150.46426391601562, + "logps/rejected": -304.023193359375, + "loss": 0.6125, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.863160133361816, + "rewards/margins": 5.384042263031006, + "rewards/rejected": -19.247201919555664, + "step": 16792 + }, + { + "epoch": 2.61, + "learning_rate": 1.8306675657451112e-06, + "logits/chosen": -1.8043127059936523, + "logits/rejected": -2.522228717803955, + "logps/chosen": -190.78424072265625, + "logps/rejected": -460.005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.564035415649414, + "rewards/margins": 11.535715103149414, + "rewards/rejected": -25.099750518798828, + "step": 16793 + }, + { + "epoch": 2.61, + "learning_rate": 1.8299341252139633e-06, + "logits/chosen": -1.769300937652588, + "logits/rejected": -2.3192901611328125, + "logps/chosen": -270.30364990234375, + "logps/rejected": -274.6861572265625, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.560320854187012, + "rewards/margins": 4.797426700592041, + "rewards/rejected": -18.35774803161621, + "step": 16794 + }, + { + "epoch": 2.61, + "learning_rate": 1.8292006846828156e-06, + "logits/chosen": -2.9180502891540527, + "logits/rejected": -2.8974742889404297, + "logps/chosen": -186.8478546142578, + "logps/rejected": -271.5776672363281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.859478950500488, + "rewards/margins": 9.421207427978516, + "rewards/rejected": -17.280685424804688, + "step": 16795 + }, + { + "epoch": 2.61, + "learning_rate": 1.8284672441516675e-06, + "logits/chosen": -2.444110631942749, + "logits/rejected": -2.6428325176239014, + "logps/chosen": -251.31182861328125, + "logps/rejected": -546.8839721679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.128579139709473, + "rewards/margins": 14.710460662841797, + "rewards/rejected": -27.839038848876953, + "step": 16796 + }, + { + "epoch": 2.61, + "learning_rate": 1.8277338036205198e-06, + "logits/chosen": -2.6982192993164062, + "logits/rejected": -1.5230953693389893, + "logps/chosen": -311.57012939453125, + "logps/rejected": -220.34246826171875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.618032455444336, + "rewards/margins": 6.569882869720459, + "rewards/rejected": -17.187915802001953, + "step": 16797 + }, + { + "epoch": 2.61, + "learning_rate": 1.827000363089372e-06, + "logits/chosen": -2.3637568950653076, + "logits/rejected": -2.5037388801574707, + "logps/chosen": -229.42848205566406, + "logps/rejected": -255.14630126953125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.304394721984863, + "rewards/margins": 8.27054214477539, + "rewards/rejected": -19.574935913085938, + "step": 16798 + }, + { + "epoch": 2.61, + "learning_rate": 1.8262669225582242e-06, + "logits/chosen": -1.1741894483566284, + "logits/rejected": -2.498880386352539, + "logps/chosen": -166.5731201171875, + "logps/rejected": -524.907958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.77608585357666, + "rewards/margins": 13.661100387573242, + "rewards/rejected": -24.437185287475586, + "step": 16799 + }, + { + "epoch": 2.61, + "learning_rate": 1.825533482027076e-06, + "logits/chosen": -2.1905300617218018, + "logits/rejected": -3.089041233062744, + "logps/chosen": -349.06378173828125, + "logps/rejected": -673.6966552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.169857025146484, + "rewards/margins": 11.916990280151367, + "rewards/rejected": -21.08684730529785, + "step": 16800 + }, + { + "epoch": 2.61, + "learning_rate": 1.824800041495928e-06, + "logits/chosen": -2.3357186317443848, + "logits/rejected": -2.4374454021453857, + "logps/chosen": -220.12408447265625, + "logps/rejected": -494.8869323730469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.336240768432617, + "rewards/margins": 9.546075820922852, + "rewards/rejected": -22.88231658935547, + "step": 16801 + }, + { + "epoch": 2.61, + "learning_rate": 1.8240666009647805e-06, + "logits/chosen": -2.116239547729492, + "logits/rejected": -2.0246760845184326, + "logps/chosen": -388.302490234375, + "logps/rejected": -780.2205810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.538253784179688, + "rewards/margins": 12.157482147216797, + "rewards/rejected": -23.695735931396484, + "step": 16802 + }, + { + "epoch": 2.61, + "learning_rate": 1.8233331604336324e-06, + "logits/chosen": -1.964337944984436, + "logits/rejected": -2.7058699131011963, + "logps/chosen": -382.50860595703125, + "logps/rejected": -779.483154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.548837661743164, + "rewards/margins": 17.12162208557129, + "rewards/rejected": -26.670459747314453, + "step": 16803 + }, + { + "epoch": 2.61, + "learning_rate": 1.8225997199024847e-06, + "logits/chosen": -2.5471010208129883, + "logits/rejected": -2.1917672157287598, + "logps/chosen": -171.30813598632812, + "logps/rejected": -208.77639770507812, + "loss": 0.548, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.534566879272461, + "rewards/margins": 4.067282199859619, + "rewards/rejected": -17.601848602294922, + "step": 16804 + }, + { + "epoch": 2.61, + "learning_rate": 1.8218662793713365e-06, + "logits/chosen": -2.3860056400299072, + "logits/rejected": -2.6734304428100586, + "logps/chosen": -225.03684997558594, + "logps/rejected": -371.21124267578125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9338884353637695, + "rewards/margins": 10.730424880981445, + "rewards/rejected": -18.6643123626709, + "step": 16805 + }, + { + "epoch": 2.61, + "learning_rate": 1.821132838840189e-06, + "logits/chosen": -1.2027459144592285, + "logits/rejected": -2.5744664669036865, + "logps/chosen": -188.843505859375, + "logps/rejected": -540.8255615234375, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.148025512695312, + "rewards/margins": 8.321181297302246, + "rewards/rejected": -19.469205856323242, + "step": 16806 + }, + { + "epoch": 2.61, + "learning_rate": 1.820399398309041e-06, + "logits/chosen": -2.859771966934204, + "logits/rejected": -2.920703172683716, + "logps/chosen": -108.44037628173828, + "logps/rejected": -257.7153015136719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.422014236450195, + "rewards/margins": 10.017172813415527, + "rewards/rejected": -16.43918800354004, + "step": 16807 + }, + { + "epoch": 2.61, + "learning_rate": 1.8196659577778932e-06, + "logits/chosen": -2.9516165256500244, + "logits/rejected": -2.473215341567993, + "logps/chosen": -864.5321655273438, + "logps/rejected": -586.2059326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7146530151367188, + "rewards/margins": 15.764789581298828, + "rewards/rejected": -18.479442596435547, + "step": 16808 + }, + { + "epoch": 2.61, + "learning_rate": 1.8189325172467451e-06, + "logits/chosen": -0.8809183239936829, + "logits/rejected": -2.687325954437256, + "logps/chosen": -160.99484252929688, + "logps/rejected": -522.1895141601562, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.129169464111328, + "rewards/margins": 9.355255126953125, + "rewards/rejected": -19.484424591064453, + "step": 16809 + }, + { + "epoch": 2.61, + "learning_rate": 1.8181990767155972e-06, + "logits/chosen": -2.6558847427368164, + "logits/rejected": -2.2269272804260254, + "logps/chosen": -345.7876281738281, + "logps/rejected": -461.6791076660156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.206282138824463, + "rewards/margins": 12.438810348510742, + "rewards/rejected": -18.645092010498047, + "step": 16810 + }, + { + "epoch": 2.61, + "learning_rate": 1.8174656361844495e-06, + "logits/chosen": -1.1759706735610962, + "logits/rejected": -2.2535390853881836, + "logps/chosen": -147.8331298828125, + "logps/rejected": -334.0664367675781, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.75915241241455, + "rewards/margins": 6.637666702270508, + "rewards/rejected": -16.396820068359375, + "step": 16811 + }, + { + "epoch": 2.61, + "learning_rate": 1.8167321956533014e-06, + "logits/chosen": -1.5869413614273071, + "logits/rejected": -2.5878560543060303, + "logps/chosen": -205.49740600585938, + "logps/rejected": -422.6899108886719, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.298299789428711, + "rewards/margins": 10.086597442626953, + "rewards/rejected": -25.384897232055664, + "step": 16812 + }, + { + "epoch": 2.61, + "learning_rate": 1.8159987551221537e-06, + "logits/chosen": -2.661198616027832, + "logits/rejected": -2.898757219314575, + "logps/chosen": -217.30776977539062, + "logps/rejected": -316.1182861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.598884582519531, + "rewards/margins": 9.979716300964355, + "rewards/rejected": -16.578601837158203, + "step": 16813 + }, + { + "epoch": 2.61, + "learning_rate": 1.8152653145910058e-06, + "logits/chosen": -2.5767784118652344, + "logits/rejected": -2.344705104827881, + "logps/chosen": -196.45758056640625, + "logps/rejected": -250.04974365234375, + "loss": 0.2279, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.366097450256348, + "rewards/margins": 3.713618278503418, + "rewards/rejected": -15.079715728759766, + "step": 16814 + }, + { + "epoch": 2.62, + "learning_rate": 1.814531874059858e-06, + "logits/chosen": -2.3665707111358643, + "logits/rejected": -2.1572763919830322, + "logps/chosen": -349.228759765625, + "logps/rejected": -376.5600891113281, + "loss": 0.15, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.098199844360352, + "rewards/margins": 2.9566354751586914, + "rewards/rejected": -18.05483627319336, + "step": 16815 + }, + { + "epoch": 2.62, + "learning_rate": 1.81379843352871e-06, + "logits/chosen": -2.3164901733398438, + "logits/rejected": -2.492249011993408, + "logps/chosen": -264.32177734375, + "logps/rejected": -410.03668212890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.72065544128418, + "rewards/margins": 7.937038421630859, + "rewards/rejected": -22.65769386291504, + "step": 16816 + }, + { + "epoch": 2.62, + "learning_rate": 1.8130649929975623e-06, + "logits/chosen": -1.1470820903778076, + "logits/rejected": -2.447158098220825, + "logps/chosen": -172.18057250976562, + "logps/rejected": -437.0608215332031, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.277971267700195, + "rewards/margins": 6.353908538818359, + "rewards/rejected": -18.631879806518555, + "step": 16817 + }, + { + "epoch": 2.62, + "learning_rate": 1.8123315524664141e-06, + "logits/chosen": -2.738543748855591, + "logits/rejected": -2.525512456893921, + "logps/chosen": -471.4459533691406, + "logps/rejected": -539.5795288085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.227193832397461, + "rewards/margins": 11.151233673095703, + "rewards/rejected": -20.378427505493164, + "step": 16818 + }, + { + "epoch": 2.62, + "learning_rate": 1.8115981119352662e-06, + "logits/chosen": -0.5040656924247742, + "logits/rejected": -2.693697452545166, + "logps/chosen": -189.04421997070312, + "logps/rejected": -665.659423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.760078430175781, + "rewards/margins": 11.743515014648438, + "rewards/rejected": -21.50359344482422, + "step": 16819 + }, + { + "epoch": 2.62, + "learning_rate": 1.8108646714041185e-06, + "logits/chosen": -1.525804042816162, + "logits/rejected": -2.4789087772369385, + "logps/chosen": -171.8478240966797, + "logps/rejected": -530.665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169530868530273, + "rewards/margins": 16.592252731323242, + "rewards/rejected": -22.761783599853516, + "step": 16820 + }, + { + "epoch": 2.62, + "learning_rate": 1.8101312308729704e-06, + "logits/chosen": -2.3967881202697754, + "logits/rejected": -2.605950355529785, + "logps/chosen": -266.15692138671875, + "logps/rejected": -382.70831298828125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.134347915649414, + "rewards/margins": 6.718809127807617, + "rewards/rejected": -14.853157043457031, + "step": 16821 + }, + { + "epoch": 2.62, + "learning_rate": 1.8093977903418227e-06, + "logits/chosen": -2.97931170463562, + "logits/rejected": -2.7032947540283203, + "logps/chosen": -237.5023193359375, + "logps/rejected": -244.5554656982422, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.549566268920898, + "rewards/margins": 5.8056721687316895, + "rewards/rejected": -15.355238914489746, + "step": 16822 + }, + { + "epoch": 2.62, + "learning_rate": 1.8086643498106748e-06, + "logits/chosen": -2.718416452407837, + "logits/rejected": -2.88114857673645, + "logps/chosen": -272.4432678222656, + "logps/rejected": -455.55328369140625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.537040710449219, + "rewards/margins": 10.598617553710938, + "rewards/rejected": -21.135658264160156, + "step": 16823 + }, + { + "epoch": 2.62, + "learning_rate": 1.807930909279527e-06, + "logits/chosen": -2.99263596534729, + "logits/rejected": -2.239725351333618, + "logps/chosen": -568.4657592773438, + "logps/rejected": -649.0115966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.34497356414795, + "rewards/margins": 15.84898567199707, + "rewards/rejected": -28.193958282470703, + "step": 16824 + }, + { + "epoch": 2.62, + "learning_rate": 1.807197468748379e-06, + "logits/chosen": -2.747058153152466, + "logits/rejected": -1.6902426481246948, + "logps/chosen": -787.3592529296875, + "logps/rejected": -459.2090759277344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.883364677429199, + "rewards/margins": 10.252031326293945, + "rewards/rejected": -18.13539695739746, + "step": 16825 + }, + { + "epoch": 2.62, + "learning_rate": 1.8064640282172313e-06, + "logits/chosen": -1.528352975845337, + "logits/rejected": -2.7061543464660645, + "logps/chosen": -139.82797241210938, + "logps/rejected": -645.6665649414062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3405656814575195, + "rewards/margins": 11.678864479064941, + "rewards/rejected": -19.01943016052246, + "step": 16826 + }, + { + "epoch": 2.62, + "learning_rate": 1.8057305876860834e-06, + "logits/chosen": -3.1050803661346436, + "logits/rejected": -2.0019140243530273, + "logps/chosen": -299.46539306640625, + "logps/rejected": -248.6678466796875, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.261556625366211, + "rewards/margins": 7.220674991607666, + "rewards/rejected": -13.482231140136719, + "step": 16827 + }, + { + "epoch": 2.62, + "learning_rate": 1.8049971471549357e-06, + "logits/chosen": -2.017716407775879, + "logits/rejected": -2.7446067333221436, + "logps/chosen": -88.44530487060547, + "logps/rejected": -272.45428466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.181918144226074, + "rewards/margins": 11.194664001464844, + "rewards/rejected": -18.376583099365234, + "step": 16828 + }, + { + "epoch": 2.62, + "learning_rate": 1.8042637066237875e-06, + "logits/chosen": -1.8085511922836304, + "logits/rejected": -2.5636236667633057, + "logps/chosen": -429.804931640625, + "logps/rejected": -479.0221862792969, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.320334434509277, + "rewards/margins": 6.0515618324279785, + "rewards/rejected": -18.371896743774414, + "step": 16829 + }, + { + "epoch": 2.62, + "learning_rate": 1.8035302660926394e-06, + "logits/chosen": -1.5640082359313965, + "logits/rejected": -2.763848304748535, + "logps/chosen": -228.47207641601562, + "logps/rejected": -593.4989013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.69533920288086, + "rewards/margins": 12.518415451049805, + "rewards/rejected": -23.213754653930664, + "step": 16830 + }, + { + "epoch": 2.62, + "learning_rate": 1.802796825561492e-06, + "logits/chosen": -1.809291124343872, + "logits/rejected": -2.5987460613250732, + "logps/chosen": -131.78652954101562, + "logps/rejected": -382.50201416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.863750457763672, + "rewards/margins": 12.05439567565918, + "rewards/rejected": -21.918148040771484, + "step": 16831 + }, + { + "epoch": 2.62, + "learning_rate": 1.8020633850303438e-06, + "logits/chosen": -2.811406373977661, + "logits/rejected": -2.143493175506592, + "logps/chosen": -205.60838317871094, + "logps/rejected": -224.87559509277344, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.18783950805664, + "rewards/margins": 5.256411075592041, + "rewards/rejected": -14.44425106048584, + "step": 16832 + }, + { + "epoch": 2.62, + "learning_rate": 1.8013299444991961e-06, + "logits/chosen": -2.401862382888794, + "logits/rejected": -2.700594902038574, + "logps/chosen": -176.2808380126953, + "logps/rejected": -437.25225830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.362990379333496, + "rewards/margins": 13.611811637878418, + "rewards/rejected": -20.974802017211914, + "step": 16833 + }, + { + "epoch": 2.62, + "learning_rate": 1.800596503968048e-06, + "logits/chosen": -0.8265048265457153, + "logits/rejected": -2.4316329956054688, + "logps/chosen": -252.18218994140625, + "logps/rejected": -504.80682373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.690213203430176, + "rewards/margins": 9.278538703918457, + "rewards/rejected": -19.968751907348633, + "step": 16834 + }, + { + "epoch": 2.62, + "learning_rate": 1.7998630634369003e-06, + "logits/chosen": -2.8995258808135986, + "logits/rejected": -2.517641305923462, + "logps/chosen": -281.70367431640625, + "logps/rejected": -293.9683837890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.021584987640381, + "rewards/margins": 7.567061901092529, + "rewards/rejected": -14.58864688873291, + "step": 16835 + }, + { + "epoch": 2.62, + "learning_rate": 1.7991296229057524e-06, + "logits/chosen": -2.716867446899414, + "logits/rejected": -2.878598928451538, + "logps/chosen": -183.52536010742188, + "logps/rejected": -446.833251953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.196585655212402, + "rewards/margins": 13.208051681518555, + "rewards/rejected": -23.40463638305664, + "step": 16836 + }, + { + "epoch": 2.62, + "learning_rate": 1.7983961823746047e-06, + "logits/chosen": -2.2003183364868164, + "logits/rejected": -2.69160795211792, + "logps/chosen": -136.70030212402344, + "logps/rejected": -397.3892517089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.249191284179688, + "rewards/margins": 10.163965225219727, + "rewards/rejected": -18.413156509399414, + "step": 16837 + }, + { + "epoch": 2.62, + "learning_rate": 1.7976627418434566e-06, + "logits/chosen": -2.879439353942871, + "logits/rejected": -3.058466911315918, + "logps/chosen": -322.70330810546875, + "logps/rejected": -376.223876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.71093225479126, + "rewards/margins": 13.088887214660645, + "rewards/rejected": -17.799819946289062, + "step": 16838 + }, + { + "epoch": 2.62, + "learning_rate": 1.7969293013123087e-06, + "logits/chosen": -2.880763292312622, + "logits/rejected": -2.5603015422821045, + "logps/chosen": -495.1197204589844, + "logps/rejected": -463.958984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.7139310836792, + "rewards/margins": 7.829974174499512, + "rewards/rejected": -17.54390525817871, + "step": 16839 + }, + { + "epoch": 2.62, + "learning_rate": 1.796195860781161e-06, + "logits/chosen": -2.4625325202941895, + "logits/rejected": -2.6048617362976074, + "logps/chosen": -411.47772216796875, + "logps/rejected": -497.8331604003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696687698364258, + "rewards/margins": 11.247782707214355, + "rewards/rejected": -18.944469451904297, + "step": 16840 + }, + { + "epoch": 2.62, + "learning_rate": 1.7954624202500128e-06, + "logits/chosen": -2.5031075477600098, + "logits/rejected": -2.63039231300354, + "logps/chosen": -277.41973876953125, + "logps/rejected": -336.0380859375, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.587741851806641, + "rewards/margins": 7.144150733947754, + "rewards/rejected": -13.731892585754395, + "step": 16841 + }, + { + "epoch": 2.62, + "learning_rate": 1.7947289797188651e-06, + "logits/chosen": -2.6502225399017334, + "logits/rejected": -2.3123764991760254, + "logps/chosen": -621.5474243164062, + "logps/rejected": -581.0768432617188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.965517044067383, + "rewards/margins": 8.735347747802734, + "rewards/rejected": -22.700862884521484, + "step": 16842 + }, + { + "epoch": 2.62, + "learning_rate": 1.793995539187717e-06, + "logits/chosen": -2.3721985816955566, + "logits/rejected": -2.616180658340454, + "logps/chosen": -315.8731689453125, + "logps/rejected": -435.94842529296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.27760124206543, + "rewards/margins": 8.411190032958984, + "rewards/rejected": -17.688791275024414, + "step": 16843 + }, + { + "epoch": 2.62, + "learning_rate": 1.7932620986565695e-06, + "logits/chosen": -2.0209171772003174, + "logits/rejected": -2.16329288482666, + "logps/chosen": -178.41732788085938, + "logps/rejected": -351.8424072265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.750968933105469, + "rewards/margins": 9.906789779663086, + "rewards/rejected": -17.657760620117188, + "step": 16844 + }, + { + "epoch": 2.62, + "learning_rate": 1.7925286581254214e-06, + "logits/chosen": -2.1874024868011475, + "logits/rejected": -2.3665928840637207, + "logps/chosen": -211.9783172607422, + "logps/rejected": -389.10552978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.913671493530273, + "rewards/margins": 10.307449340820312, + "rewards/rejected": -21.221120834350586, + "step": 16845 + }, + { + "epoch": 2.62, + "learning_rate": 1.7917952175942737e-06, + "logits/chosen": -2.2608566284179688, + "logits/rejected": -2.6426327228546143, + "logps/chosen": -137.6259307861328, + "logps/rejected": -426.13433837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.335031509399414, + "rewards/margins": 13.739952087402344, + "rewards/rejected": -22.074981689453125, + "step": 16846 + }, + { + "epoch": 2.62, + "learning_rate": 1.7910617770631256e-06, + "logits/chosen": -2.952944040298462, + "logits/rejected": -2.7460691928863525, + "logps/chosen": -657.4132690429688, + "logps/rejected": -544.3392333984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.583728790283203, + "rewards/margins": 8.100181579589844, + "rewards/rejected": -15.683910369873047, + "step": 16847 + }, + { + "epoch": 2.62, + "learning_rate": 1.7903283365319781e-06, + "logits/chosen": -2.2440481185913086, + "logits/rejected": -2.733492612838745, + "logps/chosen": -148.7974395751953, + "logps/rejected": -366.771728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.890721321105957, + "rewards/margins": 10.958734512329102, + "rewards/rejected": -20.849456787109375, + "step": 16848 + }, + { + "epoch": 2.62, + "learning_rate": 1.78959489600083e-06, + "logits/chosen": -2.644780397415161, + "logits/rejected": -2.853076696395874, + "logps/chosen": -392.1938781738281, + "logps/rejected": -499.8105773925781, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.657675743103027, + "rewards/margins": 7.325571537017822, + "rewards/rejected": -19.983247756958008, + "step": 16849 + }, + { + "epoch": 2.62, + "learning_rate": 1.7888614554696819e-06, + "logits/chosen": -2.054004192352295, + "logits/rejected": -2.6742327213287354, + "logps/chosen": -133.4290313720703, + "logps/rejected": -402.7763977050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.403548240661621, + "rewards/margins": 16.80905532836914, + "rewards/rejected": -24.212604522705078, + "step": 16850 + }, + { + "epoch": 2.62, + "learning_rate": 1.7881280149385342e-06, + "logits/chosen": -2.0544979572296143, + "logits/rejected": -2.58492112159729, + "logps/chosen": -264.953857421875, + "logps/rejected": -623.4324340820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.182817459106445, + "rewards/margins": 17.291641235351562, + "rewards/rejected": -28.474458694458008, + "step": 16851 + }, + { + "epoch": 2.62, + "learning_rate": 1.7873945744073863e-06, + "logits/chosen": -1.0467643737792969, + "logits/rejected": -2.158750057220459, + "logps/chosen": -193.8645477294922, + "logps/rejected": -349.50128173828125, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.852357864379883, + "rewards/margins": 2.9040446281433105, + "rewards/rejected": -19.75640296936035, + "step": 16852 + }, + { + "epoch": 2.62, + "learning_rate": 1.7866611338762386e-06, + "logits/chosen": -2.8332018852233887, + "logits/rejected": -2.7092409133911133, + "logps/chosen": -453.565673828125, + "logps/rejected": -531.7345581054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6470537185668945, + "rewards/margins": 10.535623550415039, + "rewards/rejected": -18.18267822265625, + "step": 16853 + }, + { + "epoch": 2.62, + "learning_rate": 1.7859276933450904e-06, + "logits/chosen": -1.9789286851882935, + "logits/rejected": -2.3274903297424316, + "logps/chosen": -186.6881103515625, + "logps/rejected": -464.4493713378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.120646476745605, + "rewards/margins": 13.67013931274414, + "rewards/rejected": -22.790786743164062, + "step": 16854 + }, + { + "epoch": 2.62, + "learning_rate": 1.7851942528139427e-06, + "logits/chosen": -2.9296340942382812, + "logits/rejected": -2.5360755920410156, + "logps/chosen": -397.6529846191406, + "logps/rejected": -603.5216674804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.758166313171387, + "rewards/margins": 13.850948333740234, + "rewards/rejected": -21.609115600585938, + "step": 16855 + }, + { + "epoch": 2.62, + "learning_rate": 1.7844608122827948e-06, + "logits/chosen": -1.2286633253097534, + "logits/rejected": -2.490222930908203, + "logps/chosen": -236.8452911376953, + "logps/rejected": -493.4588623046875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.11296272277832, + "rewards/margins": 8.180809020996094, + "rewards/rejected": -18.293771743774414, + "step": 16856 + }, + { + "epoch": 2.62, + "learning_rate": 1.7837273717516471e-06, + "logits/chosen": -2.749288320541382, + "logits/rejected": -2.2139434814453125, + "logps/chosen": -206.719970703125, + "logps/rejected": -189.6210174560547, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.756595611572266, + "rewards/margins": 6.772250652313232, + "rewards/rejected": -17.528846740722656, + "step": 16857 + }, + { + "epoch": 2.62, + "learning_rate": 1.782993931220499e-06, + "logits/chosen": -2.489529609680176, + "logits/rejected": -2.8698477745056152, + "logps/chosen": -107.21461486816406, + "logps/rejected": -273.84039306640625, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.472305297851562, + "rewards/margins": 6.5054402351379395, + "rewards/rejected": -15.977745056152344, + "step": 16858 + }, + { + "epoch": 2.62, + "learning_rate": 1.7822604906893509e-06, + "logits/chosen": -2.965115547180176, + "logits/rejected": -2.363901138305664, + "logps/chosen": -293.7112121582031, + "logps/rejected": -502.1087646484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.905603408813477, + "rewards/margins": 8.740041732788086, + "rewards/rejected": -19.645645141601562, + "step": 16859 + }, + { + "epoch": 2.62, + "learning_rate": 1.7815270501582032e-06, + "logits/chosen": -2.775057077407837, + "logits/rejected": -1.7922762632369995, + "logps/chosen": -513.366943359375, + "logps/rejected": -415.21783447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.194877624511719, + "rewards/margins": 10.279060363769531, + "rewards/rejected": -20.47393798828125, + "step": 16860 + }, + { + "epoch": 2.62, + "learning_rate": 1.7807936096270553e-06, + "logits/chosen": -2.498091220855713, + "logits/rejected": -1.9613741636276245, + "logps/chosen": -241.26617431640625, + "logps/rejected": -506.2662048339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.603806495666504, + "rewards/margins": 15.556745529174805, + "rewards/rejected": -26.160552978515625, + "step": 16861 + }, + { + "epoch": 2.62, + "learning_rate": 1.7800601690959076e-06, + "logits/chosen": -1.8285777568817139, + "logits/rejected": -2.2176623344421387, + "logps/chosen": -228.88949584960938, + "logps/rejected": -481.7424621582031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.358308792114258, + "rewards/margins": 11.112822532653809, + "rewards/rejected": -26.47113037109375, + "step": 16862 + }, + { + "epoch": 2.62, + "learning_rate": 1.7793267285647595e-06, + "logits/chosen": -2.8790056705474854, + "logits/rejected": -2.0342907905578613, + "logps/chosen": -309.61529541015625, + "logps/rejected": -317.57550048828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.043327331542969, + "rewards/margins": 10.550180435180664, + "rewards/rejected": -19.593507766723633, + "step": 16863 + }, + { + "epoch": 2.62, + "learning_rate": 1.7785932880336118e-06, + "logits/chosen": -1.4898059368133545, + "logits/rejected": -2.538571834564209, + "logps/chosen": -180.41946411132812, + "logps/rejected": -413.74951171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.94670295715332, + "rewards/margins": 10.208856582641602, + "rewards/rejected": -18.155559539794922, + "step": 16864 + }, + { + "epoch": 2.62, + "learning_rate": 1.7778598475024638e-06, + "logits/chosen": -1.9385517835617065, + "logits/rejected": -2.32875394821167, + "logps/chosen": -204.83255004882812, + "logps/rejected": -443.3538513183594, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.700372695922852, + "rewards/margins": 8.051116943359375, + "rewards/rejected": -17.751489639282227, + "step": 16865 + }, + { + "epoch": 2.62, + "learning_rate": 1.7771264069713162e-06, + "logits/chosen": -1.2833448648452759, + "logits/rejected": -2.6285560131073, + "logps/chosen": -171.08871459960938, + "logps/rejected": -359.8039855957031, + "loss": 0.0649, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.873851776123047, + "rewards/margins": 6.454545021057129, + "rewards/rejected": -18.32839584350586, + "step": 16866 + }, + { + "epoch": 2.62, + "learning_rate": 1.776392966440168e-06, + "logits/chosen": -2.7432427406311035, + "logits/rejected": -2.6591074466705322, + "logps/chosen": -206.78350830078125, + "logps/rejected": -354.43865966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.260143280029297, + "rewards/margins": 11.395635604858398, + "rewards/rejected": -20.655778884887695, + "step": 16867 + }, + { + "epoch": 2.62, + "learning_rate": 1.7756595259090203e-06, + "logits/chosen": -2.672182321548462, + "logits/rejected": -2.0355067253112793, + "logps/chosen": -394.8489990234375, + "logps/rejected": -570.9189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.38409423828125, + "rewards/margins": 15.346671104431152, + "rewards/rejected": -23.730764389038086, + "step": 16868 + }, + { + "epoch": 2.62, + "learning_rate": 1.7749260853778724e-06, + "logits/chosen": -1.0966994762420654, + "logits/rejected": -2.113819122314453, + "logps/chosen": -169.18414306640625, + "logps/rejected": -397.1795959472656, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.646716117858887, + "rewards/margins": 10.873507499694824, + "rewards/rejected": -19.52022361755371, + "step": 16869 + }, + { + "epoch": 2.62, + "learning_rate": 1.7741926448467243e-06, + "logits/chosen": -2.552971839904785, + "logits/rejected": -2.9296047687530518, + "logps/chosen": -124.52792358398438, + "logps/rejected": -335.4956970214844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.825063705444336, + "rewards/margins": 9.469022750854492, + "rewards/rejected": -19.294086456298828, + "step": 16870 + }, + { + "epoch": 2.62, + "learning_rate": 1.7734592043155766e-06, + "logits/chosen": -2.711134910583496, + "logits/rejected": -2.3482799530029297, + "logps/chosen": -772.3937377929688, + "logps/rejected": -852.0360107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.10202407836914, + "rewards/margins": 11.805465698242188, + "rewards/rejected": -21.907489776611328, + "step": 16871 + }, + { + "epoch": 2.62, + "learning_rate": 1.7727257637844285e-06, + "logits/chosen": -2.2584800720214844, + "logits/rejected": -2.8123703002929688, + "logps/chosen": -183.53515625, + "logps/rejected": -306.25750732421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.300439834594727, + "rewards/margins": 10.46981430053711, + "rewards/rejected": -18.770254135131836, + "step": 16872 + }, + { + "epoch": 2.62, + "learning_rate": 1.771992323253281e-06, + "logits/chosen": -2.425060272216797, + "logits/rejected": -2.877769947052002, + "logps/chosen": -253.72915649414062, + "logps/rejected": -441.0921936035156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.898970603942871, + "rewards/margins": 8.631213188171387, + "rewards/rejected": -18.530183792114258, + "step": 16873 + }, + { + "epoch": 2.62, + "learning_rate": 1.7712588827221329e-06, + "logits/chosen": -2.2285287380218506, + "logits/rejected": -2.52408504486084, + "logps/chosen": -202.86300659179688, + "logps/rejected": -342.1539306640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.937033653259277, + "rewards/margins": 8.436531066894531, + "rewards/rejected": -18.373565673828125, + "step": 16874 + }, + { + "epoch": 2.62, + "learning_rate": 1.7705254421909852e-06, + "logits/chosen": -2.867095947265625, + "logits/rejected": -2.796008825302124, + "logps/chosen": -214.84930419921875, + "logps/rejected": -279.03411865234375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.368383407592773, + "rewards/margins": 9.541611671447754, + "rewards/rejected": -17.909996032714844, + "step": 16875 + }, + { + "epoch": 2.62, + "learning_rate": 1.769792001659837e-06, + "logits/chosen": -2.565683603286743, + "logits/rejected": -2.7248077392578125, + "logps/chosen": -484.5770263671875, + "logps/rejected": -704.520751953125, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.45879077911377, + "rewards/margins": 7.529151916503906, + "rewards/rejected": -18.98794174194336, + "step": 16876 + }, + { + "epoch": 2.62, + "learning_rate": 1.7690585611286894e-06, + "logits/chosen": -2.5333197116851807, + "logits/rejected": -2.981381893157959, + "logps/chosen": -130.74691772460938, + "logps/rejected": -359.95989990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.8345308303833, + "rewards/margins": 11.153658866882324, + "rewards/rejected": -19.988189697265625, + "step": 16877 + }, + { + "epoch": 2.62, + "learning_rate": 1.7683251205975414e-06, + "logits/chosen": -2.8023526668548584, + "logits/rejected": -2.2484941482543945, + "logps/chosen": -244.80535888671875, + "logps/rejected": -249.17105102539062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.555989265441895, + "rewards/margins": 7.899466514587402, + "rewards/rejected": -17.455455780029297, + "step": 16878 + }, + { + "epoch": 2.63, + "learning_rate": 1.7675916800663933e-06, + "logits/chosen": -1.2895534038543701, + "logits/rejected": -2.3100414276123047, + "logps/chosen": -155.97476196289062, + "logps/rejected": -288.5180969238281, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.17915678024292, + "rewards/margins": 9.20870590209961, + "rewards/rejected": -16.387863159179688, + "step": 16879 + }, + { + "epoch": 2.63, + "learning_rate": 1.7668582395352456e-06, + "logits/chosen": -2.827092170715332, + "logits/rejected": -2.2039668560028076, + "logps/chosen": -738.3339233398438, + "logps/rejected": -454.62677001953125, + "loss": 0.0612, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.868194580078125, + "rewards/margins": 4.308231830596924, + "rewards/rejected": -15.17642593383789, + "step": 16880 + }, + { + "epoch": 2.63, + "learning_rate": 1.7661247990040977e-06, + "logits/chosen": -2.3000571727752686, + "logits/rejected": -2.7599072456359863, + "logps/chosen": -188.97705078125, + "logps/rejected": -483.8983154296875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.902848243713379, + "rewards/margins": 11.705238342285156, + "rewards/rejected": -20.60808563232422, + "step": 16881 + }, + { + "epoch": 2.63, + "learning_rate": 1.76539135847295e-06, + "logits/chosen": -2.9093916416168213, + "logits/rejected": -2.796701431274414, + "logps/chosen": -565.7864990234375, + "logps/rejected": -497.2312927246094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7903594970703125, + "rewards/margins": 12.382024765014648, + "rewards/rejected": -20.17238426208496, + "step": 16882 + }, + { + "epoch": 2.63, + "learning_rate": 1.7646579179418019e-06, + "logits/chosen": -2.6220338344573975, + "logits/rejected": -2.7533161640167236, + "logps/chosen": -346.40753173828125, + "logps/rejected": -770.646240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.267282485961914, + "rewards/margins": 15.410575866699219, + "rewards/rejected": -25.677860260009766, + "step": 16883 + }, + { + "epoch": 2.63, + "learning_rate": 1.763924477410654e-06, + "logits/chosen": -1.2617515325546265, + "logits/rejected": -2.3855204582214355, + "logps/chosen": -182.1085205078125, + "logps/rejected": -442.5276794433594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.084534645080566, + "rewards/margins": 13.86807632446289, + "rewards/rejected": -21.95261001586914, + "step": 16884 + }, + { + "epoch": 2.63, + "learning_rate": 1.763191036879506e-06, + "logits/chosen": -1.6363871097564697, + "logits/rejected": -2.6487133502960205, + "logps/chosen": -135.72608947753906, + "logps/rejected": -389.6827087402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.265043258666992, + "rewards/margins": 13.491569519042969, + "rewards/rejected": -20.75661277770996, + "step": 16885 + }, + { + "epoch": 2.63, + "learning_rate": 1.7624575963483584e-06, + "logits/chosen": -2.8179125785827637, + "logits/rejected": -2.4899182319641113, + "logps/chosen": -519.98486328125, + "logps/rejected": -500.05181884765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.916496276855469, + "rewards/margins": 9.146936416625977, + "rewards/rejected": -21.063432693481445, + "step": 16886 + }, + { + "epoch": 2.63, + "learning_rate": 1.7617241558172105e-06, + "logits/chosen": -1.2823219299316406, + "logits/rejected": -2.633139133453369, + "logps/chosen": -279.17669677734375, + "logps/rejected": -613.1473388671875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.590181350708008, + "rewards/margins": 13.521710395812988, + "rewards/rejected": -26.111892700195312, + "step": 16887 + }, + { + "epoch": 2.63, + "learning_rate": 1.7609907152860626e-06, + "logits/chosen": -2.57214093208313, + "logits/rejected": -2.9340507984161377, + "logps/chosen": -144.45248413085938, + "logps/rejected": -376.24273681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.981801986694336, + "rewards/margins": 14.185993194580078, + "rewards/rejected": -22.16779327392578, + "step": 16888 + }, + { + "epoch": 2.63, + "learning_rate": 1.7602572747549146e-06, + "logits/chosen": -2.4613912105560303, + "logits/rejected": -2.5752015113830566, + "logps/chosen": -118.46507263183594, + "logps/rejected": -278.4621887207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.849228382110596, + "rewards/margins": 10.336149215698242, + "rewards/rejected": -18.185379028320312, + "step": 16889 + }, + { + "epoch": 2.63, + "learning_rate": 1.759523834223767e-06, + "logits/chosen": -1.9010649919509888, + "logits/rejected": -2.2804114818573, + "logps/chosen": -157.7932586669922, + "logps/rejected": -351.5475769042969, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.032693862915039, + "rewards/margins": 8.441505432128906, + "rewards/rejected": -21.474197387695312, + "step": 16890 + }, + { + "epoch": 2.63, + "learning_rate": 1.758790393692619e-06, + "logits/chosen": -2.7658016681671143, + "logits/rejected": -2.774472713470459, + "logps/chosen": -133.9041748046875, + "logps/rejected": -385.8186950683594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.390478134155273, + "rewards/margins": 14.13536548614502, + "rewards/rejected": -22.52584457397461, + "step": 16891 + }, + { + "epoch": 2.63, + "learning_rate": 1.7580569531614711e-06, + "logits/chosen": -2.1101810932159424, + "logits/rejected": -2.590883493423462, + "logps/chosen": -434.426513671875, + "logps/rejected": -639.4243774414062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -18.258554458618164, + "rewards/margins": 11.162820816040039, + "rewards/rejected": -29.421375274658203, + "step": 16892 + }, + { + "epoch": 2.63, + "learning_rate": 1.757323512630323e-06, + "logits/chosen": -2.723931074142456, + "logits/rejected": -0.8496373891830444, + "logps/chosen": -187.28407287597656, + "logps/rejected": -284.83740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.296304702758789, + "rewards/margins": 11.374685287475586, + "rewards/rejected": -18.670989990234375, + "step": 16893 + }, + { + "epoch": 2.63, + "learning_rate": 1.7565900720991753e-06, + "logits/chosen": -2.6636667251586914, + "logits/rejected": -1.5723216533660889, + "logps/chosen": -338.522216796875, + "logps/rejected": -367.2410888671875, + "loss": 0.2105, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.659759521484375, + "rewards/margins": 5.695736885070801, + "rewards/rejected": -16.35549545288086, + "step": 16894 + }, + { + "epoch": 2.63, + "learning_rate": 1.7558566315680274e-06, + "logits/chosen": -2.9115772247314453, + "logits/rejected": -2.3257055282592773, + "logps/chosen": -669.628173828125, + "logps/rejected": -986.6497192382812, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.05606460571289, + "rewards/margins": 9.139366149902344, + "rewards/rejected": -17.195430755615234, + "step": 16895 + }, + { + "epoch": 2.63, + "learning_rate": 1.7551231910368795e-06, + "logits/chosen": -2.514265298843384, + "logits/rejected": -2.5728237628936768, + "logps/chosen": -244.669677734375, + "logps/rejected": -440.6375427246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.291542053222656, + "rewards/margins": 11.248059272766113, + "rewards/rejected": -23.539600372314453, + "step": 16896 + }, + { + "epoch": 2.63, + "learning_rate": 1.7543897505057316e-06, + "logits/chosen": -2.1840248107910156, + "logits/rejected": -2.66306209564209, + "logps/chosen": -154.5649871826172, + "logps/rejected": -316.6891174316406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.769308090209961, + "rewards/margins": 8.689299583435059, + "rewards/rejected": -18.458606719970703, + "step": 16897 + }, + { + "epoch": 2.63, + "learning_rate": 1.7536563099745837e-06, + "logits/chosen": -2.4758450984954834, + "logits/rejected": -2.7449328899383545, + "logps/chosen": -227.72393798828125, + "logps/rejected": -372.36279296875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.741241455078125, + "rewards/margins": 8.033822059631348, + "rewards/rejected": -15.775063514709473, + "step": 16898 + }, + { + "epoch": 2.63, + "learning_rate": 1.752922869443436e-06, + "logits/chosen": -2.4952704906463623, + "logits/rejected": -2.6899585723876953, + "logps/chosen": -195.62095642089844, + "logps/rejected": -500.7693176269531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.770931243896484, + "rewards/margins": 9.159027099609375, + "rewards/rejected": -20.92995834350586, + "step": 16899 + }, + { + "epoch": 2.63, + "learning_rate": 1.752189428912288e-06, + "logits/chosen": -2.8391976356506348, + "logits/rejected": -2.710993528366089, + "logps/chosen": -542.6248779296875, + "logps/rejected": -512.28173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.798808574676514, + "rewards/margins": 16.46088409423828, + "rewards/rejected": -22.259693145751953, + "step": 16900 + }, + { + "epoch": 2.63, + "learning_rate": 1.7514559883811401e-06, + "logits/chosen": -2.369314670562744, + "logits/rejected": -2.0520200729370117, + "logps/chosen": -470.45294189453125, + "logps/rejected": -535.150634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.771842002868652, + "rewards/margins": 10.651497840881348, + "rewards/rejected": -24.42333984375, + "step": 16901 + }, + { + "epoch": 2.63, + "learning_rate": 1.7507225478499922e-06, + "logits/chosen": -2.095319986343384, + "logits/rejected": -2.5960116386413574, + "logps/chosen": -196.07260131835938, + "logps/rejected": -444.815673828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.37071418762207, + "rewards/margins": 8.177919387817383, + "rewards/rejected": -18.548633575439453, + "step": 16902 + }, + { + "epoch": 2.63, + "learning_rate": 1.7499891073188443e-06, + "logits/chosen": -2.5917022228240967, + "logits/rejected": -2.863567352294922, + "logps/chosen": -294.1610412597656, + "logps/rejected": -549.8715209960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.969915390014648, + "rewards/margins": 11.736927032470703, + "rewards/rejected": -23.70684051513672, + "step": 16903 + }, + { + "epoch": 2.63, + "learning_rate": 1.7492556667876964e-06, + "logits/chosen": -2.901371479034424, + "logits/rejected": -2.75829815864563, + "logps/chosen": -869.948974609375, + "logps/rejected": -907.3392333984375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.158421516418457, + "rewards/margins": 7.426220893859863, + "rewards/rejected": -17.58464241027832, + "step": 16904 + }, + { + "epoch": 2.63, + "learning_rate": 1.7485222262565485e-06, + "logits/chosen": -2.977140188217163, + "logits/rejected": -2.8200700283050537, + "logps/chosen": -113.92711639404297, + "logps/rejected": -289.2174377441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.039369583129883, + "rewards/margins": 12.124370574951172, + "rewards/rejected": -20.163738250732422, + "step": 16905 + }, + { + "epoch": 2.63, + "learning_rate": 1.7477887857254006e-06, + "logits/chosen": -2.52071213722229, + "logits/rejected": -2.642894983291626, + "logps/chosen": -126.542236328125, + "logps/rejected": -333.33319091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.198551177978516, + "rewards/margins": 12.457094192504883, + "rewards/rejected": -18.6556453704834, + "step": 16906 + }, + { + "epoch": 2.63, + "learning_rate": 1.747055345194253e-06, + "logits/chosen": -2.2199604511260986, + "logits/rejected": -2.641632318496704, + "logps/chosen": -215.46170043945312, + "logps/rejected": -469.257080078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.409221649169922, + "rewards/margins": 8.576862335205078, + "rewards/rejected": -22.986083984375, + "step": 16907 + }, + { + "epoch": 2.63, + "learning_rate": 1.746321904663105e-06, + "logits/chosen": -2.0798962116241455, + "logits/rejected": -2.382270097732544, + "logps/chosen": -211.799560546875, + "logps/rejected": -534.9801025390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.823923110961914, + "rewards/margins": 11.75247573852539, + "rewards/rejected": -22.576398849487305, + "step": 16908 + }, + { + "epoch": 2.63, + "learning_rate": 1.745588464131957e-06, + "logits/chosen": -2.0420336723327637, + "logits/rejected": -2.470552682876587, + "logps/chosen": -188.8133544921875, + "logps/rejected": -603.081298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.936239242553711, + "rewards/margins": 14.029267311096191, + "rewards/rejected": -22.96550750732422, + "step": 16909 + }, + { + "epoch": 2.63, + "learning_rate": 1.7448550236008092e-06, + "logits/chosen": -2.5311806201934814, + "logits/rejected": -2.770885944366455, + "logps/chosen": -624.8338623046875, + "logps/rejected": -536.98388671875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.647905349731445, + "rewards/margins": 11.126439094543457, + "rewards/rejected": -18.77434539794922, + "step": 16910 + }, + { + "epoch": 2.63, + "learning_rate": 1.7441215830696615e-06, + "logits/chosen": -1.9414877891540527, + "logits/rejected": -2.322335958480835, + "logps/chosen": -275.29376220703125, + "logps/rejected": -445.1203308105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.155538558959961, + "rewards/margins": 12.211584091186523, + "rewards/rejected": -21.367122650146484, + "step": 16911 + }, + { + "epoch": 2.63, + "learning_rate": 1.7433881425385136e-06, + "logits/chosen": -1.462850570678711, + "logits/rejected": -2.393183946609497, + "logps/chosen": -211.82789611816406, + "logps/rejected": -454.30426025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630716323852539, + "rewards/margins": 15.698408126831055, + "rewards/rejected": -23.329124450683594, + "step": 16912 + }, + { + "epoch": 2.63, + "learning_rate": 1.7426547020073654e-06, + "logits/chosen": -2.2251334190368652, + "logits/rejected": -1.4743316173553467, + "logps/chosen": -212.6939239501953, + "logps/rejected": -213.38888549804688, + "loss": 0.5993, + "rewards/accuracies": 0.5, + "rewards/chosen": -16.58502960205078, + "rewards/margins": 2.6947531700134277, + "rewards/rejected": -19.279783248901367, + "step": 16913 + }, + { + "epoch": 2.63, + "learning_rate": 1.7419212614762175e-06, + "logits/chosen": -2.1474497318267822, + "logits/rejected": -2.9308419227600098, + "logps/chosen": -143.08628845214844, + "logps/rejected": -476.8590087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779216766357422, + "rewards/margins": 17.223039627075195, + "rewards/rejected": -26.002256393432617, + "step": 16914 + }, + { + "epoch": 2.63, + "learning_rate": 1.7411878209450698e-06, + "logits/chosen": -2.402421712875366, + "logits/rejected": -2.877868890762329, + "logps/chosen": -95.59786224365234, + "logps/rejected": -261.7850341796875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.083927154541016, + "rewards/margins": 5.814694881439209, + "rewards/rejected": -13.898622512817383, + "step": 16915 + }, + { + "epoch": 2.63, + "learning_rate": 1.740454380413922e-06, + "logits/chosen": -1.3812919855117798, + "logits/rejected": -2.2301411628723145, + "logps/chosen": -362.3081359863281, + "logps/rejected": -723.7076416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.585251808166504, + "rewards/margins": 14.99355697631836, + "rewards/rejected": -28.578807830810547, + "step": 16916 + }, + { + "epoch": 2.63, + "learning_rate": 1.739720939882774e-06, + "logits/chosen": -1.2292386293411255, + "logits/rejected": -2.4949705600738525, + "logps/chosen": -291.9062194824219, + "logps/rejected": -396.5049743652344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.761758804321289, + "rewards/margins": 8.577089309692383, + "rewards/rejected": -21.338848114013672, + "step": 16917 + }, + { + "epoch": 2.63, + "learning_rate": 1.738987499351626e-06, + "logits/chosen": -2.7259509563446045, + "logits/rejected": -2.8763182163238525, + "logps/chosen": -95.02759552001953, + "logps/rejected": -275.322998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.476797103881836, + "rewards/margins": 10.45637035369873, + "rewards/rejected": -17.93316650390625, + "step": 16918 + }, + { + "epoch": 2.63, + "learning_rate": 1.7382540588204782e-06, + "logits/chosen": -2.2552273273468018, + "logits/rejected": -2.209317922592163, + "logps/chosen": -433.15484619140625, + "logps/rejected": -456.7650451660156, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.850955963134766, + "rewards/margins": 10.689534187316895, + "rewards/rejected": -19.540491104125977, + "step": 16919 + }, + { + "epoch": 2.63, + "learning_rate": 1.7375206182893305e-06, + "logits/chosen": -1.8392531871795654, + "logits/rejected": -2.3965446949005127, + "logps/chosen": -324.34869384765625, + "logps/rejected": -490.095947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.259297370910645, + "rewards/margins": 15.401277542114258, + "rewards/rejected": -24.660573959350586, + "step": 16920 + }, + { + "epoch": 2.63, + "learning_rate": 1.7367871777581826e-06, + "logits/chosen": -1.2776166200637817, + "logits/rejected": -2.551938772201538, + "logps/chosen": -237.825439453125, + "logps/rejected": -610.8216552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.34247875213623, + "rewards/margins": 11.407176971435547, + "rewards/rejected": -21.749656677246094, + "step": 16921 + }, + { + "epoch": 2.63, + "learning_rate": 1.7360537372270347e-06, + "logits/chosen": -1.1246445178985596, + "logits/rejected": -2.3765082359313965, + "logps/chosen": -157.80670166015625, + "logps/rejected": -458.7278137207031, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.973855972290039, + "rewards/margins": 10.89206314086914, + "rewards/rejected": -24.865917205810547, + "step": 16922 + }, + { + "epoch": 2.63, + "learning_rate": 1.7353202966958866e-06, + "logits/chosen": -2.7961127758026123, + "logits/rejected": -2.9084577560424805, + "logps/chosen": -343.1779479980469, + "logps/rejected": -397.8536071777344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380106449127197, + "rewards/margins": 10.362478256225586, + "rewards/rejected": -16.742584228515625, + "step": 16923 + }, + { + "epoch": 2.63, + "learning_rate": 1.7345868561647389e-06, + "logits/chosen": -2.603513240814209, + "logits/rejected": -2.519209384918213, + "logps/chosen": -254.36239624023438, + "logps/rejected": -379.0015563964844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.63579273223877, + "rewards/margins": 10.092696189880371, + "rewards/rejected": -20.72848892211914, + "step": 16924 + }, + { + "epoch": 2.63, + "learning_rate": 1.733853415633591e-06, + "logits/chosen": -2.6374671459198, + "logits/rejected": -2.58894681930542, + "logps/chosen": -860.9319458007812, + "logps/rejected": -660.55810546875, + "loss": 0.182, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.102252006530762, + "rewards/margins": 5.687565803527832, + "rewards/rejected": -18.789817810058594, + "step": 16925 + }, + { + "epoch": 2.63, + "learning_rate": 1.733119975102443e-06, + "logits/chosen": -2.3993477821350098, + "logits/rejected": -2.75698184967041, + "logps/chosen": -284.12615966796875, + "logps/rejected": -519.34130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.929452896118164, + "rewards/margins": 13.374618530273438, + "rewards/rejected": -24.3040714263916, + "step": 16926 + }, + { + "epoch": 2.63, + "learning_rate": 1.7323865345712951e-06, + "logits/chosen": -2.6918723583221436, + "logits/rejected": -2.698026657104492, + "logps/chosen": -171.6791534423828, + "logps/rejected": -381.5605163574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.441112518310547, + "rewards/margins": 12.392812728881836, + "rewards/rejected": -20.833927154541016, + "step": 16927 + }, + { + "epoch": 2.63, + "learning_rate": 1.7316530940401474e-06, + "logits/chosen": -1.6167103052139282, + "logits/rejected": -2.752119541168213, + "logps/chosen": -338.2454528808594, + "logps/rejected": -574.05322265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.642636299133301, + "rewards/margins": 8.439918518066406, + "rewards/rejected": -16.082555770874023, + "step": 16928 + }, + { + "epoch": 2.63, + "learning_rate": 1.7309196535089995e-06, + "logits/chosen": -1.9706369638442993, + "logits/rejected": -2.8784117698669434, + "logps/chosen": -186.19967651367188, + "logps/rejected": -375.8931884765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.686513900756836, + "rewards/margins": 8.468647003173828, + "rewards/rejected": -18.155160903930664, + "step": 16929 + }, + { + "epoch": 2.63, + "learning_rate": 1.7301862129778516e-06, + "logits/chosen": -2.7301905155181885, + "logits/rejected": -1.4553077220916748, + "logps/chosen": -474.2557373046875, + "logps/rejected": -400.1607971191406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.831604957580566, + "rewards/margins": 8.528250694274902, + "rewards/rejected": -21.35985565185547, + "step": 16930 + }, + { + "epoch": 2.63, + "learning_rate": 1.7294527724467037e-06, + "logits/chosen": -2.6121881008148193, + "logits/rejected": -2.6050124168395996, + "logps/chosen": -295.3651123046875, + "logps/rejected": -489.2430725097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.83049201965332, + "rewards/margins": 12.200490951538086, + "rewards/rejected": -21.030982971191406, + "step": 16931 + }, + { + "epoch": 2.63, + "learning_rate": 1.728719331915556e-06, + "logits/chosen": -2.69486665725708, + "logits/rejected": -2.799288034439087, + "logps/chosen": -157.56776428222656, + "logps/rejected": -516.4117431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.093019485473633, + "rewards/margins": 12.83665943145752, + "rewards/rejected": -20.929677963256836, + "step": 16932 + }, + { + "epoch": 2.63, + "learning_rate": 1.7279858913844079e-06, + "logits/chosen": -2.6795401573181152, + "logits/rejected": -1.347777247428894, + "logps/chosen": -302.5433654785156, + "logps/rejected": -402.2059020996094, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.045088768005371, + "rewards/margins": 7.586145877838135, + "rewards/rejected": -18.63123321533203, + "step": 16933 + }, + { + "epoch": 2.63, + "learning_rate": 1.72725245085326e-06, + "logits/chosen": -2.712970018386841, + "logits/rejected": -2.244295358657837, + "logps/chosen": -461.71881103515625, + "logps/rejected": -479.1534118652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.767723083496094, + "rewards/margins": 11.006301879882812, + "rewards/rejected": -19.774024963378906, + "step": 16934 + }, + { + "epoch": 2.63, + "learning_rate": 1.726519010322112e-06, + "logits/chosen": -2.440945863723755, + "logits/rejected": -2.7622416019439697, + "logps/chosen": -150.2841339111328, + "logps/rejected": -299.0372314453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.769355773925781, + "rewards/margins": 7.4464945793151855, + "rewards/rejected": -19.215850830078125, + "step": 16935 + }, + { + "epoch": 2.63, + "learning_rate": 1.7257855697909644e-06, + "logits/chosen": -2.922339677810669, + "logits/rejected": -2.095374584197998, + "logps/chosen": -552.3109130859375, + "logps/rejected": -523.5310668945312, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.693239212036133, + "rewards/margins": 3.300227165222168, + "rewards/rejected": -14.993465423583984, + "step": 16936 + }, + { + "epoch": 2.63, + "learning_rate": 1.7250521292598164e-06, + "logits/chosen": -1.8440310955047607, + "logits/rejected": -2.7293701171875, + "logps/chosen": -174.03970336914062, + "logps/rejected": -442.73162841796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.980355262756348, + "rewards/margins": 13.593475341796875, + "rewards/rejected": -23.57383155822754, + "step": 16937 + }, + { + "epoch": 2.63, + "learning_rate": 1.7243186887286685e-06, + "logits/chosen": -2.217642068862915, + "logits/rejected": -2.866724729537964, + "logps/chosen": -205.46566772460938, + "logps/rejected": -422.637451171875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.644037246704102, + "rewards/margins": 9.266063690185547, + "rewards/rejected": -19.91010093688965, + "step": 16938 + }, + { + "epoch": 2.63, + "learning_rate": 1.7235852481975206e-06, + "logits/chosen": -2.022378444671631, + "logits/rejected": -2.891329050064087, + "logps/chosen": -211.85427856445312, + "logps/rejected": -474.34075927734375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.598298072814941, + "rewards/margins": 7.7860636711120605, + "rewards/rejected": -16.384361267089844, + "step": 16939 + }, + { + "epoch": 2.63, + "learning_rate": 1.7228518076663727e-06, + "logits/chosen": -0.772240936756134, + "logits/rejected": -2.7398715019226074, + "logps/chosen": -241.84518432617188, + "logps/rejected": -536.8729248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.873458862304688, + "rewards/margins": 13.358749389648438, + "rewards/rejected": -23.232208251953125, + "step": 16940 + }, + { + "epoch": 2.63, + "learning_rate": 1.722118367135225e-06, + "logits/chosen": -1.5501655340194702, + "logits/rejected": -2.781924247741699, + "logps/chosen": -425.2336120605469, + "logps/rejected": -723.731689453125, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.231857299804688, + "rewards/margins": 7.4339165687561035, + "rewards/rejected": -19.665775299072266, + "step": 16941 + }, + { + "epoch": 2.63, + "learning_rate": 1.721384926604077e-06, + "logits/chosen": -2.761284112930298, + "logits/rejected": -1.9598897695541382, + "logps/chosen": -539.5145263671875, + "logps/rejected": -377.437744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.826807022094727, + "rewards/margins": 11.316168785095215, + "rewards/rejected": -22.142974853515625, + "step": 16942 + }, + { + "epoch": 2.63, + "learning_rate": 1.720651486072929e-06, + "logits/chosen": -2.58236026763916, + "logits/rejected": -2.5210938453674316, + "logps/chosen": -368.498046875, + "logps/rejected": -476.63116455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52967357635498, + "rewards/margins": 10.00538444519043, + "rewards/rejected": -18.535058975219727, + "step": 16943 + }, + { + "epoch": 2.64, + "learning_rate": 1.719918045541781e-06, + "logits/chosen": -2.6676762104034424, + "logits/rejected": -2.6197335720062256, + "logps/chosen": -197.6209716796875, + "logps/rejected": -354.0832824707031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.118295669555664, + "rewards/margins": 12.925809860229492, + "rewards/rejected": -23.044105529785156, + "step": 16944 + }, + { + "epoch": 2.64, + "learning_rate": 1.7191846050106334e-06, + "logits/chosen": -1.6501286029815674, + "logits/rejected": -2.4960899353027344, + "logps/chosen": -313.34051513671875, + "logps/rejected": -578.6585693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.482452392578125, + "rewards/margins": 12.799823760986328, + "rewards/rejected": -26.282276153564453, + "step": 16945 + }, + { + "epoch": 2.64, + "learning_rate": 1.7184511644794855e-06, + "logits/chosen": -1.3865824937820435, + "logits/rejected": -2.4776031970977783, + "logps/chosen": -219.34934997558594, + "logps/rejected": -357.749267578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.076051712036133, + "rewards/margins": 7.025389194488525, + "rewards/rejected": -18.1014404296875, + "step": 16946 + }, + { + "epoch": 2.64, + "learning_rate": 1.7177177239483376e-06, + "logits/chosen": -1.4772436618804932, + "logits/rejected": -1.9349071979522705, + "logps/chosen": -155.54257202148438, + "logps/rejected": -391.68560791015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.402084350585938, + "rewards/margins": 8.622978210449219, + "rewards/rejected": -18.025062561035156, + "step": 16947 + }, + { + "epoch": 2.64, + "learning_rate": 1.7169842834171897e-06, + "logits/chosen": -2.050107479095459, + "logits/rejected": -2.403395652770996, + "logps/chosen": -373.25616455078125, + "logps/rejected": -414.0571594238281, + "loss": 0.5178, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.052783966064453, + "rewards/margins": 4.774557590484619, + "rewards/rejected": -18.827341079711914, + "step": 16948 + }, + { + "epoch": 2.64, + "learning_rate": 1.716250842886042e-06, + "logits/chosen": -2.559861898422241, + "logits/rejected": -2.464665412902832, + "logps/chosen": -287.63330078125, + "logps/rejected": -466.65521240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.90371322631836, + "rewards/margins": 11.939627647399902, + "rewards/rejected": -24.843341827392578, + "step": 16949 + }, + { + "epoch": 2.64, + "learning_rate": 1.715517402354894e-06, + "logits/chosen": -2.6777164936065674, + "logits/rejected": -2.965817451477051, + "logps/chosen": -105.90205383300781, + "logps/rejected": -297.6397705078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041574478149414, + "rewards/margins": 8.868839263916016, + "rewards/rejected": -14.910412788391113, + "step": 16950 + }, + { + "epoch": 2.64, + "learning_rate": 1.7147839618237461e-06, + "logits/chosen": -2.510068893432617, + "logits/rejected": -2.587684392929077, + "logps/chosen": -424.23931884765625, + "logps/rejected": -686.482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.73275375366211, + "rewards/margins": 13.729555130004883, + "rewards/rejected": -22.46230697631836, + "step": 16951 + }, + { + "epoch": 2.64, + "learning_rate": 1.714050521292598e-06, + "logits/chosen": -1.2262723445892334, + "logits/rejected": -2.6112825870513916, + "logps/chosen": -252.4748077392578, + "logps/rejected": -657.8941650390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.846482276916504, + "rewards/margins": 13.125381469726562, + "rewards/rejected": -25.97186279296875, + "step": 16952 + }, + { + "epoch": 2.64, + "learning_rate": 1.7133170807614503e-06, + "logits/chosen": -1.9366105794906616, + "logits/rejected": -2.804269790649414, + "logps/chosen": -175.24026489257812, + "logps/rejected": -470.410400390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.239424705505371, + "rewards/margins": 10.500597953796387, + "rewards/rejected": -20.740022659301758, + "step": 16953 + }, + { + "epoch": 2.64, + "learning_rate": 1.7125836402303024e-06, + "logits/chosen": -2.3853139877319336, + "logits/rejected": -2.922825336456299, + "logps/chosen": -269.95208740234375, + "logps/rejected": -486.5926513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1471052169799805, + "rewards/margins": 12.232467651367188, + "rewards/rejected": -19.379573822021484, + "step": 16954 + }, + { + "epoch": 2.64, + "learning_rate": 1.7118501996991545e-06, + "logits/chosen": -2.7519280910491943, + "logits/rejected": -2.7105743885040283, + "logps/chosen": -100.68540954589844, + "logps/rejected": -432.55706787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.29151725769043, + "rewards/margins": 13.0562744140625, + "rewards/rejected": -20.34779167175293, + "step": 16955 + }, + { + "epoch": 2.64, + "learning_rate": 1.7111167591680066e-06, + "logits/chosen": -2.703733205795288, + "logits/rejected": -2.726912498474121, + "logps/chosen": -122.23402404785156, + "logps/rejected": -300.786865234375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.127009391784668, + "rewards/margins": 8.32069206237793, + "rewards/rejected": -15.447701454162598, + "step": 16956 + }, + { + "epoch": 2.64, + "learning_rate": 1.7103833186368589e-06, + "logits/chosen": -2.2384414672851562, + "logits/rejected": -2.4604578018188477, + "logps/chosen": -401.0401611328125, + "logps/rejected": -422.975830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1677350997924805, + "rewards/margins": 14.075074195861816, + "rewards/rejected": -20.242809295654297, + "step": 16957 + }, + { + "epoch": 2.64, + "learning_rate": 1.709649878105711e-06, + "logits/chosen": -1.6643123626708984, + "logits/rejected": -2.5104033946990967, + "logps/chosen": -138.63931274414062, + "logps/rejected": -402.1867370605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.150336265563965, + "rewards/margins": 10.863276481628418, + "rewards/rejected": -22.013612747192383, + "step": 16958 + }, + { + "epoch": 2.64, + "learning_rate": 1.708916437574563e-06, + "logits/chosen": -2.6791486740112305, + "logits/rejected": -2.098358392715454, + "logps/chosen": -583.9554443359375, + "logps/rejected": -369.94189453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.029383659362793, + "rewards/margins": 7.847159385681152, + "rewards/rejected": -16.876543045043945, + "step": 16959 + }, + { + "epoch": 2.64, + "learning_rate": 1.7081829970434152e-06, + "logits/chosen": -2.431129217147827, + "logits/rejected": -2.8446733951568604, + "logps/chosen": -209.60987854003906, + "logps/rejected": -282.1238708496094, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.835624694824219, + "rewards/margins": 7.096856594085693, + "rewards/rejected": -16.93248176574707, + "step": 16960 + }, + { + "epoch": 2.64, + "learning_rate": 1.7074495565122672e-06, + "logits/chosen": -1.9730241298675537, + "logits/rejected": -2.792377233505249, + "logps/chosen": -160.7440185546875, + "logps/rejected": -424.42169189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.366464614868164, + "rewards/margins": 9.670089721679688, + "rewards/rejected": -18.03655433654785, + "step": 16961 + }, + { + "epoch": 2.64, + "learning_rate": 1.7067161159811193e-06, + "logits/chosen": -2.046957015991211, + "logits/rejected": -2.726621389389038, + "logps/chosen": -513.1121826171875, + "logps/rejected": -712.7030029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.970235347747803, + "rewards/margins": 11.77787971496582, + "rewards/rejected": -19.74811553955078, + "step": 16962 + }, + { + "epoch": 2.64, + "learning_rate": 1.7059826754499714e-06, + "logits/chosen": -2.7986207008361816, + "logits/rejected": -2.9771149158477783, + "logps/chosen": -179.266357421875, + "logps/rejected": -481.69775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.372300148010254, + "rewards/margins": 9.485274314880371, + "rewards/rejected": -19.857574462890625, + "step": 16963 + }, + { + "epoch": 2.64, + "learning_rate": 1.7052492349188235e-06, + "logits/chosen": -2.478778123855591, + "logits/rejected": -2.799027681350708, + "logps/chosen": -264.2792663574219, + "logps/rejected": -549.049560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014382362365723, + "rewards/margins": 17.11570930480957, + "rewards/rejected": -23.13009262084961, + "step": 16964 + }, + { + "epoch": 2.64, + "learning_rate": 1.7045157943876756e-06, + "logits/chosen": -2.8356924057006836, + "logits/rejected": -2.624838352203369, + "logps/chosen": -360.66864013671875, + "logps/rejected": -418.26904296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.364018440246582, + "rewards/margins": 9.04942512512207, + "rewards/rejected": -15.413443565368652, + "step": 16965 + }, + { + "epoch": 2.64, + "learning_rate": 1.703782353856528e-06, + "logits/chosen": -1.9725464582443237, + "logits/rejected": -2.6168365478515625, + "logps/chosen": -339.1907958984375, + "logps/rejected": -641.396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.378093719482422, + "rewards/margins": 9.974834442138672, + "rewards/rejected": -20.352928161621094, + "step": 16966 + }, + { + "epoch": 2.64, + "learning_rate": 1.70304891332538e-06, + "logits/chosen": -2.514547109603882, + "logits/rejected": -1.8752703666687012, + "logps/chosen": -447.55963134765625, + "logps/rejected": -547.2777709960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.836410522460938, + "rewards/margins": 11.456443786621094, + "rewards/rejected": -24.29285430908203, + "step": 16967 + }, + { + "epoch": 2.64, + "learning_rate": 1.702315472794232e-06, + "logits/chosen": -1.669788122177124, + "logits/rejected": -2.6018564701080322, + "logps/chosen": -223.2679443359375, + "logps/rejected": -658.26220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.792945861816406, + "rewards/margins": 13.570758819580078, + "rewards/rejected": -28.363704681396484, + "step": 16968 + }, + { + "epoch": 2.64, + "learning_rate": 1.7015820322630842e-06, + "logits/chosen": -2.7110698223114014, + "logits/rejected": -2.7074108123779297, + "logps/chosen": -461.91656494140625, + "logps/rejected": -621.9527587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.005640983581543, + "rewards/margins": 13.145936965942383, + "rewards/rejected": -21.15157699584961, + "step": 16969 + }, + { + "epoch": 2.64, + "learning_rate": 1.7008485917319365e-06, + "logits/chosen": -1.3475216627120972, + "logits/rejected": -2.832420825958252, + "logps/chosen": -357.106689453125, + "logps/rejected": -586.6167602539062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.043014526367188, + "rewards/margins": 11.555680274963379, + "rewards/rejected": -23.59869384765625, + "step": 16970 + }, + { + "epoch": 2.64, + "learning_rate": 1.7001151512007886e-06, + "logits/chosen": -2.2956244945526123, + "logits/rejected": -2.795353889465332, + "logps/chosen": -118.36576843261719, + "logps/rejected": -326.54931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.624106407165527, + "rewards/margins": 14.7225923538208, + "rewards/rejected": -20.346698760986328, + "step": 16971 + }, + { + "epoch": 2.64, + "learning_rate": 1.6993817106696404e-06, + "logits/chosen": -2.055102825164795, + "logits/rejected": -2.651998519897461, + "logps/chosen": -250.28651428222656, + "logps/rejected": -411.8203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.156332015991211, + "rewards/margins": 8.034025192260742, + "rewards/rejected": -20.190357208251953, + "step": 16972 + }, + { + "epoch": 2.64, + "learning_rate": 1.6986482701384925e-06, + "logits/chosen": -2.2862884998321533, + "logits/rejected": -2.374210834503174, + "logps/chosen": -213.93130493164062, + "logps/rejected": -377.3733215332031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.420347213745117, + "rewards/margins": 11.053085327148438, + "rewards/rejected": -18.473430633544922, + "step": 16973 + }, + { + "epoch": 2.64, + "learning_rate": 1.6979148296073448e-06, + "logits/chosen": -0.7001585364341736, + "logits/rejected": -1.5356782674789429, + "logps/chosen": -366.400634765625, + "logps/rejected": -638.161865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.916492462158203, + "rewards/margins": 16.376008987426758, + "rewards/rejected": -26.29250144958496, + "step": 16974 + }, + { + "epoch": 2.64, + "learning_rate": 1.697181389076197e-06, + "logits/chosen": -2.0532541275024414, + "logits/rejected": -2.6045608520507812, + "logps/chosen": -263.61480712890625, + "logps/rejected": -795.2809448242188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.904342651367188, + "rewards/margins": 17.673320770263672, + "rewards/rejected": -27.57766342163086, + "step": 16975 + }, + { + "epoch": 2.64, + "learning_rate": 1.696447948545049e-06, + "logits/chosen": -2.6251003742218018, + "logits/rejected": -1.4945595264434814, + "logps/chosen": -342.7462158203125, + "logps/rejected": -280.1831359863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.973954200744629, + "rewards/margins": 10.240376472473145, + "rewards/rejected": -17.214330673217773, + "step": 16976 + }, + { + "epoch": 2.64, + "learning_rate": 1.6957145080139011e-06, + "logits/chosen": -2.149972677230835, + "logits/rejected": -2.559720277786255, + "logps/chosen": -454.30474853515625, + "logps/rejected": -582.0252685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.280340194702148, + "rewards/margins": 14.259220123291016, + "rewards/rejected": -23.539560317993164, + "step": 16977 + }, + { + "epoch": 2.64, + "learning_rate": 1.6949810674827532e-06, + "logits/chosen": -2.596449851989746, + "logits/rejected": -1.5460067987442017, + "logps/chosen": -322.55560302734375, + "logps/rejected": -323.9603576660156, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.971536636352539, + "rewards/margins": 6.053488731384277, + "rewards/rejected": -16.025026321411133, + "step": 16978 + }, + { + "epoch": 2.64, + "learning_rate": 1.6942476269516055e-06, + "logits/chosen": -1.0263574123382568, + "logits/rejected": -2.4750475883483887, + "logps/chosen": -247.7043914794922, + "logps/rejected": -500.548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.688566207885742, + "rewards/margins": 13.350598335266113, + "rewards/rejected": -26.039165496826172, + "step": 16979 + }, + { + "epoch": 2.64, + "learning_rate": 1.6935141864204576e-06, + "logits/chosen": -1.4136302471160889, + "logits/rejected": -2.6436541080474854, + "logps/chosen": -210.28610229492188, + "logps/rejected": -553.8310546875, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.579689979553223, + "rewards/margins": 11.97598648071289, + "rewards/rejected": -22.555675506591797, + "step": 16980 + }, + { + "epoch": 2.64, + "learning_rate": 1.6927807458893097e-06, + "logits/chosen": -2.666013479232788, + "logits/rejected": -1.8639322519302368, + "logps/chosen": -713.891845703125, + "logps/rejected": -553.6156616210938, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.978864669799805, + "rewards/margins": 10.099985122680664, + "rewards/rejected": -20.07884979248047, + "step": 16981 + }, + { + "epoch": 2.64, + "learning_rate": 1.6920473053581616e-06, + "logits/chosen": -2.8620059490203857, + "logits/rejected": -2.9335098266601562, + "logps/chosen": -155.63409423828125, + "logps/rejected": -268.6226806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.495578765869141, + "rewards/margins": 11.668858528137207, + "rewards/rejected": -19.16443634033203, + "step": 16982 + }, + { + "epoch": 2.64, + "learning_rate": 1.6913138648270139e-06, + "logits/chosen": -1.0605478286743164, + "logits/rejected": -2.2259371280670166, + "logps/chosen": -134.79080200195312, + "logps/rejected": -361.4815368652344, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.284120559692383, + "rewards/margins": 10.43943977355957, + "rewards/rejected": -20.723560333251953, + "step": 16983 + }, + { + "epoch": 2.64, + "learning_rate": 1.690580424295866e-06, + "logits/chosen": -2.340101718902588, + "logits/rejected": -2.7949743270874023, + "logps/chosen": -235.1236114501953, + "logps/rejected": -439.32415771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.604840278625488, + "rewards/margins": 9.589813232421875, + "rewards/rejected": -19.194652557373047, + "step": 16984 + }, + { + "epoch": 2.64, + "learning_rate": 1.689846983764718e-06, + "logits/chosen": -2.3836758136749268, + "logits/rejected": -2.7645812034606934, + "logps/chosen": -187.9077606201172, + "logps/rejected": -573.417724609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.964994430541992, + "rewards/margins": 10.268089294433594, + "rewards/rejected": -19.233083724975586, + "step": 16985 + }, + { + "epoch": 2.64, + "learning_rate": 1.6891135432335701e-06, + "logits/chosen": -2.8739871978759766, + "logits/rejected": -1.6283869743347168, + "logps/chosen": -444.2208251953125, + "logps/rejected": -569.8076782226562, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.854400634765625, + "rewards/margins": 6.587740421295166, + "rewards/rejected": -16.442140579223633, + "step": 16986 + }, + { + "epoch": 2.64, + "learning_rate": 1.6883801027024224e-06, + "logits/chosen": -1.535284161567688, + "logits/rejected": -2.473048448562622, + "logps/chosen": -220.26144409179688, + "logps/rejected": -250.8949737548828, + "loss": 0.1286, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.50716781616211, + "rewards/margins": 4.80015754699707, + "rewards/rejected": -17.30732536315918, + "step": 16987 + }, + { + "epoch": 2.64, + "learning_rate": 1.6876466621712745e-06, + "logits/chosen": -2.8219735622406006, + "logits/rejected": -2.3101260662078857, + "logps/chosen": -422.0721435546875, + "logps/rejected": -616.0137329101562, + "loss": 0.4682, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.990577697753906, + "rewards/margins": 5.710381984710693, + "rewards/rejected": -19.700960159301758, + "step": 16988 + }, + { + "epoch": 2.64, + "learning_rate": 1.6869132216401266e-06, + "logits/chosen": -2.3174476623535156, + "logits/rejected": -2.662020683288574, + "logps/chosen": -409.2236633300781, + "logps/rejected": -422.23748779296875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.018978118896484, + "rewards/margins": 7.5212531089782715, + "rewards/rejected": -19.54022979736328, + "step": 16989 + }, + { + "epoch": 2.64, + "learning_rate": 1.6861797811089787e-06, + "logits/chosen": -1.5481152534484863, + "logits/rejected": -2.2662155628204346, + "logps/chosen": -124.48015594482422, + "logps/rejected": -349.2674255371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.06940221786499, + "rewards/margins": 14.465450286865234, + "rewards/rejected": -19.534852981567383, + "step": 16990 + }, + { + "epoch": 2.64, + "learning_rate": 1.685446340577831e-06, + "logits/chosen": -2.688735246658325, + "logits/rejected": -2.1984336376190186, + "logps/chosen": -437.2770080566406, + "logps/rejected": -345.27496337890625, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.301277160644531, + "rewards/margins": 3.1207022666931152, + "rewards/rejected": -15.421979904174805, + "step": 16991 + }, + { + "epoch": 2.64, + "learning_rate": 1.6847129000466829e-06, + "logits/chosen": -1.7783151865005493, + "logits/rejected": -2.6788814067840576, + "logps/chosen": -137.26937866210938, + "logps/rejected": -379.176025390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.024529457092285, + "rewards/margins": 10.272809982299805, + "rewards/rejected": -17.297340393066406, + "step": 16992 + }, + { + "epoch": 2.64, + "learning_rate": 1.683979459515535e-06, + "logits/chosen": -2.736921548843384, + "logits/rejected": -2.7423019409179688, + "logps/chosen": -155.33071899414062, + "logps/rejected": -267.1758117675781, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.056658744812012, + "rewards/margins": 9.10825252532959, + "rewards/rejected": -17.1649112701416, + "step": 16993 + }, + { + "epoch": 2.64, + "learning_rate": 1.683246018984387e-06, + "logits/chosen": -1.4406225681304932, + "logits/rejected": -2.6288721561431885, + "logps/chosen": -225.6402587890625, + "logps/rejected": -694.9532470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.119444847106934, + "rewards/margins": 13.180723190307617, + "rewards/rejected": -23.300167083740234, + "step": 16994 + }, + { + "epoch": 2.64, + "learning_rate": 1.6825125784532394e-06, + "logits/chosen": -1.434309959411621, + "logits/rejected": -2.4246273040771484, + "logps/chosen": -227.72291564941406, + "logps/rejected": -536.2992553710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.498653411865234, + "rewards/margins": 13.630592346191406, + "rewards/rejected": -25.12924575805664, + "step": 16995 + }, + { + "epoch": 2.64, + "learning_rate": 1.6817791379220915e-06, + "logits/chosen": -2.804192066192627, + "logits/rejected": -2.623554229736328, + "logps/chosen": -189.63668823242188, + "logps/rejected": -470.8909606933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.660369873046875, + "rewards/margins": 12.734613418579102, + "rewards/rejected": -20.394983291625977, + "step": 16996 + }, + { + "epoch": 2.64, + "learning_rate": 1.6810456973909435e-06, + "logits/chosen": -2.6463019847869873, + "logits/rejected": -1.7628809213638306, + "logps/chosen": -253.16741943359375, + "logps/rejected": -254.57325744628906, + "loss": 0.3965, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.803413391113281, + "rewards/margins": 5.837089538574219, + "rewards/rejected": -16.6405029296875, + "step": 16997 + }, + { + "epoch": 2.64, + "learning_rate": 1.6803122568597956e-06, + "logits/chosen": -2.7715871334075928, + "logits/rejected": -2.9594478607177734, + "logps/chosen": -134.844970703125, + "logps/rejected": -367.9903564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.103348731994629, + "rewards/margins": 10.558809280395508, + "rewards/rejected": -20.662158966064453, + "step": 16998 + }, + { + "epoch": 2.64, + "learning_rate": 1.6795788163286477e-06, + "logits/chosen": -1.9463154077529907, + "logits/rejected": -2.472360610961914, + "logps/chosen": -269.2990417480469, + "logps/rejected": -476.3106384277344, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.092185020446777, + "rewards/margins": 8.752189636230469, + "rewards/rejected": -20.844375610351562, + "step": 16999 + }, + { + "epoch": 2.64, + "learning_rate": 1.6788453757975e-06, + "logits/chosen": -3.0363731384277344, + "logits/rejected": -2.775684356689453, + "logps/chosen": -156.57838439941406, + "logps/rejected": -327.02972412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.770757675170898, + "rewards/margins": 10.24968147277832, + "rewards/rejected": -19.02043914794922, + "step": 17000 + }, + { + "epoch": 2.64, + "learning_rate": 1.678111935266352e-06, + "logits/chosen": -2.0500333309173584, + "logits/rejected": -2.7689390182495117, + "logps/chosen": -656.763427734375, + "logps/rejected": -460.08673095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.825713157653809, + "rewards/margins": 13.079992294311523, + "rewards/rejected": -19.905704498291016, + "step": 17001 + }, + { + "epoch": 2.64, + "learning_rate": 1.677378494735204e-06, + "logits/chosen": -2.7647244930267334, + "logits/rejected": -2.045560598373413, + "logps/chosen": -294.54229736328125, + "logps/rejected": -373.8223571777344, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.612775802612305, + "rewards/margins": 7.120058059692383, + "rewards/rejected": -18.732833862304688, + "step": 17002 + }, + { + "epoch": 2.64, + "learning_rate": 1.676645054204056e-06, + "logits/chosen": -2.7902462482452393, + "logits/rejected": -2.1460013389587402, + "logps/chosen": -184.39471435546875, + "logps/rejected": -221.72557067871094, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.260446548461914, + "rewards/margins": 6.951613903045654, + "rewards/rejected": -18.212059020996094, + "step": 17003 + }, + { + "epoch": 2.64, + "learning_rate": 1.6759116136729084e-06, + "logits/chosen": -2.580895185470581, + "logits/rejected": -2.593226194381714, + "logps/chosen": -554.16455078125, + "logps/rejected": -698.583251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.333402633666992, + "rewards/margins": 10.039190292358398, + "rewards/rejected": -22.37259292602539, + "step": 17004 + }, + { + "epoch": 2.64, + "learning_rate": 1.6751781731417605e-06, + "logits/chosen": -2.666358709335327, + "logits/rejected": -2.41487455368042, + "logps/chosen": -744.5330810546875, + "logps/rejected": -765.053955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.345114707946777, + "rewards/margins": 9.139593124389648, + "rewards/rejected": -22.48470687866211, + "step": 17005 + }, + { + "epoch": 2.64, + "learning_rate": 1.6744447326106126e-06, + "logits/chosen": -1.575546145439148, + "logits/rejected": -2.155097723007202, + "logps/chosen": -291.7803955078125, + "logps/rejected": -643.9707641601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.17611312866211, + "rewards/margins": 17.968719482421875, + "rewards/rejected": -28.144832611083984, + "step": 17006 + }, + { + "epoch": 2.64, + "learning_rate": 1.6737112920794647e-06, + "logits/chosen": -2.4648427963256836, + "logits/rejected": -2.1447372436523438, + "logps/chosen": -424.3370056152344, + "logps/rejected": -437.17572021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.257528305053711, + "rewards/margins": 11.75558090209961, + "rewards/rejected": -20.013111114501953, + "step": 17007 + }, + { + "epoch": 2.65, + "learning_rate": 1.672977851548317e-06, + "logits/chosen": -2.359198570251465, + "logits/rejected": -2.6688756942749023, + "logps/chosen": -249.50408935546875, + "logps/rejected": -474.99627685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.88461685180664, + "rewards/margins": 10.245162963867188, + "rewards/rejected": -19.129779815673828, + "step": 17008 + }, + { + "epoch": 2.65, + "learning_rate": 1.672244411017169e-06, + "logits/chosen": -1.2335619926452637, + "logits/rejected": -2.5563573837280273, + "logps/chosen": -203.6785888671875, + "logps/rejected": -452.251708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.814735412597656, + "rewards/margins": 10.23332405090332, + "rewards/rejected": -21.048059463500977, + "step": 17009 + }, + { + "epoch": 2.65, + "learning_rate": 1.6715109704860211e-06, + "logits/chosen": -2.8225111961364746, + "logits/rejected": -3.0302507877349854, + "logps/chosen": -609.457763671875, + "logps/rejected": -408.47381591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.263232707977295, + "rewards/margins": 9.69687271118164, + "rewards/rejected": -15.960104942321777, + "step": 17010 + }, + { + "epoch": 2.65, + "learning_rate": 1.670777529954873e-06, + "logits/chosen": -2.977914333343506, + "logits/rejected": -2.3029086589813232, + "logps/chosen": -210.7603302001953, + "logps/rejected": -200.027587890625, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.886447906494141, + "rewards/margins": 4.977481842041016, + "rewards/rejected": -12.863929748535156, + "step": 17011 + }, + { + "epoch": 2.65, + "learning_rate": 1.6700440894237253e-06, + "logits/chosen": -2.6580302715301514, + "logits/rejected": -2.7342326641082764, + "logps/chosen": -339.868408203125, + "logps/rejected": -573.2969970703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.145276069641113, + "rewards/margins": 8.443581581115723, + "rewards/rejected": -15.588857650756836, + "step": 17012 + }, + { + "epoch": 2.65, + "learning_rate": 1.6693106488925774e-06, + "logits/chosen": -2.700460433959961, + "logits/rejected": -2.4179129600524902, + "logps/chosen": -193.8791961669922, + "logps/rejected": -299.6539001464844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.710562705993652, + "rewards/margins": 11.165472984313965, + "rewards/rejected": -19.876035690307617, + "step": 17013 + }, + { + "epoch": 2.65, + "learning_rate": 1.6685772083614295e-06, + "logits/chosen": -3.0466902256011963, + "logits/rejected": -2.227562189102173, + "logps/chosen": -201.17477416992188, + "logps/rejected": -127.67127990722656, + "loss": 0.8519, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.099741458892822, + "rewards/margins": 3.2398598194122314, + "rewards/rejected": -10.339601516723633, + "step": 17014 + }, + { + "epoch": 2.65, + "learning_rate": 1.6678437678302816e-06, + "logits/chosen": -2.422300338745117, + "logits/rejected": -2.676809787750244, + "logps/chosen": -626.3840942382812, + "logps/rejected": -1023.9188842773438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.775213241577148, + "rewards/margins": 9.846181869506836, + "rewards/rejected": -19.621395111083984, + "step": 17015 + }, + { + "epoch": 2.65, + "learning_rate": 1.6671103272991339e-06, + "logits/chosen": -1.9326581954956055, + "logits/rejected": -2.4337453842163086, + "logps/chosen": -224.15652465820312, + "logps/rejected": -448.5564880371094, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.164247512817383, + "rewards/margins": 10.624485969543457, + "rewards/rejected": -20.788734436035156, + "step": 17016 + }, + { + "epoch": 2.65, + "learning_rate": 1.666376886767986e-06, + "logits/chosen": -2.969937562942505, + "logits/rejected": -2.8565666675567627, + "logps/chosen": -465.8402404785156, + "logps/rejected": -318.1722717285156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0246734619140625, + "rewards/margins": 12.585225105285645, + "rewards/rejected": -16.609899520874023, + "step": 17017 + }, + { + "epoch": 2.65, + "learning_rate": 1.665643446236838e-06, + "logits/chosen": -2.3251726627349854, + "logits/rejected": -1.6629576683044434, + "logps/chosen": -236.90689086914062, + "logps/rejected": -283.29150390625, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.232617378234863, + "rewards/margins": 7.024707317352295, + "rewards/rejected": -14.25732421875, + "step": 17018 + }, + { + "epoch": 2.65, + "learning_rate": 1.6649100057056902e-06, + "logits/chosen": -2.6721603870391846, + "logits/rejected": -2.4339218139648438, + "logps/chosen": -230.37026977539062, + "logps/rejected": -300.8992919921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.636839866638184, + "rewards/margins": 8.302180290222168, + "rewards/rejected": -16.93902015686035, + "step": 17019 + }, + { + "epoch": 2.65, + "learning_rate": 1.6641765651745423e-06, + "logits/chosen": -2.6090331077575684, + "logits/rejected": -2.1878061294555664, + "logps/chosen": -452.1407470703125, + "logps/rejected": -339.74261474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.538599014282227, + "rewards/margins": 10.302927017211914, + "rewards/rejected": -19.84152603149414, + "step": 17020 + }, + { + "epoch": 2.65, + "learning_rate": 1.6634431246433943e-06, + "logits/chosen": -1.9277762174606323, + "logits/rejected": -2.4090099334716797, + "logps/chosen": -137.53823852539062, + "logps/rejected": -265.51226806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.042688369750977, + "rewards/margins": 9.025421142578125, + "rewards/rejected": -18.0681095123291, + "step": 17021 + }, + { + "epoch": 2.65, + "learning_rate": 1.6627096841122464e-06, + "logits/chosen": -0.624428391456604, + "logits/rejected": -2.031215190887451, + "logps/chosen": -306.269775390625, + "logps/rejected": -606.364013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.226910591125488, + "rewards/margins": 25.477392196655273, + "rewards/rejected": -32.70430374145508, + "step": 17022 + }, + { + "epoch": 2.65, + "learning_rate": 1.6619762435810985e-06, + "logits/chosen": -2.8357906341552734, + "logits/rejected": -3.067594528198242, + "logps/chosen": -93.60188293457031, + "logps/rejected": -295.248046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.384491920471191, + "rewards/margins": 9.420830726623535, + "rewards/rejected": -17.805322647094727, + "step": 17023 + }, + { + "epoch": 2.65, + "learning_rate": 1.6612428030499506e-06, + "logits/chosen": -2.3284640312194824, + "logits/rejected": -2.6551754474639893, + "logps/chosen": -461.2834167480469, + "logps/rejected": -600.002197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.104374885559082, + "rewards/margins": 11.444608688354492, + "rewards/rejected": -19.54898452758789, + "step": 17024 + }, + { + "epoch": 2.65, + "learning_rate": 1.660509362518803e-06, + "logits/chosen": -2.076119899749756, + "logits/rejected": -2.5039467811584473, + "logps/chosen": -401.06121826171875, + "logps/rejected": -460.2016906738281, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.193181991577148, + "rewards/margins": 5.734691143035889, + "rewards/rejected": -16.927873611450195, + "step": 17025 + }, + { + "epoch": 2.65, + "learning_rate": 1.659775921987655e-06, + "logits/chosen": -1.8953348398208618, + "logits/rejected": -2.1101126670837402, + "logps/chosen": -165.46688842773438, + "logps/rejected": -311.6075134277344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.402944564819336, + "rewards/margins": 8.088254928588867, + "rewards/rejected": -20.491199493408203, + "step": 17026 + }, + { + "epoch": 2.65, + "learning_rate": 1.659042481456507e-06, + "logits/chosen": -2.386197566986084, + "logits/rejected": -2.624535083770752, + "logps/chosen": -433.20806884765625, + "logps/rejected": -520.0, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.024795532226562, + "rewards/margins": 13.27450942993164, + "rewards/rejected": -24.299304962158203, + "step": 17027 + }, + { + "epoch": 2.65, + "learning_rate": 1.6583090409253592e-06, + "logits/chosen": -2.2332849502563477, + "logits/rejected": -2.6873857975006104, + "logps/chosen": -175.995849609375, + "logps/rejected": -312.4471130371094, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.270490646362305, + "rewards/margins": 7.481032371520996, + "rewards/rejected": -18.751523971557617, + "step": 17028 + }, + { + "epoch": 2.65, + "learning_rate": 1.6575756003942115e-06, + "logits/chosen": -2.4683151245117188, + "logits/rejected": -2.945357322692871, + "logps/chosen": -150.66978454589844, + "logps/rejected": -397.3013916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.170174598693848, + "rewards/margins": 9.848881721496582, + "rewards/rejected": -19.01905632019043, + "step": 17029 + }, + { + "epoch": 2.65, + "learning_rate": 1.6568421598630636e-06, + "logits/chosen": -2.8773586750030518, + "logits/rejected": -1.7811658382415771, + "logps/chosen": -609.7408447265625, + "logps/rejected": -501.4735412597656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.004963874816895, + "rewards/margins": 10.641172409057617, + "rewards/rejected": -19.646137237548828, + "step": 17030 + }, + { + "epoch": 2.65, + "learning_rate": 1.6561087193319155e-06, + "logits/chosen": -2.5362942218780518, + "logits/rejected": -2.855597496032715, + "logps/chosen": -107.23040771484375, + "logps/rejected": -218.41526794433594, + "loss": 0.2508, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.611609935760498, + "rewards/margins": 4.955114364624023, + "rewards/rejected": -12.56672477722168, + "step": 17031 + }, + { + "epoch": 2.65, + "learning_rate": 1.6553752788007675e-06, + "logits/chosen": -0.6783000230789185, + "logits/rejected": -1.969366192817688, + "logps/chosen": -142.4952850341797, + "logps/rejected": -524.28076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.799798011779785, + "rewards/margins": 13.116292953491211, + "rewards/rejected": -22.91609001159668, + "step": 17032 + }, + { + "epoch": 2.65, + "learning_rate": 1.6546418382696198e-06, + "logits/chosen": -2.787766218185425, + "logits/rejected": -2.952603816986084, + "logps/chosen": -166.3782501220703, + "logps/rejected": -329.6510009765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.458280563354492, + "rewards/margins": 7.915676593780518, + "rewards/rejected": -19.37395668029785, + "step": 17033 + }, + { + "epoch": 2.65, + "learning_rate": 1.653908397738472e-06, + "logits/chosen": -1.9566383361816406, + "logits/rejected": -2.8049144744873047, + "logps/chosen": -113.5926284790039, + "logps/rejected": -288.870361328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.603157997131348, + "rewards/margins": 7.724912166595459, + "rewards/rejected": -14.328069686889648, + "step": 17034 + }, + { + "epoch": 2.65, + "learning_rate": 1.653174957207324e-06, + "logits/chosen": -2.651611328125, + "logits/rejected": -2.5020480155944824, + "logps/chosen": -190.63067626953125, + "logps/rejected": -481.80950927734375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.940088272094727, + "rewards/margins": 9.844977378845215, + "rewards/rejected": -22.785064697265625, + "step": 17035 + }, + { + "epoch": 2.65, + "learning_rate": 1.6524415166761761e-06, + "logits/chosen": -2.383286476135254, + "logits/rejected": -2.8756823539733887, + "logps/chosen": -197.88865661621094, + "logps/rejected": -408.462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.042093276977539, + "rewards/margins": 9.974420547485352, + "rewards/rejected": -20.01651382446289, + "step": 17036 + }, + { + "epoch": 2.65, + "learning_rate": 1.6517080761450284e-06, + "logits/chosen": -2.1249125003814697, + "logits/rejected": -2.5540826320648193, + "logps/chosen": -212.4583740234375, + "logps/rejected": -267.5106506347656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.188015460968018, + "rewards/margins": 8.790688514709473, + "rewards/rejected": -15.978704452514648, + "step": 17037 + }, + { + "epoch": 2.65, + "learning_rate": 1.6509746356138805e-06, + "logits/chosen": -1.3672524690628052, + "logits/rejected": -2.145308017730713, + "logps/chosen": -452.52947998046875, + "logps/rejected": -694.0021362304688, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.468542098999023, + "rewards/margins": 9.447700500488281, + "rewards/rejected": -18.916242599487305, + "step": 17038 + }, + { + "epoch": 2.65, + "learning_rate": 1.6502411950827326e-06, + "logits/chosen": -1.8113394975662231, + "logits/rejected": -2.5522096157073975, + "logps/chosen": -240.20436096191406, + "logps/rejected": -435.5440673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.498672485351562, + "rewards/margins": 10.68539047241211, + "rewards/rejected": -19.184062957763672, + "step": 17039 + }, + { + "epoch": 2.65, + "learning_rate": 1.6495077545515847e-06, + "logits/chosen": -2.1962385177612305, + "logits/rejected": -2.7156879901885986, + "logps/chosen": -325.4729919433594, + "logps/rejected": -575.6522216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.031759262084961, + "rewards/margins": 13.861207962036133, + "rewards/rejected": -22.892967224121094, + "step": 17040 + }, + { + "epoch": 2.65, + "learning_rate": 1.6487743140204368e-06, + "logits/chosen": -2.2681524753570557, + "logits/rejected": -2.6287407875061035, + "logps/chosen": -354.1441345214844, + "logps/rejected": -347.2994384765625, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.426490783691406, + "rewards/margins": 5.918944835662842, + "rewards/rejected": -15.345436096191406, + "step": 17041 + }, + { + "epoch": 2.65, + "learning_rate": 1.6480408734892889e-06, + "logits/chosen": -2.2093887329101562, + "logits/rejected": -2.4558603763580322, + "logps/chosen": -137.0946502685547, + "logps/rejected": -385.7359619140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2704620361328125, + "rewards/margins": 11.410341262817383, + "rewards/rejected": -18.680803298950195, + "step": 17042 + }, + { + "epoch": 2.65, + "learning_rate": 1.647307432958141e-06, + "logits/chosen": -2.7897815704345703, + "logits/rejected": -2.856433153152466, + "logps/chosen": -293.34173583984375, + "logps/rejected": -457.3638916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.04987907409668, + "rewards/margins": 13.472156524658203, + "rewards/rejected": -20.522035598754883, + "step": 17043 + }, + { + "epoch": 2.65, + "learning_rate": 1.646573992426993e-06, + "logits/chosen": -1.1691769361495972, + "logits/rejected": -2.4778707027435303, + "logps/chosen": -154.8468780517578, + "logps/rejected": -573.8018798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.801958084106445, + "rewards/margins": 17.039356231689453, + "rewards/rejected": -22.841312408447266, + "step": 17044 + }, + { + "epoch": 2.65, + "learning_rate": 1.6458405518958451e-06, + "logits/chosen": -2.6263856887817383, + "logits/rejected": -3.119662284851074, + "logps/chosen": -120.60083770751953, + "logps/rejected": -330.88330078125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.318885803222656, + "rewards/margins": 7.148186683654785, + "rewards/rejected": -15.467073440551758, + "step": 17045 + }, + { + "epoch": 2.65, + "learning_rate": 1.6451071113646974e-06, + "logits/chosen": -1.225499153137207, + "logits/rejected": -2.673827648162842, + "logps/chosen": -262.66998291015625, + "logps/rejected": -506.64654541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.369000434875488, + "rewards/margins": 9.301498413085938, + "rewards/rejected": -22.67049789428711, + "step": 17046 + }, + { + "epoch": 2.65, + "learning_rate": 1.6443736708335495e-06, + "logits/chosen": -2.2810871601104736, + "logits/rejected": -2.8506734371185303, + "logps/chosen": -164.81060791015625, + "logps/rejected": -324.02734375, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.731221199035645, + "rewards/margins": 4.215928077697754, + "rewards/rejected": -13.947149276733398, + "step": 17047 + }, + { + "epoch": 2.65, + "learning_rate": 1.6436402303024016e-06, + "logits/chosen": -2.2534735202789307, + "logits/rejected": -2.62607741355896, + "logps/chosen": -203.48666381835938, + "logps/rejected": -426.1828918457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.266904830932617, + "rewards/margins": 10.105356216430664, + "rewards/rejected": -18.37226104736328, + "step": 17048 + }, + { + "epoch": 2.65, + "learning_rate": 1.6429067897712537e-06, + "logits/chosen": -2.4135892391204834, + "logits/rejected": -2.3486111164093018, + "logps/chosen": -223.05331420898438, + "logps/rejected": -266.5234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.174446105957031, + "rewards/margins": 7.193709373474121, + "rewards/rejected": -18.36815643310547, + "step": 17049 + }, + { + "epoch": 2.65, + "learning_rate": 1.642173349240106e-06, + "logits/chosen": -2.7803587913513184, + "logits/rejected": -2.2035515308380127, + "logps/chosen": -324.5771789550781, + "logps/rejected": -574.4251708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.931209564208984, + "rewards/margins": 10.963323593139648, + "rewards/rejected": -19.894533157348633, + "step": 17050 + }, + { + "epoch": 2.65, + "learning_rate": 1.6414399087089579e-06, + "logits/chosen": -2.365675926208496, + "logits/rejected": -2.1909098625183105, + "logps/chosen": -283.8722839355469, + "logps/rejected": -337.275634765625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.193096160888672, + "rewards/margins": 5.054569244384766, + "rewards/rejected": -15.247665405273438, + "step": 17051 + }, + { + "epoch": 2.65, + "learning_rate": 1.64070646817781e-06, + "logits/chosen": -2.48325514793396, + "logits/rejected": -2.861853837966919, + "logps/chosen": -182.9556884765625, + "logps/rejected": -238.36892700195312, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.610221862792969, + "rewards/margins": 6.851977825164795, + "rewards/rejected": -16.462200164794922, + "step": 17052 + }, + { + "epoch": 2.65, + "learning_rate": 1.639973027646662e-06, + "logits/chosen": -2.7689576148986816, + "logits/rejected": -2.3465030193328857, + "logps/chosen": -421.35784912109375, + "logps/rejected": -629.16015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.148798942565918, + "rewards/margins": 15.244526863098145, + "rewards/rejected": -23.393325805664062, + "step": 17053 + }, + { + "epoch": 2.65, + "learning_rate": 1.6392395871155144e-06, + "logits/chosen": -2.666550636291504, + "logits/rejected": -2.7918429374694824, + "logps/chosen": -366.4082946777344, + "logps/rejected": -358.84564208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.655453681945801, + "rewards/margins": 9.602401733398438, + "rewards/rejected": -17.257856369018555, + "step": 17054 + }, + { + "epoch": 2.65, + "learning_rate": 1.6385061465843665e-06, + "logits/chosen": -1.7159558534622192, + "logits/rejected": -2.805173635482788, + "logps/chosen": -170.20399475097656, + "logps/rejected": -473.5343017578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.807112693786621, + "rewards/margins": 10.350194931030273, + "rewards/rejected": -23.157306671142578, + "step": 17055 + }, + { + "epoch": 2.65, + "learning_rate": 1.6377727060532186e-06, + "logits/chosen": -1.771554946899414, + "logits/rejected": -2.7211661338806152, + "logps/chosen": -86.96394348144531, + "logps/rejected": -372.88006591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.189102649688721, + "rewards/margins": 12.956502914428711, + "rewards/rejected": -20.145606994628906, + "step": 17056 + }, + { + "epoch": 2.65, + "learning_rate": 1.6370392655220706e-06, + "logits/chosen": -1.1368526220321655, + "logits/rejected": -2.2396373748779297, + "logps/chosen": -365.5171813964844, + "logps/rejected": -775.1748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.904216766357422, + "rewards/margins": 12.48563003540039, + "rewards/rejected": -21.389846801757812, + "step": 17057 + }, + { + "epoch": 2.65, + "learning_rate": 1.636305824990923e-06, + "logits/chosen": -1.5693638324737549, + "logits/rejected": -2.7414283752441406, + "logps/chosen": -225.83633422851562, + "logps/rejected": -364.421630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.155949592590332, + "rewards/margins": 12.028242111206055, + "rewards/rejected": -19.184192657470703, + "step": 17058 + }, + { + "epoch": 2.65, + "learning_rate": 1.635572384459775e-06, + "logits/chosen": -2.8113625049591064, + "logits/rejected": -1.5705773830413818, + "logps/chosen": -290.91363525390625, + "logps/rejected": -297.5090026855469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.370576858520508, + "rewards/margins": 9.029094696044922, + "rewards/rejected": -18.39967155456543, + "step": 17059 + }, + { + "epoch": 2.65, + "learning_rate": 1.634838943928627e-06, + "logits/chosen": -1.6362652778625488, + "logits/rejected": -2.4846513271331787, + "logps/chosen": -147.69869995117188, + "logps/rejected": -368.064697265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.01986312866211, + "rewards/margins": 11.571945190429688, + "rewards/rejected": -21.591808319091797, + "step": 17060 + }, + { + "epoch": 2.65, + "learning_rate": 1.634105503397479e-06, + "logits/chosen": -2.681081533432007, + "logits/rejected": -2.8688385486602783, + "logps/chosen": -202.9084014892578, + "logps/rejected": -398.3232421875, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.172822952270508, + "rewards/margins": 4.628767967224121, + "rewards/rejected": -13.801591873168945, + "step": 17061 + }, + { + "epoch": 2.65, + "learning_rate": 1.6333720628663313e-06, + "logits/chosen": -1.8530713319778442, + "logits/rejected": -1.8934268951416016, + "logps/chosen": -622.4203491210938, + "logps/rejected": -580.2178955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.199522018432617, + "rewards/margins": 11.672042846679688, + "rewards/rejected": -23.871564865112305, + "step": 17062 + }, + { + "epoch": 2.65, + "learning_rate": 1.6326386223351834e-06, + "logits/chosen": -2.707700729370117, + "logits/rejected": -3.0146262645721436, + "logps/chosen": -312.01519775390625, + "logps/rejected": -423.88104248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.637277603149414, + "rewards/margins": 9.763969421386719, + "rewards/rejected": -16.401247024536133, + "step": 17063 + }, + { + "epoch": 2.65, + "learning_rate": 1.6319051818040355e-06, + "logits/chosen": -2.683417797088623, + "logits/rejected": -1.5550155639648438, + "logps/chosen": -614.5946044921875, + "logps/rejected": -508.8179016113281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.485852241516113, + "rewards/margins": 14.193153381347656, + "rewards/rejected": -26.679004669189453, + "step": 17064 + }, + { + "epoch": 2.65, + "learning_rate": 1.6311717412728876e-06, + "logits/chosen": -1.9284570217132568, + "logits/rejected": -2.0564727783203125, + "logps/chosen": -352.4642639160156, + "logps/rejected": -714.861083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.947086334228516, + "rewards/margins": 19.104890823364258, + "rewards/rejected": -30.051979064941406, + "step": 17065 + }, + { + "epoch": 2.65, + "learning_rate": 1.6304383007417397e-06, + "logits/chosen": -2.4832828044891357, + "logits/rejected": -2.6384963989257812, + "logps/chosen": -192.06015014648438, + "logps/rejected": -322.74639892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.639170169830322, + "rewards/margins": 10.946983337402344, + "rewards/rejected": -17.58615493774414, + "step": 17066 + }, + { + "epoch": 2.65, + "learning_rate": 1.629704860210592e-06, + "logits/chosen": -2.026031970977783, + "logits/rejected": -2.746249198913574, + "logps/chosen": -873.38427734375, + "logps/rejected": -710.0084838867188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.814640045166016, + "rewards/margins": 11.632650375366211, + "rewards/rejected": -21.447288513183594, + "step": 17067 + }, + { + "epoch": 2.65, + "learning_rate": 1.628971419679444e-06, + "logits/chosen": -1.3952432870864868, + "logits/rejected": -2.74379301071167, + "logps/chosen": -351.77557373046875, + "logps/rejected": -521.2252807617188, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.672922134399414, + "rewards/margins": 10.210552215576172, + "rewards/rejected": -19.883472442626953, + "step": 17068 + }, + { + "epoch": 2.65, + "learning_rate": 1.6282379791482961e-06, + "logits/chosen": -2.5383477210998535, + "logits/rejected": -2.8502371311187744, + "logps/chosen": -180.9256591796875, + "logps/rejected": -410.0957336425781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.997763633728027, + "rewards/margins": 12.008950233459473, + "rewards/rejected": -19.0067138671875, + "step": 17069 + }, + { + "epoch": 2.65, + "learning_rate": 1.627504538617148e-06, + "logits/chosen": -1.889925479888916, + "logits/rejected": -2.6662790775299072, + "logps/chosen": -143.0653839111328, + "logps/rejected": -301.46917724609375, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.05435562133789, + "rewards/margins": 5.6864752769470215, + "rewards/rejected": -18.74083137512207, + "step": 17070 + }, + { + "epoch": 2.65, + "learning_rate": 1.6267710980860003e-06, + "logits/chosen": -1.3579298257827759, + "logits/rejected": -2.6189441680908203, + "logps/chosen": -156.89772033691406, + "logps/rejected": -481.1821594238281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.849325180053711, + "rewards/margins": 9.380586624145508, + "rewards/rejected": -18.22991180419922, + "step": 17071 + }, + { + "epoch": 2.66, + "learning_rate": 1.6260376575548524e-06, + "logits/chosen": -2.2280807495117188, + "logits/rejected": -2.7619526386260986, + "logps/chosen": -535.9322509765625, + "logps/rejected": -555.6925659179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224682807922363, + "rewards/margins": 14.291633605957031, + "rewards/rejected": -23.516315460205078, + "step": 17072 + }, + { + "epoch": 2.66, + "learning_rate": 1.6253042170237045e-06, + "logits/chosen": -1.34687340259552, + "logits/rejected": -2.6623659133911133, + "logps/chosen": -214.05343627929688, + "logps/rejected": -450.6274719238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.722112655639648, + "rewards/margins": 11.853466033935547, + "rewards/rejected": -18.575578689575195, + "step": 17073 + }, + { + "epoch": 2.66, + "learning_rate": 1.6245707764925566e-06, + "logits/chosen": -2.2835402488708496, + "logits/rejected": -2.544707775115967, + "logps/chosen": -292.4169921875, + "logps/rejected": -505.903564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.904132843017578, + "rewards/margins": 11.403345108032227, + "rewards/rejected": -22.307477951049805, + "step": 17074 + }, + { + "epoch": 2.66, + "learning_rate": 1.623837335961409e-06, + "logits/chosen": -2.2892768383026123, + "logits/rejected": -2.688241958618164, + "logps/chosen": -114.98463439941406, + "logps/rejected": -279.2751770019531, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.304448127746582, + "rewards/margins": 6.713258743286133, + "rewards/rejected": -17.01770782470703, + "step": 17075 + }, + { + "epoch": 2.66, + "learning_rate": 1.623103895430261e-06, + "logits/chosen": -2.622251272201538, + "logits/rejected": -2.4552149772644043, + "logps/chosen": -289.79779052734375, + "logps/rejected": -416.0406494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.128684043884277, + "rewards/margins": 9.485498428344727, + "rewards/rejected": -19.614181518554688, + "step": 17076 + }, + { + "epoch": 2.66, + "learning_rate": 1.622370454899113e-06, + "logits/chosen": -2.3590729236602783, + "logits/rejected": -2.6812384128570557, + "logps/chosen": -210.70901489257812, + "logps/rejected": -378.6148681640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.051046371459961, + "rewards/margins": 8.56689453125, + "rewards/rejected": -20.617938995361328, + "step": 17077 + }, + { + "epoch": 2.66, + "learning_rate": 1.6216370143679652e-06, + "logits/chosen": -0.8624677658081055, + "logits/rejected": -2.339949607849121, + "logps/chosen": -257.77130126953125, + "logps/rejected": -648.423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.09890079498291, + "rewards/margins": 12.275020599365234, + "rewards/rejected": -22.37392234802246, + "step": 17078 + }, + { + "epoch": 2.66, + "learning_rate": 1.6209035738368173e-06, + "logits/chosen": -2.4841136932373047, + "logits/rejected": -2.837545871734619, + "logps/chosen": -343.67633056640625, + "logps/rejected": -464.3760681152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446958541870117, + "rewards/margins": 11.825468063354492, + "rewards/rejected": -20.27242660522461, + "step": 17079 + }, + { + "epoch": 2.66, + "learning_rate": 1.6201701333056693e-06, + "logits/chosen": -1.352335810661316, + "logits/rejected": -1.7957098484039307, + "logps/chosen": -430.21270751953125, + "logps/rejected": -477.34100341796875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.668388366699219, + "rewards/margins": 7.532796382904053, + "rewards/rejected": -18.201183319091797, + "step": 17080 + }, + { + "epoch": 2.66, + "learning_rate": 1.6194366927745214e-06, + "logits/chosen": -1.8350183963775635, + "logits/rejected": -2.517010450363159, + "logps/chosen": -360.4296875, + "logps/rejected": -586.1492309570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.64047622680664, + "rewards/margins": 11.817192077636719, + "rewards/rejected": -24.45766830444336, + "step": 17081 + }, + { + "epoch": 2.66, + "learning_rate": 1.6187032522433735e-06, + "logits/chosen": -1.6093562841415405, + "logits/rejected": -2.528843879699707, + "logps/chosen": -136.7989501953125, + "logps/rejected": -349.0250244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.294873237609863, + "rewards/margins": 9.347765922546387, + "rewards/rejected": -18.64263916015625, + "step": 17082 + }, + { + "epoch": 2.66, + "learning_rate": 1.6179698117122256e-06, + "logits/chosen": -2.688659429550171, + "logits/rejected": -2.8454058170318604, + "logps/chosen": -279.31817626953125, + "logps/rejected": -399.0653991699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.010407447814941, + "rewards/margins": 11.1915864944458, + "rewards/rejected": -20.201993942260742, + "step": 17083 + }, + { + "epoch": 2.66, + "learning_rate": 1.617236371181078e-06, + "logits/chosen": -2.194613218307495, + "logits/rejected": -2.6545236110687256, + "logps/chosen": -312.9792785644531, + "logps/rejected": -433.86810302734375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.309045791625977, + "rewards/margins": 6.84618616104126, + "rewards/rejected": -18.155231475830078, + "step": 17084 + }, + { + "epoch": 2.66, + "learning_rate": 1.61650293064993e-06, + "logits/chosen": -2.836854934692383, + "logits/rejected": -2.8167173862457275, + "logps/chosen": -123.89495849609375, + "logps/rejected": -312.1783752441406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202653884887695, + "rewards/margins": 7.116508483886719, + "rewards/rejected": -13.319162368774414, + "step": 17085 + }, + { + "epoch": 2.66, + "learning_rate": 1.615769490118782e-06, + "logits/chosen": -1.8606308698654175, + "logits/rejected": -2.4271323680877686, + "logps/chosen": -162.3038787841797, + "logps/rejected": -345.8825988769531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.68527603149414, + "rewards/margins": 9.641263961791992, + "rewards/rejected": -22.326539993286133, + "step": 17086 + }, + { + "epoch": 2.66, + "learning_rate": 1.6150360495876342e-06, + "logits/chosen": -1.9687353372573853, + "logits/rejected": -2.7019360065460205, + "logps/chosen": -396.2239990234375, + "logps/rejected": -659.5355224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.708499908447266, + "rewards/margins": 15.774588584899902, + "rewards/rejected": -27.483089447021484, + "step": 17087 + }, + { + "epoch": 2.66, + "learning_rate": 1.6143026090564865e-06, + "logits/chosen": -1.9448598623275757, + "logits/rejected": -1.5963962078094482, + "logps/chosen": -372.362548828125, + "logps/rejected": -452.3208312988281, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.901890754699707, + "rewards/margins": 9.134448051452637, + "rewards/rejected": -19.036338806152344, + "step": 17088 + }, + { + "epoch": 2.66, + "learning_rate": 1.6135691685253386e-06, + "logits/chosen": -1.7627716064453125, + "logits/rejected": -2.9058756828308105, + "logps/chosen": -326.87628173828125, + "logps/rejected": -730.3722534179688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.97544002532959, + "rewards/margins": 8.755167007446289, + "rewards/rejected": -18.730607986450195, + "step": 17089 + }, + { + "epoch": 2.66, + "learning_rate": 1.6128357279941905e-06, + "logits/chosen": -1.099979281425476, + "logits/rejected": -2.597475051879883, + "logps/chosen": -185.89459228515625, + "logps/rejected": -575.4525146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.805908203125, + "rewards/margins": 9.981498718261719, + "rewards/rejected": -20.78740692138672, + "step": 17090 + }, + { + "epoch": 2.66, + "learning_rate": 1.6121022874630426e-06, + "logits/chosen": -2.5599734783172607, + "logits/rejected": -2.2640740871429443, + "logps/chosen": -334.3514404296875, + "logps/rejected": -319.3142395019531, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.484423637390137, + "rewards/margins": 8.478593826293945, + "rewards/rejected": -18.963016510009766, + "step": 17091 + }, + { + "epoch": 2.66, + "learning_rate": 1.6113688469318949e-06, + "logits/chosen": -2.585279941558838, + "logits/rejected": -2.661158800125122, + "logps/chosen": -231.10342407226562, + "logps/rejected": -293.8910827636719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.550702095031738, + "rewards/margins": 7.681458473205566, + "rewards/rejected": -18.232160568237305, + "step": 17092 + }, + { + "epoch": 2.66, + "learning_rate": 1.610635406400747e-06, + "logits/chosen": -2.350590944290161, + "logits/rejected": -2.7756073474884033, + "logps/chosen": -448.69659423828125, + "logps/rejected": -627.9005737304688, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.571029663085938, + "rewards/margins": 8.18040657043457, + "rewards/rejected": -18.75143814086914, + "step": 17093 + }, + { + "epoch": 2.66, + "learning_rate": 1.609901965869599e-06, + "logits/chosen": -2.4581174850463867, + "logits/rejected": -1.1335980892181396, + "logps/chosen": -424.70343017578125, + "logps/rejected": -298.98980712890625, + "loss": 0.1277, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.057538986206055, + "rewards/margins": 6.334138870239258, + "rewards/rejected": -15.391677856445312, + "step": 17094 + }, + { + "epoch": 2.66, + "learning_rate": 1.6091685253384511e-06, + "logits/chosen": -2.6045312881469727, + "logits/rejected": -2.4904115200042725, + "logps/chosen": -233.33810424804688, + "logps/rejected": -259.853271484375, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.174198150634766, + "rewards/margins": 7.552570819854736, + "rewards/rejected": -16.726768493652344, + "step": 17095 + }, + { + "epoch": 2.66, + "learning_rate": 1.6084350848073034e-06, + "logits/chosen": -2.083272933959961, + "logits/rejected": -2.6326487064361572, + "logps/chosen": -104.92730712890625, + "logps/rejected": -465.1823425292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.187970161437988, + "rewards/margins": 13.532954216003418, + "rewards/rejected": -22.720924377441406, + "step": 17096 + }, + { + "epoch": 2.66, + "learning_rate": 1.6077016442761555e-06, + "logits/chosen": -2.7574939727783203, + "logits/rejected": -2.7859408855438232, + "logps/chosen": -498.14801025390625, + "logps/rejected": -463.7113952636719, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.122325897216797, + "rewards/margins": 7.5083160400390625, + "rewards/rejected": -17.63064193725586, + "step": 17097 + }, + { + "epoch": 2.66, + "learning_rate": 1.6069682037450076e-06, + "logits/chosen": -1.6955361366271973, + "logits/rejected": -2.393946409225464, + "logps/chosen": -199.9600830078125, + "logps/rejected": -349.42584228515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.396707534790039, + "rewards/margins": 8.204305648803711, + "rewards/rejected": -16.60101318359375, + "step": 17098 + }, + { + "epoch": 2.66, + "learning_rate": 1.6062347632138597e-06, + "logits/chosen": -2.4641425609588623, + "logits/rejected": -2.7018356323242188, + "logps/chosen": -260.9314880371094, + "logps/rejected": -361.68157958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.236038208007812, + "rewards/margins": 9.156365394592285, + "rewards/rejected": -17.39240264892578, + "step": 17099 + }, + { + "epoch": 2.66, + "learning_rate": 1.6055013226827118e-06, + "logits/chosen": -2.576336145401001, + "logits/rejected": -2.7960760593414307, + "logps/chosen": -153.34063720703125, + "logps/rejected": -318.19476318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.588692665100098, + "rewards/margins": 12.432252883911133, + "rewards/rejected": -19.020944595336914, + "step": 17100 + }, + { + "epoch": 2.66, + "learning_rate": 1.6047678821515639e-06, + "logits/chosen": -2.2101731300354004, + "logits/rejected": -2.3279826641082764, + "logps/chosen": -248.1419677734375, + "logps/rejected": -363.43145751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.90199089050293, + "rewards/margins": 10.312911987304688, + "rewards/rejected": -20.214902877807617, + "step": 17101 + }, + { + "epoch": 2.66, + "learning_rate": 1.604034441620416e-06, + "logits/chosen": -2.3576180934906006, + "logits/rejected": -2.1862123012542725, + "logps/chosen": -202.3408966064453, + "logps/rejected": -493.02276611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.76214599609375, + "rewards/margins": 13.075469970703125, + "rewards/rejected": -19.837615966796875, + "step": 17102 + }, + { + "epoch": 2.66, + "learning_rate": 1.603301001089268e-06, + "logits/chosen": -1.9471288919448853, + "logits/rejected": -2.296780586242676, + "logps/chosen": -260.785888671875, + "logps/rejected": -363.634521484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.533992767333984, + "rewards/margins": 9.145854949951172, + "rewards/rejected": -18.679847717285156, + "step": 17103 + }, + { + "epoch": 2.66, + "learning_rate": 1.6025675605581201e-06, + "logits/chosen": -2.3010354042053223, + "logits/rejected": -2.7130308151245117, + "logps/chosen": -535.136474609375, + "logps/rejected": -683.9802856445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.86121940612793, + "rewards/margins": 10.836181640625, + "rewards/rejected": -16.697399139404297, + "step": 17104 + }, + { + "epoch": 2.66, + "learning_rate": 1.6018341200269724e-06, + "logits/chosen": -2.513186454772949, + "logits/rejected": -1.7726960182189941, + "logps/chosen": -277.48284912109375, + "logps/rejected": -407.76922607421875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.810724258422852, + "rewards/margins": 11.43818473815918, + "rewards/rejected": -20.24890899658203, + "step": 17105 + }, + { + "epoch": 2.66, + "learning_rate": 1.6011006794958245e-06, + "logits/chosen": -2.4124369621276855, + "logits/rejected": -2.877297878265381, + "logps/chosen": -171.8666534423828, + "logps/rejected": -345.04754638671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.11978816986084, + "rewards/margins": 11.186943054199219, + "rewards/rejected": -20.306732177734375, + "step": 17106 + }, + { + "epoch": 2.66, + "learning_rate": 1.6003672389646766e-06, + "logits/chosen": -0.8062633275985718, + "logits/rejected": -2.4405932426452637, + "logps/chosen": -139.74240112304688, + "logps/rejected": -410.7872009277344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.11541748046875, + "rewards/margins": 10.625774383544922, + "rewards/rejected": -20.741191864013672, + "step": 17107 + }, + { + "epoch": 2.66, + "learning_rate": 1.5996337984335287e-06, + "logits/chosen": -2.35412335395813, + "logits/rejected": -2.5050487518310547, + "logps/chosen": -392.584228515625, + "logps/rejected": -566.9497680664062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.441303253173828, + "rewards/margins": 12.351961135864258, + "rewards/rejected": -20.793264389038086, + "step": 17108 + }, + { + "epoch": 2.66, + "learning_rate": 1.598900357902381e-06, + "logits/chosen": -2.3423755168914795, + "logits/rejected": -2.8064091205596924, + "logps/chosen": -138.86106872558594, + "logps/rejected": -377.027099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.66339111328125, + "rewards/margins": 8.473400115966797, + "rewards/rejected": -18.136791229248047, + "step": 17109 + }, + { + "epoch": 2.66, + "learning_rate": 1.598166917371233e-06, + "logits/chosen": -2.2003283500671387, + "logits/rejected": -2.8522820472717285, + "logps/chosen": -279.336669921875, + "logps/rejected": -725.3681030273438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414662837982178, + "rewards/margins": 11.42143440246582, + "rewards/rejected": -18.836097717285156, + "step": 17110 + }, + { + "epoch": 2.66, + "learning_rate": 1.597433476840085e-06, + "logits/chosen": -2.7008652687072754, + "logits/rejected": -2.886183977127075, + "logps/chosen": -177.20779418945312, + "logps/rejected": -376.2494201660156, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.63559341430664, + "rewards/margins": 6.698249816894531, + "rewards/rejected": -19.333843231201172, + "step": 17111 + }, + { + "epoch": 2.66, + "learning_rate": 1.596700036308937e-06, + "logits/chosen": -2.73445987701416, + "logits/rejected": -2.880882740020752, + "logps/chosen": -183.3762664794922, + "logps/rejected": -405.3411865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.580960273742676, + "rewards/margins": 11.270421981811523, + "rewards/rejected": -19.851383209228516, + "step": 17112 + }, + { + "epoch": 2.66, + "learning_rate": 1.5959665957777894e-06, + "logits/chosen": -2.497955322265625, + "logits/rejected": -2.6949574947357178, + "logps/chosen": -226.0310821533203, + "logps/rejected": -451.3079833984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.225738525390625, + "rewards/margins": 13.099336624145508, + "rewards/rejected": -18.325075149536133, + "step": 17113 + }, + { + "epoch": 2.66, + "learning_rate": 1.5952331552466415e-06, + "logits/chosen": -2.6265904903411865, + "logits/rejected": -2.7986412048339844, + "logps/chosen": -143.28277587890625, + "logps/rejected": -301.2677917480469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7432780265808105, + "rewards/margins": 12.249557495117188, + "rewards/rejected": -19.992835998535156, + "step": 17114 + }, + { + "epoch": 2.66, + "learning_rate": 1.5944997147154936e-06, + "logits/chosen": -2.554210662841797, + "logits/rejected": -2.9702417850494385, + "logps/chosen": -393.82427978515625, + "logps/rejected": -568.444580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.963375091552734, + "rewards/margins": 10.567916870117188, + "rewards/rejected": -19.531291961669922, + "step": 17115 + }, + { + "epoch": 2.66, + "learning_rate": 1.5937662741843457e-06, + "logits/chosen": -2.783782482147217, + "logits/rejected": -2.1139681339263916, + "logps/chosen": -382.81268310546875, + "logps/rejected": -350.57733154296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.113738536834717, + "rewards/margins": 9.274123191833496, + "rewards/rejected": -16.387861251831055, + "step": 17116 + }, + { + "epoch": 2.66, + "learning_rate": 1.593032833653198e-06, + "logits/chosen": -2.028916358947754, + "logits/rejected": -2.7085347175598145, + "logps/chosen": -197.91439819335938, + "logps/rejected": -590.2010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.233552932739258, + "rewards/margins": 12.431968688964844, + "rewards/rejected": -20.6655216217041, + "step": 17117 + }, + { + "epoch": 2.66, + "learning_rate": 1.59229939312205e-06, + "logits/chosen": -1.2280287742614746, + "logits/rejected": -2.3763115406036377, + "logps/chosen": -186.32589721679688, + "logps/rejected": -590.0269775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.553009033203125, + "rewards/margins": 14.850536346435547, + "rewards/rejected": -26.403545379638672, + "step": 17118 + }, + { + "epoch": 2.66, + "learning_rate": 1.5915659525909021e-06, + "logits/chosen": -2.333094596862793, + "logits/rejected": -2.7258825302124023, + "logps/chosen": -219.58831787109375, + "logps/rejected": -268.75128173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.220614433288574, + "rewards/margins": 9.728949546813965, + "rewards/rejected": -18.94956398010254, + "step": 17119 + }, + { + "epoch": 2.66, + "learning_rate": 1.590832512059754e-06, + "logits/chosen": -2.4007043838500977, + "logits/rejected": -2.8721392154693604, + "logps/chosen": -145.03939819335938, + "logps/rejected": -325.074951171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.380970001220703, + "rewards/margins": 9.470508575439453, + "rewards/rejected": -19.851478576660156, + "step": 17120 + }, + { + "epoch": 2.66, + "learning_rate": 1.5900990715286063e-06, + "logits/chosen": -2.329516649246216, + "logits/rejected": -2.710566997528076, + "logps/chosen": -350.21484375, + "logps/rejected": -449.6407165527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.87617015838623, + "rewards/margins": 10.669681549072266, + "rewards/rejected": -20.545852661132812, + "step": 17121 + }, + { + "epoch": 2.66, + "learning_rate": 1.5893656309974584e-06, + "logits/chosen": -2.255110502243042, + "logits/rejected": -2.5558950901031494, + "logps/chosen": -141.62445068359375, + "logps/rejected": -642.838623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.233141899108887, + "rewards/margins": 21.45525550842285, + "rewards/rejected": -29.688396453857422, + "step": 17122 + }, + { + "epoch": 2.66, + "learning_rate": 1.5886321904663105e-06, + "logits/chosen": -1.606408715248108, + "logits/rejected": -2.569474458694458, + "logps/chosen": -321.5843505859375, + "logps/rejected": -535.2403564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.271769523620605, + "rewards/margins": 12.37917709350586, + "rewards/rejected": -20.65094757080078, + "step": 17123 + }, + { + "epoch": 2.66, + "learning_rate": 1.5878987499351626e-06, + "logits/chosen": -2.357454538345337, + "logits/rejected": -1.934278130531311, + "logps/chosen": -168.65896606445312, + "logps/rejected": -369.5715637207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.110197067260742, + "rewards/margins": 13.977275848388672, + "rewards/rejected": -23.087472915649414, + "step": 17124 + }, + { + "epoch": 2.66, + "learning_rate": 1.5871653094040147e-06, + "logits/chosen": -2.004572868347168, + "logits/rejected": -2.768731117248535, + "logps/chosen": -378.541259765625, + "logps/rejected": -353.16522216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81616735458374, + "rewards/margins": 10.276918411254883, + "rewards/rejected": -16.09308624267578, + "step": 17125 + }, + { + "epoch": 2.66, + "learning_rate": 1.586431868872867e-06, + "logits/chosen": -2.7255659103393555, + "logits/rejected": -2.365825653076172, + "logps/chosen": -195.38414001464844, + "logps/rejected": -421.5038146972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.093181610107422, + "rewards/margins": 14.268795013427734, + "rewards/rejected": -21.361976623535156, + "step": 17126 + }, + { + "epoch": 2.66, + "learning_rate": 1.585698428341719e-06, + "logits/chosen": -2.860532283782959, + "logits/rejected": -2.385143280029297, + "logps/chosen": -329.03314208984375, + "logps/rejected": -291.93060302734375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.731077194213867, + "rewards/margins": 6.614051818847656, + "rewards/rejected": -16.345129013061523, + "step": 17127 + }, + { + "epoch": 2.66, + "learning_rate": 1.5849649878105712e-06, + "logits/chosen": -3.1206533908843994, + "logits/rejected": -3.0669748783111572, + "logps/chosen": -278.9189147949219, + "logps/rejected": -367.6282043457031, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1027655601501465, + "rewards/margins": 9.942892074584961, + "rewards/rejected": -17.045658111572266, + "step": 17128 + }, + { + "epoch": 2.66, + "learning_rate": 1.584231547279423e-06, + "logits/chosen": -2.410261631011963, + "logits/rejected": -2.050766944885254, + "logps/chosen": -247.4080810546875, + "logps/rejected": -408.64434814453125, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.098098754882812, + "rewards/margins": 4.839821815490723, + "rewards/rejected": -16.93791961669922, + "step": 17129 + }, + { + "epoch": 2.66, + "learning_rate": 1.5834981067482753e-06, + "logits/chosen": -2.3536183834075928, + "logits/rejected": -2.4714536666870117, + "logps/chosen": -230.36727905273438, + "logps/rejected": -431.8798828125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.404964447021484, + "rewards/margins": 7.299022197723389, + "rewards/rejected": -19.70398712158203, + "step": 17130 + }, + { + "epoch": 2.66, + "learning_rate": 1.5827646662171274e-06, + "logits/chosen": -2.870196580886841, + "logits/rejected": -1.3378983736038208, + "logps/chosen": -810.5202026367188, + "logps/rejected": -683.5388793945312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.736846923828125, + "rewards/margins": 8.251976013183594, + "rewards/rejected": -17.98882293701172, + "step": 17131 + }, + { + "epoch": 2.66, + "learning_rate": 1.5820312256859795e-06, + "logits/chosen": -2.8513989448547363, + "logits/rejected": -2.6467862129211426, + "logps/chosen": -229.5542755126953, + "logps/rejected": -505.2373962402344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.30832052230835, + "rewards/margins": 7.814611434936523, + "rewards/rejected": -15.122931480407715, + "step": 17132 + }, + { + "epoch": 2.66, + "learning_rate": 1.5812977851548316e-06, + "logits/chosen": -2.017878770828247, + "logits/rejected": -2.68033766746521, + "logps/chosen": -137.15936279296875, + "logps/rejected": -349.1673278808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.884103775024414, + "rewards/margins": 11.85113525390625, + "rewards/rejected": -21.735240936279297, + "step": 17133 + }, + { + "epoch": 2.66, + "learning_rate": 1.580564344623684e-06, + "logits/chosen": -2.7780838012695312, + "logits/rejected": -0.9758139848709106, + "logps/chosen": -254.9138946533203, + "logps/rejected": -245.91604614257812, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.298835754394531, + "rewards/margins": 8.269996643066406, + "rewards/rejected": -18.568832397460938, + "step": 17134 + }, + { + "epoch": 2.66, + "learning_rate": 1.579830904092536e-06, + "logits/chosen": -1.7463364601135254, + "logits/rejected": -2.842634916305542, + "logps/chosen": -123.65028381347656, + "logps/rejected": -620.89404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.380698204040527, + "rewards/margins": 15.69659423828125, + "rewards/rejected": -24.077293395996094, + "step": 17135 + }, + { + "epoch": 2.67, + "learning_rate": 1.579097463561388e-06, + "logits/chosen": -0.8035774230957031, + "logits/rejected": -2.5702474117279053, + "logps/chosen": -182.4266357421875, + "logps/rejected": -434.30157470703125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.101926803588867, + "rewards/margins": 6.932342052459717, + "rewards/rejected": -19.034269332885742, + "step": 17136 + }, + { + "epoch": 2.67, + "learning_rate": 1.5783640230302402e-06, + "logits/chosen": -1.665764570236206, + "logits/rejected": -2.3078415393829346, + "logps/chosen": -321.4015197753906, + "logps/rejected": -411.82012939453125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.161667823791504, + "rewards/margins": 7.1700592041015625, + "rewards/rejected": -17.331727981567383, + "step": 17137 + }, + { + "epoch": 2.67, + "learning_rate": 1.5776305824990925e-06, + "logits/chosen": -2.0303235054016113, + "logits/rejected": -2.9563817977905273, + "logps/chosen": -122.30754852294922, + "logps/rejected": -477.47821044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.876458644866943, + "rewards/margins": 11.199758529663086, + "rewards/rejected": -19.076217651367188, + "step": 17138 + }, + { + "epoch": 2.67, + "learning_rate": 1.5768971419679444e-06, + "logits/chosen": -2.542945146560669, + "logits/rejected": -2.093751907348633, + "logps/chosen": -434.2706604003906, + "logps/rejected": -520.2030029296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.476879119873047, + "rewards/margins": 8.280926704406738, + "rewards/rejected": -15.757804870605469, + "step": 17139 + }, + { + "epoch": 2.67, + "learning_rate": 1.5761637014367964e-06, + "logits/chosen": -2.3933632373809814, + "logits/rejected": -2.5257630348205566, + "logps/chosen": -242.1861572265625, + "logps/rejected": -436.21923828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.221858024597168, + "rewards/margins": 8.682025909423828, + "rewards/rejected": -18.903884887695312, + "step": 17140 + }, + { + "epoch": 2.67, + "learning_rate": 1.5754302609056485e-06, + "logits/chosen": -1.1983451843261719, + "logits/rejected": -2.50697922706604, + "logps/chosen": -146.58468627929688, + "logps/rejected": -628.7240600585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.864498138427734, + "rewards/margins": 17.408864974975586, + "rewards/rejected": -28.273361206054688, + "step": 17141 + }, + { + "epoch": 2.67, + "learning_rate": 1.5746968203745008e-06, + "logits/chosen": -2.3551642894744873, + "logits/rejected": -2.6957485675811768, + "logps/chosen": -342.0121765136719, + "logps/rejected": -716.038818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050174713134766, + "rewards/margins": 13.484975814819336, + "rewards/rejected": -19.53514862060547, + "step": 17142 + }, + { + "epoch": 2.67, + "learning_rate": 1.573963379843353e-06, + "logits/chosen": -3.1438052654266357, + "logits/rejected": -2.9211156368255615, + "logps/chosen": -110.49609375, + "logps/rejected": -143.56759643554688, + "loss": 0.9105, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.1115007400512695, + "rewards/margins": 4.026851654052734, + "rewards/rejected": -11.13835334777832, + "step": 17143 + }, + { + "epoch": 2.67, + "learning_rate": 1.573229939312205e-06, + "logits/chosen": -1.449748158454895, + "logits/rejected": -2.826136350631714, + "logps/chosen": -216.60350036621094, + "logps/rejected": -813.6754150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.150325775146484, + "rewards/margins": 10.476142883300781, + "rewards/rejected": -21.626468658447266, + "step": 17144 + }, + { + "epoch": 2.67, + "learning_rate": 1.5724964987810571e-06, + "logits/chosen": -2.1482372283935547, + "logits/rejected": -2.26227068901062, + "logps/chosen": -434.0389404296875, + "logps/rejected": -490.92657470703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.843903541564941, + "rewards/margins": 8.490832328796387, + "rewards/rejected": -20.334735870361328, + "step": 17145 + }, + { + "epoch": 2.67, + "learning_rate": 1.5717630582499092e-06, + "logits/chosen": -2.5932865142822266, + "logits/rejected": -2.5994300842285156, + "logps/chosen": -197.800537109375, + "logps/rejected": -368.31256103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.836147308349609, + "rewards/margins": 12.072319984436035, + "rewards/rejected": -19.908466339111328, + "step": 17146 + }, + { + "epoch": 2.67, + "learning_rate": 1.5710296177187615e-06, + "logits/chosen": -2.5969018936157227, + "logits/rejected": -2.1317973136901855, + "logps/chosen": -794.6974487304688, + "logps/rejected": -643.8826904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.791577339172363, + "rewards/margins": 9.332442283630371, + "rewards/rejected": -18.124019622802734, + "step": 17147 + }, + { + "epoch": 2.67, + "learning_rate": 1.5702961771876136e-06, + "logits/chosen": -2.799527406692505, + "logits/rejected": -2.842644214630127, + "logps/chosen": -106.56358337402344, + "logps/rejected": -453.7802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.21813678741455, + "rewards/margins": 12.889759063720703, + "rewards/rejected": -21.10789680480957, + "step": 17148 + }, + { + "epoch": 2.67, + "learning_rate": 1.5695627366564655e-06, + "logits/chosen": -2.063624858856201, + "logits/rejected": -2.6115643978118896, + "logps/chosen": -134.92034912109375, + "logps/rejected": -248.3427734375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.514813423156738, + "rewards/margins": 5.999829292297363, + "rewards/rejected": -17.5146427154541, + "step": 17149 + }, + { + "epoch": 2.67, + "learning_rate": 1.5688292961253176e-06, + "logits/chosen": -2.5649192333221436, + "logits/rejected": -2.4612905979156494, + "logps/chosen": -856.1624755859375, + "logps/rejected": -869.8762817382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.483301162719727, + "rewards/margins": 12.51682186126709, + "rewards/rejected": -23.0001220703125, + "step": 17150 + }, + { + "epoch": 2.67, + "learning_rate": 1.5680958555941699e-06, + "logits/chosen": -2.7524590492248535, + "logits/rejected": -1.2492895126342773, + "logps/chosen": -622.7230224609375, + "logps/rejected": -452.03021240234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.198370933532715, + "rewards/margins": 13.239463806152344, + "rewards/rejected": -21.437835693359375, + "step": 17151 + }, + { + "epoch": 2.67, + "learning_rate": 1.567362415063022e-06, + "logits/chosen": -2.3967201709747314, + "logits/rejected": -1.9140651226043701, + "logps/chosen": -176.3894500732422, + "logps/rejected": -453.667236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.206037521362305, + "rewards/margins": 20.377565383911133, + "rewards/rejected": -26.583602905273438, + "step": 17152 + }, + { + "epoch": 2.67, + "learning_rate": 1.566628974531874e-06, + "logits/chosen": -2.449498414993286, + "logits/rejected": -2.510887622833252, + "logps/chosen": -405.7956237792969, + "logps/rejected": -465.670654296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.990577697753906, + "rewards/margins": 11.599448204040527, + "rewards/rejected": -24.59002685546875, + "step": 17153 + }, + { + "epoch": 2.67, + "learning_rate": 1.5658955340007261e-06, + "logits/chosen": -2.7688674926757812, + "logits/rejected": -2.823401927947998, + "logps/chosen": -134.94400024414062, + "logps/rejected": -335.28033447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019519805908203, + "rewards/margins": 11.69478702545166, + "rewards/rejected": -17.714305877685547, + "step": 17154 + }, + { + "epoch": 2.67, + "learning_rate": 1.5651620934695784e-06, + "logits/chosen": -2.6449506282806396, + "logits/rejected": -2.0376288890838623, + "logps/chosen": -195.5607147216797, + "logps/rejected": -305.2532958984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.496813774108887, + "rewards/margins": 8.076125144958496, + "rewards/rejected": -14.572938919067383, + "step": 17155 + }, + { + "epoch": 2.67, + "learning_rate": 1.5644286529384305e-06, + "logits/chosen": -2.7361042499542236, + "logits/rejected": -1.7511810064315796, + "logps/chosen": -566.0857543945312, + "logps/rejected": -438.83441162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.952280044555664, + "rewards/margins": 9.747352600097656, + "rewards/rejected": -21.69963264465332, + "step": 17156 + }, + { + "epoch": 2.67, + "learning_rate": 1.5636952124072826e-06, + "logits/chosen": -2.1738662719726562, + "logits/rejected": -2.2464659214019775, + "logps/chosen": -282.7464599609375, + "logps/rejected": -548.2674560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.858358383178711, + "rewards/margins": 12.770503997802734, + "rewards/rejected": -20.628862380981445, + "step": 17157 + }, + { + "epoch": 2.67, + "learning_rate": 1.5629617718761347e-06, + "logits/chosen": -2.7175285816192627, + "logits/rejected": -2.401564121246338, + "logps/chosen": -241.12847900390625, + "logps/rejected": -255.09426879882812, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.32602071762085, + "rewards/margins": 7.759454727172852, + "rewards/rejected": -14.08547592163086, + "step": 17158 + }, + { + "epoch": 2.67, + "learning_rate": 1.5622283313449868e-06, + "logits/chosen": -1.1622897386550903, + "logits/rejected": -2.2417027950286865, + "logps/chosen": -482.1671142578125, + "logps/rejected": -721.32177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.90985107421875, + "rewards/margins": 15.248336791992188, + "rewards/rejected": -24.158187866210938, + "step": 17159 + }, + { + "epoch": 2.67, + "learning_rate": 1.5614948908138389e-06, + "logits/chosen": -2.904017448425293, + "logits/rejected": -2.957404136657715, + "logps/chosen": -122.72695922851562, + "logps/rejected": -275.0411682128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.073112964630127, + "rewards/margins": 14.12594223022461, + "rewards/rejected": -18.199054718017578, + "step": 17160 + }, + { + "epoch": 2.67, + "learning_rate": 1.560761450282691e-06, + "logits/chosen": -1.6619504690170288, + "logits/rejected": -2.451110363006592, + "logps/chosen": -162.81436157226562, + "logps/rejected": -430.78363037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.220430374145508, + "rewards/margins": 11.175168991088867, + "rewards/rejected": -22.395599365234375, + "step": 17161 + }, + { + "epoch": 2.67, + "learning_rate": 1.560028009751543e-06, + "logits/chosen": -2.441153049468994, + "logits/rejected": -2.7770628929138184, + "logps/chosen": -1191.574462890625, + "logps/rejected": -1112.21923828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.634540557861328, + "rewards/margins": 8.954191207885742, + "rewards/rejected": -18.588729858398438, + "step": 17162 + }, + { + "epoch": 2.67, + "learning_rate": 1.5592945692203952e-06, + "logits/chosen": -2.675797462463379, + "logits/rejected": -1.51837956905365, + "logps/chosen": -212.51187133789062, + "logps/rejected": -110.78983306884766, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534523010253906, + "rewards/margins": 3.3418116569519043, + "rewards/rejected": -9.876335144042969, + "step": 17163 + }, + { + "epoch": 2.67, + "learning_rate": 1.5585611286892475e-06, + "logits/chosen": -2.5758018493652344, + "logits/rejected": -2.9025094509124756, + "logps/chosen": -176.50941467285156, + "logps/rejected": -371.8172607421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.514410018920898, + "rewards/margins": 9.121360778808594, + "rewards/rejected": -18.635770797729492, + "step": 17164 + }, + { + "epoch": 2.67, + "learning_rate": 1.5578276881580995e-06, + "logits/chosen": -2.947356939315796, + "logits/rejected": -2.683026075363159, + "logps/chosen": -395.18341064453125, + "logps/rejected": -492.88043212890625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.822203636169434, + "rewards/margins": 11.81515884399414, + "rewards/rejected": -20.63736343383789, + "step": 17165 + }, + { + "epoch": 2.67, + "learning_rate": 1.5570942476269516e-06, + "logits/chosen": -2.7444984912872314, + "logits/rejected": -2.4559006690979004, + "logps/chosen": -231.51307678222656, + "logps/rejected": -527.574951171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.36231803894043, + "rewards/margins": 16.08084487915039, + "rewards/rejected": -26.44316291809082, + "step": 17166 + }, + { + "epoch": 2.67, + "learning_rate": 1.5563608070958037e-06, + "logits/chosen": -1.812603235244751, + "logits/rejected": -2.5539603233337402, + "logps/chosen": -290.2783203125, + "logps/rejected": -504.91998291015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3490400314331055, + "rewards/margins": 9.888555526733398, + "rewards/rejected": -17.237594604492188, + "step": 17167 + }, + { + "epoch": 2.67, + "learning_rate": 1.555627366564656e-06, + "logits/chosen": -1.4466880559921265, + "logits/rejected": -2.660815477371216, + "logps/chosen": -140.10504150390625, + "logps/rejected": -438.78887939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.313902854919434, + "rewards/margins": 13.891651153564453, + "rewards/rejected": -23.205554962158203, + "step": 17168 + }, + { + "epoch": 2.67, + "learning_rate": 1.554893926033508e-06, + "logits/chosen": -3.0037155151367188, + "logits/rejected": -1.918226718902588, + "logps/chosen": -300.5294189453125, + "logps/rejected": -287.98968505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.198192596435547, + "rewards/margins": 9.588347434997559, + "rewards/rejected": -17.78653907775879, + "step": 17169 + }, + { + "epoch": 2.67, + "learning_rate": 1.55416048550236e-06, + "logits/chosen": -0.7967895269393921, + "logits/rejected": -1.5521517992019653, + "logps/chosen": -187.34014892578125, + "logps/rejected": -504.61309814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.377089500427246, + "rewards/margins": 16.138072967529297, + "rewards/rejected": -23.51516342163086, + "step": 17170 + }, + { + "epoch": 2.67, + "learning_rate": 1.553427044971212e-06, + "logits/chosen": -3.0235700607299805, + "logits/rejected": -2.991230010986328, + "logps/chosen": -122.80255889892578, + "logps/rejected": -256.78363037109375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.295782089233398, + "rewards/margins": 6.664129257202148, + "rewards/rejected": -12.959911346435547, + "step": 17171 + }, + { + "epoch": 2.67, + "learning_rate": 1.5526936044400644e-06, + "logits/chosen": -2.4879655838012695, + "logits/rejected": -2.479644775390625, + "logps/chosen": -666.9189453125, + "logps/rejected": -694.6121215820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.576708793640137, + "rewards/margins": 13.062967300415039, + "rewards/rejected": -21.639677047729492, + "step": 17172 + }, + { + "epoch": 2.67, + "learning_rate": 1.5519601639089165e-06, + "logits/chosen": -2.665132761001587, + "logits/rejected": -1.2584587335586548, + "logps/chosen": -324.53369140625, + "logps/rejected": -317.216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.109651565551758, + "rewards/margins": 14.167143821716309, + "rewards/rejected": -21.276796340942383, + "step": 17173 + }, + { + "epoch": 2.67, + "learning_rate": 1.5512267233777686e-06, + "logits/chosen": -2.797041177749634, + "logits/rejected": -3.031665325164795, + "logps/chosen": -147.77011108398438, + "logps/rejected": -277.65679931640625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.782533645629883, + "rewards/margins": 9.180000305175781, + "rewards/rejected": -17.962533950805664, + "step": 17174 + }, + { + "epoch": 2.67, + "learning_rate": 1.5504932828466207e-06, + "logits/chosen": -1.9999905824661255, + "logits/rejected": -2.9051082134246826, + "logps/chosen": -111.18926239013672, + "logps/rejected": -391.6412353515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.93648099899292, + "rewards/margins": 10.032732009887695, + "rewards/rejected": -16.969213485717773, + "step": 17175 + }, + { + "epoch": 2.67, + "learning_rate": 1.549759842315473e-06, + "logits/chosen": -1.6726841926574707, + "logits/rejected": -2.6694326400756836, + "logps/chosen": -244.43231201171875, + "logps/rejected": -408.32037353515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.731653213500977, + "rewards/margins": 8.561914443969727, + "rewards/rejected": -19.293567657470703, + "step": 17176 + }, + { + "epoch": 2.67, + "learning_rate": 1.549026401784325e-06, + "logits/chosen": -2.566540002822876, + "logits/rejected": -1.509013295173645, + "logps/chosen": -322.7286682128906, + "logps/rejected": -432.8450927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.772167205810547, + "rewards/margins": 10.97298812866211, + "rewards/rejected": -22.745155334472656, + "step": 17177 + }, + { + "epoch": 2.67, + "learning_rate": 1.5482929612531771e-06, + "logits/chosen": -2.970224380493164, + "logits/rejected": -2.943819046020508, + "logps/chosen": -868.259765625, + "logps/rejected": -657.230224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.419743537902832, + "rewards/margins": 10.533246040344238, + "rewards/rejected": -17.95298957824707, + "step": 17178 + }, + { + "epoch": 2.67, + "learning_rate": 1.547559520722029e-06, + "logits/chosen": -1.8352782726287842, + "logits/rejected": -2.8953895568847656, + "logps/chosen": -194.4559326171875, + "logps/rejected": -372.33154296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.865934371948242, + "rewards/margins": 6.902677536010742, + "rewards/rejected": -17.768611907958984, + "step": 17179 + }, + { + "epoch": 2.67, + "learning_rate": 1.5468260801908813e-06, + "logits/chosen": -2.498758554458618, + "logits/rejected": -2.2757842540740967, + "logps/chosen": -399.97894287109375, + "logps/rejected": -447.8742370605469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4029541015625, + "rewards/margins": 12.431638717651367, + "rewards/rejected": -18.834592819213867, + "step": 17180 + }, + { + "epoch": 2.67, + "learning_rate": 1.5460926396597334e-06, + "logits/chosen": -2.8927292823791504, + "logits/rejected": -2.651578426361084, + "logps/chosen": -702.9349365234375, + "logps/rejected": -753.8472900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.992859840393066, + "rewards/margins": 10.998668670654297, + "rewards/rejected": -20.991527557373047, + "step": 17181 + }, + { + "epoch": 2.67, + "learning_rate": 1.5453591991285855e-06, + "logits/chosen": -2.26747465133667, + "logits/rejected": -2.623072385787964, + "logps/chosen": -215.9766082763672, + "logps/rejected": -337.0204162597656, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.550430297851562, + "rewards/margins": 7.889060020446777, + "rewards/rejected": -17.439491271972656, + "step": 17182 + }, + { + "epoch": 2.67, + "learning_rate": 1.5446257585974376e-06, + "logits/chosen": -2.6131858825683594, + "logits/rejected": -2.96978759765625, + "logps/chosen": -96.34642028808594, + "logps/rejected": -259.9263000488281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.15958309173584, + "rewards/margins": 7.973023891448975, + "rewards/rejected": -17.132606506347656, + "step": 17183 + }, + { + "epoch": 2.67, + "learning_rate": 1.5438923180662897e-06, + "logits/chosen": -1.4507523775100708, + "logits/rejected": -2.394882917404175, + "logps/chosen": -141.29730224609375, + "logps/rejected": -298.13934326171875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.921287536621094, + "rewards/margins": 6.784003734588623, + "rewards/rejected": -18.705291748046875, + "step": 17184 + }, + { + "epoch": 2.67, + "learning_rate": 1.543158877535142e-06, + "logits/chosen": -2.797635078430176, + "logits/rejected": -1.8821758031845093, + "logps/chosen": -382.7681884765625, + "logps/rejected": -278.1655578613281, + "loss": 0.1067, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.893106460571289, + "rewards/margins": 5.162811279296875, + "rewards/rejected": -16.055917739868164, + "step": 17185 + }, + { + "epoch": 2.67, + "learning_rate": 1.542425437003994e-06, + "logits/chosen": -2.4276351928710938, + "logits/rejected": -2.9153873920440674, + "logps/chosen": -527.519775390625, + "logps/rejected": -740.39111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.487288475036621, + "rewards/margins": 12.714286804199219, + "rewards/rejected": -20.201576232910156, + "step": 17186 + }, + { + "epoch": 2.67, + "learning_rate": 1.5416919964728462e-06, + "logits/chosen": -1.9869827032089233, + "logits/rejected": -2.6308295726776123, + "logps/chosen": -193.36715698242188, + "logps/rejected": -508.3902893066406, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.095939636230469, + "rewards/margins": 10.295230865478516, + "rewards/rejected": -19.391170501708984, + "step": 17187 + }, + { + "epoch": 2.67, + "learning_rate": 1.540958555941698e-06, + "logits/chosen": -2.7535409927368164, + "logits/rejected": -2.3246448040008545, + "logps/chosen": -929.7520751953125, + "logps/rejected": -966.8648681640625, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.935235977172852, + "rewards/margins": 10.614374160766602, + "rewards/rejected": -22.549610137939453, + "step": 17188 + }, + { + "epoch": 2.67, + "learning_rate": 1.5402251154105503e-06, + "logits/chosen": -2.4226675033569336, + "logits/rejected": -2.8491578102111816, + "logps/chosen": -155.11447143554688, + "logps/rejected": -281.8888244628906, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.738959312438965, + "rewards/margins": 6.209131240844727, + "rewards/rejected": -15.948091506958008, + "step": 17189 + }, + { + "epoch": 2.67, + "learning_rate": 1.5394916748794024e-06, + "logits/chosen": -1.5799634456634521, + "logits/rejected": -2.3533098697662354, + "logps/chosen": -196.61016845703125, + "logps/rejected": -396.95648193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.576398849487305, + "rewards/margins": 9.654644966125488, + "rewards/rejected": -23.23104476928711, + "step": 17190 + }, + { + "epoch": 2.67, + "learning_rate": 1.5387582343482545e-06, + "logits/chosen": -2.3841660022735596, + "logits/rejected": -2.729748010635376, + "logps/chosen": -141.67135620117188, + "logps/rejected": -404.60919189453125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.835145950317383, + "rewards/margins": 6.720840930938721, + "rewards/rejected": -18.555988311767578, + "step": 17191 + }, + { + "epoch": 2.67, + "learning_rate": 1.5380247938171066e-06, + "logits/chosen": -1.3190511465072632, + "logits/rejected": -2.385356903076172, + "logps/chosen": -146.7432098388672, + "logps/rejected": -387.8760681152344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13142204284668, + "rewards/margins": 11.305471420288086, + "rewards/rejected": -19.436893463134766, + "step": 17192 + }, + { + "epoch": 2.67, + "learning_rate": 1.537291353285959e-06, + "logits/chosen": -2.70062518119812, + "logits/rejected": -2.6901512145996094, + "logps/chosen": -210.43869018554688, + "logps/rejected": -399.00360107421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.188241958618164, + "rewards/margins": 11.597373962402344, + "rewards/rejected": -21.78561782836914, + "step": 17193 + }, + { + "epoch": 2.67, + "learning_rate": 1.536557912754811e-06, + "logits/chosen": -2.7082011699676514, + "logits/rejected": -1.507059931755066, + "logps/chosen": -580.73046875, + "logps/rejected": -574.4993896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.088859558105469, + "rewards/margins": 12.96531867980957, + "rewards/rejected": -23.05417823791504, + "step": 17194 + }, + { + "epoch": 2.67, + "learning_rate": 1.535824472223663e-06, + "logits/chosen": -2.9254207611083984, + "logits/rejected": -1.7903629541397095, + "logps/chosen": -352.283203125, + "logps/rejected": -174.47103881835938, + "loss": 1.6399, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.097412109375, + "rewards/margins": 0.14376330375671387, + "rewards/rejected": -11.241174697875977, + "step": 17195 + }, + { + "epoch": 2.67, + "learning_rate": 1.5350910316925152e-06, + "logits/chosen": -2.361636161804199, + "logits/rejected": -1.8834972381591797, + "logps/chosen": -267.80267333984375, + "logps/rejected": -316.29443359375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.995585441589355, + "rewards/margins": 10.901374816894531, + "rewards/rejected": -21.896961212158203, + "step": 17196 + }, + { + "epoch": 2.67, + "learning_rate": 1.5343575911613675e-06, + "logits/chosen": -2.2507970333099365, + "logits/rejected": -2.7292447090148926, + "logps/chosen": -317.1253662109375, + "logps/rejected": -496.9969177246094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.437509536743164, + "rewards/margins": 9.693548202514648, + "rewards/rejected": -20.131057739257812, + "step": 17197 + }, + { + "epoch": 2.67, + "learning_rate": 1.5336241506302194e-06, + "logits/chosen": -2.9657840728759766, + "logits/rejected": -2.462273597717285, + "logps/chosen": -195.5080108642578, + "logps/rejected": -238.44151306152344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.053413391113281, + "rewards/margins": 10.830437660217285, + "rewards/rejected": -16.88385009765625, + "step": 17198 + }, + { + "epoch": 2.67, + "learning_rate": 1.5328907100990715e-06, + "logits/chosen": -1.5774884223937988, + "logits/rejected": -2.1152915954589844, + "logps/chosen": -247.10052490234375, + "logps/rejected": -461.2864990234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.065603256225586, + "rewards/margins": 14.349536895751953, + "rewards/rejected": -25.415142059326172, + "step": 17199 + }, + { + "epoch": 2.67, + "learning_rate": 1.5321572695679235e-06, + "logits/chosen": -2.7862794399261475, + "logits/rejected": -2.8419382572174072, + "logps/chosen": -124.25593566894531, + "logps/rejected": -355.1275634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.82259464263916, + "rewards/margins": 10.674880027770996, + "rewards/rejected": -19.497474670410156, + "step": 17200 + }, + { + "epoch": 2.68, + "learning_rate": 1.5314238290367758e-06, + "logits/chosen": -2.4882373809814453, + "logits/rejected": -2.497431516647339, + "logps/chosen": -574.0263671875, + "logps/rejected": -571.7083740234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.327763557434082, + "rewards/margins": 11.592239379882812, + "rewards/rejected": -21.920001983642578, + "step": 17201 + }, + { + "epoch": 2.68, + "learning_rate": 1.530690388505628e-06, + "logits/chosen": -2.5829389095306396, + "logits/rejected": -1.5339570045471191, + "logps/chosen": -319.05841064453125, + "logps/rejected": -383.1773681640625, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.994784355163574, + "rewards/margins": 10.300027847290039, + "rewards/rejected": -18.294811248779297, + "step": 17202 + }, + { + "epoch": 2.68, + "learning_rate": 1.52995694797448e-06, + "logits/chosen": -2.433401346206665, + "logits/rejected": -1.494507074356079, + "logps/chosen": -310.6472473144531, + "logps/rejected": -406.73590087890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.25837516784668, + "rewards/margins": 7.707812786102295, + "rewards/rejected": -17.966188430786133, + "step": 17203 + }, + { + "epoch": 2.68, + "learning_rate": 1.5292235074433321e-06, + "logits/chosen": -1.2884899377822876, + "logits/rejected": -2.5334889888763428, + "logps/chosen": -246.4034423828125, + "logps/rejected": -362.0104064941406, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.918024063110352, + "rewards/margins": 7.110244274139404, + "rewards/rejected": -17.028268814086914, + "step": 17204 + }, + { + "epoch": 2.68, + "learning_rate": 1.5284900669121842e-06, + "logits/chosen": -2.4381299018859863, + "logits/rejected": -2.7681474685668945, + "logps/chosen": -297.3258361816406, + "logps/rejected": -374.90283203125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.85396957397461, + "rewards/margins": 6.7132768630981445, + "rewards/rejected": -17.567245483398438, + "step": 17205 + }, + { + "epoch": 2.68, + "learning_rate": 1.5277566263810365e-06, + "logits/chosen": -2.186689615249634, + "logits/rejected": -2.3366339206695557, + "logps/chosen": -176.590087890625, + "logps/rejected": -633.9624633789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386440277099609, + "rewards/margins": 19.82616424560547, + "rewards/rejected": -26.212604522705078, + "step": 17206 + }, + { + "epoch": 2.68, + "learning_rate": 1.5270231858498886e-06, + "logits/chosen": -2.159029722213745, + "logits/rejected": -1.0863147974014282, + "logps/chosen": -1005.1967163085938, + "logps/rejected": -449.10528564453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.377801895141602, + "rewards/margins": 11.691192626953125, + "rewards/rejected": -20.06899642944336, + "step": 17207 + }, + { + "epoch": 2.68, + "learning_rate": 1.5262897453187405e-06, + "logits/chosen": -2.218942403793335, + "logits/rejected": -2.9157800674438477, + "logps/chosen": -379.6258239746094, + "logps/rejected": -577.96826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.343099594116211, + "rewards/margins": 13.369707107543945, + "rewards/rejected": -22.712806701660156, + "step": 17208 + }, + { + "epoch": 2.68, + "learning_rate": 1.5255563047875926e-06, + "logits/chosen": -2.5274033546447754, + "logits/rejected": -1.6536086797714233, + "logps/chosen": -254.9877166748047, + "logps/rejected": -272.68414306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.618043899536133, + "rewards/margins": 10.65573501586914, + "rewards/rejected": -18.273778915405273, + "step": 17209 + }, + { + "epoch": 2.68, + "learning_rate": 1.5248228642564449e-06, + "logits/chosen": -2.5052220821380615, + "logits/rejected": -1.634022831916809, + "logps/chosen": -390.6558837890625, + "logps/rejected": -477.36859130859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.663248062133789, + "rewards/margins": 8.772500991821289, + "rewards/rejected": -20.435749053955078, + "step": 17210 + }, + { + "epoch": 2.68, + "learning_rate": 1.524089423725297e-06, + "logits/chosen": -2.547703504562378, + "logits/rejected": -2.6800332069396973, + "logps/chosen": -322.9073791503906, + "logps/rejected": -473.7660217285156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.291401863098145, + "rewards/margins": 12.194562911987305, + "rewards/rejected": -21.485963821411133, + "step": 17211 + }, + { + "epoch": 2.68, + "learning_rate": 1.523355983194149e-06, + "logits/chosen": -2.000234603881836, + "logits/rejected": -2.4960250854492188, + "logps/chosen": -427.9629821777344, + "logps/rejected": -457.3814697265625, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.991188049316406, + "rewards/margins": 7.724432468414307, + "rewards/rejected": -17.715621948242188, + "step": 17212 + }, + { + "epoch": 2.68, + "learning_rate": 1.5226225426630011e-06, + "logits/chosen": -2.357672691345215, + "logits/rejected": -2.730720281600952, + "logps/chosen": -284.5624694824219, + "logps/rejected": -500.801513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.088483810424805, + "rewards/margins": 12.784599304199219, + "rewards/rejected": -23.87308120727539, + "step": 17213 + }, + { + "epoch": 2.68, + "learning_rate": 1.5218891021318534e-06, + "logits/chosen": -1.3099353313446045, + "logits/rejected": -2.3309290409088135, + "logps/chosen": -137.40016174316406, + "logps/rejected": -264.35382080078125, + "loss": 0.0516, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.425787925720215, + "rewards/margins": 6.706486701965332, + "rewards/rejected": -16.132274627685547, + "step": 17214 + }, + { + "epoch": 2.68, + "learning_rate": 1.5211556616007055e-06, + "logits/chosen": -2.89054799079895, + "logits/rejected": -2.931323766708374, + "logps/chosen": -118.89909362792969, + "logps/rejected": -389.20709228515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.222397804260254, + "rewards/margins": 10.379591941833496, + "rewards/rejected": -18.60198974609375, + "step": 17215 + }, + { + "epoch": 2.68, + "learning_rate": 1.5204222210695576e-06, + "logits/chosen": -2.661078453063965, + "logits/rejected": -2.521658182144165, + "logps/chosen": -267.5806884765625, + "logps/rejected": -329.72149658203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.619861602783203, + "rewards/margins": 9.199409484863281, + "rewards/rejected": -19.819271087646484, + "step": 17216 + }, + { + "epoch": 2.68, + "learning_rate": 1.5196887805384097e-06, + "logits/chosen": -1.4475622177124023, + "logits/rejected": -2.4665021896362305, + "logps/chosen": -165.14134216308594, + "logps/rejected": -308.098876953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.469164848327637, + "rewards/margins": 8.213497161865234, + "rewards/rejected": -19.682662963867188, + "step": 17217 + }, + { + "epoch": 2.68, + "learning_rate": 1.5189553400072618e-06, + "logits/chosen": -1.9375966787338257, + "logits/rejected": -2.495543956756592, + "logps/chosen": -376.59735107421875, + "logps/rejected": -381.4524230957031, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.238165855407715, + "rewards/margins": 9.681901931762695, + "rewards/rejected": -17.920068740844727, + "step": 17218 + }, + { + "epoch": 2.68, + "learning_rate": 1.5182218994761139e-06, + "logits/chosen": -2.616912603378296, + "logits/rejected": -2.8844501972198486, + "logps/chosen": -169.279296875, + "logps/rejected": -222.5225830078125, + "loss": 0.1367, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.274822235107422, + "rewards/margins": 2.855617046356201, + "rewards/rejected": -11.130439758300781, + "step": 17219 + }, + { + "epoch": 2.68, + "learning_rate": 1.517488458944966e-06, + "logits/chosen": -2.9556961059570312, + "logits/rejected": -3.0622975826263428, + "logps/chosen": -117.8314208984375, + "logps/rejected": -215.88546752929688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.956876754760742, + "rewards/margins": 11.035684585571289, + "rewards/rejected": -17.99256134033203, + "step": 17220 + }, + { + "epoch": 2.68, + "learning_rate": 1.516755018413818e-06, + "logits/chosen": -2.739917278289795, + "logits/rejected": -3.0526435375213623, + "logps/chosen": -272.32574462890625, + "logps/rejected": -377.42974853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.105108261108398, + "rewards/margins": 11.677229881286621, + "rewards/rejected": -20.782337188720703, + "step": 17221 + }, + { + "epoch": 2.68, + "learning_rate": 1.5160215778826704e-06, + "logits/chosen": -2.5284535884857178, + "logits/rejected": -2.5780563354492188, + "logps/chosen": -330.19427490234375, + "logps/rejected": -443.61968994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.921262264251709, + "rewards/margins": 16.50550079345703, + "rewards/rejected": -22.42676544189453, + "step": 17222 + }, + { + "epoch": 2.68, + "learning_rate": 1.5152881373515225e-06, + "logits/chosen": -2.683490514755249, + "logits/rejected": -2.8386001586914062, + "logps/chosen": -147.79025268554688, + "logps/rejected": -392.4122314453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.752829551696777, + "rewards/margins": 10.365352630615234, + "rewards/rejected": -19.118183135986328, + "step": 17223 + }, + { + "epoch": 2.68, + "learning_rate": 1.5145546968203746e-06, + "logits/chosen": -2.393735885620117, + "logits/rejected": -2.8396286964416504, + "logps/chosen": -717.289306640625, + "logps/rejected": -667.2005615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.091784477233887, + "rewards/margins": 11.175542831420898, + "rewards/rejected": -18.26732635498047, + "step": 17224 + }, + { + "epoch": 2.68, + "learning_rate": 1.5138212562892266e-06, + "logits/chosen": -2.79646372795105, + "logits/rejected": -2.7024550437927246, + "logps/chosen": -306.4043273925781, + "logps/rejected": -496.2853698730469, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.072717666625977, + "rewards/margins": 7.597235202789307, + "rewards/rejected": -18.669952392578125, + "step": 17225 + }, + { + "epoch": 2.68, + "learning_rate": 1.5130878157580787e-06, + "logits/chosen": -2.5317223072052, + "logits/rejected": -1.515053391456604, + "logps/chosen": -548.657958984375, + "logps/rejected": -388.5770263671875, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.246088027954102, + "rewards/margins": 9.652870178222656, + "rewards/rejected": -19.89896011352539, + "step": 17226 + }, + { + "epoch": 2.68, + "learning_rate": 1.512354375226931e-06, + "logits/chosen": -2.1281769275665283, + "logits/rejected": -2.7993357181549072, + "logps/chosen": -101.45477294921875, + "logps/rejected": -323.49560546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.677982330322266, + "rewards/margins": 10.360502243041992, + "rewards/rejected": -19.038484573364258, + "step": 17227 + }, + { + "epoch": 2.68, + "learning_rate": 1.511620934695783e-06, + "logits/chosen": -2.7415292263031006, + "logits/rejected": -2.821488857269287, + "logps/chosen": -96.6726303100586, + "logps/rejected": -212.75753784179688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.27646255493164, + "rewards/margins": 9.483868598937988, + "rewards/rejected": -17.760330200195312, + "step": 17228 + }, + { + "epoch": 2.68, + "learning_rate": 1.510887494164635e-06, + "logits/chosen": -2.5423760414123535, + "logits/rejected": -3.0098717212677, + "logps/chosen": -142.58815002441406, + "logps/rejected": -413.4658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.333497524261475, + "rewards/margins": 10.008148193359375, + "rewards/rejected": -17.341646194458008, + "step": 17229 + }, + { + "epoch": 2.68, + "learning_rate": 1.510154053633487e-06, + "logits/chosen": -1.565375566482544, + "logits/rejected": -2.7107248306274414, + "logps/chosen": -273.535400390625, + "logps/rejected": -550.8133544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.609874725341797, + "rewards/margins": 9.641359329223633, + "rewards/rejected": -18.251232147216797, + "step": 17230 + }, + { + "epoch": 2.68, + "learning_rate": 1.5094206131023394e-06, + "logits/chosen": -2.6910266876220703, + "logits/rejected": -2.9220035076141357, + "logps/chosen": -143.60972595214844, + "logps/rejected": -279.5635681152344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.876087188720703, + "rewards/margins": 8.177244186401367, + "rewards/rejected": -15.05333137512207, + "step": 17231 + }, + { + "epoch": 2.68, + "learning_rate": 1.5086871725711915e-06, + "logits/chosen": -1.8614530563354492, + "logits/rejected": -2.8588852882385254, + "logps/chosen": -217.10000610351562, + "logps/rejected": -505.6064147949219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.722770690917969, + "rewards/margins": 12.161255836486816, + "rewards/rejected": -20.88402557373047, + "step": 17232 + }, + { + "epoch": 2.68, + "learning_rate": 1.5079537320400436e-06, + "logits/chosen": -2.6600234508514404, + "logits/rejected": -2.3613946437835693, + "logps/chosen": -294.83099365234375, + "logps/rejected": -224.03567504882812, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.840378761291504, + "rewards/margins": 7.214715003967285, + "rewards/rejected": -15.055093765258789, + "step": 17233 + }, + { + "epoch": 2.68, + "learning_rate": 1.5072202915088957e-06, + "logits/chosen": -2.7139737606048584, + "logits/rejected": -2.5099918842315674, + "logps/chosen": -550.3543701171875, + "logps/rejected": -494.76904296875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.249223709106445, + "rewards/margins": 8.158404350280762, + "rewards/rejected": -21.40762710571289, + "step": 17234 + }, + { + "epoch": 2.68, + "learning_rate": 1.506486850977748e-06, + "logits/chosen": -2.835432529449463, + "logits/rejected": -2.7370927333831787, + "logps/chosen": -342.9751892089844, + "logps/rejected": -322.7420959472656, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.561975479125977, + "rewards/margins": 6.282628536224365, + "rewards/rejected": -17.8446044921875, + "step": 17235 + }, + { + "epoch": 2.68, + "learning_rate": 1.5057534104466e-06, + "logits/chosen": -1.6758075952529907, + "logits/rejected": -2.6423826217651367, + "logps/chosen": -270.9076232910156, + "logps/rejected": -569.8081665039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.060763359069824, + "rewards/margins": 13.224023818969727, + "rewards/rejected": -22.284788131713867, + "step": 17236 + }, + { + "epoch": 2.68, + "learning_rate": 1.5050199699154521e-06, + "logits/chosen": -1.918793797492981, + "logits/rejected": -2.8619508743286133, + "logps/chosen": -179.18130493164062, + "logps/rejected": -520.5280151367188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.58509349822998, + "rewards/margins": 12.021413803100586, + "rewards/rejected": -22.60650634765625, + "step": 17237 + }, + { + "epoch": 2.68, + "learning_rate": 1.504286529384304e-06, + "logits/chosen": -2.3832688331604004, + "logits/rejected": -2.763018846511841, + "logps/chosen": -193.61007690429688, + "logps/rejected": -348.5521240234375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.53309440612793, + "rewards/margins": 7.326258182525635, + "rewards/rejected": -19.859352111816406, + "step": 17238 + }, + { + "epoch": 2.68, + "learning_rate": 1.5035530888531563e-06, + "logits/chosen": -1.551895260810852, + "logits/rejected": -2.562387228012085, + "logps/chosen": -393.91339111328125, + "logps/rejected": -719.146728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.318377494812012, + "rewards/margins": 10.827472686767578, + "rewards/rejected": -21.145851135253906, + "step": 17239 + }, + { + "epoch": 2.68, + "learning_rate": 1.5028196483220084e-06, + "logits/chosen": -2.385774850845337, + "logits/rejected": -1.9624923467636108, + "logps/chosen": -197.14993286132812, + "logps/rejected": -424.90728759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.939332962036133, + "rewards/margins": 14.0570068359375, + "rewards/rejected": -22.996341705322266, + "step": 17240 + }, + { + "epoch": 2.68, + "learning_rate": 1.5020862077908605e-06, + "logits/chosen": -1.3819236755371094, + "logits/rejected": -2.26568341255188, + "logps/chosen": -279.8935852050781, + "logps/rejected": -645.990478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.200121879577637, + "rewards/margins": 14.078742027282715, + "rewards/rejected": -23.27886390686035, + "step": 17241 + }, + { + "epoch": 2.68, + "learning_rate": 1.5013527672597126e-06, + "logits/chosen": -2.6948025226593018, + "logits/rejected": -1.3922499418258667, + "logps/chosen": -634.1014404296875, + "logps/rejected": -315.16204833984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.736907958984375, + "rewards/margins": 9.924845695495605, + "rewards/rejected": -15.661754608154297, + "step": 17242 + }, + { + "epoch": 2.68, + "learning_rate": 1.500619326728565e-06, + "logits/chosen": -0.8460280299186707, + "logits/rejected": -1.626001000404358, + "logps/chosen": -159.78945922851562, + "logps/rejected": -438.14263916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.29378604888916, + "rewards/margins": 11.690387725830078, + "rewards/rejected": -19.984172821044922, + "step": 17243 + }, + { + "epoch": 2.68, + "learning_rate": 1.499885886197417e-06, + "logits/chosen": -1.4201927185058594, + "logits/rejected": -2.6384263038635254, + "logps/chosen": -208.8475341796875, + "logps/rejected": -642.290283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.670377731323242, + "rewards/margins": 16.310985565185547, + "rewards/rejected": -27.98136329650879, + "step": 17244 + }, + { + "epoch": 2.68, + "learning_rate": 1.499152445666269e-06, + "logits/chosen": -2.6804299354553223, + "logits/rejected": -1.7650411128997803, + "logps/chosen": -287.3376770019531, + "logps/rejected": -256.36968994140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.898515701293945, + "rewards/margins": 9.107582092285156, + "rewards/rejected": -20.006099700927734, + "step": 17245 + }, + { + "epoch": 2.68, + "learning_rate": 1.4984190051351212e-06, + "logits/chosen": -2.7362844944000244, + "logits/rejected": -1.8342441320419312, + "logps/chosen": -322.85015869140625, + "logps/rejected": -373.32891845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.27871322631836, + "rewards/margins": 11.46649169921875, + "rewards/rejected": -19.74520492553711, + "step": 17246 + }, + { + "epoch": 2.68, + "learning_rate": 1.4976855646039733e-06, + "logits/chosen": -2.5097081661224365, + "logits/rejected": -2.953979015350342, + "logps/chosen": -115.5135498046875, + "logps/rejected": -333.02899169921875, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.542404174804688, + "rewards/margins": 8.08287525177002, + "rewards/rejected": -17.625280380249023, + "step": 17247 + }, + { + "epoch": 2.68, + "learning_rate": 1.4969521240728253e-06, + "logits/chosen": -2.7714266777038574, + "logits/rejected": -2.914022922515869, + "logps/chosen": -175.41311645507812, + "logps/rejected": -197.71163940429688, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.910964012145996, + "rewards/margins": 5.071569919586182, + "rewards/rejected": -14.982534408569336, + "step": 17248 + }, + { + "epoch": 2.68, + "learning_rate": 1.4962186835416774e-06, + "logits/chosen": -2.4350879192352295, + "logits/rejected": -3.0003011226654053, + "logps/chosen": -205.71548461914062, + "logps/rejected": -492.3160400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.55610179901123, + "rewards/margins": 10.229754447937012, + "rewards/rejected": -20.785856246948242, + "step": 17249 + }, + { + "epoch": 2.68, + "learning_rate": 1.4954852430105295e-06, + "logits/chosen": -0.7976529002189636, + "logits/rejected": -2.1694693565368652, + "logps/chosen": -178.41998291015625, + "logps/rejected": -458.0115051269531, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.15102481842041, + "rewards/margins": 7.750979423522949, + "rewards/rejected": -19.90200424194336, + "step": 17250 + }, + { + "epoch": 2.68, + "learning_rate": 1.4947518024793816e-06, + "logits/chosen": -2.2077510356903076, + "logits/rejected": -2.26104736328125, + "logps/chosen": -238.5973663330078, + "logps/rejected": -415.8391418457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.869123458862305, + "rewards/margins": 13.277316093444824, + "rewards/rejected": -20.146438598632812, + "step": 17251 + }, + { + "epoch": 2.68, + "learning_rate": 1.494018361948234e-06, + "logits/chosen": -2.0539112091064453, + "logits/rejected": -2.7331271171569824, + "logps/chosen": -285.15997314453125, + "logps/rejected": -386.21710205078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.284308433532715, + "rewards/margins": 7.827964782714844, + "rewards/rejected": -22.112274169921875, + "step": 17252 + }, + { + "epoch": 2.68, + "learning_rate": 1.493284921417086e-06, + "logits/chosen": -2.6172289848327637, + "logits/rejected": -2.5176000595092773, + "logps/chosen": -106.8894271850586, + "logps/rejected": -418.865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.745220184326172, + "rewards/margins": 13.071210861206055, + "rewards/rejected": -21.816431045532227, + "step": 17253 + }, + { + "epoch": 2.68, + "learning_rate": 1.492551480885938e-06, + "logits/chosen": -1.1541526317596436, + "logits/rejected": -2.158513307571411, + "logps/chosen": -145.07000732421875, + "logps/rejected": -421.142333984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.014660835266113, + "rewards/margins": 8.896432876586914, + "rewards/rejected": -18.911094665527344, + "step": 17254 + }, + { + "epoch": 2.68, + "learning_rate": 1.4918180403547902e-06, + "logits/chosen": -1.7449835538864136, + "logits/rejected": -2.7012650966644287, + "logps/chosen": -199.14724731445312, + "logps/rejected": -565.02685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.958612442016602, + "rewards/margins": 13.982437133789062, + "rewards/rejected": -25.941049575805664, + "step": 17255 + }, + { + "epoch": 2.68, + "learning_rate": 1.4910845998236425e-06, + "logits/chosen": -2.55873703956604, + "logits/rejected": -1.7060493230819702, + "logps/chosen": -199.47982788085938, + "logps/rejected": -347.12823486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.15127944946289, + "rewards/margins": 10.594610214233398, + "rewards/rejected": -19.74588966369629, + "step": 17256 + }, + { + "epoch": 2.68, + "learning_rate": 1.4903511592924944e-06, + "logits/chosen": -1.9172463417053223, + "logits/rejected": -2.8067803382873535, + "logps/chosen": -158.23228454589844, + "logps/rejected": -352.7520751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.297141075134277, + "rewards/margins": 12.860677719116211, + "rewards/rejected": -20.157819747924805, + "step": 17257 + }, + { + "epoch": 2.68, + "learning_rate": 1.4896177187613465e-06, + "logits/chosen": -2.7319071292877197, + "logits/rejected": -2.6696527004241943, + "logps/chosen": -226.86679077148438, + "logps/rejected": -421.1392822265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.893293380737305, + "rewards/margins": 8.771463394165039, + "rewards/rejected": -17.664756774902344, + "step": 17258 + }, + { + "epoch": 2.68, + "learning_rate": 1.4888842782301986e-06, + "logits/chosen": -2.526522397994995, + "logits/rejected": -2.808748722076416, + "logps/chosen": -185.58468627929688, + "logps/rejected": -248.59568786621094, + "loss": 0.0611, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.065831184387207, + "rewards/margins": 8.415022850036621, + "rewards/rejected": -16.480854034423828, + "step": 17259 + }, + { + "epoch": 2.68, + "learning_rate": 1.4881508376990509e-06, + "logits/chosen": -2.6705503463745117, + "logits/rejected": -2.6722824573516846, + "logps/chosen": -248.43881225585938, + "logps/rejected": -323.374267578125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.847793579101562, + "rewards/margins": 8.079254150390625, + "rewards/rejected": -16.927047729492188, + "step": 17260 + }, + { + "epoch": 2.68, + "learning_rate": 1.487417397167903e-06, + "logits/chosen": -1.8281822204589844, + "logits/rejected": -2.6541335582733154, + "logps/chosen": -138.50869750976562, + "logps/rejected": -322.4317626953125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.776870727539062, + "rewards/margins": 6.394861221313477, + "rewards/rejected": -17.17173194885254, + "step": 17261 + }, + { + "epoch": 2.68, + "learning_rate": 1.486683956636755e-06, + "logits/chosen": -2.5532238483428955, + "logits/rejected": -2.879676103591919, + "logps/chosen": -867.3214111328125, + "logps/rejected": -845.2342529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.98366641998291, + "rewards/margins": 10.12592601776123, + "rewards/rejected": -20.10959243774414, + "step": 17262 + }, + { + "epoch": 2.68, + "learning_rate": 1.4859505161056071e-06, + "logits/chosen": -1.5290857553482056, + "logits/rejected": -2.8782601356506348, + "logps/chosen": -116.35355377197266, + "logps/rejected": -370.8853759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.735899925231934, + "rewards/margins": 10.654495239257812, + "rewards/rejected": -18.390396118164062, + "step": 17263 + }, + { + "epoch": 2.68, + "learning_rate": 1.4852170755744592e-06, + "logits/chosen": -2.184314012527466, + "logits/rejected": -2.645128011703491, + "logps/chosen": -149.50936889648438, + "logps/rejected": -356.1111755371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.31811809539795, + "rewards/margins": 12.327938079833984, + "rewards/rejected": -21.64605712890625, + "step": 17264 + }, + { + "epoch": 2.69, + "learning_rate": 1.4844836350433115e-06, + "logits/chosen": -2.304215669631958, + "logits/rejected": -2.7150466442108154, + "logps/chosen": -158.33486938476562, + "logps/rejected": -394.8203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4644775390625, + "rewards/margins": 10.668773651123047, + "rewards/rejected": -20.133251190185547, + "step": 17265 + }, + { + "epoch": 2.69, + "learning_rate": 1.4837501945121636e-06, + "logits/chosen": -2.612781047821045, + "logits/rejected": -2.8932809829711914, + "logps/chosen": -160.87400817871094, + "logps/rejected": -347.03253173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.284201622009277, + "rewards/margins": 9.742305755615234, + "rewards/rejected": -17.026506423950195, + "step": 17266 + }, + { + "epoch": 2.69, + "learning_rate": 1.4830167539810155e-06, + "logits/chosen": -2.515624523162842, + "logits/rejected": -2.4963107109069824, + "logps/chosen": -230.81558227539062, + "logps/rejected": -264.4595947265625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.032770156860352, + "rewards/margins": 6.8694682121276855, + "rewards/rejected": -16.902238845825195, + "step": 17267 + }, + { + "epoch": 2.69, + "learning_rate": 1.4822833134498676e-06, + "logits/chosen": -1.3575632572174072, + "logits/rejected": -2.594665765762329, + "logps/chosen": -290.404052734375, + "logps/rejected": -415.7292175292969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.937405586242676, + "rewards/margins": 9.010868072509766, + "rewards/rejected": -19.948272705078125, + "step": 17268 + }, + { + "epoch": 2.69, + "learning_rate": 1.4815498729187199e-06, + "logits/chosen": -2.641484260559082, + "logits/rejected": -2.9289116859436035, + "logps/chosen": -291.6146240234375, + "logps/rejected": -494.53607177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.816783905029297, + "rewards/margins": 9.833304405212402, + "rewards/rejected": -20.650089263916016, + "step": 17269 + }, + { + "epoch": 2.69, + "learning_rate": 1.480816432387572e-06, + "logits/chosen": -2.6705517768859863, + "logits/rejected": -2.5233871936798096, + "logps/chosen": -200.79251098632812, + "logps/rejected": -374.16400146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.269855976104736, + "rewards/margins": 12.741901397705078, + "rewards/rejected": -20.011756896972656, + "step": 17270 + }, + { + "epoch": 2.69, + "learning_rate": 1.480082991856424e-06, + "logits/chosen": -2.8796730041503906, + "logits/rejected": -2.5848212242126465, + "logps/chosen": -457.37744140625, + "logps/rejected": -654.2531127929688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.159256935119629, + "rewards/margins": 10.641518592834473, + "rewards/rejected": -18.8007755279541, + "step": 17271 + }, + { + "epoch": 2.69, + "learning_rate": 1.4793495513252761e-06, + "logits/chosen": -1.0764756202697754, + "logits/rejected": -2.475588798522949, + "logps/chosen": -169.51898193359375, + "logps/rejected": -407.77569580078125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.24892520904541, + "rewards/margins": 12.324495315551758, + "rewards/rejected": -22.573421478271484, + "step": 17272 + }, + { + "epoch": 2.69, + "learning_rate": 1.4786161107941284e-06, + "logits/chosen": -2.2578067779541016, + "logits/rejected": -2.8227007389068604, + "logps/chosen": -132.6544189453125, + "logps/rejected": -443.8183898925781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.07210922241211, + "rewards/margins": 12.477675437927246, + "rewards/rejected": -20.549785614013672, + "step": 17273 + }, + { + "epoch": 2.69, + "learning_rate": 1.4778826702629805e-06, + "logits/chosen": -2.357149839401245, + "logits/rejected": -2.8192458152770996, + "logps/chosen": -170.1901397705078, + "logps/rejected": -456.294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.67139720916748, + "rewards/margins": 10.254205703735352, + "rewards/rejected": -19.92560386657715, + "step": 17274 + }, + { + "epoch": 2.69, + "learning_rate": 1.4771492297318326e-06, + "logits/chosen": -2.791374921798706, + "logits/rejected": -2.6597394943237305, + "logps/chosen": -614.4552001953125, + "logps/rejected": -401.4075012207031, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.312545776367188, + "rewards/margins": 7.702296733856201, + "rewards/rejected": -19.014842987060547, + "step": 17275 + }, + { + "epoch": 2.69, + "learning_rate": 1.4764157892006847e-06, + "logits/chosen": -2.3497073650360107, + "logits/rejected": -2.46659779548645, + "logps/chosen": -156.03497314453125, + "logps/rejected": -372.7758483886719, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.923114776611328, + "rewards/margins": 7.90728759765625, + "rewards/rejected": -15.830402374267578, + "step": 17276 + }, + { + "epoch": 2.69, + "learning_rate": 1.4756823486695368e-06, + "logits/chosen": -1.648950219154358, + "logits/rejected": -2.797849178314209, + "logps/chosen": -151.48211669921875, + "logps/rejected": -253.0830535888672, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.394790649414062, + "rewards/margins": 5.846871376037598, + "rewards/rejected": -16.241662979125977, + "step": 17277 + }, + { + "epoch": 2.69, + "learning_rate": 1.474948908138389e-06, + "logits/chosen": -2.4350545406341553, + "logits/rejected": -2.5696542263031006, + "logps/chosen": -144.4069366455078, + "logps/rejected": -310.44708251953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5867414474487305, + "rewards/margins": 8.934581756591797, + "rewards/rejected": -16.521324157714844, + "step": 17278 + }, + { + "epoch": 2.69, + "learning_rate": 1.474215467607241e-06, + "logits/chosen": -2.7573647499084473, + "logits/rejected": -2.064897060394287, + "logps/chosen": -293.3070068359375, + "logps/rejected": -225.32980346679688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9673309326171875, + "rewards/margins": 9.257858276367188, + "rewards/rejected": -15.225189208984375, + "step": 17279 + }, + { + "epoch": 2.69, + "learning_rate": 1.473482027076093e-06, + "logits/chosen": -1.4983880519866943, + "logits/rejected": -2.0925843715667725, + "logps/chosen": -173.44558715820312, + "logps/rejected": -649.8568725585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.91684341430664, + "rewards/margins": 15.13763427734375, + "rewards/rejected": -24.05447769165039, + "step": 17280 + }, + { + "epoch": 2.69, + "learning_rate": 1.4727485865449454e-06, + "logits/chosen": -2.785861015319824, + "logits/rejected": -2.4438400268554688, + "logps/chosen": -214.82391357421875, + "logps/rejected": -295.0636901855469, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.867119789123535, + "rewards/margins": 7.4580793380737305, + "rewards/rejected": -15.325199127197266, + "step": 17281 + }, + { + "epoch": 2.69, + "learning_rate": 1.4720151460137975e-06, + "logits/chosen": -1.1614240407943726, + "logits/rejected": -2.3832216262817383, + "logps/chosen": -152.00161743164062, + "logps/rejected": -429.054443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8021697998046875, + "rewards/margins": 9.499573707580566, + "rewards/rejected": -16.301742553710938, + "step": 17282 + }, + { + "epoch": 2.69, + "learning_rate": 1.4712817054826496e-06, + "logits/chosen": -1.7157410383224487, + "logits/rejected": -2.00276517868042, + "logps/chosen": -242.72952270507812, + "logps/rejected": -332.8305358886719, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.342047691345215, + "rewards/margins": 6.280104637145996, + "rewards/rejected": -19.62215232849121, + "step": 17283 + }, + { + "epoch": 2.69, + "learning_rate": 1.4705482649515016e-06, + "logits/chosen": -2.56292462348938, + "logits/rejected": -2.7157070636749268, + "logps/chosen": -841.1298217773438, + "logps/rejected": -683.4527587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.72411823272705, + "rewards/margins": 11.229877471923828, + "rewards/rejected": -20.953994750976562, + "step": 17284 + }, + { + "epoch": 2.69, + "learning_rate": 1.4698148244203537e-06, + "logits/chosen": -2.5586905479431152, + "logits/rejected": -2.8586349487304688, + "logps/chosen": -69.9813232421875, + "logps/rejected": -240.82135009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5059661865234375, + "rewards/margins": 10.65316390991211, + "rewards/rejected": -16.159130096435547, + "step": 17285 + }, + { + "epoch": 2.69, + "learning_rate": 1.469081383889206e-06, + "logits/chosen": -1.149910569190979, + "logits/rejected": -1.8470990657806396, + "logps/chosen": -309.124755859375, + "logps/rejected": -374.1025390625, + "loss": 0.9231, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.293638229370117, + "rewards/margins": 6.839818000793457, + "rewards/rejected": -18.133455276489258, + "step": 17286 + }, + { + "epoch": 2.69, + "learning_rate": 1.468347943358058e-06, + "logits/chosen": -2.6274991035461426, + "logits/rejected": -2.057199239730835, + "logps/chosen": -292.3863525390625, + "logps/rejected": -360.70623779296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.583337783813477, + "rewards/margins": 7.800279140472412, + "rewards/rejected": -19.383617401123047, + "step": 17287 + }, + { + "epoch": 2.69, + "learning_rate": 1.46761450282691e-06, + "logits/chosen": -2.1471192836761475, + "logits/rejected": -2.8111276626586914, + "logps/chosen": -511.4263610839844, + "logps/rejected": -564.0476684570312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.343511581420898, + "rewards/margins": 8.339885711669922, + "rewards/rejected": -16.68339729309082, + "step": 17288 + }, + { + "epoch": 2.69, + "learning_rate": 1.466881062295762e-06, + "logits/chosen": -1.7760151624679565, + "logits/rejected": -2.5795745849609375, + "logps/chosen": -248.70948791503906, + "logps/rejected": -525.4880981445312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.787910461425781, + "rewards/margins": 13.540648460388184, + "rewards/rejected": -23.32855987548828, + "step": 17289 + }, + { + "epoch": 2.69, + "learning_rate": 1.4661476217646144e-06, + "logits/chosen": -1.3006751537322998, + "logits/rejected": -2.172713041305542, + "logps/chosen": -88.60406494140625, + "logps/rejected": -295.86407470703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.842166900634766, + "rewards/margins": 7.168011665344238, + "rewards/rejected": -14.010178565979004, + "step": 17290 + }, + { + "epoch": 2.69, + "learning_rate": 1.4654141812334665e-06, + "logits/chosen": -2.3311331272125244, + "logits/rejected": -2.7214763164520264, + "logps/chosen": -221.49456787109375, + "logps/rejected": -508.21075439453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.419715881347656, + "rewards/margins": 11.035234451293945, + "rewards/rejected": -21.4549503326416, + "step": 17291 + }, + { + "epoch": 2.69, + "learning_rate": 1.4646807407023186e-06, + "logits/chosen": -1.7073113918304443, + "logits/rejected": -2.574016809463501, + "logps/chosen": -259.0401306152344, + "logps/rejected": -472.50457763671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.378385543823242, + "rewards/margins": 9.526887893676758, + "rewards/rejected": -19.9052734375, + "step": 17292 + }, + { + "epoch": 2.69, + "learning_rate": 1.4639473001711707e-06, + "logits/chosen": -1.5987167358398438, + "logits/rejected": -2.40372633934021, + "logps/chosen": -183.79261779785156, + "logps/rejected": -423.69769287109375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.303422927856445, + "rewards/margins": 7.795600414276123, + "rewards/rejected": -18.099023818969727, + "step": 17293 + }, + { + "epoch": 2.69, + "learning_rate": 1.463213859640023e-06, + "logits/chosen": -2.74141263961792, + "logits/rejected": -1.8929338455200195, + "logps/chosen": -268.885498046875, + "logps/rejected": -379.35333251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.683504104614258, + "rewards/margins": 11.364059448242188, + "rewards/rejected": -21.047561645507812, + "step": 17294 + }, + { + "epoch": 2.69, + "learning_rate": 1.462480419108875e-06, + "logits/chosen": -2.15824556350708, + "logits/rejected": -2.9459495544433594, + "logps/chosen": -114.03057098388672, + "logps/rejected": -370.9447937011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.975561618804932, + "rewards/margins": 11.707021713256836, + "rewards/rejected": -18.68258285522461, + "step": 17295 + }, + { + "epoch": 2.69, + "learning_rate": 1.4617469785777272e-06, + "logits/chosen": -1.1790056228637695, + "logits/rejected": -1.8241546154022217, + "logps/chosen": -228.11573791503906, + "logps/rejected": -484.16131591796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.714960098266602, + "rewards/margins": 11.221817016601562, + "rewards/rejected": -22.936777114868164, + "step": 17296 + }, + { + "epoch": 2.69, + "learning_rate": 1.461013538046579e-06, + "logits/chosen": -2.69089674949646, + "logits/rejected": -1.8801449537277222, + "logps/chosen": -513.6228637695312, + "logps/rejected": -408.7270812988281, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.477718353271484, + "rewards/margins": 9.352161407470703, + "rewards/rejected": -19.829879760742188, + "step": 17297 + }, + { + "epoch": 2.69, + "learning_rate": 1.4602800975154313e-06, + "logits/chosen": -2.77982234954834, + "logits/rejected": -2.322263717651367, + "logps/chosen": -1003.2137451171875, + "logps/rejected": -935.5480346679688, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.631993293762207, + "rewards/margins": 10.512826919555664, + "rewards/rejected": -22.144821166992188, + "step": 17298 + }, + { + "epoch": 2.69, + "learning_rate": 1.4595466569842834e-06, + "logits/chosen": -1.9370746612548828, + "logits/rejected": -2.1627230644226074, + "logps/chosen": -203.35244750976562, + "logps/rejected": -274.9932556152344, + "loss": 0.1427, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.627152919769287, + "rewards/margins": 6.646839618682861, + "rewards/rejected": -14.273992538452148, + "step": 17299 + }, + { + "epoch": 2.69, + "learning_rate": 1.4588132164531355e-06, + "logits/chosen": -2.121335029602051, + "logits/rejected": -2.4493556022644043, + "logps/chosen": -215.6224365234375, + "logps/rejected": -236.87181091308594, + "loss": 0.4137, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.343717575073242, + "rewards/margins": 4.0649003982543945, + "rewards/rejected": -16.40861701965332, + "step": 17300 + }, + { + "epoch": 2.69, + "learning_rate": 1.4580797759219876e-06, + "logits/chosen": -2.938096761703491, + "logits/rejected": -1.7341920137405396, + "logps/chosen": -338.641357421875, + "logps/rejected": -270.9794616699219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.950712203979492, + "rewards/margins": 9.787428855895996, + "rewards/rejected": -19.738140106201172, + "step": 17301 + }, + { + "epoch": 2.69, + "learning_rate": 1.45734633539084e-06, + "logits/chosen": -2.6965651512145996, + "logits/rejected": -1.6532078981399536, + "logps/chosen": -548.6207275390625, + "logps/rejected": -360.3243408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.335217475891113, + "rewards/margins": 12.015237808227539, + "rewards/rejected": -16.35045623779297, + "step": 17302 + }, + { + "epoch": 2.69, + "learning_rate": 1.456612894859692e-06, + "logits/chosen": -2.2601187229156494, + "logits/rejected": -2.7944321632385254, + "logps/chosen": -570.0855712890625, + "logps/rejected": -761.8078002929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.994180679321289, + "rewards/margins": 14.964280128479004, + "rewards/rejected": -21.95846176147461, + "step": 17303 + }, + { + "epoch": 2.69, + "learning_rate": 1.455879454328544e-06, + "logits/chosen": -2.6361334323883057, + "logits/rejected": -2.8953914642333984, + "logps/chosen": -80.23768615722656, + "logps/rejected": -205.63275146484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.843989372253418, + "rewards/margins": 7.4280900955200195, + "rewards/rejected": -13.272079467773438, + "step": 17304 + }, + { + "epoch": 2.69, + "learning_rate": 1.4551460137973962e-06, + "logits/chosen": -2.7194244861602783, + "logits/rejected": -2.71380352973938, + "logps/chosen": -194.28054809570312, + "logps/rejected": -207.23629760742188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.0297212600708, + "rewards/margins": 7.611464500427246, + "rewards/rejected": -16.641185760498047, + "step": 17305 + }, + { + "epoch": 2.69, + "learning_rate": 1.4544125732662483e-06, + "logits/chosen": -2.668715238571167, + "logits/rejected": -2.133554458618164, + "logps/chosen": -384.7786865234375, + "logps/rejected": -445.5819396972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.513925552368164, + "rewards/margins": 9.821142196655273, + "rewards/rejected": -18.335067749023438, + "step": 17306 + }, + { + "epoch": 2.69, + "learning_rate": 1.4536791327351004e-06, + "logits/chosen": -1.9574296474456787, + "logits/rejected": -2.193058967590332, + "logps/chosen": -178.03570556640625, + "logps/rejected": -414.3614501953125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.748018264770508, + "rewards/margins": 8.751567840576172, + "rewards/rejected": -19.49958610534668, + "step": 17307 + }, + { + "epoch": 2.69, + "learning_rate": 1.4529456922039524e-06, + "logits/chosen": -2.576516628265381, + "logits/rejected": -2.9169695377349854, + "logps/chosen": -442.48199462890625, + "logps/rejected": -586.4730224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.833317756652832, + "rewards/margins": 14.1191987991333, + "rewards/rejected": -19.952516555786133, + "step": 17308 + }, + { + "epoch": 2.69, + "learning_rate": 1.4522122516728045e-06, + "logits/chosen": -2.7361867427825928, + "logits/rejected": -1.2099252939224243, + "logps/chosen": -961.408447265625, + "logps/rejected": -505.1269836425781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.9513521194458, + "rewards/margins": 14.31556224822998, + "rewards/rejected": -24.26691436767578, + "step": 17309 + }, + { + "epoch": 2.69, + "learning_rate": 1.4514788111416566e-06, + "logits/chosen": -2.3450801372528076, + "logits/rejected": -2.8792052268981934, + "logps/chosen": -89.23590087890625, + "logps/rejected": -228.21694946289062, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.804527282714844, + "rewards/margins": 6.193171501159668, + "rewards/rejected": -12.997697830200195, + "step": 17310 + }, + { + "epoch": 2.69, + "learning_rate": 1.450745370610509e-06, + "logits/chosen": -2.698472023010254, + "logits/rejected": -2.8307578563690186, + "logps/chosen": -238.68934631347656, + "logps/rejected": -357.6536865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.152498245239258, + "rewards/margins": 10.927483558654785, + "rewards/rejected": -19.07998275756836, + "step": 17311 + }, + { + "epoch": 2.69, + "learning_rate": 1.450011930079361e-06, + "logits/chosen": -2.814896583557129, + "logits/rejected": -2.475648880004883, + "logps/chosen": -619.4378051757812, + "logps/rejected": -527.9599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.017570495605469, + "rewards/margins": 14.351696014404297, + "rewards/rejected": -19.369266510009766, + "step": 17312 + }, + { + "epoch": 2.69, + "learning_rate": 1.4492784895482131e-06, + "logits/chosen": -2.536856174468994, + "logits/rejected": -2.6711232662200928, + "logps/chosen": -198.85455322265625, + "logps/rejected": -382.1683349609375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.100790023803711, + "rewards/margins": 9.487685203552246, + "rewards/rejected": -20.588476181030273, + "step": 17313 + }, + { + "epoch": 2.69, + "learning_rate": 1.4485450490170652e-06, + "logits/chosen": -2.3728158473968506, + "logits/rejected": -2.7157230377197266, + "logps/chosen": -462.2818298339844, + "logps/rejected": -863.0914306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.160636901855469, + "rewards/margins": 19.959758758544922, + "rewards/rejected": -27.12039566040039, + "step": 17314 + }, + { + "epoch": 2.69, + "learning_rate": 1.4478116084859175e-06, + "logits/chosen": -1.5986248254776, + "logits/rejected": -2.7708592414855957, + "logps/chosen": -257.5826416015625, + "logps/rejected": -454.19354248046875, + "loss": 0.0569, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.887760639190674, + "rewards/margins": 7.796810150146484, + "rewards/rejected": -15.6845703125, + "step": 17315 + }, + { + "epoch": 2.69, + "learning_rate": 1.4470781679547694e-06, + "logits/chosen": -1.8358345031738281, + "logits/rejected": -2.984097719192505, + "logps/chosen": -220.81106567382812, + "logps/rejected": -473.09039306640625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.328944206237793, + "rewards/margins": 9.0990629196167, + "rewards/rejected": -21.428007125854492, + "step": 17316 + }, + { + "epoch": 2.69, + "learning_rate": 1.4463447274236215e-06, + "logits/chosen": -2.5129029750823975, + "logits/rejected": -2.6281425952911377, + "logps/chosen": -646.1135864257812, + "logps/rejected": -749.934814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.098885536193848, + "rewards/margins": 14.193338394165039, + "rewards/rejected": -22.29222297668457, + "step": 17317 + }, + { + "epoch": 2.69, + "learning_rate": 1.4456112868924736e-06, + "logits/chosen": -1.496678352355957, + "logits/rejected": -2.3372912406921387, + "logps/chosen": -262.44256591796875, + "logps/rejected": -464.1365051269531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.753975868225098, + "rewards/margins": 7.941042423248291, + "rewards/rejected": -17.695018768310547, + "step": 17318 + }, + { + "epoch": 2.69, + "learning_rate": 1.4448778463613259e-06, + "logits/chosen": -2.5598807334899902, + "logits/rejected": -2.616457939147949, + "logps/chosen": -222.4458465576172, + "logps/rejected": -329.477294921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.528493881225586, + "rewards/margins": 10.909442901611328, + "rewards/rejected": -21.437938690185547, + "step": 17319 + }, + { + "epoch": 2.69, + "learning_rate": 1.444144405830178e-06, + "logits/chosen": -2.869741678237915, + "logits/rejected": -1.732858419418335, + "logps/chosen": -311.9334411621094, + "logps/rejected": -296.1086120605469, + "loss": 0.1616, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.164799690246582, + "rewards/margins": 3.3619184494018555, + "rewards/rejected": -15.526718139648438, + "step": 17320 + }, + { + "epoch": 2.69, + "learning_rate": 1.44341096529903e-06, + "logits/chosen": -1.8125438690185547, + "logits/rejected": -2.928069591522217, + "logps/chosen": -212.28823852539062, + "logps/rejected": -376.4320068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.997148513793945, + "rewards/margins": 10.983905792236328, + "rewards/rejected": -15.981054306030273, + "step": 17321 + }, + { + "epoch": 2.69, + "learning_rate": 1.4426775247678821e-06, + "logits/chosen": -2.0947108268737793, + "logits/rejected": -2.6063098907470703, + "logps/chosen": -253.3086395263672, + "logps/rejected": -490.737548828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.392795562744141, + "rewards/margins": 11.953105926513672, + "rewards/rejected": -19.345901489257812, + "step": 17322 + }, + { + "epoch": 2.69, + "learning_rate": 1.4419440842367344e-06, + "logits/chosen": -2.469475507736206, + "logits/rejected": -2.8169052600860596, + "logps/chosen": -216.99539184570312, + "logps/rejected": -331.84637451171875, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.756344318389893, + "rewards/margins": 5.4502458572387695, + "rewards/rejected": -13.20659065246582, + "step": 17323 + }, + { + "epoch": 2.69, + "learning_rate": 1.4412106437055865e-06, + "logits/chosen": -2.8651881217956543, + "logits/rejected": -2.9921817779541016, + "logps/chosen": -275.8644104003906, + "logps/rejected": -252.37823486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.117175817489624, + "rewards/margins": 12.819270133972168, + "rewards/rejected": -14.936445236206055, + "step": 17324 + }, + { + "epoch": 2.69, + "learning_rate": 1.4404772031744386e-06, + "logits/chosen": -2.138820171356201, + "logits/rejected": -2.7470171451568604, + "logps/chosen": -228.02685546875, + "logps/rejected": -314.5445861816406, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.172101020812988, + "rewards/margins": 8.534664154052734, + "rewards/rejected": -16.70676612854004, + "step": 17325 + }, + { + "epoch": 2.69, + "learning_rate": 1.4397437626432905e-06, + "logits/chosen": -2.971571445465088, + "logits/rejected": -2.6067793369293213, + "logps/chosen": -223.0619659423828, + "logps/rejected": -383.22283935546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958274841308594, + "rewards/margins": 13.830645561218262, + "rewards/rejected": -19.788921356201172, + "step": 17326 + }, + { + "epoch": 2.69, + "learning_rate": 1.4390103221121428e-06, + "logits/chosen": -2.348381996154785, + "logits/rejected": -2.063354253768921, + "logps/chosen": -571.8529663085938, + "logps/rejected": -572.083251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.055421829223633, + "rewards/margins": 11.221576690673828, + "rewards/rejected": -21.27699851989746, + "step": 17327 + }, + { + "epoch": 2.69, + "learning_rate": 1.4382768815809949e-06, + "logits/chosen": -2.2287003993988037, + "logits/rejected": -2.665653705596924, + "logps/chosen": -467.59393310546875, + "logps/rejected": -651.6973876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.606328964233398, + "rewards/margins": 11.471502304077148, + "rewards/rejected": -23.077831268310547, + "step": 17328 + }, + { + "epoch": 2.7, + "learning_rate": 1.437543441049847e-06, + "logits/chosen": -2.705972194671631, + "logits/rejected": -1.762721061706543, + "logps/chosen": -427.98095703125, + "logps/rejected": -515.556884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0961737632751465, + "rewards/margins": 11.642393112182617, + "rewards/rejected": -18.738567352294922, + "step": 17329 + }, + { + "epoch": 2.7, + "learning_rate": 1.436810000518699e-06, + "logits/chosen": -2.5176570415496826, + "logits/rejected": -2.687561511993408, + "logps/chosen": -184.52273559570312, + "logps/rejected": -234.46006774902344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.039319038391113, + "rewards/margins": 8.29842758178711, + "rewards/rejected": -19.337745666503906, + "step": 17330 + }, + { + "epoch": 2.7, + "learning_rate": 1.4360765599875512e-06, + "logits/chosen": -2.5835013389587402, + "logits/rejected": -2.542248487472534, + "logps/chosen": -429.4010009765625, + "logps/rejected": -504.54443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.989608764648438, + "rewards/margins": 9.830158233642578, + "rewards/rejected": -23.819766998291016, + "step": 17331 + }, + { + "epoch": 2.7, + "learning_rate": 1.4353431194564035e-06, + "logits/chosen": -1.5807888507843018, + "logits/rejected": -2.526487350463867, + "logps/chosen": -239.1234130859375, + "logps/rejected": -591.0712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.167802810668945, + "rewards/margins": 14.653328895568848, + "rewards/rejected": -22.82113265991211, + "step": 17332 + }, + { + "epoch": 2.7, + "learning_rate": 1.4346096789252555e-06, + "logits/chosen": -2.540179967880249, + "logits/rejected": -1.1063220500946045, + "logps/chosen": -213.28338623046875, + "logps/rejected": -209.79571533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53933048248291, + "rewards/margins": 11.297345161437988, + "rewards/rejected": -16.8366756439209, + "step": 17333 + }, + { + "epoch": 2.7, + "learning_rate": 1.4338762383941076e-06, + "logits/chosen": -2.7420477867126465, + "logits/rejected": -2.0687665939331055, + "logps/chosen": -266.7193298339844, + "logps/rejected": -331.3260498046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.574548721313477, + "rewards/margins": 10.94169807434082, + "rewards/rejected": -23.516246795654297, + "step": 17334 + }, + { + "epoch": 2.7, + "learning_rate": 1.4331427978629597e-06, + "logits/chosen": -2.2123942375183105, + "logits/rejected": -2.6717007160186768, + "logps/chosen": -230.26144409179688, + "logps/rejected": -432.9322509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.785393714904785, + "rewards/margins": 10.594396591186523, + "rewards/rejected": -18.379789352416992, + "step": 17335 + }, + { + "epoch": 2.7, + "learning_rate": 1.4324093573318118e-06, + "logits/chosen": -2.361999273300171, + "logits/rejected": -3.0028576850891113, + "logps/chosen": -142.1826171875, + "logps/rejected": -281.319091796875, + "loss": 0.5207, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.749805450439453, + "rewards/margins": 4.908761978149414, + "rewards/rejected": -15.658567428588867, + "step": 17336 + }, + { + "epoch": 2.7, + "learning_rate": 1.431675916800664e-06, + "logits/chosen": -1.1159381866455078, + "logits/rejected": -2.759439706802368, + "logps/chosen": -212.76492309570312, + "logps/rejected": -420.17474365234375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.129546165466309, + "rewards/margins": 7.408645153045654, + "rewards/rejected": -18.538190841674805, + "step": 17337 + }, + { + "epoch": 2.7, + "learning_rate": 1.430942476269516e-06, + "logits/chosen": -2.9588775634765625, + "logits/rejected": -2.990182399749756, + "logps/chosen": -501.8068542480469, + "logps/rejected": -528.1622924804688, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.566385269165039, + "rewards/margins": 7.593278408050537, + "rewards/rejected": -17.159664154052734, + "step": 17338 + }, + { + "epoch": 2.7, + "learning_rate": 1.430209035738368e-06, + "logits/chosen": -2.9391660690307617, + "logits/rejected": -2.9155330657958984, + "logps/chosen": -390.6751708984375, + "logps/rejected": -545.1636962890625, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.272075653076172, + "rewards/margins": 5.626527786254883, + "rewards/rejected": -16.898603439331055, + "step": 17339 + }, + { + "epoch": 2.7, + "learning_rate": 1.4294755952072204e-06, + "logits/chosen": -0.6852167844772339, + "logits/rejected": -1.816726565361023, + "logps/chosen": -153.06141662597656, + "logps/rejected": -673.5407104492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.795352935791016, + "rewards/margins": 15.64345932006836, + "rewards/rejected": -26.438812255859375, + "step": 17340 + }, + { + "epoch": 2.7, + "learning_rate": 1.4287421546760725e-06, + "logits/chosen": -1.564231514930725, + "logits/rejected": -2.649868965148926, + "logps/chosen": -386.2740478515625, + "logps/rejected": -472.3846435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.434216499328613, + "rewards/margins": 9.981829643249512, + "rewards/rejected": -17.416046142578125, + "step": 17341 + }, + { + "epoch": 2.7, + "learning_rate": 1.4280087141449246e-06, + "logits/chosen": -2.285892963409424, + "logits/rejected": -2.33171010017395, + "logps/chosen": -195.14044189453125, + "logps/rejected": -221.05130004882812, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.348976135253906, + "rewards/margins": 4.79090690612793, + "rewards/rejected": -12.139883041381836, + "step": 17342 + }, + { + "epoch": 2.7, + "learning_rate": 1.4272752736137767e-06, + "logits/chosen": -1.4260609149932861, + "logits/rejected": -2.58994460105896, + "logps/chosen": -223.92636108398438, + "logps/rejected": -445.790283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.422773361206055, + "rewards/margins": 10.716655731201172, + "rewards/rejected": -23.139429092407227, + "step": 17343 + }, + { + "epoch": 2.7, + "learning_rate": 1.426541833082629e-06, + "logits/chosen": -2.6711220741271973, + "logits/rejected": -2.8476462364196777, + "logps/chosen": -297.2917785644531, + "logps/rejected": -270.96197509765625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129153251647949, + "rewards/margins": 7.520333290100098, + "rewards/rejected": -14.649486541748047, + "step": 17344 + }, + { + "epoch": 2.7, + "learning_rate": 1.425808392551481e-06, + "logits/chosen": -1.346070408821106, + "logits/rejected": -2.327395439147949, + "logps/chosen": -215.51296997070312, + "logps/rejected": -386.7477722167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.055231094360352, + "rewards/margins": 13.207612991333008, + "rewards/rejected": -22.26284408569336, + "step": 17345 + }, + { + "epoch": 2.7, + "learning_rate": 1.425074952020333e-06, + "logits/chosen": -2.0805201530456543, + "logits/rejected": -2.4849159717559814, + "logps/chosen": -344.6396789550781, + "logps/rejected": -491.6630554199219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.072296142578125, + "rewards/margins": 9.097376823425293, + "rewards/rejected": -20.169673919677734, + "step": 17346 + }, + { + "epoch": 2.7, + "learning_rate": 1.424341511489185e-06, + "logits/chosen": -2.8097450733184814, + "logits/rejected": -2.723947048187256, + "logps/chosen": -768.0525512695312, + "logps/rejected": -1117.705078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.480409622192383, + "rewards/margins": 11.118505477905273, + "rewards/rejected": -21.598915100097656, + "step": 17347 + }, + { + "epoch": 2.7, + "learning_rate": 1.4236080709580373e-06, + "logits/chosen": -1.7632323503494263, + "logits/rejected": -2.438309907913208, + "logps/chosen": -338.00933837890625, + "logps/rejected": -506.42291259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.684269905090332, + "rewards/margins": 10.143714904785156, + "rewards/rejected": -20.827983856201172, + "step": 17348 + }, + { + "epoch": 2.7, + "learning_rate": 1.4228746304268894e-06, + "logits/chosen": -1.7712277173995972, + "logits/rejected": -2.905914068222046, + "logps/chosen": -668.4735107421875, + "logps/rejected": -626.0152587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.158174514770508, + "rewards/margins": 13.791199684143066, + "rewards/rejected": -23.94937515258789, + "step": 17349 + }, + { + "epoch": 2.7, + "learning_rate": 1.4221411898957415e-06, + "logits/chosen": -2.8475449085235596, + "logits/rejected": -2.2017369270324707, + "logps/chosen": -499.3554382324219, + "logps/rejected": -597.424560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.315744400024414, + "rewards/margins": 11.342265129089355, + "rewards/rejected": -21.658008575439453, + "step": 17350 + }, + { + "epoch": 2.7, + "learning_rate": 1.4214077493645936e-06, + "logits/chosen": -2.691117763519287, + "logits/rejected": -2.7038002014160156, + "logps/chosen": -168.46658325195312, + "logps/rejected": -306.7503967285156, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.467812538146973, + "rewards/margins": 7.52956485748291, + "rewards/rejected": -16.997377395629883, + "step": 17351 + }, + { + "epoch": 2.7, + "learning_rate": 1.4206743088334457e-06, + "logits/chosen": -2.7083094120025635, + "logits/rejected": -2.108666181564331, + "logps/chosen": -430.1783142089844, + "logps/rejected": -401.27197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.832452774047852, + "rewards/margins": 13.28366756439209, + "rewards/rejected": -21.116119384765625, + "step": 17352 + }, + { + "epoch": 2.7, + "learning_rate": 1.419940868302298e-06, + "logits/chosen": -2.877955198287964, + "logits/rejected": -2.205700159072876, + "logps/chosen": -635.534423828125, + "logps/rejected": -518.6821899414062, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.604262351989746, + "rewards/margins": 8.746781349182129, + "rewards/rejected": -17.351043701171875, + "step": 17353 + }, + { + "epoch": 2.7, + "learning_rate": 1.41920742777115e-06, + "logits/chosen": -2.520289659500122, + "logits/rejected": -2.5802741050720215, + "logps/chosen": -247.30325317382812, + "logps/rejected": -366.0888671875, + "loss": 0.6449, + "rewards/accuracies": 0.5, + "rewards/chosen": -15.530744552612305, + "rewards/margins": 4.661440849304199, + "rewards/rejected": -20.192184448242188, + "step": 17354 + }, + { + "epoch": 2.7, + "learning_rate": 1.4184739872400022e-06, + "logits/chosen": -2.6088790893554688, + "logits/rejected": -2.034245014190674, + "logps/chosen": -196.30593872070312, + "logps/rejected": -225.98831176757812, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.302879333496094, + "rewards/margins": 6.1658220291137695, + "rewards/rejected": -15.468701362609863, + "step": 17355 + }, + { + "epoch": 2.7, + "learning_rate": 1.417740546708854e-06, + "logits/chosen": -1.8670475482940674, + "logits/rejected": -2.821237802505493, + "logps/chosen": -178.4248809814453, + "logps/rejected": -713.8203735351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.504276275634766, + "rewards/margins": 10.398918151855469, + "rewards/rejected": -24.903194427490234, + "step": 17356 + }, + { + "epoch": 2.7, + "learning_rate": 1.4170071061777063e-06, + "logits/chosen": -2.5491116046905518, + "logits/rejected": -2.028672456741333, + "logps/chosen": -527.4578857421875, + "logps/rejected": -472.99468994140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.03792953491211, + "rewards/margins": 11.642129898071289, + "rewards/rejected": -22.680057525634766, + "step": 17357 + }, + { + "epoch": 2.7, + "learning_rate": 1.4162736656465584e-06, + "logits/chosen": -2.3586814403533936, + "logits/rejected": -2.6982665061950684, + "logps/chosen": -216.24972534179688, + "logps/rejected": -373.0753173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.567981719970703, + "rewards/margins": 12.324338912963867, + "rewards/rejected": -21.892318725585938, + "step": 17358 + }, + { + "epoch": 2.7, + "learning_rate": 1.4155402251154105e-06, + "logits/chosen": -2.7913873195648193, + "logits/rejected": -2.60337495803833, + "logps/chosen": -623.7599487304688, + "logps/rejected": -619.4776611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.810340881347656, + "rewards/margins": 10.031024932861328, + "rewards/rejected": -18.841365814208984, + "step": 17359 + }, + { + "epoch": 2.7, + "learning_rate": 1.4148067845842626e-06, + "logits/chosen": -1.8441276550292969, + "logits/rejected": -2.7408599853515625, + "logps/chosen": -270.70068359375, + "logps/rejected": -394.90631103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.252737522125244, + "rewards/margins": 11.905050277709961, + "rewards/rejected": -18.157787322998047, + "step": 17360 + }, + { + "epoch": 2.7, + "learning_rate": 1.414073344053115e-06, + "logits/chosen": -2.5282604694366455, + "logits/rejected": -2.4142239093780518, + "logps/chosen": -215.92684936523438, + "logps/rejected": -238.1942138671875, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.7025785446167, + "rewards/margins": 5.148858070373535, + "rewards/rejected": -18.851436614990234, + "step": 17361 + }, + { + "epoch": 2.7, + "learning_rate": 1.413339903521967e-06, + "logits/chosen": -2.466627597808838, + "logits/rejected": -2.230130672454834, + "logps/chosen": -352.3084716796875, + "logps/rejected": -450.6820068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.134857177734375, + "rewards/margins": 12.74263858795166, + "rewards/rejected": -21.87749481201172, + "step": 17362 + }, + { + "epoch": 2.7, + "learning_rate": 1.412606462990819e-06, + "logits/chosen": -2.4443726539611816, + "logits/rejected": -2.3650994300842285, + "logps/chosen": -351.60333251953125, + "logps/rejected": -424.09130859375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.134794235229492, + "rewards/margins": 7.877830505371094, + "rewards/rejected": -17.012624740600586, + "step": 17363 + }, + { + "epoch": 2.7, + "learning_rate": 1.4118730224596712e-06, + "logits/chosen": -2.0436301231384277, + "logits/rejected": -2.1694135665893555, + "logps/chosen": -117.62262725830078, + "logps/rejected": -365.5490417480469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.12287712097168, + "rewards/margins": 16.33048439025879, + "rewards/rejected": -21.45336151123047, + "step": 17364 + }, + { + "epoch": 2.7, + "learning_rate": 1.4111395819285233e-06, + "logits/chosen": -2.7993366718292236, + "logits/rejected": -2.6624038219451904, + "logps/chosen": -581.7239379882812, + "logps/rejected": -742.42236328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.234090805053711, + "rewards/margins": 10.24520492553711, + "rewards/rejected": -19.47929573059082, + "step": 17365 + }, + { + "epoch": 2.7, + "learning_rate": 1.4104061413973754e-06, + "logits/chosen": -2.6502439975738525, + "logits/rejected": -2.902099847793579, + "logps/chosen": -377.60345458984375, + "logps/rejected": -734.9304809570312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.706889152526855, + "rewards/margins": 7.809429168701172, + "rewards/rejected": -22.516319274902344, + "step": 17366 + }, + { + "epoch": 2.7, + "learning_rate": 1.4096727008662275e-06, + "logits/chosen": -2.1861162185668945, + "logits/rejected": -2.6408281326293945, + "logps/chosen": -265.88421630859375, + "logps/rejected": -513.046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.53628158569336, + "rewards/margins": 10.885648727416992, + "rewards/rejected": -23.42193031311035, + "step": 17367 + }, + { + "epoch": 2.7, + "learning_rate": 1.4089392603350795e-06, + "logits/chosen": -1.9316794872283936, + "logits/rejected": -2.4381160736083984, + "logps/chosen": -896.1864624023438, + "logps/rejected": -681.9846801757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.400511741638184, + "rewards/margins": 17.85372543334961, + "rewards/rejected": -23.25423812866211, + "step": 17368 + }, + { + "epoch": 2.7, + "learning_rate": 1.4082058198039316e-06, + "logits/chosen": -2.1912474632263184, + "logits/rejected": -2.393700361251831, + "logps/chosen": -138.11328125, + "logps/rejected": -360.34954833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.608331680297852, + "rewards/margins": 13.426082611083984, + "rewards/rejected": -25.034414291381836, + "step": 17369 + }, + { + "epoch": 2.7, + "learning_rate": 1.407472379272784e-06, + "logits/chosen": -2.249976396560669, + "logits/rejected": -2.6562907695770264, + "logps/chosen": -217.52340698242188, + "logps/rejected": -415.7689208984375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.261127471923828, + "rewards/margins": 12.224693298339844, + "rewards/rejected": -23.485820770263672, + "step": 17370 + }, + { + "epoch": 2.7, + "learning_rate": 1.406738938741636e-06, + "logits/chosen": -2.201641798019409, + "logits/rejected": -2.9078598022460938, + "logps/chosen": -170.57562255859375, + "logps/rejected": -421.7511901855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.779609203338623, + "rewards/margins": 12.233278274536133, + "rewards/rejected": -18.012887954711914, + "step": 17371 + }, + { + "epoch": 2.7, + "learning_rate": 1.4060054982104881e-06, + "logits/chosen": -2.8678431510925293, + "logits/rejected": -2.217461109161377, + "logps/chosen": -329.78424072265625, + "logps/rejected": -285.74359130859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.281103610992432, + "rewards/margins": 8.463275909423828, + "rewards/rejected": -15.744379043579102, + "step": 17372 + }, + { + "epoch": 2.7, + "learning_rate": 1.4052720576793402e-06, + "logits/chosen": -2.6585280895233154, + "logits/rejected": -2.0838100910186768, + "logps/chosen": -205.92689514160156, + "logps/rejected": -346.6217041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8182692527771, + "rewards/margins": 10.617592811584473, + "rewards/rejected": -18.435861587524414, + "step": 17373 + }, + { + "epoch": 2.7, + "learning_rate": 1.4045386171481925e-06, + "logits/chosen": -2.8123300075531006, + "logits/rejected": -2.2200403213500977, + "logps/chosen": -515.6065673828125, + "logps/rejected": -491.6476745605469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.892595291137695, + "rewards/margins": 11.224899291992188, + "rewards/rejected": -25.117494583129883, + "step": 17374 + }, + { + "epoch": 2.7, + "learning_rate": 1.4038051766170444e-06, + "logits/chosen": -2.7256646156311035, + "logits/rejected": -2.387596368789673, + "logps/chosen": -281.4835205078125, + "logps/rejected": -448.41180419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.029485702514648, + "rewards/margins": 11.135435104370117, + "rewards/rejected": -24.164920806884766, + "step": 17375 + }, + { + "epoch": 2.7, + "learning_rate": 1.4030717360858965e-06, + "logits/chosen": -2.760462760925293, + "logits/rejected": -2.4685964584350586, + "logps/chosen": -487.135009765625, + "logps/rejected": -611.29296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4989166259765625, + "rewards/margins": 15.15032958984375, + "rewards/rejected": -20.649246215820312, + "step": 17376 + }, + { + "epoch": 2.7, + "learning_rate": 1.4023382955547486e-06, + "logits/chosen": -2.2292866706848145, + "logits/rejected": -2.47268009185791, + "logps/chosen": -253.28958129882812, + "logps/rejected": -378.1024169921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.916879653930664, + "rewards/margins": 8.520100593566895, + "rewards/rejected": -16.436981201171875, + "step": 17377 + }, + { + "epoch": 2.7, + "learning_rate": 1.4016048550236009e-06, + "logits/chosen": -2.7389445304870605, + "logits/rejected": -1.9352166652679443, + "logps/chosen": -538.9290161132812, + "logps/rejected": -595.3995361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.628828048706055, + "rewards/margins": 14.599815368652344, + "rewards/rejected": -24.2286434173584, + "step": 17378 + }, + { + "epoch": 2.7, + "learning_rate": 1.400871414492453e-06, + "logits/chosen": -1.3599302768707275, + "logits/rejected": -2.3540191650390625, + "logps/chosen": -135.4307403564453, + "logps/rejected": -537.1265869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.323456764221191, + "rewards/margins": 14.106834411621094, + "rewards/rejected": -24.4302921295166, + "step": 17379 + }, + { + "epoch": 2.7, + "learning_rate": 1.400137973961305e-06, + "logits/chosen": -2.67982816696167, + "logits/rejected": -2.7941551208496094, + "logps/chosen": -478.8496398925781, + "logps/rejected": -564.563720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.341910362243652, + "rewards/margins": 12.276479721069336, + "rewards/rejected": -18.618389129638672, + "step": 17380 + }, + { + "epoch": 2.7, + "learning_rate": 1.3994045334301571e-06, + "logits/chosen": -2.430915117263794, + "logits/rejected": -2.8914315700531006, + "logps/chosen": -390.01971435546875, + "logps/rejected": -475.09521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.067021369934082, + "rewards/margins": 10.813322067260742, + "rewards/rejected": -19.88034439086914, + "step": 17381 + }, + { + "epoch": 2.7, + "learning_rate": 1.3986710928990094e-06, + "logits/chosen": -2.7840468883514404, + "logits/rejected": -2.973893404006958, + "logps/chosen": -124.05270385742188, + "logps/rejected": -214.67962646484375, + "loss": 0.501, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.319124221801758, + "rewards/margins": 3.701890468597412, + "rewards/rejected": -15.021014213562012, + "step": 17382 + }, + { + "epoch": 2.7, + "learning_rate": 1.3979376523678615e-06, + "logits/chosen": -2.502347230911255, + "logits/rejected": -1.614844560623169, + "logps/chosen": -242.17111206054688, + "logps/rejected": -357.42877197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.859185218811035, + "rewards/margins": 11.121015548706055, + "rewards/rejected": -19.980201721191406, + "step": 17383 + }, + { + "epoch": 2.7, + "learning_rate": 1.3972042118367136e-06, + "logits/chosen": -1.3783842325210571, + "logits/rejected": -2.4973793029785156, + "logps/chosen": -119.91716003417969, + "logps/rejected": -456.12628173828125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.630061149597168, + "rewards/margins": 6.728731155395508, + "rewards/rejected": -16.35879135131836, + "step": 17384 + }, + { + "epoch": 2.7, + "learning_rate": 1.3964707713055655e-06, + "logits/chosen": -2.265434503555298, + "logits/rejected": -1.5459349155426025, + "logps/chosen": -268.46661376953125, + "logps/rejected": -374.2069396972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.979510307312012, + "rewards/margins": 13.03274154663086, + "rewards/rejected": -25.012252807617188, + "step": 17385 + }, + { + "epoch": 2.7, + "learning_rate": 1.3957373307744178e-06, + "logits/chosen": -2.6813642978668213, + "logits/rejected": -0.9939974546432495, + "logps/chosen": -604.8231201171875, + "logps/rejected": -339.1331481933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1830644607543945, + "rewards/margins": 13.465093612670898, + "rewards/rejected": -19.64815902709961, + "step": 17386 + }, + { + "epoch": 2.7, + "learning_rate": 1.3950038902432699e-06, + "logits/chosen": -1.6978152990341187, + "logits/rejected": -2.5342516899108887, + "logps/chosen": -291.7496643066406, + "logps/rejected": -612.6691284179688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.249544143676758, + "rewards/margins": 9.80482292175293, + "rewards/rejected": -21.054367065429688, + "step": 17387 + }, + { + "epoch": 2.7, + "learning_rate": 1.394270449712122e-06, + "logits/chosen": -1.1458488702774048, + "logits/rejected": -2.82878041267395, + "logps/chosen": -196.25865173339844, + "logps/rejected": -564.595703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.617326736450195, + "rewards/margins": 8.08981704711914, + "rewards/rejected": -23.707141876220703, + "step": 17388 + }, + { + "epoch": 2.7, + "learning_rate": 1.393537009180974e-06, + "logits/chosen": -1.155311942100525, + "logits/rejected": -2.7017786502838135, + "logps/chosen": -320.37847900390625, + "logps/rejected": -804.0340576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.728343963623047, + "rewards/margins": 12.06355094909668, + "rewards/rejected": -21.791894912719727, + "step": 17389 + }, + { + "epoch": 2.7, + "learning_rate": 1.3928035686498262e-06, + "logits/chosen": -2.2946999073028564, + "logits/rejected": -2.787421703338623, + "logps/chosen": -191.1121063232422, + "logps/rejected": -379.61798095703125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.238131523132324, + "rewards/margins": 6.951183319091797, + "rewards/rejected": -19.189315795898438, + "step": 17390 + }, + { + "epoch": 2.7, + "learning_rate": 1.3920701281186785e-06, + "logits/chosen": -2.047420024871826, + "logits/rejected": -2.482590675354004, + "logps/chosen": -408.8733825683594, + "logps/rejected": -451.034423828125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.531245231628418, + "rewards/margins": 6.938361167907715, + "rewards/rejected": -20.469606399536133, + "step": 17391 + }, + { + "epoch": 2.7, + "learning_rate": 1.3913366875875305e-06, + "logits/chosen": -2.5203697681427, + "logits/rejected": -2.662353515625, + "logps/chosen": -438.05902099609375, + "logps/rejected": -557.50927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.406913757324219, + "rewards/margins": 10.119827270507812, + "rewards/rejected": -19.52674102783203, + "step": 17392 + }, + { + "epoch": 2.7, + "learning_rate": 1.3906032470563826e-06, + "logits/chosen": -2.4729607105255127, + "logits/rejected": -1.9403923749923706, + "logps/chosen": -350.62677001953125, + "logps/rejected": -310.4314880371094, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.851702690124512, + "rewards/margins": 6.01987361907959, + "rewards/rejected": -18.8715763092041, + "step": 17393 + }, + { + "epoch": 2.71, + "learning_rate": 1.3898698065252347e-06, + "logits/chosen": -2.16645884513855, + "logits/rejected": -2.557131052017212, + "logps/chosen": -204.51023864746094, + "logps/rejected": -539.0427856445312, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.397624015808105, + "rewards/margins": 10.9890718460083, + "rewards/rejected": -23.386695861816406, + "step": 17394 + }, + { + "epoch": 2.71, + "learning_rate": 1.3891363659940868e-06, + "logits/chosen": -2.663300037384033, + "logits/rejected": -2.220137357711792, + "logps/chosen": -351.2885437011719, + "logps/rejected": -292.40350341796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.279985427856445, + "rewards/margins": 8.476247787475586, + "rewards/rejected": -18.75623321533203, + "step": 17395 + }, + { + "epoch": 2.71, + "learning_rate": 1.388402925462939e-06, + "logits/chosen": -2.713592529296875, + "logits/rejected": -2.8848767280578613, + "logps/chosen": -617.96533203125, + "logps/rejected": -612.4312744140625, + "loss": 0.2535, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.459512710571289, + "rewards/margins": 7.200132369995117, + "rewards/rejected": -17.659645080566406, + "step": 17396 + }, + { + "epoch": 2.71, + "learning_rate": 1.387669484931791e-06, + "logits/chosen": -2.694150686264038, + "logits/rejected": -2.189636468887329, + "logps/chosen": -673.5264892578125, + "logps/rejected": -588.5106811523438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.185409545898438, + "rewards/margins": 10.970911979675293, + "rewards/rejected": -22.156322479248047, + "step": 17397 + }, + { + "epoch": 2.71, + "learning_rate": 1.386936044400643e-06, + "logits/chosen": -2.386698007583618, + "logits/rejected": -1.326537847518921, + "logps/chosen": -234.8690643310547, + "logps/rejected": -240.4885711669922, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.763375282287598, + "rewards/margins": 6.486081600189209, + "rewards/rejected": -16.24945640563965, + "step": 17398 + }, + { + "epoch": 2.71, + "learning_rate": 1.3862026038694954e-06, + "logits/chosen": -2.48690128326416, + "logits/rejected": -2.685391664505005, + "logps/chosen": -319.2331237792969, + "logps/rejected": -529.5005493164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.418513298034668, + "rewards/margins": 16.72787857055664, + "rewards/rejected": -29.146392822265625, + "step": 17399 + }, + { + "epoch": 2.71, + "learning_rate": 1.3854691633383475e-06, + "logits/chosen": -2.2784671783447266, + "logits/rejected": -2.604602575302124, + "logps/chosen": -276.626953125, + "logps/rejected": -436.562255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.244565963745117, + "rewards/margins": 10.504106521606445, + "rewards/rejected": -20.748672485351562, + "step": 17400 + }, + { + "epoch": 2.71, + "learning_rate": 1.3847357228071996e-06, + "logits/chosen": -1.2398605346679688, + "logits/rejected": -2.2245922088623047, + "logps/chosen": -303.160400390625, + "logps/rejected": -477.7503662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.835201263427734, + "rewards/margins": 11.284507751464844, + "rewards/rejected": -23.119709014892578, + "step": 17401 + }, + { + "epoch": 2.71, + "learning_rate": 1.3840022822760517e-06, + "logits/chosen": -2.499373435974121, + "logits/rejected": -2.909282922744751, + "logps/chosen": -656.6921997070312, + "logps/rejected": -718.1309814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.436561584472656, + "rewards/margins": 9.235422134399414, + "rewards/rejected": -17.67198371887207, + "step": 17402 + }, + { + "epoch": 2.71, + "learning_rate": 1.383268841744904e-06, + "logits/chosen": -1.6927273273468018, + "logits/rejected": -2.7338132858276367, + "logps/chosen": -214.05709838867188, + "logps/rejected": -409.3857727050781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.544157981872559, + "rewards/margins": 11.144043922424316, + "rewards/rejected": -23.688201904296875, + "step": 17403 + }, + { + "epoch": 2.71, + "learning_rate": 1.382535401213756e-06, + "logits/chosen": -1.8005197048187256, + "logits/rejected": -2.3102188110351562, + "logps/chosen": -240.7355194091797, + "logps/rejected": -491.782958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.631284713745117, + "rewards/margins": 16.766700744628906, + "rewards/rejected": -23.39798355102539, + "step": 17404 + }, + { + "epoch": 2.71, + "learning_rate": 1.381801960682608e-06, + "logits/chosen": -2.976278781890869, + "logits/rejected": -2.894054889678955, + "logps/chosen": -203.75369262695312, + "logps/rejected": -438.60931396484375, + "loss": 0.2786, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.567126274108887, + "rewards/margins": 7.014766693115234, + "rewards/rejected": -19.581893920898438, + "step": 17405 + }, + { + "epoch": 2.71, + "learning_rate": 1.38106852015146e-06, + "logits/chosen": -2.7541439533233643, + "logits/rejected": -2.8224434852600098, + "logps/chosen": -144.845458984375, + "logps/rejected": -505.87060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.39530086517334, + "rewards/margins": 13.649084091186523, + "rewards/rejected": -22.044384002685547, + "step": 17406 + }, + { + "epoch": 2.71, + "learning_rate": 1.3803350796203123e-06, + "logits/chosen": -1.5601887702941895, + "logits/rejected": -2.4114463329315186, + "logps/chosen": -241.95138549804688, + "logps/rejected": -425.3011474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.178814888000488, + "rewards/margins": 9.684592247009277, + "rewards/rejected": -22.863407135009766, + "step": 17407 + }, + { + "epoch": 2.71, + "learning_rate": 1.3796016390891644e-06, + "logits/chosen": -2.544687271118164, + "logits/rejected": -2.440215587615967, + "logps/chosen": -281.6412048339844, + "logps/rejected": -292.84942626953125, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.492839813232422, + "rewards/margins": 6.005155086517334, + "rewards/rejected": -17.49799346923828, + "step": 17408 + }, + { + "epoch": 2.71, + "learning_rate": 1.3788681985580165e-06, + "logits/chosen": -2.1023192405700684, + "logits/rejected": -2.6501169204711914, + "logps/chosen": -356.15509033203125, + "logps/rejected": -687.4708251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.205453872680664, + "rewards/margins": 12.586727142333984, + "rewards/rejected": -22.79218101501465, + "step": 17409 + }, + { + "epoch": 2.71, + "learning_rate": 1.3781347580268686e-06, + "logits/chosen": -1.5827414989471436, + "logits/rejected": -2.4597597122192383, + "logps/chosen": -292.904052734375, + "logps/rejected": -615.4449462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.323772430419922, + "rewards/margins": 11.326565742492676, + "rewards/rejected": -27.650339126586914, + "step": 17410 + }, + { + "epoch": 2.71, + "learning_rate": 1.3774013174957207e-06, + "logits/chosen": -2.25400447845459, + "logits/rejected": -2.8639354705810547, + "logps/chosen": -233.10305786132812, + "logps/rejected": -453.9714050292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.874879837036133, + "rewards/margins": 11.250358581542969, + "rewards/rejected": -20.125240325927734, + "step": 17411 + }, + { + "epoch": 2.71, + "learning_rate": 1.376667876964573e-06, + "logits/chosen": -1.9169970750808716, + "logits/rejected": -2.4268550872802734, + "logps/chosen": -188.81051635742188, + "logps/rejected": -355.02838134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.869604110717773, + "rewards/margins": 12.26423454284668, + "rewards/rejected": -21.133838653564453, + "step": 17412 + }, + { + "epoch": 2.71, + "learning_rate": 1.375934436433425e-06, + "logits/chosen": -1.8343331813812256, + "logits/rejected": -2.370804786682129, + "logps/chosen": -215.33157348632812, + "logps/rejected": -266.8558349609375, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.581830024719238, + "rewards/margins": 6.038022994995117, + "rewards/rejected": -17.619853973388672, + "step": 17413 + }, + { + "epoch": 2.71, + "learning_rate": 1.3752009959022772e-06, + "logits/chosen": -2.4674391746520996, + "logits/rejected": -1.0943039655685425, + "logps/chosen": -260.2166442871094, + "logps/rejected": -217.77801513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.066770553588867, + "rewards/margins": 10.545965194702148, + "rewards/rejected": -17.612735748291016, + "step": 17414 + }, + { + "epoch": 2.71, + "learning_rate": 1.374467555371129e-06, + "logits/chosen": -2.5804860591888428, + "logits/rejected": -2.0880303382873535, + "logps/chosen": -311.534423828125, + "logps/rejected": -408.50640869140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.619449615478516, + "rewards/margins": 8.36640739440918, + "rewards/rejected": -21.985857009887695, + "step": 17415 + }, + { + "epoch": 2.71, + "learning_rate": 1.3737341148399813e-06, + "logits/chosen": -2.217473268508911, + "logits/rejected": -2.3806633949279785, + "logps/chosen": -116.4020004272461, + "logps/rejected": -436.37646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.347573280334473, + "rewards/margins": 13.764535903930664, + "rewards/rejected": -24.112110137939453, + "step": 17416 + }, + { + "epoch": 2.71, + "learning_rate": 1.3730006743088334e-06, + "logits/chosen": -2.7111735343933105, + "logits/rejected": -1.5641509294509888, + "logps/chosen": -448.1311950683594, + "logps/rejected": -433.42529296875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.473428726196289, + "rewards/margins": 9.844627380371094, + "rewards/rejected": -21.318058013916016, + "step": 17417 + }, + { + "epoch": 2.71, + "learning_rate": 1.3722672337776855e-06, + "logits/chosen": -2.203805446624756, + "logits/rejected": -2.7748773097991943, + "logps/chosen": -307.57525634765625, + "logps/rejected": -463.8377380371094, + "loss": 0.0978, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.742866516113281, + "rewards/margins": 5.801219940185547, + "rewards/rejected": -18.544086456298828, + "step": 17418 + }, + { + "epoch": 2.71, + "learning_rate": 1.3715337932465376e-06, + "logits/chosen": -2.8009541034698486, + "logits/rejected": -2.698127508163452, + "logps/chosen": -241.75924682617188, + "logps/rejected": -450.24658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19592571258545, + "rewards/margins": 10.768502235412598, + "rewards/rejected": -18.964427947998047, + "step": 17419 + }, + { + "epoch": 2.71, + "learning_rate": 1.37080035271539e-06, + "logits/chosen": -1.9169219732284546, + "logits/rejected": -2.8784096240997314, + "logps/chosen": -206.2644500732422, + "logps/rejected": -588.248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.348283767700195, + "rewards/margins": 9.335697174072266, + "rewards/rejected": -21.68398094177246, + "step": 17420 + }, + { + "epoch": 2.71, + "learning_rate": 1.370066912184242e-06, + "logits/chosen": -2.862548828125, + "logits/rejected": -1.721015453338623, + "logps/chosen": -371.44525146484375, + "logps/rejected": -160.75296020507812, + "loss": 0.828, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.029857635498047, + "rewards/margins": 3.1150901317596436, + "rewards/rejected": -14.14494800567627, + "step": 17421 + }, + { + "epoch": 2.71, + "learning_rate": 1.369333471653094e-06, + "logits/chosen": -1.461169719696045, + "logits/rejected": -2.71809458732605, + "logps/chosen": -317.35968017578125, + "logps/rejected": -566.9063720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.108365058898926, + "rewards/margins": 10.52943229675293, + "rewards/rejected": -20.637798309326172, + "step": 17422 + }, + { + "epoch": 2.71, + "learning_rate": 1.3686000311219462e-06, + "logits/chosen": -2.204582452774048, + "logits/rejected": -2.7011210918426514, + "logps/chosen": -233.8234100341797, + "logps/rejected": -362.7677917480469, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.914909362792969, + "rewards/margins": 9.353418350219727, + "rewards/rejected": -18.268327713012695, + "step": 17423 + }, + { + "epoch": 2.71, + "learning_rate": 1.3678665905907985e-06, + "logits/chosen": -1.6464028358459473, + "logits/rejected": -2.273627996444702, + "logps/chosen": -153.975830078125, + "logps/rejected": -527.1121826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.424179077148438, + "rewards/margins": 10.787861824035645, + "rewards/rejected": -21.212039947509766, + "step": 17424 + }, + { + "epoch": 2.71, + "learning_rate": 1.3671331500596504e-06, + "logits/chosen": -2.4677720069885254, + "logits/rejected": -1.732352614402771, + "logps/chosen": -163.60484313964844, + "logps/rejected": -266.2252197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.190242767333984, + "rewards/margins": 10.585674285888672, + "rewards/rejected": -17.775917053222656, + "step": 17425 + }, + { + "epoch": 2.71, + "learning_rate": 1.3663997095285025e-06, + "logits/chosen": -2.642099618911743, + "logits/rejected": -2.8460915088653564, + "logps/chosen": -144.8545684814453, + "logps/rejected": -407.98095703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411827087402344, + "rewards/margins": 12.234526634216309, + "rewards/rejected": -18.64635467529297, + "step": 17426 + }, + { + "epoch": 2.71, + "learning_rate": 1.3656662689973545e-06, + "logits/chosen": -2.918952226638794, + "logits/rejected": -2.9811813831329346, + "logps/chosen": -143.22637939453125, + "logps/rejected": -311.88995361328125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.903181076049805, + "rewards/margins": 10.6810302734375, + "rewards/rejected": -19.584211349487305, + "step": 17427 + }, + { + "epoch": 2.71, + "learning_rate": 1.3649328284662069e-06, + "logits/chosen": -2.0303473472595215, + "logits/rejected": -2.822967290878296, + "logps/chosen": -249.8355712890625, + "logps/rejected": -436.9776306152344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129548072814941, + "rewards/margins": 8.083487510681152, + "rewards/rejected": -15.213035583496094, + "step": 17428 + }, + { + "epoch": 2.71, + "learning_rate": 1.364199387935059e-06, + "logits/chosen": -1.7110241651535034, + "logits/rejected": -2.6658530235290527, + "logps/chosen": -368.5650634765625, + "logps/rejected": -577.1621704101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.836389541625977, + "rewards/margins": 10.876228332519531, + "rewards/rejected": -21.712617874145508, + "step": 17429 + }, + { + "epoch": 2.71, + "learning_rate": 1.363465947403911e-06, + "logits/chosen": -1.7096590995788574, + "logits/rejected": -2.6642708778381348, + "logps/chosen": -206.06573486328125, + "logps/rejected": -537.6087646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.548303604125977, + "rewards/margins": 16.482425689697266, + "rewards/rejected": -25.030731201171875, + "step": 17430 + }, + { + "epoch": 2.71, + "learning_rate": 1.3627325068727631e-06, + "logits/chosen": -2.313664436340332, + "logits/rejected": -2.1651484966278076, + "logps/chosen": -607.553955078125, + "logps/rejected": -499.804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.550076484680176, + "rewards/margins": 12.541557312011719, + "rewards/rejected": -25.091632843017578, + "step": 17431 + }, + { + "epoch": 2.71, + "learning_rate": 1.3619990663416152e-06, + "logits/chosen": -1.628243088722229, + "logits/rejected": -1.7641026973724365, + "logps/chosen": -368.1075439453125, + "logps/rejected": -696.0467529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.525178909301758, + "rewards/margins": 13.746681213378906, + "rewards/rejected": -27.271860122680664, + "step": 17432 + }, + { + "epoch": 2.71, + "learning_rate": 1.3612656258104675e-06, + "logits/chosen": -1.8121801614761353, + "logits/rejected": -2.087028741836548, + "logps/chosen": -145.92550659179688, + "logps/rejected": -257.4174499511719, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.955679893493652, + "rewards/margins": 7.671239852905273, + "rewards/rejected": -19.62691879272461, + "step": 17433 + }, + { + "epoch": 2.71, + "learning_rate": 1.3605321852793194e-06, + "logits/chosen": -1.3934218883514404, + "logits/rejected": -2.246694564819336, + "logps/chosen": -217.2486114501953, + "logps/rejected": -393.95904541015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.13555908203125, + "rewards/margins": 9.642021179199219, + "rewards/rejected": -20.77758026123047, + "step": 17434 + }, + { + "epoch": 2.71, + "learning_rate": 1.3597987447481715e-06, + "logits/chosen": -2.7631731033325195, + "logits/rejected": -2.6350274085998535, + "logps/chosen": -430.5169677734375, + "logps/rejected": -632.31005859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.13184642791748, + "rewards/margins": 18.10235595703125, + "rewards/rejected": -30.234203338623047, + "step": 17435 + }, + { + "epoch": 2.71, + "learning_rate": 1.3590653042170236e-06, + "logits/chosen": -2.6950221061706543, + "logits/rejected": -2.8729753494262695, + "logps/chosen": -144.66160583496094, + "logps/rejected": -200.33358764648438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.370759010314941, + "rewards/margins": 8.280861854553223, + "rewards/rejected": -16.651620864868164, + "step": 17436 + }, + { + "epoch": 2.71, + "learning_rate": 1.3583318636858759e-06, + "logits/chosen": -2.4638822078704834, + "logits/rejected": -2.599973440170288, + "logps/chosen": -117.58988952636719, + "logps/rejected": -381.41375732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.248562812805176, + "rewards/margins": 9.168336868286133, + "rewards/rejected": -18.416900634765625, + "step": 17437 + }, + { + "epoch": 2.71, + "learning_rate": 1.357598423154728e-06, + "logits/chosen": -2.7853493690490723, + "logits/rejected": -2.638643503189087, + "logps/chosen": -518.4842529296875, + "logps/rejected": -780.1290283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.504983901977539, + "rewards/margins": 12.86937141418457, + "rewards/rejected": -22.37435531616211, + "step": 17438 + }, + { + "epoch": 2.71, + "learning_rate": 1.35686498262358e-06, + "logits/chosen": -2.596376895904541, + "logits/rejected": -2.8718292713165283, + "logps/chosen": -210.32363891601562, + "logps/rejected": -503.57696533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2011308670043945, + "rewards/margins": 13.710051536560059, + "rewards/rejected": -20.911182403564453, + "step": 17439 + }, + { + "epoch": 2.71, + "learning_rate": 1.3561315420924321e-06, + "logits/chosen": -1.5402498245239258, + "logits/rejected": -2.638353109359741, + "logps/chosen": -345.3079833984375, + "logps/rejected": -535.3948364257812, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.999005317687988, + "rewards/margins": 11.00346565246582, + "rewards/rejected": -21.002470016479492, + "step": 17440 + }, + { + "epoch": 2.71, + "learning_rate": 1.3553981015612844e-06, + "logits/chosen": -1.3388737440109253, + "logits/rejected": -2.4124631881713867, + "logps/chosen": -143.9296417236328, + "logps/rejected": -645.3262329101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.1185302734375, + "rewards/margins": 19.208271026611328, + "rewards/rejected": -27.326801300048828, + "step": 17441 + }, + { + "epoch": 2.71, + "learning_rate": 1.3546646610301365e-06, + "logits/chosen": -2.45491361618042, + "logits/rejected": -2.687617063522339, + "logps/chosen": -500.4817199707031, + "logps/rejected": -651.3967895507812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.371261596679688, + "rewards/margins": 9.616342544555664, + "rewards/rejected": -20.98760414123535, + "step": 17442 + }, + { + "epoch": 2.71, + "learning_rate": 1.3539312204989886e-06, + "logits/chosen": -1.594034194946289, + "logits/rejected": -2.613739013671875, + "logps/chosen": -113.36415100097656, + "logps/rejected": -478.15985107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.247771263122559, + "rewards/margins": 14.664520263671875, + "rewards/rejected": -22.91229248046875, + "step": 17443 + }, + { + "epoch": 2.71, + "learning_rate": 1.3531977799678405e-06, + "logits/chosen": -2.3107759952545166, + "logits/rejected": -1.1731007099151611, + "logps/chosen": -312.3455505371094, + "logps/rejected": -299.65399169921875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.297772407531738, + "rewards/margins": 8.631448745727539, + "rewards/rejected": -18.92922019958496, + "step": 17444 + }, + { + "epoch": 2.71, + "learning_rate": 1.3524643394366928e-06, + "logits/chosen": -2.7715861797332764, + "logits/rejected": -2.8467798233032227, + "logps/chosen": -169.1034393310547, + "logps/rejected": -369.7384033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.447681427001953, + "rewards/margins": 10.942384719848633, + "rewards/rejected": -20.390066146850586, + "step": 17445 + }, + { + "epoch": 2.71, + "learning_rate": 1.351730898905545e-06, + "logits/chosen": -2.5654594898223877, + "logits/rejected": -2.1486666202545166, + "logps/chosen": -221.51858520507812, + "logps/rejected": -325.87762451171875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.952962875366211, + "rewards/margins": 9.901183128356934, + "rewards/rejected": -18.854145050048828, + "step": 17446 + }, + { + "epoch": 2.71, + "learning_rate": 1.350997458374397e-06, + "logits/chosen": -2.817227840423584, + "logits/rejected": -2.5828194618225098, + "logps/chosen": -434.3437194824219, + "logps/rejected": -615.9049072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.752252578735352, + "rewards/margins": 12.60313606262207, + "rewards/rejected": -24.355388641357422, + "step": 17447 + }, + { + "epoch": 2.71, + "learning_rate": 1.350264017843249e-06, + "logits/chosen": -2.1846938133239746, + "logits/rejected": -2.299196720123291, + "logps/chosen": -272.3819274902344, + "logps/rejected": -411.19708251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.569864273071289, + "rewards/margins": 12.505754470825195, + "rewards/rejected": -22.075618743896484, + "step": 17448 + }, + { + "epoch": 2.71, + "learning_rate": 1.3495305773121012e-06, + "logits/chosen": -1.4262861013412476, + "logits/rejected": -1.943946123123169, + "logps/chosen": -183.21182250976562, + "logps/rejected": -260.17889404296875, + "loss": 0.2576, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.25033950805664, + "rewards/margins": 4.827645778656006, + "rewards/rejected": -19.077983856201172, + "step": 17449 + }, + { + "epoch": 2.71, + "learning_rate": 1.3487971367809535e-06, + "logits/chosen": -1.11518394947052, + "logits/rejected": -1.9697299003601074, + "logps/chosen": -144.95968627929688, + "logps/rejected": -525.05029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.522783279418945, + "rewards/margins": 12.812299728393555, + "rewards/rejected": -21.3350830078125, + "step": 17450 + }, + { + "epoch": 2.71, + "learning_rate": 1.3480636962498056e-06, + "logits/chosen": -1.5895618200302124, + "logits/rejected": -2.355842113494873, + "logps/chosen": -168.74270629882812, + "logps/rejected": -517.9139404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.78870964050293, + "rewards/margins": 13.51749324798584, + "rewards/rejected": -22.306201934814453, + "step": 17451 + }, + { + "epoch": 2.71, + "learning_rate": 1.3473302557186576e-06, + "logits/chosen": -2.9463181495666504, + "logits/rejected": -2.954677104949951, + "logps/chosen": -383.36029052734375, + "logps/rejected": -330.1417236328125, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.3750638961792, + "rewards/margins": 9.586762428283691, + "rewards/rejected": -17.96182632446289, + "step": 17452 + }, + { + "epoch": 2.71, + "learning_rate": 1.3465968151875097e-06, + "logits/chosen": -0.9540519714355469, + "logits/rejected": -2.2690415382385254, + "logps/chosen": -143.86788940429688, + "logps/rejected": -516.0208740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.710625648498535, + "rewards/margins": 17.97694206237793, + "rewards/rejected": -27.68756866455078, + "step": 17453 + }, + { + "epoch": 2.71, + "learning_rate": 1.3458633746563618e-06, + "logits/chosen": -2.3519344329833984, + "logits/rejected": -2.780993938446045, + "logps/chosen": -166.1998291015625, + "logps/rejected": -312.51776123046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.470071792602539, + "rewards/margins": 8.521356582641602, + "rewards/rejected": -18.99142837524414, + "step": 17454 + }, + { + "epoch": 2.71, + "learning_rate": 1.345129934125214e-06, + "logits/chosen": -2.594813823699951, + "logits/rejected": -2.1089935302734375, + "logps/chosen": -516.2700805664062, + "logps/rejected": -617.03662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.39069938659668, + "rewards/margins": 13.768453598022461, + "rewards/rejected": -25.15915298461914, + "step": 17455 + }, + { + "epoch": 2.71, + "learning_rate": 1.344396493594066e-06, + "logits/chosen": -2.3699111938476562, + "logits/rejected": -2.6162543296813965, + "logps/chosen": -274.293212890625, + "logps/rejected": -456.034423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.685952186584473, + "rewards/margins": 12.768671035766602, + "rewards/rejected": -21.45462417602539, + "step": 17456 + }, + { + "epoch": 2.71, + "learning_rate": 1.343663053062918e-06, + "logits/chosen": -2.1894538402557373, + "logits/rejected": -2.572856903076172, + "logps/chosen": -127.05400085449219, + "logps/rejected": -320.9365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.432056427001953, + "rewards/margins": 9.559121131896973, + "rewards/rejected": -17.991178512573242, + "step": 17457 + }, + { + "epoch": 2.72, + "learning_rate": 1.3429296125317704e-06, + "logits/chosen": -1.8884135484695435, + "logits/rejected": -2.177030086517334, + "logps/chosen": -271.86785888671875, + "logps/rejected": -396.2237548828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.311660766601562, + "rewards/margins": 7.526253700256348, + "rewards/rejected": -19.837913513183594, + "step": 17458 + }, + { + "epoch": 2.72, + "learning_rate": 1.3421961720006225e-06, + "logits/chosen": -2.6911160945892334, + "logits/rejected": -2.845646381378174, + "logps/chosen": -169.41847229003906, + "logps/rejected": -483.49560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.604223251342773, + "rewards/margins": 13.994994163513184, + "rewards/rejected": -22.59921646118164, + "step": 17459 + }, + { + "epoch": 2.72, + "learning_rate": 1.3414627314694746e-06, + "logits/chosen": -2.2079050540924072, + "logits/rejected": -2.78371524810791, + "logps/chosen": -304.2767333984375, + "logps/rejected": -547.7833251953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.924017906188965, + "rewards/margins": 12.95862102508545, + "rewards/rejected": -21.882638931274414, + "step": 17460 + }, + { + "epoch": 2.72, + "learning_rate": 1.3407292909383267e-06, + "logits/chosen": -2.6431217193603516, + "logits/rejected": -2.576348304748535, + "logps/chosen": -502.03436279296875, + "logps/rejected": -622.9835205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.239742279052734, + "rewards/margins": 10.106033325195312, + "rewards/rejected": -20.345775604248047, + "step": 17461 + }, + { + "epoch": 2.72, + "learning_rate": 1.339995850407179e-06, + "logits/chosen": -2.6163790225982666, + "logits/rejected": -2.803178310394287, + "logps/chosen": -307.0946044921875, + "logps/rejected": -264.5375671386719, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.122499465942383, + "rewards/margins": 6.841829299926758, + "rewards/rejected": -17.96432876586914, + "step": 17462 + }, + { + "epoch": 2.72, + "learning_rate": 1.339262409876031e-06, + "logits/chosen": -1.4755220413208008, + "logits/rejected": -2.4477012157440186, + "logps/chosen": -169.54354858398438, + "logps/rejected": -388.0016784667969, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.82217025756836, + "rewards/margins": 7.390457630157471, + "rewards/rejected": -17.212627410888672, + "step": 17463 + }, + { + "epoch": 2.72, + "learning_rate": 1.338528969344883e-06, + "logits/chosen": -1.530031681060791, + "logits/rejected": -2.583035945892334, + "logps/chosen": -270.2980651855469, + "logps/rejected": -487.45074462890625, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.611764907836914, + "rewards/margins": 10.295672416687012, + "rewards/rejected": -19.90743637084961, + "step": 17464 + }, + { + "epoch": 2.72, + "learning_rate": 1.337795528813735e-06, + "logits/chosen": -2.362870931625366, + "logits/rejected": -1.8916752338409424, + "logps/chosen": -237.20272827148438, + "logps/rejected": -457.48944091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.08096694946289, + "rewards/margins": 12.865394592285156, + "rewards/rejected": -22.946361541748047, + "step": 17465 + }, + { + "epoch": 2.72, + "learning_rate": 1.3370620882825873e-06, + "logits/chosen": -2.164663076400757, + "logits/rejected": -2.8325188159942627, + "logps/chosen": -364.6397399902344, + "logps/rejected": -847.7111206054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.97096061706543, + "rewards/margins": 17.986698150634766, + "rewards/rejected": -25.957656860351562, + "step": 17466 + }, + { + "epoch": 2.72, + "learning_rate": 1.3363286477514394e-06, + "logits/chosen": -2.1242494583129883, + "logits/rejected": -2.866987943649292, + "logps/chosen": -197.66323852539062, + "logps/rejected": -606.3355712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.366960525512695, + "rewards/margins": 12.932445526123047, + "rewards/rejected": -21.299406051635742, + "step": 17467 + }, + { + "epoch": 2.72, + "learning_rate": 1.3355952072202915e-06, + "logits/chosen": -1.6431361436843872, + "logits/rejected": -2.6256680488586426, + "logps/chosen": -151.5859375, + "logps/rejected": -546.4207153320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.322198867797852, + "rewards/margins": 12.244464874267578, + "rewards/rejected": -21.56666374206543, + "step": 17468 + }, + { + "epoch": 2.72, + "learning_rate": 1.3348617666891436e-06, + "logits/chosen": -2.171356439590454, + "logits/rejected": -2.9054982662200928, + "logps/chosen": -178.237548828125, + "logps/rejected": -426.197265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.033689498901367, + "rewards/margins": 9.550368309020996, + "rewards/rejected": -20.584056854248047, + "step": 17469 + }, + { + "epoch": 2.72, + "learning_rate": 1.3341283261579957e-06, + "logits/chosen": -2.859715461730957, + "logits/rejected": -1.807277798652649, + "logps/chosen": -435.176513671875, + "logps/rejected": -187.94107055664062, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.920478343963623, + "rewards/margins": 7.6032233238220215, + "rewards/rejected": -15.523701667785645, + "step": 17470 + }, + { + "epoch": 2.72, + "learning_rate": 1.333394885626848e-06, + "logits/chosen": -1.4789719581604004, + "logits/rejected": -2.2139968872070312, + "logps/chosen": -282.0257568359375, + "logps/rejected": -448.88525390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.488845825195312, + "rewards/margins": 9.694662094116211, + "rewards/rejected": -21.183507919311523, + "step": 17471 + }, + { + "epoch": 2.72, + "learning_rate": 1.3326614450957e-06, + "logits/chosen": -2.633653402328491, + "logits/rejected": -1.6611449718475342, + "logps/chosen": -505.25677490234375, + "logps/rejected": -346.6051025390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.611930847167969, + "rewards/margins": 11.036781311035156, + "rewards/rejected": -20.648712158203125, + "step": 17472 + }, + { + "epoch": 2.72, + "learning_rate": 1.3319280045645522e-06, + "logits/chosen": -2.2009928226470947, + "logits/rejected": -2.4219746589660645, + "logps/chosen": -158.54666137695312, + "logps/rejected": -300.15594482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.406017303466797, + "rewards/margins": 8.566333770751953, + "rewards/rejected": -19.97235107421875, + "step": 17473 + }, + { + "epoch": 2.72, + "learning_rate": 1.331194564033404e-06, + "logits/chosen": -2.925153970718384, + "logits/rejected": -1.638365626335144, + "logps/chosen": -468.0701599121094, + "logps/rejected": -387.3329772949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.010679721832275, + "rewards/margins": 10.147146224975586, + "rewards/rejected": -15.15782642364502, + "step": 17474 + }, + { + "epoch": 2.72, + "learning_rate": 1.3304611235022564e-06, + "logits/chosen": -2.694380521774292, + "logits/rejected": -2.6913137435913086, + "logps/chosen": -621.2085571289062, + "logps/rejected": -509.3345642089844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.744949340820312, + "rewards/margins": 9.981925964355469, + "rewards/rejected": -19.72687530517578, + "step": 17475 + }, + { + "epoch": 2.72, + "learning_rate": 1.3297276829711084e-06, + "logits/chosen": -2.7144429683685303, + "logits/rejected": -2.394788980484009, + "logps/chosen": -1047.24951171875, + "logps/rejected": -864.9020385742188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.036186218261719, + "rewards/margins": 10.972898483276367, + "rewards/rejected": -22.00908660888672, + "step": 17476 + }, + { + "epoch": 2.72, + "learning_rate": 1.3289942424399605e-06, + "logits/chosen": -2.6243927478790283, + "logits/rejected": -2.9825940132141113, + "logps/chosen": -470.766357421875, + "logps/rejected": -619.8228759765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.694738388061523, + "rewards/margins": 9.278257369995117, + "rewards/rejected": -17.97299575805664, + "step": 17477 + }, + { + "epoch": 2.72, + "learning_rate": 1.3282608019088126e-06, + "logits/chosen": -1.3082900047302246, + "logits/rejected": -2.5535643100738525, + "logps/chosen": -292.286376953125, + "logps/rejected": -574.03515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.937049865722656, + "rewards/margins": 12.108280181884766, + "rewards/rejected": -21.045330047607422, + "step": 17478 + }, + { + "epoch": 2.72, + "learning_rate": 1.327527361377665e-06, + "logits/chosen": -2.413490056991577, + "logits/rejected": -2.4244186878204346, + "logps/chosen": -350.48480224609375, + "logps/rejected": -436.3724365234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.853939056396484, + "rewards/margins": 8.327587127685547, + "rewards/rejected": -20.18152618408203, + "step": 17479 + }, + { + "epoch": 2.72, + "learning_rate": 1.326793920846517e-06, + "logits/chosen": -2.0053322315216064, + "logits/rejected": -2.810119152069092, + "logps/chosen": -103.29722595214844, + "logps/rejected": -500.5559997558594, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.876847267150879, + "rewards/margins": 13.58092212677002, + "rewards/rejected": -22.4577693939209, + "step": 17480 + }, + { + "epoch": 2.72, + "learning_rate": 1.326060480315369e-06, + "logits/chosen": -1.569732427597046, + "logits/rejected": -1.9034295082092285, + "logps/chosen": -307.3697814941406, + "logps/rejected": -390.20361328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.242798805236816, + "rewards/margins": 10.70551872253418, + "rewards/rejected": -17.948318481445312, + "step": 17481 + }, + { + "epoch": 2.72, + "learning_rate": 1.3253270397842212e-06, + "logits/chosen": -2.435326337814331, + "logits/rejected": -2.8084957599639893, + "logps/chosen": -115.08428192138672, + "logps/rejected": -276.6080322265625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.394107818603516, + "rewards/margins": 8.977828979492188, + "rewards/rejected": -17.371936798095703, + "step": 17482 + }, + { + "epoch": 2.72, + "learning_rate": 1.3245935992530735e-06, + "logits/chosen": -1.8852647542953491, + "logits/rejected": -2.983590602874756, + "logps/chosen": -307.07489013671875, + "logps/rejected": -504.6178283691406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.148829460144043, + "rewards/margins": 8.35912036895752, + "rewards/rejected": -20.507949829101562, + "step": 17483 + }, + { + "epoch": 2.72, + "learning_rate": 1.3238601587219254e-06, + "logits/chosen": -2.578702688217163, + "logits/rejected": -1.4500707387924194, + "logps/chosen": -401.16229248046875, + "logps/rejected": -402.9443359375, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.844149589538574, + "rewards/margins": 7.9163007736206055, + "rewards/rejected": -21.76045036315918, + "step": 17484 + }, + { + "epoch": 2.72, + "learning_rate": 1.3231267181907775e-06, + "logits/chosen": -1.288308024406433, + "logits/rejected": -2.427583932876587, + "logps/chosen": -173.78463745117188, + "logps/rejected": -558.3883056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.197412490844727, + "rewards/margins": 14.082353591918945, + "rewards/rejected": -23.279766082763672, + "step": 17485 + }, + { + "epoch": 2.72, + "learning_rate": 1.3223932776596296e-06, + "logits/chosen": -2.0754289627075195, + "logits/rejected": -2.770764112472534, + "logps/chosen": -500.3844909667969, + "logps/rejected": -838.68994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.291505336761475, + "rewards/margins": 14.963809967041016, + "rewards/rejected": -22.25531578063965, + "step": 17486 + }, + { + "epoch": 2.72, + "learning_rate": 1.3216598371284819e-06, + "logits/chosen": -1.7583948373794556, + "logits/rejected": -2.2694268226623535, + "logps/chosen": -266.14990234375, + "logps/rejected": -388.26666259765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.112278938293457, + "rewards/margins": 9.752544403076172, + "rewards/rejected": -21.864822387695312, + "step": 17487 + }, + { + "epoch": 2.72, + "learning_rate": 1.320926396597334e-06, + "logits/chosen": -2.666922092437744, + "logits/rejected": -2.4823153018951416, + "logps/chosen": -467.18798828125, + "logps/rejected": -687.026611328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.321224212646484, + "rewards/margins": 12.997581481933594, + "rewards/rejected": -22.318805694580078, + "step": 17488 + }, + { + "epoch": 2.72, + "learning_rate": 1.320192956066186e-06, + "logits/chosen": -1.459765911102295, + "logits/rejected": -2.3304283618927, + "logps/chosen": -289.2394104003906, + "logps/rejected": -490.95538330078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.40107536315918, + "rewards/margins": 11.92605972290039, + "rewards/rejected": -21.32713508605957, + "step": 17489 + }, + { + "epoch": 2.72, + "learning_rate": 1.3194595155350381e-06, + "logits/chosen": -2.26662278175354, + "logits/rejected": -2.4324686527252197, + "logps/chosen": -214.26211547851562, + "logps/rejected": -292.73834228515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.341267585754395, + "rewards/margins": 7.100660800933838, + "rewards/rejected": -18.44192886352539, + "step": 17490 + }, + { + "epoch": 2.72, + "learning_rate": 1.3187260750038902e-06, + "logits/chosen": -2.619957447052002, + "logits/rejected": -2.8575246334075928, + "logps/chosen": -378.8355712890625, + "logps/rejected": -437.788330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4401140213012695, + "rewards/margins": 13.444232940673828, + "rewards/rejected": -18.884347915649414, + "step": 17491 + }, + { + "epoch": 2.72, + "learning_rate": 1.3179926344727425e-06, + "logits/chosen": -0.8458006978034973, + "logits/rejected": -2.2047111988067627, + "logps/chosen": -331.6548767089844, + "logps/rejected": -781.941162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.737009048461914, + "rewards/margins": 15.560113906860352, + "rewards/rejected": -25.297122955322266, + "step": 17492 + }, + { + "epoch": 2.72, + "learning_rate": 1.3172591939415944e-06, + "logits/chosen": -2.063368320465088, + "logits/rejected": -2.2501237392425537, + "logps/chosen": -219.3507537841797, + "logps/rejected": -394.397216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.594364166259766, + "rewards/margins": 11.48123550415039, + "rewards/rejected": -20.075599670410156, + "step": 17493 + }, + { + "epoch": 2.72, + "learning_rate": 1.3165257534104465e-06, + "logits/chosen": -2.1336331367492676, + "logits/rejected": -2.619490623474121, + "logps/chosen": -108.43589782714844, + "logps/rejected": -522.8701171875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.699118614196777, + "rewards/margins": 7.6382551193237305, + "rewards/rejected": -17.337373733520508, + "step": 17494 + }, + { + "epoch": 2.72, + "learning_rate": 1.3157923128792986e-06, + "logits/chosen": -1.890791654586792, + "logits/rejected": -2.909641742706299, + "logps/chosen": -114.09603881835938, + "logps/rejected": -308.4005126953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.172136306762695, + "rewards/margins": 7.200531005859375, + "rewards/rejected": -15.37266731262207, + "step": 17495 + }, + { + "epoch": 2.72, + "learning_rate": 1.3150588723481509e-06, + "logits/chosen": -2.574906587600708, + "logits/rejected": -1.6365182399749756, + "logps/chosen": -594.9161987304688, + "logps/rejected": -480.83843994140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.664288520812988, + "rewards/margins": 7.727092742919922, + "rewards/rejected": -20.391380310058594, + "step": 17496 + }, + { + "epoch": 2.72, + "learning_rate": 1.314325431817003e-06, + "logits/chosen": -2.7329704761505127, + "logits/rejected": -2.5755293369293213, + "logps/chosen": -245.33477783203125, + "logps/rejected": -333.3798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.129393577575684, + "rewards/margins": 10.63338565826416, + "rewards/rejected": -16.762779235839844, + "step": 17497 + }, + { + "epoch": 2.72, + "learning_rate": 1.313591991285855e-06, + "logits/chosen": -1.643838882446289, + "logits/rejected": -2.81101393699646, + "logps/chosen": -234.38714599609375, + "logps/rejected": -499.49267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.337994575500488, + "rewards/margins": 16.33611297607422, + "rewards/rejected": -23.67410659790039, + "step": 17498 + }, + { + "epoch": 2.72, + "learning_rate": 1.3128585507547071e-06, + "logits/chosen": -2.085087537765503, + "logits/rejected": -2.371035575866699, + "logps/chosen": -226.2919158935547, + "logps/rejected": -445.4407043457031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.018978118896484, + "rewards/margins": 12.425771713256836, + "rewards/rejected": -24.444747924804688, + "step": 17499 + }, + { + "epoch": 2.72, + "learning_rate": 1.3121251102235595e-06, + "logits/chosen": -2.097494125366211, + "logits/rejected": -2.6295430660247803, + "logps/chosen": -181.48983764648438, + "logps/rejected": -501.8622741699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.308321952819824, + "rewards/margins": 13.786401748657227, + "rewards/rejected": -21.094722747802734, + "step": 17500 + }, + { + "epoch": 2.72, + "learning_rate": 1.3113916696924115e-06, + "logits/chosen": -2.6370623111724854, + "logits/rejected": -2.836855888366699, + "logps/chosen": -923.6417846679688, + "logps/rejected": -696.39013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.210654258728027, + "rewards/margins": 9.679226875305176, + "rewards/rejected": -19.889881134033203, + "step": 17501 + }, + { + "epoch": 2.72, + "learning_rate": 1.3106582291612636e-06, + "logits/chosen": -1.6394766569137573, + "logits/rejected": -2.6196799278259277, + "logps/chosen": -244.71017456054688, + "logps/rejected": -528.8761596679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.62967586517334, + "rewards/margins": 15.132617950439453, + "rewards/rejected": -25.76229476928711, + "step": 17502 + }, + { + "epoch": 2.72, + "learning_rate": 1.3099247886301155e-06, + "logits/chosen": -1.1201951503753662, + "logits/rejected": -2.5640101432800293, + "logps/chosen": -262.623779296875, + "logps/rejected": -533.764892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.429084777832031, + "rewards/margins": 9.372852325439453, + "rewards/rejected": -19.801937103271484, + "step": 17503 + }, + { + "epoch": 2.72, + "learning_rate": 1.3091913480989678e-06, + "logits/chosen": -2.27555251121521, + "logits/rejected": -2.5006906986236572, + "logps/chosen": -202.41207885742188, + "logps/rejected": -381.0373840332031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.336174011230469, + "rewards/margins": 14.221490859985352, + "rewards/rejected": -23.557666778564453, + "step": 17504 + }, + { + "epoch": 2.72, + "learning_rate": 1.30845790756782e-06, + "logits/chosen": -2.1693809032440186, + "logits/rejected": -1.561030626296997, + "logps/chosen": -528.249755859375, + "logps/rejected": -458.8739929199219, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.94231128692627, + "rewards/margins": 8.861218452453613, + "rewards/rejected": -20.803529739379883, + "step": 17505 + }, + { + "epoch": 2.72, + "learning_rate": 1.307724467036672e-06, + "logits/chosen": -2.415619373321533, + "logits/rejected": -2.947272539138794, + "logps/chosen": -132.93624877929688, + "logps/rejected": -584.1268310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.532625198364258, + "rewards/margins": 11.301246643066406, + "rewards/rejected": -21.833871841430664, + "step": 17506 + }, + { + "epoch": 2.72, + "learning_rate": 1.306991026505524e-06, + "logits/chosen": -2.973573923110962, + "logits/rejected": -2.7650766372680664, + "logps/chosen": -181.48875427246094, + "logps/rejected": -234.19163513183594, + "loss": 2.3596, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.315608978271484, + "rewards/margins": 2.61647367477417, + "rewards/rejected": -14.932082176208496, + "step": 17507 + }, + { + "epoch": 2.72, + "learning_rate": 1.3062575859743764e-06, + "logits/chosen": -2.54763126373291, + "logits/rejected": -1.6565425395965576, + "logps/chosen": -308.586181640625, + "logps/rejected": -410.60284423828125, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.230152130126953, + "rewards/margins": 7.1914167404174805, + "rewards/rejected": -19.42156982421875, + "step": 17508 + }, + { + "epoch": 2.72, + "learning_rate": 1.3055241454432285e-06, + "logits/chosen": -2.4031589031219482, + "logits/rejected": -2.4423635005950928, + "logps/chosen": -414.197998046875, + "logps/rejected": -661.224853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.113475799560547, + "rewards/margins": 13.304397583007812, + "rewards/rejected": -24.41787338256836, + "step": 17509 + }, + { + "epoch": 2.72, + "learning_rate": 1.3047907049120806e-06, + "logits/chosen": -2.8942809104919434, + "logits/rejected": -2.992708683013916, + "logps/chosen": -154.108642578125, + "logps/rejected": -249.99136352539062, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7395243644714355, + "rewards/margins": 8.433145523071289, + "rewards/rejected": -15.172669410705566, + "step": 17510 + }, + { + "epoch": 2.72, + "learning_rate": 1.3040572643809327e-06, + "logits/chosen": -2.365119457244873, + "logits/rejected": -2.8001880645751953, + "logps/chosen": -203.27244567871094, + "logps/rejected": -532.1517944335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.09844970703125, + "rewards/margins": 13.730412483215332, + "rewards/rejected": -25.828861236572266, + "step": 17511 + }, + { + "epoch": 2.72, + "learning_rate": 1.3033238238497847e-06, + "logits/chosen": -2.555711507797241, + "logits/rejected": -1.9785735607147217, + "logps/chosen": -369.7054748535156, + "logps/rejected": -401.1358642578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.50440788269043, + "rewards/margins": 9.33418083190918, + "rewards/rejected": -24.83858871459961, + "step": 17512 + }, + { + "epoch": 2.72, + "learning_rate": 1.3025903833186368e-06, + "logits/chosen": -2.0710818767547607, + "logits/rejected": -2.445996046066284, + "logps/chosen": -137.758544921875, + "logps/rejected": -258.40240478515625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.04385757446289, + "rewards/margins": 7.5495381355285645, + "rewards/rejected": -16.593395233154297, + "step": 17513 + }, + { + "epoch": 2.72, + "learning_rate": 1.301856942787489e-06, + "logits/chosen": -2.7435975074768066, + "logits/rejected": -1.9208488464355469, + "logps/chosen": -696.2699584960938, + "logps/rejected": -565.20849609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.212751388549805, + "rewards/margins": 14.474644660949707, + "rewards/rejected": -22.687397003173828, + "step": 17514 + }, + { + "epoch": 2.72, + "learning_rate": 1.301123502256341e-06, + "logits/chosen": -2.6397931575775146, + "logits/rejected": -2.8389716148376465, + "logps/chosen": -337.38275146484375, + "logps/rejected": -322.59259033203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.701665878295898, + "rewards/margins": 6.609979152679443, + "rewards/rejected": -16.3116455078125, + "step": 17515 + }, + { + "epoch": 2.72, + "learning_rate": 1.300390061725193e-06, + "logits/chosen": -1.5804755687713623, + "logits/rejected": -2.5440895557403564, + "logps/chosen": -437.78070068359375, + "logps/rejected": -561.0515747070312, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.941500663757324, + "rewards/margins": 8.123186111450195, + "rewards/rejected": -18.064687728881836, + "step": 17516 + }, + { + "epoch": 2.72, + "learning_rate": 1.2996566211940454e-06, + "logits/chosen": -2.0379199981689453, + "logits/rejected": -2.4595725536346436, + "logps/chosen": -341.41632080078125, + "logps/rejected": -516.7698974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.336313247680664, + "rewards/margins": 12.350113868713379, + "rewards/rejected": -22.68642807006836, + "step": 17517 + }, + { + "epoch": 2.72, + "learning_rate": 1.2989231806628975e-06, + "logits/chosen": -2.394918441772461, + "logits/rejected": -2.4904706478118896, + "logps/chosen": -199.63128662109375, + "logps/rejected": -247.79220581054688, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.785799026489258, + "rewards/margins": 7.808398723602295, + "rewards/rejected": -16.59419822692871, + "step": 17518 + }, + { + "epoch": 2.72, + "learning_rate": 1.2981897401317496e-06, + "logits/chosen": -2.6939754486083984, + "logits/rejected": -1.5244371891021729, + "logps/chosen": -498.50091552734375, + "logps/rejected": -386.89544677734375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.800448417663574, + "rewards/margins": 6.785818099975586, + "rewards/rejected": -15.586267471313477, + "step": 17519 + }, + { + "epoch": 2.72, + "learning_rate": 1.2974562996006017e-06, + "logits/chosen": -1.902782678604126, + "logits/rejected": -2.6507575511932373, + "logps/chosen": -362.91387939453125, + "logps/rejected": -645.10107421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.191781044006348, + "rewards/margins": 8.1785249710083, + "rewards/rejected": -16.37030601501465, + "step": 17520 + }, + { + "epoch": 2.72, + "learning_rate": 1.296722859069454e-06, + "logits/chosen": -2.927347183227539, + "logits/rejected": -2.806037187576294, + "logps/chosen": -256.5263366699219, + "logps/rejected": -261.8077087402344, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.100592136383057, + "rewards/margins": 7.7052388191223145, + "rewards/rejected": -14.805830955505371, + "step": 17521 + }, + { + "epoch": 2.73, + "learning_rate": 1.295989418538306e-06, + "logits/chosen": -1.9848203659057617, + "logits/rejected": -2.272141695022583, + "logps/chosen": -229.250244140625, + "logps/rejected": -426.547607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.94604206085205, + "rewards/margins": 10.798515319824219, + "rewards/rejected": -19.744556427001953, + "step": 17522 + }, + { + "epoch": 2.73, + "learning_rate": 1.295255978007158e-06, + "logits/chosen": -1.4446663856506348, + "logits/rejected": -2.4809861183166504, + "logps/chosen": -209.30694580078125, + "logps/rejected": -330.2900390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.572393417358398, + "rewards/margins": 8.61855697631836, + "rewards/rejected": -16.190950393676758, + "step": 17523 + }, + { + "epoch": 2.73, + "learning_rate": 1.29452253747601e-06, + "logits/chosen": -2.6404244899749756, + "logits/rejected": -2.9712765216827393, + "logps/chosen": -151.81796264648438, + "logps/rejected": -230.05996704101562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.930909156799316, + "rewards/margins": 8.509660720825195, + "rewards/rejected": -15.440570831298828, + "step": 17524 + }, + { + "epoch": 2.73, + "learning_rate": 1.2937890969448623e-06, + "logits/chosen": -0.3968689441680908, + "logits/rejected": -2.1384387016296387, + "logps/chosen": -161.590087890625, + "logps/rejected": -618.5520629882812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.145156860351562, + "rewards/margins": 12.971137046813965, + "rewards/rejected": -24.116294860839844, + "step": 17525 + }, + { + "epoch": 2.73, + "learning_rate": 1.2930556564137144e-06, + "logits/chosen": -2.704922914505005, + "logits/rejected": -2.6155526638031006, + "logps/chosen": -164.1351318359375, + "logps/rejected": -406.8672180175781, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.500829696655273, + "rewards/margins": 6.435155391693115, + "rewards/rejected": -16.935985565185547, + "step": 17526 + }, + { + "epoch": 2.73, + "learning_rate": 1.2923222158825665e-06, + "logits/chosen": -2.3467836380004883, + "logits/rejected": -2.6089673042297363, + "logps/chosen": -156.23211669921875, + "logps/rejected": -219.1936798095703, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.452555656433105, + "rewards/margins": 9.571605682373047, + "rewards/rejected": -19.02416229248047, + "step": 17527 + }, + { + "epoch": 2.73, + "learning_rate": 1.2915887753514186e-06, + "logits/chosen": -2.7124183177948, + "logits/rejected": -2.6372416019439697, + "logps/chosen": -318.2237548828125, + "logps/rejected": -427.89794921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.103527069091797, + "rewards/margins": 8.434022903442383, + "rewards/rejected": -17.53754997253418, + "step": 17528 + }, + { + "epoch": 2.73, + "learning_rate": 1.290855334820271e-06, + "logits/chosen": -2.7908666133880615, + "logits/rejected": -2.891648530960083, + "logps/chosen": -148.96075439453125, + "logps/rejected": -226.09835815429688, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.346708297729492, + "rewards/margins": 6.3456196784973145, + "rewards/rejected": -17.69232749938965, + "step": 17529 + }, + { + "epoch": 2.73, + "learning_rate": 1.290121894289123e-06, + "logits/chosen": -2.7436745166778564, + "logits/rejected": -2.8459601402282715, + "logps/chosen": -578.2888793945312, + "logps/rejected": -610.77490234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.635034561157227, + "rewards/margins": 9.6576566696167, + "rewards/rejected": -19.29269027709961, + "step": 17530 + }, + { + "epoch": 2.73, + "learning_rate": 1.289388453757975e-06, + "logits/chosen": -2.074599027633667, + "logits/rejected": -1.9426307678222656, + "logps/chosen": -263.80072021484375, + "logps/rejected": -294.28216552734375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.513901710510254, + "rewards/margins": 8.035176277160645, + "rewards/rejected": -21.5490779876709, + "step": 17531 + }, + { + "epoch": 2.73, + "learning_rate": 1.2886550132268272e-06, + "logits/chosen": -2.7146544456481934, + "logits/rejected": -2.8719124794006348, + "logps/chosen": -118.66724395751953, + "logps/rejected": -213.96278381347656, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.365558624267578, + "rewards/margins": 7.389174938201904, + "rewards/rejected": -16.75473403930664, + "step": 17532 + }, + { + "epoch": 2.73, + "learning_rate": 1.2879215726956793e-06, + "logits/chosen": -1.5799819231033325, + "logits/rejected": -2.6917855739593506, + "logps/chosen": -199.25265502929688, + "logps/rejected": -302.4078674316406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.960561752319336, + "rewards/margins": 8.621942520141602, + "rewards/rejected": -21.582504272460938, + "step": 17533 + }, + { + "epoch": 2.73, + "learning_rate": 1.2871881321645314e-06, + "logits/chosen": -2.3357789516448975, + "logits/rejected": -2.0268917083740234, + "logps/chosen": -215.0522918701172, + "logps/rejected": -296.44140625, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.494852066040039, + "rewards/margins": 9.193588256835938, + "rewards/rejected": -20.688440322875977, + "step": 17534 + }, + { + "epoch": 2.73, + "learning_rate": 1.2864546916333834e-06, + "logits/chosen": -2.9032466411590576, + "logits/rejected": -2.662721633911133, + "logps/chosen": -463.26708984375, + "logps/rejected": -688.2982177734375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.752134323120117, + "rewards/margins": 7.195767879486084, + "rewards/rejected": -16.94790267944336, + "step": 17535 + }, + { + "epoch": 2.73, + "learning_rate": 1.2857212511022355e-06, + "logits/chosen": -2.7689874172210693, + "logits/rejected": -2.827927350997925, + "logps/chosen": -261.976806640625, + "logps/rejected": -341.27410888671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.264152526855469, + "rewards/margins": 8.185857772827148, + "rewards/rejected": -17.450010299682617, + "step": 17536 + }, + { + "epoch": 2.73, + "learning_rate": 1.2849878105710876e-06, + "logits/chosen": -2.437025785446167, + "logits/rejected": -2.64996600151062, + "logps/chosen": -204.75836181640625, + "logps/rejected": -322.9121398925781, + "loss": 0.8021, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.154937744140625, + "rewards/margins": 5.156226634979248, + "rewards/rejected": -17.31116485595703, + "step": 17537 + }, + { + "epoch": 2.73, + "learning_rate": 1.28425437003994e-06, + "logits/chosen": -2.702749490737915, + "logits/rejected": -1.9008710384368896, + "logps/chosen": -208.23947143554688, + "logps/rejected": -358.2536926269531, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.648144721984863, + "rewards/margins": 9.070508003234863, + "rewards/rejected": -21.718652725219727, + "step": 17538 + }, + { + "epoch": 2.73, + "learning_rate": 1.283520929508792e-06, + "logits/chosen": -2.7182228565216064, + "logits/rejected": -2.021846294403076, + "logps/chosen": -803.5421752929688, + "logps/rejected": -516.4262084960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.754086494445801, + "rewards/margins": 12.800826072692871, + "rewards/rejected": -18.554912567138672, + "step": 17539 + }, + { + "epoch": 2.73, + "learning_rate": 1.2827874889776441e-06, + "logits/chosen": -2.11356258392334, + "logits/rejected": -2.8862273693084717, + "logps/chosen": -285.4830017089844, + "logps/rejected": -313.39593505859375, + "loss": 0.6109, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.677593231201172, + "rewards/margins": 5.046233177185059, + "rewards/rejected": -12.72382640838623, + "step": 17540 + }, + { + "epoch": 2.73, + "learning_rate": 1.2820540484464962e-06, + "logits/chosen": -2.4912657737731934, + "logits/rejected": -2.528015613555908, + "logps/chosen": -473.55950927734375, + "logps/rejected": -521.3077392578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.775087356567383, + "rewards/margins": 10.861026763916016, + "rewards/rejected": -19.63611602783203, + "step": 17541 + }, + { + "epoch": 2.73, + "learning_rate": 1.2813206079153485e-06, + "logits/chosen": -1.9065964221954346, + "logits/rejected": -2.7679433822631836, + "logps/chosen": -176.9250946044922, + "logps/rejected": -265.00469970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.47164249420166, + "rewards/margins": 10.987933158874512, + "rewards/rejected": -19.459575653076172, + "step": 17542 + }, + { + "epoch": 2.73, + "learning_rate": 1.2805871673842004e-06, + "logits/chosen": -2.5447804927825928, + "logits/rejected": -2.6119608879089355, + "logps/chosen": -135.44992065429688, + "logps/rejected": -449.5648193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039361953735352, + "rewards/margins": 15.410184860229492, + "rewards/rejected": -22.449546813964844, + "step": 17543 + }, + { + "epoch": 2.73, + "learning_rate": 1.2798537268530525e-06, + "logits/chosen": -1.742465615272522, + "logits/rejected": -2.6140806674957275, + "logps/chosen": -209.80392456054688, + "logps/rejected": -775.0533447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.617477416992188, + "rewards/margins": 21.302980422973633, + "rewards/rejected": -29.920455932617188, + "step": 17544 + }, + { + "epoch": 2.73, + "learning_rate": 1.2791202863219046e-06, + "logits/chosen": -2.9173853397369385, + "logits/rejected": -2.2867066860198975, + "logps/chosen": -400.04345703125, + "logps/rejected": -356.3492736816406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.718364238739014, + "rewards/margins": 9.359419822692871, + "rewards/rejected": -15.077783584594727, + "step": 17545 + }, + { + "epoch": 2.73, + "learning_rate": 1.2783868457907569e-06, + "logits/chosen": -2.292539596557617, + "logits/rejected": -2.935065269470215, + "logps/chosen": -222.23631286621094, + "logps/rejected": -604.8115844726562, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.681727409362793, + "rewards/margins": 8.860189437866211, + "rewards/rejected": -21.541915893554688, + "step": 17546 + }, + { + "epoch": 2.73, + "learning_rate": 1.277653405259609e-06, + "logits/chosen": -1.8019256591796875, + "logits/rejected": -2.4170432090759277, + "logps/chosen": -221.45765686035156, + "logps/rejected": -480.46490478515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.016511917114258, + "rewards/margins": 12.777839660644531, + "rewards/rejected": -22.79435157775879, + "step": 17547 + }, + { + "epoch": 2.73, + "learning_rate": 1.276919964728461e-06, + "logits/chosen": -2.8678834438323975, + "logits/rejected": -2.9438910484313965, + "logps/chosen": -167.07257080078125, + "logps/rejected": -148.3161163330078, + "loss": 0.0739, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.885906219482422, + "rewards/margins": 5.931280136108398, + "rewards/rejected": -10.81718635559082, + "step": 17548 + }, + { + "epoch": 2.73, + "learning_rate": 1.2761865241973131e-06, + "logits/chosen": -2.5223610401153564, + "logits/rejected": -1.6746222972869873, + "logps/chosen": -427.558349609375, + "logps/rejected": -397.978271484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.073646545410156, + "rewards/margins": 11.784876823425293, + "rewards/rejected": -18.858524322509766, + "step": 17549 + }, + { + "epoch": 2.73, + "learning_rate": 1.2754530836661652e-06, + "logits/chosen": -2.8468751907348633, + "logits/rejected": -2.622018337249756, + "logps/chosen": -335.5330810546875, + "logps/rejected": -472.7101745605469, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.455164909362793, + "rewards/margins": 8.330049514770508, + "rewards/rejected": -21.785213470458984, + "step": 17550 + }, + { + "epoch": 2.73, + "learning_rate": 1.2747196431350175e-06, + "logits/chosen": -2.5399389266967773, + "logits/rejected": -2.9617836475372314, + "logps/chosen": -147.1171875, + "logps/rejected": -339.34442138671875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.344419479370117, + "rewards/margins": 7.827615261077881, + "rewards/rejected": -20.172035217285156, + "step": 17551 + }, + { + "epoch": 2.73, + "learning_rate": 1.2739862026038696e-06, + "logits/chosen": -2.391890287399292, + "logits/rejected": -2.6094186305999756, + "logps/chosen": -294.3242492675781, + "logps/rejected": -561.759765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.26124382019043, + "rewards/margins": 12.43736457824707, + "rewards/rejected": -25.6986083984375, + "step": 17552 + }, + { + "epoch": 2.73, + "learning_rate": 1.2732527620727215e-06, + "logits/chosen": -2.819441556930542, + "logits/rejected": -2.4869821071624756, + "logps/chosen": -249.73519897460938, + "logps/rejected": -429.9422302246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.604616165161133, + "rewards/margins": 15.496217727661133, + "rewards/rejected": -22.100833892822266, + "step": 17553 + }, + { + "epoch": 2.73, + "learning_rate": 1.2725193215415736e-06, + "logits/chosen": -1.3002526760101318, + "logits/rejected": -2.195995569229126, + "logps/chosen": -256.4316711425781, + "logps/rejected": -497.87457275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.613656997680664, + "rewards/margins": 8.953998565673828, + "rewards/rejected": -20.56765365600586, + "step": 17554 + }, + { + "epoch": 2.73, + "learning_rate": 1.2717858810104259e-06, + "logits/chosen": -0.3431309163570404, + "logits/rejected": -2.1434526443481445, + "logps/chosen": -189.08493041992188, + "logps/rejected": -845.0858154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.801253318786621, + "rewards/margins": 18.71688461303711, + "rewards/rejected": -26.518138885498047, + "step": 17555 + }, + { + "epoch": 2.73, + "learning_rate": 1.271052440479278e-06, + "logits/chosen": -1.8814291954040527, + "logits/rejected": -2.6950788497924805, + "logps/chosen": -504.2437744140625, + "logps/rejected": -677.01318359375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.802316665649414, + "rewards/margins": 10.59517765045166, + "rewards/rejected": -21.39749526977539, + "step": 17556 + }, + { + "epoch": 2.73, + "learning_rate": 1.27031899994813e-06, + "logits/chosen": -2.1285526752471924, + "logits/rejected": -2.417938232421875, + "logps/chosen": -365.3174743652344, + "logps/rejected": -375.26837158203125, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.892751693725586, + "rewards/margins": 6.16212797164917, + "rewards/rejected": -20.054880142211914, + "step": 17557 + }, + { + "epoch": 2.73, + "learning_rate": 1.2695855594169822e-06, + "logits/chosen": -2.8591156005859375, + "logits/rejected": -2.8633408546447754, + "logps/chosen": -130.9341583251953, + "logps/rejected": -288.79888916015625, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.205025672912598, + "rewards/margins": 7.53228235244751, + "rewards/rejected": -16.737308502197266, + "step": 17558 + }, + { + "epoch": 2.73, + "learning_rate": 1.2688521188858345e-06, + "logits/chosen": -1.8359891176223755, + "logits/rejected": -2.8657076358795166, + "logps/chosen": -332.4783020019531, + "logps/rejected": -576.2966918945312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.744490623474121, + "rewards/margins": 13.39825439453125, + "rewards/rejected": -21.142745971679688, + "step": 17559 + }, + { + "epoch": 2.73, + "learning_rate": 1.2681186783546865e-06, + "logits/chosen": -2.679431438446045, + "logits/rejected": -2.851632833480835, + "logps/chosen": -670.5233764648438, + "logps/rejected": -568.482666015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5111541748046875, + "rewards/margins": 9.532890319824219, + "rewards/rejected": -17.044044494628906, + "step": 17560 + }, + { + "epoch": 2.73, + "learning_rate": 1.2673852378235386e-06, + "logits/chosen": -2.6108336448669434, + "logits/rejected": -2.0378267765045166, + "logps/chosen": -321.2581787109375, + "logps/rejected": -395.62237548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.62506103515625, + "rewards/margins": 12.807968139648438, + "rewards/rejected": -21.433029174804688, + "step": 17561 + }, + { + "epoch": 2.73, + "learning_rate": 1.2666517972923905e-06, + "logits/chosen": -2.6179888248443604, + "logits/rejected": -2.9646286964416504, + "logps/chosen": -712.9938354492188, + "logps/rejected": -877.9038696289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.237600326538086, + "rewards/margins": 14.718771934509277, + "rewards/rejected": -23.956371307373047, + "step": 17562 + }, + { + "epoch": 2.73, + "learning_rate": 1.2659183567612428e-06, + "logits/chosen": -1.7311513423919678, + "logits/rejected": -2.881558656692505, + "logps/chosen": -193.29371643066406, + "logps/rejected": -455.586669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.174830436706543, + "rewards/margins": 9.435977935791016, + "rewards/rejected": -17.610809326171875, + "step": 17563 + }, + { + "epoch": 2.73, + "learning_rate": 1.265184916230095e-06, + "logits/chosen": -2.36458158493042, + "logits/rejected": -2.1819522380828857, + "logps/chosen": -224.95726013183594, + "logps/rejected": -374.3110046386719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.09339714050293, + "rewards/margins": 11.230106353759766, + "rewards/rejected": -24.323505401611328, + "step": 17564 + }, + { + "epoch": 2.73, + "learning_rate": 1.264451475698947e-06, + "logits/chosen": -2.4775569438934326, + "logits/rejected": -2.8766930103302, + "logps/chosen": -480.570556640625, + "logps/rejected": -494.72540283203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.467019081115723, + "rewards/margins": 11.567471504211426, + "rewards/rejected": -19.03449058532715, + "step": 17565 + }, + { + "epoch": 2.73, + "learning_rate": 1.263718035167799e-06, + "logits/chosen": -2.1211531162261963, + "logits/rejected": -2.5544252395629883, + "logps/chosen": -242.54901123046875, + "logps/rejected": -371.15118408203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.838662147521973, + "rewards/margins": 8.166313171386719, + "rewards/rejected": -18.004974365234375, + "step": 17566 + }, + { + "epoch": 2.73, + "learning_rate": 1.2629845946366514e-06, + "logits/chosen": -2.3218507766723633, + "logits/rejected": -2.5109758377075195, + "logps/chosen": -170.01670837402344, + "logps/rejected": -324.79437255859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.931788921356201, + "rewards/margins": 9.638526916503906, + "rewards/rejected": -17.570316314697266, + "step": 17567 + }, + { + "epoch": 2.73, + "learning_rate": 1.2622511541055035e-06, + "logits/chosen": -1.4111820459365845, + "logits/rejected": -2.0797619819641113, + "logps/chosen": -397.4521484375, + "logps/rejected": -534.449951171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.86782693862915, + "rewards/margins": 10.877575874328613, + "rewards/rejected": -17.745403289794922, + "step": 17568 + }, + { + "epoch": 2.73, + "learning_rate": 1.2615177135743556e-06, + "logits/chosen": -2.042360305786133, + "logits/rejected": -2.508181571960449, + "logps/chosen": -400.1816101074219, + "logps/rejected": -449.5943603515625, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.971334457397461, + "rewards/margins": 5.603464603424072, + "rewards/rejected": -15.574799537658691, + "step": 17569 + }, + { + "epoch": 2.73, + "learning_rate": 1.2607842730432077e-06, + "logits/chosen": -2.8300833702087402, + "logits/rejected": -2.2540252208709717, + "logps/chosen": -206.18971252441406, + "logps/rejected": -215.36831665039062, + "loss": 0.3887, + "rewards/accuracies": 0.5, + "rewards/chosen": -7.55073881149292, + "rewards/margins": 4.938327789306641, + "rewards/rejected": -12.489066123962402, + "step": 17570 + }, + { + "epoch": 2.73, + "learning_rate": 1.2600508325120598e-06, + "logits/chosen": -2.3923709392547607, + "logits/rejected": -2.856745958328247, + "logps/chosen": -287.22198486328125, + "logps/rejected": -416.4847412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1786322593688965, + "rewards/margins": 13.13052749633789, + "rewards/rejected": -18.309160232543945, + "step": 17571 + }, + { + "epoch": 2.73, + "learning_rate": 1.2593173919809118e-06, + "logits/chosen": -2.8932178020477295, + "logits/rejected": -2.7515547275543213, + "logps/chosen": -810.724365234375, + "logps/rejected": -580.528564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.29201602935791, + "rewards/margins": 9.796960830688477, + "rewards/rejected": -17.08897590637207, + "step": 17572 + }, + { + "epoch": 2.73, + "learning_rate": 1.258583951449764e-06, + "logits/chosen": -2.5398004055023193, + "logits/rejected": -2.4362118244171143, + "logps/chosen": -451.4093017578125, + "logps/rejected": -474.1910095214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.548208236694336, + "rewards/margins": 11.67608642578125, + "rewards/rejected": -20.224294662475586, + "step": 17573 + }, + { + "epoch": 2.73, + "learning_rate": 1.257850510918616e-06, + "logits/chosen": -2.450563430786133, + "logits/rejected": -2.067807674407959, + "logps/chosen": -323.09027099609375, + "logps/rejected": -374.4156188964844, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.178072929382324, + "rewards/margins": 6.751143455505371, + "rewards/rejected": -15.929216384887695, + "step": 17574 + }, + { + "epoch": 2.73, + "learning_rate": 1.2571170703874681e-06, + "logits/chosen": -2.707160234451294, + "logits/rejected": -2.9196937084198, + "logps/chosen": -208.798828125, + "logps/rejected": -483.1301574707031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.973858833312988, + "rewards/margins": 8.014830589294434, + "rewards/rejected": -17.988689422607422, + "step": 17575 + }, + { + "epoch": 2.73, + "learning_rate": 1.2563836298563204e-06, + "logits/chosen": -2.891514778137207, + "logits/rejected": -2.174555540084839, + "logps/chosen": -406.7769775390625, + "logps/rejected": -419.0067138671875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.319818496704102, + "rewards/margins": 5.221254825592041, + "rewards/rejected": -20.541072845458984, + "step": 17576 + }, + { + "epoch": 2.73, + "learning_rate": 1.2556501893251725e-06, + "logits/chosen": -1.5434569120407104, + "logits/rejected": -2.601654529571533, + "logps/chosen": -181.83602905273438, + "logps/rejected": -393.564697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.449567794799805, + "rewards/margins": 10.701604843139648, + "rewards/rejected": -18.151172637939453, + "step": 17577 + }, + { + "epoch": 2.73, + "learning_rate": 1.2549167487940246e-06, + "logits/chosen": -2.375028133392334, + "logits/rejected": -2.9210009574890137, + "logps/chosen": -513.1297607421875, + "logps/rejected": -701.914306640625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.447542190551758, + "rewards/margins": 9.180465698242188, + "rewards/rejected": -19.628005981445312, + "step": 17578 + }, + { + "epoch": 2.73, + "learning_rate": 1.2541833082628767e-06, + "logits/chosen": -2.5696663856506348, + "logits/rejected": -2.608233690261841, + "logps/chosen": -204.86155700683594, + "logps/rejected": -364.0977783203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.722848892211914, + "rewards/margins": 10.878037452697754, + "rewards/rejected": -17.60088539123535, + "step": 17579 + }, + { + "epoch": 2.73, + "learning_rate": 1.253449867731729e-06, + "logits/chosen": -2.6137521266937256, + "logits/rejected": -1.3168286085128784, + "logps/chosen": -291.1562194824219, + "logps/rejected": -306.8609619140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.647410869598389, + "rewards/margins": 10.15255355834961, + "rewards/rejected": -16.799964904785156, + "step": 17580 + }, + { + "epoch": 2.73, + "learning_rate": 1.252716427200581e-06, + "logits/chosen": -1.9298992156982422, + "logits/rejected": -2.5063178539276123, + "logps/chosen": -278.66455078125, + "logps/rejected": -463.22406005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.581077575683594, + "rewards/margins": 8.80967903137207, + "rewards/rejected": -17.390756607055664, + "step": 17581 + }, + { + "epoch": 2.73, + "learning_rate": 1.251982986669433e-06, + "logits/chosen": -2.2503886222839355, + "logits/rejected": -2.442514181137085, + "logps/chosen": -473.9124755859375, + "logps/rejected": -418.8350830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6658782958984375, + "rewards/margins": 11.736507415771484, + "rewards/rejected": -19.402385711669922, + "step": 17582 + }, + { + "epoch": 2.73, + "learning_rate": 1.251249546138285e-06, + "logits/chosen": -2.215970516204834, + "logits/rejected": -2.5142481327056885, + "logps/chosen": -213.8260498046875, + "logps/rejected": -401.2830810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.266767501831055, + "rewards/margins": 10.425100326538086, + "rewards/rejected": -20.69186782836914, + "step": 17583 + }, + { + "epoch": 2.73, + "learning_rate": 1.2505161056071373e-06, + "logits/chosen": -2.5900392532348633, + "logits/rejected": -2.8539652824401855, + "logps/chosen": -126.85172271728516, + "logps/rejected": -298.04876708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.38044548034668, + "rewards/margins": 11.33055305480957, + "rewards/rejected": -17.71099853515625, + "step": 17584 + }, + { + "epoch": 2.73, + "learning_rate": 1.2497826650759894e-06, + "logits/chosen": -1.4364197254180908, + "logits/rejected": -2.2470550537109375, + "logps/chosen": -342.34539794921875, + "logps/rejected": -656.541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.966506958007812, + "rewards/margins": 11.372636795043945, + "rewards/rejected": -26.339143753051758, + "step": 17585 + }, + { + "epoch": 2.73, + "learning_rate": 1.2490492245448415e-06, + "logits/chosen": -3.1467769145965576, + "logits/rejected": -2.7851953506469727, + "logps/chosen": -267.9580383300781, + "logps/rejected": -262.70465087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.585131645202637, + "rewards/margins": 13.565203666687012, + "rewards/rejected": -19.15033531188965, + "step": 17586 + }, + { + "epoch": 2.74, + "learning_rate": 1.2483157840136936e-06, + "logits/chosen": -2.672473907470703, + "logits/rejected": -2.851511001586914, + "logps/chosen": -150.97755432128906, + "logps/rejected": -258.7645568847656, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.219820976257324, + "rewards/margins": 7.290163993835449, + "rewards/rejected": -18.509984970092773, + "step": 17587 + }, + { + "epoch": 2.74, + "learning_rate": 1.247582343482546e-06, + "logits/chosen": -2.525360107421875, + "logits/rejected": -2.911505937576294, + "logps/chosen": -84.24971008300781, + "logps/rejected": -404.335205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.585175514221191, + "rewards/margins": 15.184052467346191, + "rewards/rejected": -22.769227981567383, + "step": 17588 + }, + { + "epoch": 2.74, + "learning_rate": 1.246848902951398e-06, + "logits/chosen": -2.0321855545043945, + "logits/rejected": -2.728065013885498, + "logps/chosen": -713.4072875976562, + "logps/rejected": -753.6495971679688, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19014835357666, + "rewards/margins": 8.70036792755127, + "rewards/rejected": -16.89051628112793, + "step": 17589 + }, + { + "epoch": 2.74, + "learning_rate": 1.24611546242025e-06, + "logits/chosen": -2.811939001083374, + "logits/rejected": -2.266587734222412, + "logps/chosen": -754.7781982421875, + "logps/rejected": -749.0040283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.213018417358398, + "rewards/margins": 9.900346755981445, + "rewards/rejected": -18.113365173339844, + "step": 17590 + }, + { + "epoch": 2.74, + "learning_rate": 1.2453820218891022e-06, + "logits/chosen": -1.8612672090530396, + "logits/rejected": -2.3915538787841797, + "logps/chosen": -362.78778076171875, + "logps/rejected": -562.7078857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.065349578857422, + "rewards/margins": 15.97821044921875, + "rewards/rejected": -30.043560028076172, + "step": 17591 + }, + { + "epoch": 2.74, + "learning_rate": 1.2446485813579543e-06, + "logits/chosen": -2.59892201423645, + "logits/rejected": -2.0023624897003174, + "logps/chosen": -458.2723388671875, + "logps/rejected": -465.02752685546875, + "loss": 1.6426, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.940752029418945, + "rewards/margins": 5.119803428649902, + "rewards/rejected": -17.060556411743164, + "step": 17592 + }, + { + "epoch": 2.74, + "learning_rate": 1.2439151408268064e-06, + "logits/chosen": -2.6538801193237305, + "logits/rejected": -2.286827564239502, + "logps/chosen": -268.6000061035156, + "logps/rejected": -420.3401794433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.27677059173584, + "rewards/margins": 11.721881866455078, + "rewards/rejected": -18.998653411865234, + "step": 17593 + }, + { + "epoch": 2.74, + "learning_rate": 1.2431817002956585e-06, + "logits/chosen": -2.508700370788574, + "logits/rejected": -2.1108667850494385, + "logps/chosen": -382.42828369140625, + "logps/rejected": -403.5476379394531, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.628955841064453, + "rewards/margins": 6.494970321655273, + "rewards/rejected": -19.123926162719727, + "step": 17594 + }, + { + "epoch": 2.74, + "learning_rate": 1.2424482597645105e-06, + "logits/chosen": -3.0498507022857666, + "logits/rejected": -2.3419365882873535, + "logps/chosen": -226.76046752929688, + "logps/rejected": -216.20352172851562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.906949996948242, + "rewards/margins": 6.700262069702148, + "rewards/rejected": -13.60721206665039, + "step": 17595 + }, + { + "epoch": 2.74, + "learning_rate": 1.2417148192333626e-06, + "logits/chosen": -2.74892258644104, + "logits/rejected": -2.778939723968506, + "logps/chosen": -233.37820434570312, + "logps/rejected": -538.3302001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.588844299316406, + "rewards/margins": 9.429844856262207, + "rewards/rejected": -19.018688201904297, + "step": 17596 + }, + { + "epoch": 2.74, + "learning_rate": 1.240981378702215e-06, + "logits/chosen": -2.60414981842041, + "logits/rejected": -1.3208608627319336, + "logps/chosen": -753.6552734375, + "logps/rejected": -582.3814086914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.708694458007812, + "rewards/margins": 13.427284240722656, + "rewards/rejected": -23.13597869873047, + "step": 17597 + }, + { + "epoch": 2.74, + "learning_rate": 1.240247938171067e-06, + "logits/chosen": -2.6771719455718994, + "logits/rejected": -2.0580272674560547, + "logps/chosen": -211.89785766601562, + "logps/rejected": -222.74586486816406, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.704890251159668, + "rewards/margins": 6.804410457611084, + "rewards/rejected": -15.509300231933594, + "step": 17598 + }, + { + "epoch": 2.74, + "learning_rate": 1.2395144976399191e-06, + "logits/chosen": -1.4405152797698975, + "logits/rejected": -2.4607532024383545, + "logps/chosen": -413.14337158203125, + "logps/rejected": -488.20587158203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.809466361999512, + "rewards/margins": 8.687840461730957, + "rewards/rejected": -19.49730682373047, + "step": 17599 + }, + { + "epoch": 2.74, + "learning_rate": 1.2387810571087712e-06, + "logits/chosen": -2.6974289417266846, + "logits/rejected": -0.9481030106544495, + "logps/chosen": -534.380126953125, + "logps/rejected": -381.91973876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.511526107788086, + "rewards/margins": 10.909319877624512, + "rewards/rejected": -22.420846939086914, + "step": 17600 + }, + { + "epoch": 2.74, + "learning_rate": 1.2380476165776235e-06, + "logits/chosen": -2.7106950283050537, + "logits/rejected": -2.2102229595184326, + "logps/chosen": -467.20233154296875, + "logps/rejected": -475.0187683105469, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362673282623291, + "rewards/margins": 9.393509864807129, + "rewards/rejected": -14.756183624267578, + "step": 17601 + }, + { + "epoch": 2.74, + "learning_rate": 1.2373141760464754e-06, + "logits/chosen": -2.6846768856048584, + "logits/rejected": -2.0577311515808105, + "logps/chosen": -170.13125610351562, + "logps/rejected": -365.1895751953125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.065792083740234, + "rewards/margins": 11.206988334655762, + "rewards/rejected": -19.272781372070312, + "step": 17602 + }, + { + "epoch": 2.74, + "learning_rate": 1.2365807355153275e-06, + "logits/chosen": -2.789066791534424, + "logits/rejected": -2.890638828277588, + "logps/chosen": -175.96449279785156, + "logps/rejected": -261.65716552734375, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.750106811523438, + "rewards/margins": 3.9006567001342773, + "rewards/rejected": -13.650763511657715, + "step": 17603 + }, + { + "epoch": 2.74, + "learning_rate": 1.2358472949841796e-06, + "logits/chosen": -2.7011687755584717, + "logits/rejected": -2.8392655849456787, + "logps/chosen": -192.60324096679688, + "logps/rejected": -251.8286590576172, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.687291145324707, + "rewards/margins": 4.621652126312256, + "rewards/rejected": -13.308942794799805, + "step": 17604 + }, + { + "epoch": 2.74, + "learning_rate": 1.2351138544530319e-06, + "logits/chosen": -2.5936169624328613, + "logits/rejected": -2.583238363265991, + "logps/chosen": -422.0147399902344, + "logps/rejected": -714.643798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.204255104064941, + "rewards/margins": 10.79731273651123, + "rewards/rejected": -22.001567840576172, + "step": 17605 + }, + { + "epoch": 2.74, + "learning_rate": 1.234380413921884e-06, + "logits/chosen": -2.6524360179901123, + "logits/rejected": -2.6883456707000732, + "logps/chosen": -130.66082763671875, + "logps/rejected": -237.21372985839844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.792701244354248, + "rewards/margins": 9.523063659667969, + "rewards/rejected": -16.315765380859375, + "step": 17606 + }, + { + "epoch": 2.74, + "learning_rate": 1.233646973390736e-06, + "logits/chosen": -2.4951798915863037, + "logits/rejected": -1.9809051752090454, + "logps/chosen": -412.4964904785156, + "logps/rejected": -402.35614013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.74542236328125, + "rewards/margins": 9.404887199401855, + "rewards/rejected": -13.150309562683105, + "step": 17607 + }, + { + "epoch": 2.74, + "learning_rate": 1.2329135328595881e-06, + "logits/chosen": -1.3106017112731934, + "logits/rejected": -2.349454402923584, + "logps/chosen": -215.67283630371094, + "logps/rejected": -460.7010192871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.062424659729004, + "rewards/margins": 11.435643196105957, + "rewards/rejected": -18.49806785583496, + "step": 17608 + }, + { + "epoch": 2.74, + "learning_rate": 1.2321800923284404e-06, + "logits/chosen": -2.295020341873169, + "logits/rejected": -2.5912792682647705, + "logps/chosen": -230.65634155273438, + "logps/rejected": -442.473388671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.032508850097656, + "rewards/margins": 14.832561492919922, + "rewards/rejected": -25.865070343017578, + "step": 17609 + }, + { + "epoch": 2.74, + "learning_rate": 1.2314466517972925e-06, + "logits/chosen": -1.411861777305603, + "logits/rejected": -2.5497918128967285, + "logps/chosen": -146.14419555664062, + "logps/rejected": -472.4298095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.726909637451172, + "rewards/margins": 12.457612991333008, + "rewards/rejected": -20.18452262878418, + "step": 17610 + }, + { + "epoch": 2.74, + "learning_rate": 1.2307132112661446e-06, + "logits/chosen": -2.787259817123413, + "logits/rejected": -2.4811480045318604, + "logps/chosen": -863.3075561523438, + "logps/rejected": -831.9039306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.241216659545898, + "rewards/margins": 13.417022705078125, + "rewards/rejected": -24.658241271972656, + "step": 17611 + }, + { + "epoch": 2.74, + "learning_rate": 1.2299797707349965e-06, + "logits/chosen": -1.0781304836273193, + "logits/rejected": -2.759477376937866, + "logps/chosen": -196.0565185546875, + "logps/rejected": -702.2772216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.383567810058594, + "rewards/margins": 20.009170532226562, + "rewards/rejected": -28.392738342285156, + "step": 17612 + }, + { + "epoch": 2.74, + "learning_rate": 1.2292463302038488e-06, + "logits/chosen": -2.2422592639923096, + "logits/rejected": -2.853405237197876, + "logps/chosen": -204.61270141601562, + "logps/rejected": -433.296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.809273719787598, + "rewards/margins": 13.500530242919922, + "rewards/rejected": -23.309803009033203, + "step": 17613 + }, + { + "epoch": 2.74, + "learning_rate": 1.2285128896727009e-06, + "logits/chosen": -2.3583059310913086, + "logits/rejected": -2.3796801567077637, + "logps/chosen": -202.6256103515625, + "logps/rejected": -294.88775634765625, + "loss": 0.2588, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.553138732910156, + "rewards/margins": 9.347136497497559, + "rewards/rejected": -18.90027618408203, + "step": 17614 + }, + { + "epoch": 2.74, + "learning_rate": 1.227779449141553e-06, + "logits/chosen": -1.7977486848831177, + "logits/rejected": -2.3213932514190674, + "logps/chosen": -282.33135986328125, + "logps/rejected": -343.68927001953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.643754959106445, + "rewards/margins": 9.00250244140625, + "rewards/rejected": -20.646257400512695, + "step": 17615 + }, + { + "epoch": 2.74, + "learning_rate": 1.227046008610405e-06, + "logits/chosen": -2.8746511936187744, + "logits/rejected": -2.4972422122955322, + "logps/chosen": -292.2445983886719, + "logps/rejected": -190.80650329589844, + "loss": 1.6864, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.183530807495117, + "rewards/margins": 1.3741419315338135, + "rewards/rejected": -12.557672500610352, + "step": 17616 + }, + { + "epoch": 2.74, + "learning_rate": 1.2263125680792572e-06, + "logits/chosen": -2.4335570335388184, + "logits/rejected": -2.56815505027771, + "logps/chosen": -172.808837890625, + "logps/rejected": -379.06396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.553171157836914, + "rewards/margins": 13.036327362060547, + "rewards/rejected": -22.58949851989746, + "step": 17617 + }, + { + "epoch": 2.74, + "learning_rate": 1.2255791275481095e-06, + "logits/chosen": -1.1942493915557861, + "logits/rejected": -1.9550062417984009, + "logps/chosen": -159.6366424560547, + "logps/rejected": -488.49658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.620536804199219, + "rewards/margins": 12.735149383544922, + "rewards/rejected": -21.35568618774414, + "step": 17618 + }, + { + "epoch": 2.74, + "learning_rate": 1.2248456870169616e-06, + "logits/chosen": -1.8155080080032349, + "logits/rejected": -2.656397581100464, + "logps/chosen": -211.59298706054688, + "logps/rejected": -551.2177734375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.51049518585205, + "rewards/margins": 8.478058815002441, + "rewards/rejected": -20.988554000854492, + "step": 17619 + }, + { + "epoch": 2.74, + "learning_rate": 1.2241122464858136e-06, + "logits/chosen": -2.1339075565338135, + "logits/rejected": -2.407792091369629, + "logps/chosen": -308.15643310546875, + "logps/rejected": -492.7687683105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.767963409423828, + "rewards/margins": 11.469306945800781, + "rewards/rejected": -20.23727035522461, + "step": 17620 + }, + { + "epoch": 2.74, + "learning_rate": 1.2233788059546655e-06, + "logits/chosen": -1.5864118337631226, + "logits/rejected": -2.7076079845428467, + "logps/chosen": -231.32763671875, + "logps/rejected": -532.0120849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.35277795791626, + "rewards/margins": 18.84767723083496, + "rewards/rejected": -24.200454711914062, + "step": 17621 + }, + { + "epoch": 2.74, + "learning_rate": 1.2226453654235178e-06, + "logits/chosen": -2.8622732162475586, + "logits/rejected": -2.6529159545898438, + "logps/chosen": -204.97787475585938, + "logps/rejected": -379.57244873046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.45339584350586, + "rewards/margins": 8.77026081085205, + "rewards/rejected": -17.223657608032227, + "step": 17622 + }, + { + "epoch": 2.74, + "learning_rate": 1.22191192489237e-06, + "logits/chosen": -1.5928024053573608, + "logits/rejected": -2.3182501792907715, + "logps/chosen": -176.8486328125, + "logps/rejected": -403.94970703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.38760757446289, + "rewards/margins": 9.019643783569336, + "rewards/rejected": -22.407251358032227, + "step": 17623 + }, + { + "epoch": 2.74, + "learning_rate": 1.221178484361222e-06, + "logits/chosen": -2.183509588241577, + "logits/rejected": -2.991337537765503, + "logps/chosen": -194.69430541992188, + "logps/rejected": -501.9255065917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.575812339782715, + "rewards/margins": 12.310487747192383, + "rewards/rejected": -21.88629913330078, + "step": 17624 + }, + { + "epoch": 2.74, + "learning_rate": 1.220445043830074e-06, + "logits/chosen": -2.9779694080352783, + "logits/rejected": -2.9733426570892334, + "logps/chosen": -158.59458923339844, + "logps/rejected": -226.89990234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.50990104675293, + "rewards/margins": 7.548374176025391, + "rewards/rejected": -16.05827522277832, + "step": 17625 + }, + { + "epoch": 2.74, + "learning_rate": 1.2197116032989264e-06, + "logits/chosen": -1.6937384605407715, + "logits/rejected": -1.6049772500991821, + "logps/chosen": -388.3924865722656, + "logps/rejected": -551.8267211914062, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.516260147094727, + "rewards/margins": 7.538488864898682, + "rewards/rejected": -19.05474853515625, + "step": 17626 + }, + { + "epoch": 2.74, + "learning_rate": 1.2189781627677785e-06, + "logits/chosen": -2.840329647064209, + "logits/rejected": -2.184429883956909, + "logps/chosen": -221.45831298828125, + "logps/rejected": -324.869384765625, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.032116889953613, + "rewards/margins": 5.543740272521973, + "rewards/rejected": -13.575857162475586, + "step": 17627 + }, + { + "epoch": 2.74, + "learning_rate": 1.2182447222366306e-06, + "logits/chosen": -2.345043182373047, + "logits/rejected": -2.7246289253234863, + "logps/chosen": -182.06854248046875, + "logps/rejected": -436.9748840332031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.956277847290039, + "rewards/margins": 9.43167495727539, + "rewards/rejected": -18.38795280456543, + "step": 17628 + }, + { + "epoch": 2.74, + "learning_rate": 1.2175112817054827e-06, + "logits/chosen": -1.917097568511963, + "logits/rejected": -2.5427865982055664, + "logps/chosen": -169.92254638671875, + "logps/rejected": -300.5105285644531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.597192764282227, + "rewards/margins": 8.563787460327148, + "rewards/rejected": -20.160980224609375, + "step": 17629 + }, + { + "epoch": 2.74, + "learning_rate": 1.216777841174335e-06, + "logits/chosen": -2.0644350051879883, + "logits/rejected": -2.868105888366699, + "logps/chosen": -357.4000244140625, + "logps/rejected": -468.22113037109375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.612154006958008, + "rewards/margins": 9.741443634033203, + "rewards/rejected": -22.35359764099121, + "step": 17630 + }, + { + "epoch": 2.74, + "learning_rate": 1.2160444006431868e-06, + "logits/chosen": -2.117722511291504, + "logits/rejected": -2.2064671516418457, + "logps/chosen": -293.28912353515625, + "logps/rejected": -324.47088623046875, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.289994239807129, + "rewards/margins": 8.736263275146484, + "rewards/rejected": -18.02625846862793, + "step": 17631 + }, + { + "epoch": 2.74, + "learning_rate": 1.215310960112039e-06, + "logits/chosen": -2.2860677242279053, + "logits/rejected": -2.7693302631378174, + "logps/chosen": -108.67034149169922, + "logps/rejected": -302.8859558105469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.802457809448242, + "rewards/margins": 10.610326766967773, + "rewards/rejected": -19.412784576416016, + "step": 17632 + }, + { + "epoch": 2.74, + "learning_rate": 1.214577519580891e-06, + "logits/chosen": -2.427891492843628, + "logits/rejected": -1.347488522529602, + "logps/chosen": -331.2381591796875, + "logps/rejected": -274.14959716796875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.500938415527344, + "rewards/margins": 6.889901161193848, + "rewards/rejected": -14.390839576721191, + "step": 17633 + }, + { + "epoch": 2.74, + "learning_rate": 1.2138440790497433e-06, + "logits/chosen": -2.4946300983428955, + "logits/rejected": -2.2990379333496094, + "logps/chosen": -257.2580261230469, + "logps/rejected": -448.4854736328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.675675392150879, + "rewards/margins": 11.16020393371582, + "rewards/rejected": -19.835880279541016, + "step": 17634 + }, + { + "epoch": 2.74, + "learning_rate": 1.2131106385185954e-06, + "logits/chosen": -2.3202366828918457, + "logits/rejected": -2.7563397884368896, + "logps/chosen": -818.04296875, + "logps/rejected": -759.001708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.575525283813477, + "rewards/margins": 11.18444538116455, + "rewards/rejected": -23.759971618652344, + "step": 17635 + }, + { + "epoch": 2.74, + "learning_rate": 1.2123771979874475e-06, + "logits/chosen": -2.7628748416900635, + "logits/rejected": -2.8801217079162598, + "logps/chosen": -380.8977966308594, + "logps/rejected": -383.1671142578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.023758888244629, + "rewards/margins": 8.066591262817383, + "rewards/rejected": -16.090351104736328, + "step": 17636 + }, + { + "epoch": 2.74, + "learning_rate": 1.2116437574562996e-06, + "logits/chosen": -2.6168668270111084, + "logits/rejected": -2.1450579166412354, + "logps/chosen": -330.9886474609375, + "logps/rejected": -493.3658447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.708763122558594, + "rewards/margins": 15.000487327575684, + "rewards/rejected": -24.709251403808594, + "step": 17637 + }, + { + "epoch": 2.74, + "learning_rate": 1.2109103169251517e-06, + "logits/chosen": -2.513380527496338, + "logits/rejected": -2.7586913108825684, + "logps/chosen": -385.0326232910156, + "logps/rejected": -505.990478515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.752485275268555, + "rewards/margins": 9.965733528137207, + "rewards/rejected": -18.718219757080078, + "step": 17638 + }, + { + "epoch": 2.74, + "learning_rate": 1.210176876394004e-06, + "logits/chosen": -1.6893011331558228, + "logits/rejected": -2.6905839443206787, + "logps/chosen": -390.2856750488281, + "logps/rejected": -630.7371826171875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.14792537689209, + "rewards/margins": 12.235297203063965, + "rewards/rejected": -21.383222579956055, + "step": 17639 + }, + { + "epoch": 2.74, + "learning_rate": 1.209443435862856e-06, + "logits/chosen": -2.450824737548828, + "logits/rejected": -2.9393887519836426, + "logps/chosen": -190.8983917236328, + "logps/rejected": -349.4803466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.091076374053955, + "rewards/margins": 11.347105979919434, + "rewards/rejected": -14.438182830810547, + "step": 17640 + }, + { + "epoch": 2.74, + "learning_rate": 1.208709995331708e-06, + "logits/chosen": -2.6198766231536865, + "logits/rejected": -2.992039680480957, + "logps/chosen": -463.40478515625, + "logps/rejected": -398.2353820800781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.536849021911621, + "rewards/margins": 9.75251579284668, + "rewards/rejected": -19.289363861083984, + "step": 17641 + }, + { + "epoch": 2.74, + "learning_rate": 1.20797655480056e-06, + "logits/chosen": -2.52793025970459, + "logits/rejected": -1.991303563117981, + "logps/chosen": -400.7063293457031, + "logps/rejected": -271.2159423828125, + "loss": 0.4194, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.161190032958984, + "rewards/margins": 2.8464794158935547, + "rewards/rejected": -16.00766944885254, + "step": 17642 + }, + { + "epoch": 2.74, + "learning_rate": 1.2072431142694124e-06, + "logits/chosen": -2.630509614944458, + "logits/rejected": -2.9912421703338623, + "logps/chosen": -156.8200225830078, + "logps/rejected": -326.5932922363281, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.234330654144287, + "rewards/margins": 6.817347526550293, + "rewards/rejected": -13.051677703857422, + "step": 17643 + }, + { + "epoch": 2.74, + "learning_rate": 1.2065096737382644e-06, + "logits/chosen": -0.9367415308952332, + "logits/rejected": -2.2044553756713867, + "logps/chosen": -174.56564331054688, + "logps/rejected": -622.734130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.875677108764648, + "rewards/margins": 14.38888168334961, + "rewards/rejected": -25.264558792114258, + "step": 17644 + }, + { + "epoch": 2.74, + "learning_rate": 1.2057762332071165e-06, + "logits/chosen": -2.745162010192871, + "logits/rejected": -2.8910038471221924, + "logps/chosen": -202.95199584960938, + "logps/rejected": -217.2644500732422, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.964885711669922, + "rewards/margins": 7.498965740203857, + "rewards/rejected": -14.463851928710938, + "step": 17645 + }, + { + "epoch": 2.74, + "learning_rate": 1.2050427926759686e-06, + "logits/chosen": -2.660569190979004, + "logits/rejected": -2.9273390769958496, + "logps/chosen": -167.8106689453125, + "logps/rejected": -333.62396240234375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.285422325134277, + "rewards/margins": 9.22120475769043, + "rewards/rejected": -20.506628036499023, + "step": 17646 + }, + { + "epoch": 2.74, + "learning_rate": 1.204309352144821e-06, + "logits/chosen": -2.607706069946289, + "logits/rejected": -2.812469482421875, + "logps/chosen": -509.53350830078125, + "logps/rejected": -578.4864501953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71673583984375, + "rewards/margins": 9.255414009094238, + "rewards/rejected": -17.972148895263672, + "step": 17647 + }, + { + "epoch": 2.74, + "learning_rate": 1.203575911613673e-06, + "logits/chosen": -2.550351858139038, + "logits/rejected": -2.140866279602051, + "logps/chosen": -386.96795654296875, + "logps/rejected": -505.77813720703125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.422967910766602, + "rewards/margins": 5.825699329376221, + "rewards/rejected": -17.248668670654297, + "step": 17648 + }, + { + "epoch": 2.74, + "learning_rate": 1.202842471082525e-06, + "logits/chosen": -2.609041213989258, + "logits/rejected": -2.1427128314971924, + "logps/chosen": -333.39483642578125, + "logps/rejected": -332.6944580078125, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.491732597351074, + "rewards/margins": 6.569672584533691, + "rewards/rejected": -14.061405181884766, + "step": 17649 + }, + { + "epoch": 2.74, + "learning_rate": 1.2021090305513772e-06, + "logits/chosen": -2.701902389526367, + "logits/rejected": -1.8929574489593506, + "logps/chosen": -363.26031494140625, + "logps/rejected": -385.9990234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.865228652954102, + "rewards/margins": 7.944915771484375, + "rewards/rejected": -17.810144424438477, + "step": 17650 + }, + { + "epoch": 2.75, + "learning_rate": 1.2013755900202293e-06, + "logits/chosen": -1.584100604057312, + "logits/rejected": -2.6342670917510986, + "logps/chosen": -165.5425262451172, + "logps/rejected": -338.1527099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.845767021179199, + "rewards/margins": 10.514415740966797, + "rewards/rejected": -18.360183715820312, + "step": 17651 + }, + { + "epoch": 2.75, + "learning_rate": 1.2006421494890814e-06, + "logits/chosen": -1.5191844701766968, + "logits/rejected": -2.354402780532837, + "logps/chosen": -220.44342041015625, + "logps/rejected": -407.8389892578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.21085262298584, + "rewards/margins": 10.665491104125977, + "rewards/rejected": -21.8763427734375, + "step": 17652 + }, + { + "epoch": 2.75, + "learning_rate": 1.1999087089579335e-06, + "logits/chosen": -2.6301259994506836, + "logits/rejected": -2.6867356300354004, + "logps/chosen": -165.97222900390625, + "logps/rejected": -186.11973571777344, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.522497177124023, + "rewards/margins": 7.527317523956299, + "rewards/rejected": -17.049814224243164, + "step": 17653 + }, + { + "epoch": 2.75, + "learning_rate": 1.1991752684267856e-06, + "logits/chosen": -2.8065428733825684, + "logits/rejected": -2.9360764026641846, + "logps/chosen": -370.57537841796875, + "logps/rejected": -345.1262512207031, + "loss": 0.1711, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.55362319946289, + "rewards/margins": 8.705059051513672, + "rewards/rejected": -18.258682250976562, + "step": 17654 + }, + { + "epoch": 2.75, + "learning_rate": 1.1984418278956376e-06, + "logits/chosen": -2.8689959049224854, + "logits/rejected": -2.7023496627807617, + "logps/chosen": -296.1143493652344, + "logps/rejected": -507.5362548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.16495132446289, + "rewards/margins": 10.606252670288086, + "rewards/rejected": -21.77120590209961, + "step": 17655 + }, + { + "epoch": 2.75, + "learning_rate": 1.19770838736449e-06, + "logits/chosen": -2.687328577041626, + "logits/rejected": -2.282822847366333, + "logps/chosen": -218.61544799804688, + "logps/rejected": -237.44094848632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6142778396606445, + "rewards/margins": 12.632657051086426, + "rewards/rejected": -20.24693489074707, + "step": 17656 + }, + { + "epoch": 2.75, + "learning_rate": 1.196974946833342e-06, + "logits/chosen": -2.2501752376556396, + "logits/rejected": -2.697470188140869, + "logps/chosen": -425.96490478515625, + "logps/rejected": -414.0570983886719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.133836269378662, + "rewards/margins": 9.460794448852539, + "rewards/rejected": -16.59463119506836, + "step": 17657 + }, + { + "epoch": 2.75, + "learning_rate": 1.1962415063021941e-06, + "logits/chosen": -2.4702799320220947, + "logits/rejected": -2.6850619316101074, + "logps/chosen": -175.72738647460938, + "logps/rejected": -277.4744567871094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.798486709594727, + "rewards/margins": 6.999060153961182, + "rewards/rejected": -16.79754638671875, + "step": 17658 + }, + { + "epoch": 2.75, + "learning_rate": 1.1955080657710462e-06, + "logits/chosen": -2.935683012008667, + "logits/rejected": -2.1580471992492676, + "logps/chosen": -614.7109375, + "logps/rejected": -350.50244140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71940803527832, + "rewards/margins": 9.942800521850586, + "rewards/rejected": -15.662208557128906, + "step": 17659 + }, + { + "epoch": 2.75, + "learning_rate": 1.1947746252398985e-06, + "logits/chosen": -2.804842710494995, + "logits/rejected": -2.910104274749756, + "logps/chosen": -133.8974151611328, + "logps/rejected": -374.31158447265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.374794006347656, + "rewards/margins": 8.717551231384277, + "rewards/rejected": -18.09234619140625, + "step": 17660 + }, + { + "epoch": 2.75, + "learning_rate": 1.1940411847087504e-06, + "logits/chosen": -2.0487632751464844, + "logits/rejected": -2.755005121231079, + "logps/chosen": -297.95721435546875, + "logps/rejected": -295.6635437011719, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.736725807189941, + "rewards/margins": 7.099063873291016, + "rewards/rejected": -15.83578872680664, + "step": 17661 + }, + { + "epoch": 2.75, + "learning_rate": 1.1933077441776025e-06, + "logits/chosen": -2.438261032104492, + "logits/rejected": -1.5112721920013428, + "logps/chosen": -292.71075439453125, + "logps/rejected": -350.4395446777344, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.581401824951172, + "rewards/margins": 9.435636520385742, + "rewards/rejected": -23.017038345336914, + "step": 17662 + }, + { + "epoch": 2.75, + "learning_rate": 1.1925743036464546e-06, + "logits/chosen": -2.9219584465026855, + "logits/rejected": -1.8009576797485352, + "logps/chosen": -279.8036193847656, + "logps/rejected": -191.8020782470703, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.976675033569336, + "rewards/margins": 12.702747344970703, + "rewards/rejected": -16.67942237854004, + "step": 17663 + }, + { + "epoch": 2.75, + "learning_rate": 1.1918408631153069e-06, + "logits/chosen": -2.442258358001709, + "logits/rejected": -2.3258650302886963, + "logps/chosen": -137.068115234375, + "logps/rejected": -356.5100402832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.897611618041992, + "rewards/margins": 9.586225509643555, + "rewards/rejected": -19.483837127685547, + "step": 17664 + }, + { + "epoch": 2.75, + "learning_rate": 1.191107422584159e-06, + "logits/chosen": -1.8722989559173584, + "logits/rejected": -2.683753490447998, + "logps/chosen": -244.94967651367188, + "logps/rejected": -420.470947265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.935302734375, + "rewards/margins": 9.912997245788574, + "rewards/rejected": -16.84830093383789, + "step": 17665 + }, + { + "epoch": 2.75, + "learning_rate": 1.190373982053011e-06, + "logits/chosen": -2.816642999649048, + "logits/rejected": -1.913322925567627, + "logps/chosen": -391.42767333984375, + "logps/rejected": -548.958251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.59636116027832, + "rewards/margins": 11.039926528930664, + "rewards/rejected": -17.636287689208984, + "step": 17666 + }, + { + "epoch": 2.75, + "learning_rate": 1.1896405415218631e-06, + "logits/chosen": -1.810496211051941, + "logits/rejected": -2.738865852355957, + "logps/chosen": -228.4487762451172, + "logps/rejected": -461.9450378417969, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.627902030944824, + "rewards/margins": 7.841010570526123, + "rewards/rejected": -16.46891212463379, + "step": 17667 + }, + { + "epoch": 2.75, + "learning_rate": 1.1889071009907154e-06, + "logits/chosen": -2.387409210205078, + "logits/rejected": -2.608103036880493, + "logps/chosen": -166.45755004882812, + "logps/rejected": -270.74261474609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802173614501953, + "rewards/margins": 9.200328826904297, + "rewards/rejected": -17.00250244140625, + "step": 17668 + }, + { + "epoch": 2.75, + "learning_rate": 1.1881736604595675e-06, + "logits/chosen": -1.442115068435669, + "logits/rejected": -2.5708746910095215, + "logps/chosen": -203.26611328125, + "logps/rejected": -440.3973388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.67194652557373, + "rewards/margins": 11.45875358581543, + "rewards/rejected": -21.130699157714844, + "step": 17669 + }, + { + "epoch": 2.75, + "learning_rate": 1.1874402199284196e-06, + "logits/chosen": -2.689638614654541, + "logits/rejected": -1.9684170484542847, + "logps/chosen": -402.6589660644531, + "logps/rejected": -323.41168212890625, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.900785446166992, + "rewards/margins": 5.96087121963501, + "rewards/rejected": -14.86165714263916, + "step": 17670 + }, + { + "epoch": 2.75, + "learning_rate": 1.1867067793972715e-06, + "logits/chosen": -2.8633809089660645, + "logits/rejected": -1.5994253158569336, + "logps/chosen": -924.913330078125, + "logps/rejected": -535.109375, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.448426246643066, + "rewards/margins": 6.086171627044678, + "rewards/rejected": -16.534597396850586, + "step": 17671 + }, + { + "epoch": 2.75, + "learning_rate": 1.1859733388661238e-06, + "logits/chosen": -1.262865424156189, + "logits/rejected": -2.3922152519226074, + "logps/chosen": -214.51275634765625, + "logps/rejected": -581.8443603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.81428050994873, + "rewards/margins": 10.674306869506836, + "rewards/rejected": -21.48858642578125, + "step": 17672 + }, + { + "epoch": 2.75, + "learning_rate": 1.185239898334976e-06, + "logits/chosen": -1.3564493656158447, + "logits/rejected": -2.3836958408355713, + "logps/chosen": -167.975830078125, + "logps/rejected": -447.68048095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.650142669677734, + "rewards/margins": 15.187768936157227, + "rewards/rejected": -23.837913513183594, + "step": 17673 + }, + { + "epoch": 2.75, + "learning_rate": 1.184506457803828e-06, + "logits/chosen": -2.858635425567627, + "logits/rejected": -2.5706639289855957, + "logps/chosen": -572.89306640625, + "logps/rejected": -848.36572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.9115571975708, + "rewards/margins": 15.645074844360352, + "rewards/rejected": -24.55663299560547, + "step": 17674 + }, + { + "epoch": 2.75, + "learning_rate": 1.18377301727268e-06, + "logits/chosen": -1.1599748134613037, + "logits/rejected": -2.544719934463501, + "logps/chosen": -174.62562561035156, + "logps/rejected": -526.5416870117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.964948654174805, + "rewards/margins": 9.926712036132812, + "rewards/rejected": -22.89166259765625, + "step": 17675 + }, + { + "epoch": 2.75, + "learning_rate": 1.1830395767415322e-06, + "logits/chosen": -1.3120512962341309, + "logits/rejected": -2.6066009998321533, + "logps/chosen": -204.22482299804688, + "logps/rejected": -444.3306884765625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.283162117004395, + "rewards/margins": 6.488069534301758, + "rewards/rejected": -19.77123260498047, + "step": 17676 + }, + { + "epoch": 2.75, + "learning_rate": 1.1823061362103845e-06, + "logits/chosen": -2.825914144515991, + "logits/rejected": -2.845224380493164, + "logps/chosen": -145.38368225097656, + "logps/rejected": -291.3045654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.89879035949707, + "rewards/margins": 8.872802734375, + "rewards/rejected": -20.771591186523438, + "step": 17677 + }, + { + "epoch": 2.75, + "learning_rate": 1.1815726956792366e-06, + "logits/chosen": -2.231095552444458, + "logits/rejected": -1.9313987493515015, + "logps/chosen": -318.5765075683594, + "logps/rejected": -658.3970947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.328092575073242, + "rewards/margins": 13.228496551513672, + "rewards/rejected": -24.55658721923828, + "step": 17678 + }, + { + "epoch": 2.75, + "learning_rate": 1.1808392551480887e-06, + "logits/chosen": -2.7307920455932617, + "logits/rejected": -2.3384125232696533, + "logps/chosen": -606.73583984375, + "logps/rejected": -534.2100830078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.339923858642578, + "rewards/margins": 11.19057846069336, + "rewards/rejected": -17.530502319335938, + "step": 17679 + }, + { + "epoch": 2.75, + "learning_rate": 1.1801058146169405e-06, + "logits/chosen": -1.7415212392807007, + "logits/rejected": -2.415691614151001, + "logps/chosen": -255.2068634033203, + "logps/rejected": -430.02093505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.958654403686523, + "rewards/margins": 11.582853317260742, + "rewards/rejected": -19.541507720947266, + "step": 17680 + }, + { + "epoch": 2.75, + "learning_rate": 1.1793723740857928e-06, + "logits/chosen": -1.3234657049179077, + "logits/rejected": -2.8461709022521973, + "logps/chosen": -274.87432861328125, + "logps/rejected": -425.2334899902344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028861045837402, + "rewards/margins": 9.875478744506836, + "rewards/rejected": -15.904338836669922, + "step": 17681 + }, + { + "epoch": 2.75, + "learning_rate": 1.178638933554645e-06, + "logits/chosen": -2.5099663734436035, + "logits/rejected": -2.7429349422454834, + "logps/chosen": -471.65313720703125, + "logps/rejected": -553.632568359375, + "loss": 0.7479, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.638885498046875, + "rewards/margins": 7.455917835235596, + "rewards/rejected": -19.094802856445312, + "step": 17682 + }, + { + "epoch": 2.75, + "learning_rate": 1.177905493023497e-06, + "logits/chosen": -1.5989470481872559, + "logits/rejected": -1.9114516973495483, + "logps/chosen": -321.20892333984375, + "logps/rejected": -782.3924560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.159530639648438, + "rewards/margins": 18.91753578186035, + "rewards/rejected": -27.07706642150879, + "step": 17683 + }, + { + "epoch": 2.75, + "learning_rate": 1.177172052492349e-06, + "logits/chosen": -0.908176839351654, + "logits/rejected": -2.3897531032562256, + "logps/chosen": -183.35635375976562, + "logps/rejected": -415.0929260253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.883734226226807, + "rewards/margins": 10.573579788208008, + "rewards/rejected": -18.457313537597656, + "step": 17684 + }, + { + "epoch": 2.75, + "learning_rate": 1.1764386119612014e-06, + "logits/chosen": -2.885260581970215, + "logits/rejected": -1.826338529586792, + "logps/chosen": -676.8074951171875, + "logps/rejected": -417.8374328613281, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.09886646270752, + "rewards/margins": 6.224527359008789, + "rewards/rejected": -16.323394775390625, + "step": 17685 + }, + { + "epoch": 2.75, + "learning_rate": 1.1757051714300535e-06, + "logits/chosen": -2.7274463176727295, + "logits/rejected": -2.7969141006469727, + "logps/chosen": -466.2287902832031, + "logps/rejected": -451.6531066894531, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.217659950256348, + "rewards/margins": 10.06941032409668, + "rewards/rejected": -19.28706932067871, + "step": 17686 + }, + { + "epoch": 2.75, + "learning_rate": 1.1749717308989056e-06, + "logits/chosen": -2.7127580642700195, + "logits/rejected": -2.5206873416900635, + "logps/chosen": -493.97686767578125, + "logps/rejected": -475.3023376464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9770492315292358, + "rewards/margins": 15.320089340209961, + "rewards/rejected": -17.297138214111328, + "step": 17687 + }, + { + "epoch": 2.75, + "learning_rate": 1.1742382903677577e-06, + "logits/chosen": -1.0072276592254639, + "logits/rejected": -1.8784749507904053, + "logps/chosen": -167.52783203125, + "logps/rejected": -317.5797119140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.423553466796875, + "rewards/margins": 7.777538776397705, + "rewards/rejected": -19.201091766357422, + "step": 17688 + }, + { + "epoch": 2.75, + "learning_rate": 1.17350484983661e-06, + "logits/chosen": -2.8674352169036865, + "logits/rejected": -1.4853792190551758, + "logps/chosen": -431.7439270019531, + "logps/rejected": -341.8067321777344, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.345571517944336, + "rewards/margins": 5.266260623931885, + "rewards/rejected": -13.611831665039062, + "step": 17689 + }, + { + "epoch": 2.75, + "learning_rate": 1.1727714093054619e-06, + "logits/chosen": -2.4891974925994873, + "logits/rejected": -2.5917551517486572, + "logps/chosen": -464.03253173828125, + "logps/rejected": -368.6291809082031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.065227508544922, + "rewards/margins": 11.568607330322266, + "rewards/rejected": -19.633834838867188, + "step": 17690 + }, + { + "epoch": 2.75, + "learning_rate": 1.172037968774314e-06, + "logits/chosen": -1.1348496675491333, + "logits/rejected": -2.4587552547454834, + "logps/chosen": -276.6947937011719, + "logps/rejected": -422.6856689453125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.82524299621582, + "rewards/margins": 6.78980827331543, + "rewards/rejected": -16.61505126953125, + "step": 17691 + }, + { + "epoch": 2.75, + "learning_rate": 1.171304528243166e-06, + "logits/chosen": -1.1831973791122437, + "logits/rejected": -2.081717014312744, + "logps/chosen": -123.04570007324219, + "logps/rejected": -375.268798828125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.310728073120117, + "rewards/margins": 10.734413146972656, + "rewards/rejected": -20.045141220092773, + "step": 17692 + }, + { + "epoch": 2.75, + "learning_rate": 1.1705710877120183e-06, + "logits/chosen": -2.418860912322998, + "logits/rejected": -2.812621593475342, + "logps/chosen": -157.623291015625, + "logps/rejected": -392.90478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.769116401672363, + "rewards/margins": 12.646871566772461, + "rewards/rejected": -21.41598892211914, + "step": 17693 + }, + { + "epoch": 2.75, + "learning_rate": 1.1698376471808704e-06, + "logits/chosen": -2.631899118423462, + "logits/rejected": -2.4207704067230225, + "logps/chosen": -197.228759765625, + "logps/rejected": -287.59515380859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.529217720031738, + "rewards/margins": 9.468278884887695, + "rewards/rejected": -18.99749755859375, + "step": 17694 + }, + { + "epoch": 2.75, + "learning_rate": 1.1691042066497225e-06, + "logits/chosen": -2.3483939170837402, + "logits/rejected": -1.2028638124465942, + "logps/chosen": -343.2901306152344, + "logps/rejected": -279.6676940917969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.129188537597656, + "rewards/margins": 9.882766723632812, + "rewards/rejected": -19.01195526123047, + "step": 17695 + }, + { + "epoch": 2.75, + "learning_rate": 1.1683707661185746e-06, + "logits/chosen": -2.0911149978637695, + "logits/rejected": -2.5765092372894287, + "logps/chosen": -147.1923370361328, + "logps/rejected": -385.0901794433594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.894747734069824, + "rewards/margins": 11.045761108398438, + "rewards/rejected": -18.940509796142578, + "step": 17696 + }, + { + "epoch": 2.75, + "learning_rate": 1.1676373255874267e-06, + "logits/chosen": -1.605848789215088, + "logits/rejected": -2.6603469848632812, + "logps/chosen": -394.36419677734375, + "logps/rejected": -701.578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.996973037719727, + "rewards/margins": 9.272340774536133, + "rewards/rejected": -21.26931381225586, + "step": 17697 + }, + { + "epoch": 2.75, + "learning_rate": 1.166903885056279e-06, + "logits/chosen": -1.503605604171753, + "logits/rejected": -2.397008180618286, + "logps/chosen": -205.41925048828125, + "logps/rejected": -419.9088134765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.971268653869629, + "rewards/margins": 8.687172889709473, + "rewards/rejected": -19.6584415435791, + "step": 17698 + }, + { + "epoch": 2.75, + "learning_rate": 1.166170444525131e-06, + "logits/chosen": -2.176396608352661, + "logits/rejected": -2.4340991973876953, + "logps/chosen": -248.72900390625, + "logps/rejected": -305.9859619140625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.629024505615234, + "rewards/margins": 7.66369104385376, + "rewards/rejected": -19.292715072631836, + "step": 17699 + }, + { + "epoch": 2.75, + "learning_rate": 1.165437003993983e-06, + "logits/chosen": -1.7455899715423584, + "logits/rejected": -2.7850258350372314, + "logps/chosen": -162.45285034179688, + "logps/rejected": -462.66619873046875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.935445785522461, + "rewards/margins": 8.619799613952637, + "rewards/rejected": -20.55524444580078, + "step": 17700 + }, + { + "epoch": 2.75, + "learning_rate": 1.164703563462835e-06, + "logits/chosen": -2.73732590675354, + "logits/rejected": -2.3542134761810303, + "logps/chosen": -383.7445373535156, + "logps/rejected": -523.08935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.457317352294922, + "rewards/margins": 10.431116104125977, + "rewards/rejected": -21.888431549072266, + "step": 17701 + }, + { + "epoch": 2.75, + "learning_rate": 1.1639701229316874e-06, + "logits/chosen": -1.5108534097671509, + "logits/rejected": -2.0027260780334473, + "logps/chosen": -422.721923828125, + "logps/rejected": -559.0435791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.512706756591797, + "rewards/margins": 10.76276969909668, + "rewards/rejected": -21.275476455688477, + "step": 17702 + }, + { + "epoch": 2.75, + "learning_rate": 1.1632366824005394e-06, + "logits/chosen": -2.2341530323028564, + "logits/rejected": -2.547508716583252, + "logps/chosen": -485.96221923828125, + "logps/rejected": -631.00830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.611772537231445, + "rewards/margins": 12.733901023864746, + "rewards/rejected": -23.345672607421875, + "step": 17703 + }, + { + "epoch": 2.75, + "learning_rate": 1.1625032418693915e-06, + "logits/chosen": -2.6978371143341064, + "logits/rejected": -1.6846357583999634, + "logps/chosen": -353.9742431640625, + "logps/rejected": -390.25531005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.286463260650635, + "rewards/margins": 15.39457893371582, + "rewards/rejected": -22.681041717529297, + "step": 17704 + }, + { + "epoch": 2.75, + "learning_rate": 1.1617698013382436e-06, + "logits/chosen": -1.9520413875579834, + "logits/rejected": -2.5388200283050537, + "logps/chosen": -275.8447570800781, + "logps/rejected": -643.7758178710938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.843826293945312, + "rewards/margins": 8.766342163085938, + "rewards/rejected": -21.61016845703125, + "step": 17705 + }, + { + "epoch": 2.75, + "learning_rate": 1.161036360807096e-06, + "logits/chosen": -2.2689695358276367, + "logits/rejected": -2.4490764141082764, + "logps/chosen": -362.6177062988281, + "logps/rejected": -407.0672607421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.809383392333984, + "rewards/margins": 9.139360427856445, + "rewards/rejected": -17.948741912841797, + "step": 17706 + }, + { + "epoch": 2.75, + "learning_rate": 1.160302920275948e-06, + "logits/chosen": -1.805402398109436, + "logits/rejected": -2.515641689300537, + "logps/chosen": -94.52477264404297, + "logps/rejected": -391.1945495605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.116891384124756, + "rewards/margins": 14.443625450134277, + "rewards/rejected": -20.560516357421875, + "step": 17707 + }, + { + "epoch": 2.75, + "learning_rate": 1.1595694797448001e-06, + "logits/chosen": -2.5768988132476807, + "logits/rejected": -2.545231342315674, + "logps/chosen": -447.69293212890625, + "logps/rejected": -492.7498474121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.375797271728516, + "rewards/margins": 11.370342254638672, + "rewards/rejected": -22.746139526367188, + "step": 17708 + }, + { + "epoch": 2.75, + "learning_rate": 1.1588360392136522e-06, + "logits/chosen": -2.5932679176330566, + "logits/rejected": -2.5639822483062744, + "logps/chosen": -745.4047241210938, + "logps/rejected": -853.6117553710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.568702697753906, + "rewards/margins": 13.632890701293945, + "rewards/rejected": -21.20159339904785, + "step": 17709 + }, + { + "epoch": 2.75, + "learning_rate": 1.1581025986825043e-06, + "logits/chosen": -1.9383724927902222, + "logits/rejected": -2.5164291858673096, + "logps/chosen": -193.19276428222656, + "logps/rejected": -327.245361328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.446878433227539, + "rewards/margins": 8.565082550048828, + "rewards/rejected": -20.011960983276367, + "step": 17710 + }, + { + "epoch": 2.75, + "learning_rate": 1.1573691581513564e-06, + "logits/chosen": -2.6531617641448975, + "logits/rejected": -2.1419179439544678, + "logps/chosen": -426.44586181640625, + "logps/rejected": -400.98651123046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.704610824584961, + "rewards/margins": 8.584471702575684, + "rewards/rejected": -18.289081573486328, + "step": 17711 + }, + { + "epoch": 2.75, + "learning_rate": 1.1566357176202085e-06, + "logits/chosen": -2.3685078620910645, + "logits/rejected": -2.8025903701782227, + "logps/chosen": -196.37539672851562, + "logps/rejected": -358.586181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.298545837402344, + "rewards/margins": 13.467243194580078, + "rewards/rejected": -22.765789031982422, + "step": 17712 + }, + { + "epoch": 2.75, + "learning_rate": 1.1559022770890606e-06, + "logits/chosen": -1.3587682247161865, + "logits/rejected": -2.6987922191619873, + "logps/chosen": -232.67730712890625, + "logps/rejected": -421.81915283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.550134658813477, + "rewards/margins": 11.997875213623047, + "rewards/rejected": -26.548009872436523, + "step": 17713 + }, + { + "epoch": 2.75, + "learning_rate": 1.1551688365579129e-06, + "logits/chosen": -2.7205450534820557, + "logits/rejected": -2.615562677383423, + "logps/chosen": -281.7947082519531, + "logps/rejected": -246.6566162109375, + "loss": 0.8354, + "rewards/accuracies": 0.0, + "rewards/chosen": -12.746566772460938, + "rewards/margins": -0.26578712463378906, + "rewards/rejected": -12.480779647827148, + "step": 17714 + }, + { + "epoch": 2.76, + "learning_rate": 1.154435396026765e-06, + "logits/chosen": -2.6174123287200928, + "logits/rejected": -1.8809329271316528, + "logps/chosen": -838.6292724609375, + "logps/rejected": -433.6304016113281, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.621524810791016, + "rewards/margins": 10.465509414672852, + "rewards/rejected": -22.087034225463867, + "step": 17715 + }, + { + "epoch": 2.76, + "learning_rate": 1.153701955495617e-06, + "logits/chosen": -2.734125852584839, + "logits/rejected": -2.5587921142578125, + "logps/chosen": -270.79718017578125, + "logps/rejected": -612.9725341796875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.641757011413574, + "rewards/margins": 14.200393676757812, + "rewards/rejected": -22.842151641845703, + "step": 17716 + }, + { + "epoch": 2.76, + "learning_rate": 1.1529685149644691e-06, + "logits/chosen": -2.1887590885162354, + "logits/rejected": -2.7421042919158936, + "logps/chosen": -737.6624145507812, + "logps/rejected": -796.1993408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.851738929748535, + "rewards/margins": 10.517351150512695, + "rewards/rejected": -20.369091033935547, + "step": 17717 + }, + { + "epoch": 2.76, + "learning_rate": 1.1522350744333212e-06, + "logits/chosen": -2.40259051322937, + "logits/rejected": -2.5439493656158447, + "logps/chosen": -106.99032592773438, + "logps/rejected": -214.97987365722656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.652728080749512, + "rewards/margins": 7.414974212646484, + "rewards/rejected": -15.06770133972168, + "step": 17718 + }, + { + "epoch": 2.76, + "learning_rate": 1.1515016339021735e-06, + "logits/chosen": -2.4803431034088135, + "logits/rejected": -2.370333433151245, + "logps/chosen": -479.2206726074219, + "logps/rejected": -485.4840087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.833157539367676, + "rewards/margins": 10.99748706817627, + "rewards/rejected": -18.830644607543945, + "step": 17719 + }, + { + "epoch": 2.76, + "learning_rate": 1.1507681933710254e-06, + "logits/chosen": -2.5886380672454834, + "logits/rejected": -2.8932275772094727, + "logps/chosen": -195.3060302734375, + "logps/rejected": -331.0635070800781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.114027500152588, + "rewards/margins": 8.951151847839355, + "rewards/rejected": -16.0651798248291, + "step": 17720 + }, + { + "epoch": 2.76, + "learning_rate": 1.1500347528398775e-06, + "logits/chosen": -1.719887375831604, + "logits/rejected": -2.4597699642181396, + "logps/chosen": -257.09075927734375, + "logps/rejected": -472.5742492675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.541528701782227, + "rewards/margins": 11.410722732543945, + "rewards/rejected": -24.952251434326172, + "step": 17721 + }, + { + "epoch": 2.76, + "learning_rate": 1.1493013123087296e-06, + "logits/chosen": -1.4681535959243774, + "logits/rejected": -2.4669220447540283, + "logps/chosen": -350.08209228515625, + "logps/rejected": -619.3427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.442159652709961, + "rewards/margins": 11.722200393676758, + "rewards/rejected": -23.16436004638672, + "step": 17722 + }, + { + "epoch": 2.76, + "learning_rate": 1.1485678717775819e-06, + "logits/chosen": -2.7449138164520264, + "logits/rejected": -1.978513240814209, + "logps/chosen": -197.57913208007812, + "logps/rejected": -245.20657348632812, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.090155601501465, + "rewards/margins": 3.820420980453491, + "rewards/rejected": -12.910576820373535, + "step": 17723 + }, + { + "epoch": 2.76, + "learning_rate": 1.147834431246434e-06, + "logits/chosen": -2.8427939414978027, + "logits/rejected": -1.5963584184646606, + "logps/chosen": -556.901123046875, + "logps/rejected": -585.6720581054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.752138137817383, + "rewards/margins": 11.8211669921875, + "rewards/rejected": -26.573305130004883, + "step": 17724 + }, + { + "epoch": 2.76, + "learning_rate": 1.147100990715286e-06, + "logits/chosen": -2.454538583755493, + "logits/rejected": -2.6315290927886963, + "logps/chosen": -501.02032470703125, + "logps/rejected": -755.9508056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.39223861694336, + "rewards/margins": 11.037818908691406, + "rewards/rejected": -22.430057525634766, + "step": 17725 + }, + { + "epoch": 2.76, + "learning_rate": 1.1463675501841382e-06, + "logits/chosen": -0.7239775657653809, + "logits/rejected": -2.4651923179626465, + "logps/chosen": -120.51893615722656, + "logps/rejected": -503.6132507324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.775490760803223, + "rewards/margins": 20.010074615478516, + "rewards/rejected": -28.785564422607422, + "step": 17726 + }, + { + "epoch": 2.76, + "learning_rate": 1.1456341096529905e-06, + "logits/chosen": -2.641279935836792, + "logits/rejected": -2.4564146995544434, + "logps/chosen": -287.19085693359375, + "logps/rejected": -375.7548522949219, + "loss": 0.0668, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.065427780151367, + "rewards/margins": 6.161733627319336, + "rewards/rejected": -14.227161407470703, + "step": 17727 + }, + { + "epoch": 2.76, + "learning_rate": 1.1449006691218425e-06, + "logits/chosen": -1.290488362312317, + "logits/rejected": -2.442146062850952, + "logps/chosen": -156.05267333984375, + "logps/rejected": -505.2171325683594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.241545677185059, + "rewards/margins": 11.096017837524414, + "rewards/rejected": -19.33756446838379, + "step": 17728 + }, + { + "epoch": 2.76, + "learning_rate": 1.1441672285906946e-06, + "logits/chosen": -2.6993865966796875, + "logits/rejected": -2.7241270542144775, + "logps/chosen": -236.3859405517578, + "logps/rejected": -493.1656188964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81215763092041, + "rewards/margins": 17.015392303466797, + "rewards/rejected": -22.82754898071289, + "step": 17729 + }, + { + "epoch": 2.76, + "learning_rate": 1.1434337880595465e-06, + "logits/chosen": -2.8110742568969727, + "logits/rejected": -2.431199312210083, + "logps/chosen": -604.5284423828125, + "logps/rejected": -600.5914306640625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.106331825256348, + "rewards/margins": 8.409642219543457, + "rewards/rejected": -19.515974044799805, + "step": 17730 + }, + { + "epoch": 2.76, + "learning_rate": 1.1427003475283988e-06, + "logits/chosen": -2.956636905670166, + "logits/rejected": -2.9025092124938965, + "logps/chosen": -247.84214782714844, + "logps/rejected": -173.18844604492188, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.878589630126953, + "rewards/margins": 4.901694297790527, + "rewards/rejected": -13.780282974243164, + "step": 17731 + }, + { + "epoch": 2.76, + "learning_rate": 1.141966906997251e-06, + "logits/chosen": -2.7050135135650635, + "logits/rejected": -2.360271453857422, + "logps/chosen": -189.29971313476562, + "logps/rejected": -235.37625122070312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.127552032470703, + "rewards/margins": 8.938302993774414, + "rewards/rejected": -17.065855026245117, + "step": 17732 + }, + { + "epoch": 2.76, + "learning_rate": 1.141233466466103e-06, + "logits/chosen": -2.799060344696045, + "logits/rejected": -2.6618926525115967, + "logps/chosen": -874.6318359375, + "logps/rejected": -1255.0106201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.480218410491943, + "rewards/margins": 13.840856552124023, + "rewards/rejected": -19.321075439453125, + "step": 17733 + }, + { + "epoch": 2.76, + "learning_rate": 1.140500025934955e-06, + "logits/chosen": -2.6478137969970703, + "logits/rejected": -3.0348265171051025, + "logps/chosen": -135.93528747558594, + "logps/rejected": -193.98916625976562, + "loss": 0.5144, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.056001663208008, + "rewards/margins": 2.5674712657928467, + "rewards/rejected": -11.623472213745117, + "step": 17734 + }, + { + "epoch": 2.76, + "learning_rate": 1.1397665854038072e-06, + "logits/chosen": -2.9979803562164307, + "logits/rejected": -2.577301502227783, + "logps/chosen": -322.7455749511719, + "logps/rejected": -327.4641418457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.202754974365234, + "rewards/margins": 12.795003890991211, + "rewards/rejected": -20.997758865356445, + "step": 17735 + }, + { + "epoch": 2.76, + "learning_rate": 1.1390331448726595e-06, + "logits/chosen": -2.9416091442108154, + "logits/rejected": -2.387683868408203, + "logps/chosen": -217.78244018554688, + "logps/rejected": -234.5062255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.911639451980591, + "rewards/margins": 10.039834976196289, + "rewards/rejected": -13.951475143432617, + "step": 17736 + }, + { + "epoch": 2.76, + "learning_rate": 1.1382997043415116e-06, + "logits/chosen": -1.8370873928070068, + "logits/rejected": -2.588472843170166, + "logps/chosen": -251.82362365722656, + "logps/rejected": -456.83282470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.137005805969238, + "rewards/margins": 12.64721393585205, + "rewards/rejected": -19.78421974182129, + "step": 17737 + }, + { + "epoch": 2.76, + "learning_rate": 1.1375662638103637e-06, + "logits/chosen": -2.6126773357391357, + "logits/rejected": -2.7168054580688477, + "logps/chosen": -180.2982177734375, + "logps/rejected": -397.55780029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.921017646789551, + "rewards/margins": 10.807479858398438, + "rewards/rejected": -18.728496551513672, + "step": 17738 + }, + { + "epoch": 2.76, + "learning_rate": 1.1368328232792157e-06, + "logits/chosen": -2.5148279666900635, + "logits/rejected": -2.809610366821289, + "logps/chosen": -159.20166015625, + "logps/rejected": -456.677490234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2629075050354, + "rewards/margins": 11.933876037597656, + "rewards/rejected": -17.1967830657959, + "step": 17739 + }, + { + "epoch": 2.76, + "learning_rate": 1.1360993827480678e-06, + "logits/chosen": -2.888709306716919, + "logits/rejected": -1.4471535682678223, + "logps/chosen": -651.0450439453125, + "logps/rejected": -229.26998901367188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.863845825195312, + "rewards/margins": 8.658828735351562, + "rewards/rejected": -17.522674560546875, + "step": 17740 + }, + { + "epoch": 2.76, + "learning_rate": 1.13536594221692e-06, + "logits/chosen": -2.7584192752838135, + "logits/rejected": -2.635925769805908, + "logps/chosen": -506.3974609375, + "logps/rejected": -546.5032958984375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.410932540893555, + "rewards/margins": 9.523277282714844, + "rewards/rejected": -19.9342098236084, + "step": 17741 + }, + { + "epoch": 2.76, + "learning_rate": 1.134632501685772e-06, + "logits/chosen": -2.833876609802246, + "logits/rejected": -2.222517728805542, + "logps/chosen": -261.41900634765625, + "logps/rejected": -440.642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.214014053344727, + "rewards/margins": 10.78660774230957, + "rewards/rejected": -19.000621795654297, + "step": 17742 + }, + { + "epoch": 2.76, + "learning_rate": 1.1338990611546241e-06, + "logits/chosen": -2.2921504974365234, + "logits/rejected": -2.1486456394195557, + "logps/chosen": -576.4788818359375, + "logps/rejected": -804.112060546875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.884907722473145, + "rewards/margins": 13.339634895324707, + "rewards/rejected": -25.22454261779785, + "step": 17743 + }, + { + "epoch": 2.76, + "learning_rate": 1.1331656206234764e-06, + "logits/chosen": -2.4952635765075684, + "logits/rejected": -2.6807610988616943, + "logps/chosen": -174.57095336914062, + "logps/rejected": -503.1238708496094, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.300827026367188, + "rewards/margins": 7.213133811950684, + "rewards/rejected": -18.513961791992188, + "step": 17744 + }, + { + "epoch": 2.76, + "learning_rate": 1.1324321800923285e-06, + "logits/chosen": -2.4630603790283203, + "logits/rejected": -3.011596441268921, + "logps/chosen": -257.9721984863281, + "logps/rejected": -441.3905029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.971146583557129, + "rewards/margins": 10.197733879089355, + "rewards/rejected": -18.168880462646484, + "step": 17745 + }, + { + "epoch": 2.76, + "learning_rate": 1.1316987395611806e-06, + "logits/chosen": -1.8396419286727905, + "logits/rejected": -2.685466766357422, + "logps/chosen": -606.712890625, + "logps/rejected": -735.6986083984375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.590802192687988, + "rewards/margins": 12.864137649536133, + "rewards/rejected": -20.454938888549805, + "step": 17746 + }, + { + "epoch": 2.76, + "learning_rate": 1.1309652990300327e-06, + "logits/chosen": -2.945385694503784, + "logits/rejected": -2.1248974800109863, + "logps/chosen": -309.50177001953125, + "logps/rejected": -266.321044921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.442079544067383, + "rewards/margins": 11.127509117126465, + "rewards/rejected": -17.56958770751953, + "step": 17747 + }, + { + "epoch": 2.76, + "learning_rate": 1.130231858498885e-06, + "logits/chosen": -1.8156957626342773, + "logits/rejected": -2.3450818061828613, + "logps/chosen": -174.8875732421875, + "logps/rejected": -313.2774658203125, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.413540840148926, + "rewards/margins": 7.418007850646973, + "rewards/rejected": -18.8315486907959, + "step": 17748 + }, + { + "epoch": 2.76, + "learning_rate": 1.1294984179677369e-06, + "logits/chosen": -1.9885647296905518, + "logits/rejected": -2.5669913291931152, + "logps/chosen": -187.97372436523438, + "logps/rejected": -384.17791748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.05721664428711, + "rewards/margins": 10.150520324707031, + "rewards/rejected": -19.20773696899414, + "step": 17749 + }, + { + "epoch": 2.76, + "learning_rate": 1.128764977436589e-06, + "logits/chosen": -2.0110881328582764, + "logits/rejected": -2.516878128051758, + "logps/chosen": -193.66270446777344, + "logps/rejected": -394.3924560546875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.234617233276367, + "rewards/margins": 7.546144485473633, + "rewards/rejected": -18.78076171875, + "step": 17750 + }, + { + "epoch": 2.76, + "learning_rate": 1.128031536905441e-06, + "logits/chosen": -2.259920835494995, + "logits/rejected": -2.064361810684204, + "logps/chosen": -248.96127319335938, + "logps/rejected": -538.522705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.423792839050293, + "rewards/margins": 11.489049911499023, + "rewards/rejected": -20.912841796875, + "step": 17751 + }, + { + "epoch": 2.76, + "learning_rate": 1.1272980963742933e-06, + "logits/chosen": -2.7554352283477783, + "logits/rejected": -2.6276512145996094, + "logps/chosen": -251.0706787109375, + "logps/rejected": -314.0635986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.543878555297852, + "rewards/margins": 9.573850631713867, + "rewards/rejected": -18.11772918701172, + "step": 17752 + }, + { + "epoch": 2.76, + "learning_rate": 1.1265646558431454e-06, + "logits/chosen": -1.9854416847229004, + "logits/rejected": -2.245753049850464, + "logps/chosen": -225.13877868652344, + "logps/rejected": -563.56982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.475120544433594, + "rewards/margins": 16.61066436767578, + "rewards/rejected": -25.085784912109375, + "step": 17753 + }, + { + "epoch": 2.76, + "learning_rate": 1.1258312153119975e-06, + "logits/chosen": -1.4364734888076782, + "logits/rejected": -1.8838841915130615, + "logps/chosen": -181.88461303710938, + "logps/rejected": -519.2218017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.977153778076172, + "rewards/margins": 14.538904190063477, + "rewards/rejected": -25.51605796813965, + "step": 17754 + }, + { + "epoch": 2.76, + "learning_rate": 1.1250977747808496e-06, + "logits/chosen": -2.574598789215088, + "logits/rejected": -2.938838005065918, + "logps/chosen": -174.096923828125, + "logps/rejected": -418.9915466308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.650386810302734, + "rewards/margins": 10.858011245727539, + "rewards/rejected": -19.508398056030273, + "step": 17755 + }, + { + "epoch": 2.76, + "learning_rate": 1.1243643342497017e-06, + "logits/chosen": -2.7876853942871094, + "logits/rejected": -2.3307456970214844, + "logps/chosen": -539.5675659179688, + "logps/rejected": -576.1624145507812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7173686027526855, + "rewards/margins": 16.44564437866211, + "rewards/rejected": -22.163013458251953, + "step": 17756 + }, + { + "epoch": 2.76, + "learning_rate": 1.123630893718554e-06, + "logits/chosen": -2.062002658843994, + "logits/rejected": -2.4529707431793213, + "logps/chosen": -191.7377471923828, + "logps/rejected": -441.40960693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.022516250610352, + "rewards/margins": 14.551462173461914, + "rewards/rejected": -23.573978424072266, + "step": 17757 + }, + { + "epoch": 2.76, + "learning_rate": 1.122897453187406e-06, + "logits/chosen": -2.6183457374572754, + "logits/rejected": -2.7714836597442627, + "logps/chosen": -698.8937377929688, + "logps/rejected": -642.9655151367188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.117130279541016, + "rewards/margins": 11.759382247924805, + "rewards/rejected": -20.876510620117188, + "step": 17758 + }, + { + "epoch": 2.76, + "learning_rate": 1.122164012656258e-06, + "logits/chosen": -1.4647445678710938, + "logits/rejected": -2.664656639099121, + "logps/chosen": -229.7332000732422, + "logps/rejected": -365.3945617675781, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.811529159545898, + "rewards/margins": 6.566667556762695, + "rewards/rejected": -20.378196716308594, + "step": 17759 + }, + { + "epoch": 2.76, + "learning_rate": 1.12143057212511e-06, + "logits/chosen": -1.9708757400512695, + "logits/rejected": -2.681300163269043, + "logps/chosen": -304.15545654296875, + "logps/rejected": -343.5079040527344, + "loss": 0.249, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.773194313049316, + "rewards/margins": 5.7034149169921875, + "rewards/rejected": -15.476609230041504, + "step": 17760 + }, + { + "epoch": 2.76, + "learning_rate": 1.1206971315939624e-06, + "logits/chosen": -1.451602816581726, + "logits/rejected": -2.0713717937469482, + "logps/chosen": -272.3086853027344, + "logps/rejected": -438.1796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.799408912658691, + "rewards/margins": 10.901006698608398, + "rewards/rejected": -21.700416564941406, + "step": 17761 + }, + { + "epoch": 2.76, + "learning_rate": 1.1199636910628145e-06, + "logits/chosen": -2.75234055519104, + "logits/rejected": -2.6388306617736816, + "logps/chosen": -172.0491943359375, + "logps/rejected": -362.2196044921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.725845336914062, + "rewards/margins": 10.72647476196289, + "rewards/rejected": -19.452320098876953, + "step": 17762 + }, + { + "epoch": 2.76, + "learning_rate": 1.1192302505316665e-06, + "logits/chosen": -2.1583309173583984, + "logits/rejected": -2.972080707550049, + "logps/chosen": -205.92872619628906, + "logps/rejected": -301.8698425292969, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.395477294921875, + "rewards/margins": 7.6103291511535645, + "rewards/rejected": -18.00580596923828, + "step": 17763 + }, + { + "epoch": 2.76, + "learning_rate": 1.1184968100005186e-06, + "logits/chosen": -2.808384895324707, + "logits/rejected": -2.2017982006073, + "logps/chosen": -198.2696533203125, + "logps/rejected": -292.347412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.044751167297363, + "rewards/margins": 12.666440963745117, + "rewards/rejected": -20.711193084716797, + "step": 17764 + }, + { + "epoch": 2.76, + "learning_rate": 1.117763369469371e-06, + "logits/chosen": -1.4733070135116577, + "logits/rejected": -1.8134231567382812, + "logps/chosen": -219.3931884765625, + "logps/rejected": -354.2452697753906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.256049633026123, + "rewards/margins": 9.152985572814941, + "rewards/rejected": -16.409034729003906, + "step": 17765 + }, + { + "epoch": 2.76, + "learning_rate": 1.117029928938223e-06, + "logits/chosen": -1.7226293087005615, + "logits/rejected": -2.5751688480377197, + "logps/chosen": -287.4914855957031, + "logps/rejected": -498.6862487792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.530067443847656, + "rewards/margins": 10.569086074829102, + "rewards/rejected": -22.099153518676758, + "step": 17766 + }, + { + "epoch": 2.76, + "learning_rate": 1.1162964884070751e-06, + "logits/chosen": -1.901935338973999, + "logits/rejected": -2.611880302429199, + "logps/chosen": -440.02386474609375, + "logps/rejected": -557.1337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.318007469177246, + "rewards/margins": 13.243831634521484, + "rewards/rejected": -23.561840057373047, + "step": 17767 + }, + { + "epoch": 2.76, + "learning_rate": 1.1155630478759272e-06, + "logits/chosen": -0.5573887228965759, + "logits/rejected": -1.6294031143188477, + "logps/chosen": -243.22003173828125, + "logps/rejected": -364.376220703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7115397453308105, + "rewards/margins": 10.524200439453125, + "rewards/rejected": -17.235740661621094, + "step": 17768 + }, + { + "epoch": 2.76, + "learning_rate": 1.1148296073447793e-06, + "logits/chosen": -2.8666179180145264, + "logits/rejected": -1.932279109954834, + "logps/chosen": -622.9890747070312, + "logps/rejected": -331.4102783203125, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.387504577636719, + "rewards/margins": 7.365760803222656, + "rewards/rejected": -18.753265380859375, + "step": 17769 + }, + { + "epoch": 2.76, + "learning_rate": 1.1140961668136314e-06, + "logits/chosen": -2.22949481010437, + "logits/rejected": -2.8017327785491943, + "logps/chosen": -367.07269287109375, + "logps/rejected": -379.793701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.399618148803711, + "rewards/margins": 15.761232376098633, + "rewards/rejected": -23.160850524902344, + "step": 17770 + }, + { + "epoch": 2.76, + "learning_rate": 1.1133627262824835e-06, + "logits/chosen": -1.50138258934021, + "logits/rejected": -2.247438907623291, + "logps/chosen": -253.25074768066406, + "logps/rejected": -490.5203552246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.8862886428833, + "rewards/margins": 13.736135482788086, + "rewards/rejected": -23.622425079345703, + "step": 17771 + }, + { + "epoch": 2.76, + "learning_rate": 1.1126292857513356e-06, + "logits/chosen": -2.1324543952941895, + "logits/rejected": -2.733639717102051, + "logps/chosen": -146.50326538085938, + "logps/rejected": -365.72216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.941137313842773, + "rewards/margins": 9.306285858154297, + "rewards/rejected": -19.247421264648438, + "step": 17772 + }, + { + "epoch": 2.76, + "learning_rate": 1.1118958452201879e-06, + "logits/chosen": -2.3742077350616455, + "logits/rejected": -2.426373243331909, + "logps/chosen": -211.65194702148438, + "logps/rejected": -422.8017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.267710208892822, + "rewards/margins": 11.990089416503906, + "rewards/rejected": -19.25779914855957, + "step": 17773 + }, + { + "epoch": 2.76, + "learning_rate": 1.11116240468904e-06, + "logits/chosen": -1.1689374446868896, + "logits/rejected": -2.619533061981201, + "logps/chosen": -213.97042846679688, + "logps/rejected": -505.6393737792969, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.997864723205566, + "rewards/margins": 10.986629486083984, + "rewards/rejected": -19.984495162963867, + "step": 17774 + }, + { + "epoch": 2.76, + "learning_rate": 1.110428964157892e-06, + "logits/chosen": -2.6946723461151123, + "logits/rejected": -1.9106272459030151, + "logps/chosen": -307.81024169921875, + "logps/rejected": -285.86328125, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1386284828186035, + "rewards/margins": 7.49148416519165, + "rewards/rejected": -14.630112648010254, + "step": 17775 + }, + { + "epoch": 2.76, + "learning_rate": 1.1096955236267441e-06, + "logits/chosen": -2.646394729614258, + "logits/rejected": -2.698117256164551, + "logps/chosen": -220.5916748046875, + "logps/rejected": -348.7516784667969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.431320190429688, + "rewards/margins": 7.553928852081299, + "rewards/rejected": -18.985248565673828, + "step": 17776 + }, + { + "epoch": 2.76, + "learning_rate": 1.1089620830955962e-06, + "logits/chosen": -2.9116885662078857, + "logits/rejected": -2.8184680938720703, + "logps/chosen": -92.18433380126953, + "logps/rejected": -313.12445068359375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.831086158752441, + "rewards/margins": 6.724240303039551, + "rewards/rejected": -12.555326461791992, + "step": 17777 + }, + { + "epoch": 2.76, + "learning_rate": 1.1082286425644485e-06, + "logits/chosen": -2.8702447414398193, + "logits/rejected": -1.3056623935699463, + "logps/chosen": -595.4108276367188, + "logps/rejected": -472.7481994628906, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.48228645324707, + "rewards/margins": 7.951861381530762, + "rewards/rejected": -13.434147834777832, + "step": 17778 + }, + { + "epoch": 2.77, + "learning_rate": 1.1074952020333004e-06, + "logits/chosen": -1.827020525932312, + "logits/rejected": -2.715869188308716, + "logps/chosen": -200.36695861816406, + "logps/rejected": -322.13140869140625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.873406410217285, + "rewards/margins": 6.39390754699707, + "rewards/rejected": -18.267314910888672, + "step": 17779 + }, + { + "epoch": 2.77, + "learning_rate": 1.1067617615021525e-06, + "logits/chosen": -2.562286615371704, + "logits/rejected": -2.853350877761841, + "logps/chosen": -193.54908752441406, + "logps/rejected": -314.8782653808594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.970951080322266, + "rewards/margins": 8.683847427368164, + "rewards/rejected": -18.65479850769043, + "step": 17780 + }, + { + "epoch": 2.77, + "learning_rate": 1.1060283209710046e-06, + "logits/chosen": -1.234973669052124, + "logits/rejected": -2.569892406463623, + "logps/chosen": -182.72055053710938, + "logps/rejected": -353.8375244140625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.827016830444336, + "rewards/margins": 7.213186264038086, + "rewards/rejected": -19.040203094482422, + "step": 17781 + }, + { + "epoch": 2.77, + "learning_rate": 1.1052948804398569e-06, + "logits/chosen": -2.7165017127990723, + "logits/rejected": -2.3040874004364014, + "logps/chosen": -476.12518310546875, + "logps/rejected": -410.5986022949219, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.539684295654297, + "rewards/margins": 8.48514175415039, + "rewards/rejected": -18.024826049804688, + "step": 17782 + }, + { + "epoch": 2.77, + "learning_rate": 1.104561439908709e-06, + "logits/chosen": -2.7291054725646973, + "logits/rejected": -2.3832032680511475, + "logps/chosen": -767.3857421875, + "logps/rejected": -653.355712890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.586515426635742, + "rewards/margins": 8.515769004821777, + "rewards/rejected": -19.102283477783203, + "step": 17783 + }, + { + "epoch": 2.77, + "learning_rate": 1.103827999377561e-06, + "logits/chosen": -2.518031358718872, + "logits/rejected": -2.8707382678985596, + "logps/chosen": -515.7355346679688, + "logps/rejected": -594.8823852539062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.825664520263672, + "rewards/margins": 10.457660675048828, + "rewards/rejected": -17.2833251953125, + "step": 17784 + }, + { + "epoch": 2.77, + "learning_rate": 1.1030945588464132e-06, + "logits/chosen": -1.8430688381195068, + "logits/rejected": -2.752323865890503, + "logps/chosen": -140.61766052246094, + "logps/rejected": -376.5097351074219, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.576488494873047, + "rewards/margins": 6.936235427856445, + "rewards/rejected": -16.512723922729492, + "step": 17785 + }, + { + "epoch": 2.77, + "learning_rate": 1.1023611183152655e-06, + "logits/chosen": -2.522967576980591, + "logits/rejected": -2.0131278038024902, + "logps/chosen": -577.8567504882812, + "logps/rejected": -668.9798583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.208227157592773, + "rewards/margins": 15.995952606201172, + "rewards/rejected": -27.204181671142578, + "step": 17786 + }, + { + "epoch": 2.77, + "learning_rate": 1.1016276777841176e-06, + "logits/chosen": -2.905390739440918, + "logits/rejected": -2.8014132976531982, + "logps/chosen": -525.7675170898438, + "logps/rejected": -241.33212280273438, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.514123916625977, + "rewards/margins": 6.8218488693237305, + "rewards/rejected": -18.33597183227539, + "step": 17787 + }, + { + "epoch": 2.77, + "learning_rate": 1.1008942372529696e-06, + "logits/chosen": -2.677743673324585, + "logits/rejected": -2.8309597969055176, + "logps/chosen": -191.02865600585938, + "logps/rejected": -298.475341796875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.197842597961426, + "rewards/margins": 6.572502136230469, + "rewards/rejected": -14.770345687866211, + "step": 17788 + }, + { + "epoch": 2.77, + "learning_rate": 1.1001607967218215e-06, + "logits/chosen": -2.334630012512207, + "logits/rejected": -2.3901381492614746, + "logps/chosen": -391.14483642578125, + "logps/rejected": -593.5521850585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.382875442504883, + "rewards/margins": 12.761180877685547, + "rewards/rejected": -19.144054412841797, + "step": 17789 + }, + { + "epoch": 2.77, + "learning_rate": 1.0994273561906738e-06, + "logits/chosen": -2.9461519718170166, + "logits/rejected": -2.9812824726104736, + "logps/chosen": -114.45182037353516, + "logps/rejected": -254.28079223632812, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71459674835205, + "rewards/margins": 8.087133407592773, + "rewards/rejected": -16.80173110961914, + "step": 17790 + }, + { + "epoch": 2.77, + "learning_rate": 1.098693915659526e-06, + "logits/chosen": -2.930567502975464, + "logits/rejected": -2.7422609329223633, + "logps/chosen": -603.3237915039062, + "logps/rejected": -550.8275756835938, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.579689025878906, + "rewards/margins": 7.206810474395752, + "rewards/rejected": -16.7864990234375, + "step": 17791 + }, + { + "epoch": 2.77, + "learning_rate": 1.097960475128378e-06, + "logits/chosen": -2.9352781772613525, + "logits/rejected": -2.8058454990386963, + "logps/chosen": -566.1802978515625, + "logps/rejected": -1021.224365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.997491359710693, + "rewards/margins": 15.90785026550293, + "rewards/rejected": -20.90534210205078, + "step": 17792 + }, + { + "epoch": 2.77, + "learning_rate": 1.09722703459723e-06, + "logits/chosen": -2.0377349853515625, + "logits/rejected": -2.6705586910247803, + "logps/chosen": -281.58038330078125, + "logps/rejected": -486.6578369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.631484031677246, + "rewards/margins": 15.153505325317383, + "rewards/rejected": -22.784988403320312, + "step": 17793 + }, + { + "epoch": 2.77, + "learning_rate": 1.0964935940660824e-06, + "logits/chosen": -2.890861749649048, + "logits/rejected": -2.9420084953308105, + "logps/chosen": -118.10604095458984, + "logps/rejected": -216.2052001953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.187492370605469, + "rewards/margins": 7.655505657196045, + "rewards/rejected": -15.842998504638672, + "step": 17794 + }, + { + "epoch": 2.77, + "learning_rate": 1.0957601535349345e-06, + "logits/chosen": -1.308479905128479, + "logits/rejected": -2.576972484588623, + "logps/chosen": -242.86656188964844, + "logps/rejected": -795.9533081054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.393710136413574, + "rewards/margins": 14.341228485107422, + "rewards/rejected": -25.734939575195312, + "step": 17795 + }, + { + "epoch": 2.77, + "learning_rate": 1.0950267130037866e-06, + "logits/chosen": -2.54793381690979, + "logits/rejected": -2.9749181270599365, + "logps/chosen": -151.68954467773438, + "logps/rejected": -539.974365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.639646530151367, + "rewards/margins": 14.643606185913086, + "rewards/rejected": -27.283252716064453, + "step": 17796 + }, + { + "epoch": 2.77, + "learning_rate": 1.0942932724726387e-06, + "logits/chosen": -1.2786085605621338, + "logits/rejected": -2.3611998558044434, + "logps/chosen": -267.5033264160156, + "logps/rejected": -550.807373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7205810546875, + "rewards/margins": 14.834575653076172, + "rewards/rejected": -22.555156707763672, + "step": 17797 + }, + { + "epoch": 2.77, + "learning_rate": 1.0935598319414908e-06, + "logits/chosen": -1.4900100231170654, + "logits/rejected": -2.8252413272857666, + "logps/chosen": -402.8959045410156, + "logps/rejected": -533.0504150390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.574750900268555, + "rewards/margins": 9.078424453735352, + "rewards/rejected": -15.653175354003906, + "step": 17798 + }, + { + "epoch": 2.77, + "learning_rate": 1.0928263914103428e-06, + "logits/chosen": -2.6986734867095947, + "logits/rejected": -2.762364387512207, + "logps/chosen": -505.4649658203125, + "logps/rejected": -790.4208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.35041332244873, + "rewards/margins": 11.694561958312988, + "rewards/rejected": -21.04497528076172, + "step": 17799 + }, + { + "epoch": 2.77, + "learning_rate": 1.092092950879195e-06, + "logits/chosen": -2.6101274490356445, + "logits/rejected": -2.725139856338501, + "logps/chosen": -103.56608581542969, + "logps/rejected": -338.60595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909933090209961, + "rewards/margins": 13.406457901000977, + "rewards/rejected": -19.316390991210938, + "step": 17800 + }, + { + "epoch": 2.77, + "learning_rate": 1.091359510348047e-06, + "logits/chosen": -1.4561644792556763, + "logits/rejected": -2.626499652862549, + "logps/chosen": -227.9886932373047, + "logps/rejected": -461.08551025390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.027710914611816, + "rewards/margins": 11.386260032653809, + "rewards/rejected": -20.413970947265625, + "step": 17801 + }, + { + "epoch": 2.77, + "learning_rate": 1.0906260698168991e-06, + "logits/chosen": -2.4129745960235596, + "logits/rejected": -2.1379446983337402, + "logps/chosen": -147.2179412841797, + "logps/rejected": -300.6286926269531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.775733947753906, + "rewards/margins": 8.890175819396973, + "rewards/rejected": -14.665909767150879, + "step": 17802 + }, + { + "epoch": 2.77, + "learning_rate": 1.0898926292857514e-06, + "logits/chosen": -1.0264846086502075, + "logits/rejected": -2.358466148376465, + "logps/chosen": -169.50555419921875, + "logps/rejected": -568.896728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.942171096801758, + "rewards/margins": 13.704733848571777, + "rewards/rejected": -24.64690399169922, + "step": 17803 + }, + { + "epoch": 2.77, + "learning_rate": 1.0891591887546035e-06, + "logits/chosen": -1.3631093502044678, + "logits/rejected": -2.7860381603240967, + "logps/chosen": -226.46197509765625, + "logps/rejected": -517.423583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.733120918273926, + "rewards/margins": 13.81141471862793, + "rewards/rejected": -21.54453468322754, + "step": 17804 + }, + { + "epoch": 2.77, + "learning_rate": 1.0884257482234556e-06, + "logits/chosen": -2.762531280517578, + "logits/rejected": -2.9796133041381836, + "logps/chosen": -131.30918884277344, + "logps/rejected": -208.94444274902344, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.550835609436035, + "rewards/margins": 7.977143287658691, + "rewards/rejected": -15.527978897094727, + "step": 17805 + }, + { + "epoch": 2.77, + "learning_rate": 1.0876923076923077e-06, + "logits/chosen": -2.104259967803955, + "logits/rejected": -1.6888457536697388, + "logps/chosen": -355.6390380859375, + "logps/rejected": -343.446044921875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.228433609008789, + "rewards/margins": 7.223784446716309, + "rewards/rejected": -17.45221710205078, + "step": 17806 + }, + { + "epoch": 2.77, + "learning_rate": 1.08695886716116e-06, + "logits/chosen": -2.0967557430267334, + "logits/rejected": -2.741671085357666, + "logps/chosen": -220.8725128173828, + "logps/rejected": -261.36077880859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.95106315612793, + "rewards/margins": 7.799300193786621, + "rewards/rejected": -16.750364303588867, + "step": 17807 + }, + { + "epoch": 2.77, + "learning_rate": 1.0862254266300119e-06, + "logits/chosen": -2.6145713329315186, + "logits/rejected": -2.88559627532959, + "logps/chosen": -75.78063201904297, + "logps/rejected": -282.560546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.678421974182129, + "rewards/margins": 9.741589546203613, + "rewards/rejected": -16.420011520385742, + "step": 17808 + }, + { + "epoch": 2.77, + "learning_rate": 1.085491986098864e-06, + "logits/chosen": -2.775723695755005, + "logits/rejected": -2.678971529006958, + "logps/chosen": -195.15419006347656, + "logps/rejected": -310.1572265625, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729597091674805, + "rewards/margins": 10.008338928222656, + "rewards/rejected": -16.73793601989746, + "step": 17809 + }, + { + "epoch": 2.77, + "learning_rate": 1.084758545567716e-06, + "logits/chosen": -2.5675883293151855, + "logits/rejected": -2.9952542781829834, + "logps/chosen": -162.05960083007812, + "logps/rejected": -458.234619140625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.573739051818848, + "rewards/margins": 8.964479446411133, + "rewards/rejected": -16.538219451904297, + "step": 17810 + }, + { + "epoch": 2.77, + "learning_rate": 1.0840251050365683e-06, + "logits/chosen": -2.1531929969787598, + "logits/rejected": -2.4298477172851562, + "logps/chosen": -261.2565612792969, + "logps/rejected": -337.22381591796875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.895370006561279, + "rewards/margins": 9.283266067504883, + "rewards/rejected": -17.17863655090332, + "step": 17811 + }, + { + "epoch": 2.77, + "learning_rate": 1.0832916645054204e-06, + "logits/chosen": -1.4451062679290771, + "logits/rejected": -2.3092868328094482, + "logps/chosen": -285.93768310546875, + "logps/rejected": -631.1513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6514387130737305, + "rewards/margins": 14.835471153259277, + "rewards/rejected": -21.486909866333008, + "step": 17812 + }, + { + "epoch": 2.77, + "learning_rate": 1.0825582239742725e-06, + "logits/chosen": -2.5281622409820557, + "logits/rejected": -2.420997142791748, + "logps/chosen": -885.8484497070312, + "logps/rejected": -692.0439453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.197827339172363, + "rewards/margins": 9.674150466918945, + "rewards/rejected": -17.871976852416992, + "step": 17813 + }, + { + "epoch": 2.77, + "learning_rate": 1.0818247834431246e-06, + "logits/chosen": -2.7594034671783447, + "logits/rejected": -2.0005745887756348, + "logps/chosen": -606.1026000976562, + "logps/rejected": -530.9887084960938, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.589937210083008, + "rewards/margins": 9.692366600036621, + "rewards/rejected": -20.282304763793945, + "step": 17814 + }, + { + "epoch": 2.77, + "learning_rate": 1.081091342911977e-06, + "logits/chosen": -2.177436590194702, + "logits/rejected": -2.243253707885742, + "logps/chosen": -318.7071228027344, + "logps/rejected": -402.8886413574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.618627548217773, + "rewards/margins": 12.26133918762207, + "rewards/rejected": -21.879966735839844, + "step": 17815 + }, + { + "epoch": 2.77, + "learning_rate": 1.080357902380829e-06, + "logits/chosen": -1.3713524341583252, + "logits/rejected": -2.072582721710205, + "logps/chosen": -225.67630004882812, + "logps/rejected": -523.6463623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.995792388916016, + "rewards/margins": 12.879558563232422, + "rewards/rejected": -23.875350952148438, + "step": 17816 + }, + { + "epoch": 2.77, + "learning_rate": 1.079624461849681e-06, + "logits/chosen": -2.917618989944458, + "logits/rejected": -2.4436306953430176, + "logps/chosen": -122.45248413085938, + "logps/rejected": -166.12823486328125, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.110639572143555, + "rewards/margins": 2.8635590076446533, + "rewards/rejected": -9.974199295043945, + "step": 17817 + }, + { + "epoch": 2.77, + "learning_rate": 1.078891021318533e-06, + "logits/chosen": -1.2801804542541504, + "logits/rejected": -2.4800753593444824, + "logps/chosen": -155.1202392578125, + "logps/rejected": -544.9490356445312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.583976745605469, + "rewards/margins": 8.957812309265137, + "rewards/rejected": -16.541790008544922, + "step": 17818 + }, + { + "epoch": 2.77, + "learning_rate": 1.0781575807873853e-06, + "logits/chosen": -2.885105609893799, + "logits/rejected": -2.987159490585327, + "logps/chosen": -188.58572387695312, + "logps/rejected": -399.3023681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.571831226348877, + "rewards/margins": 11.070455551147461, + "rewards/rejected": -18.642288208007812, + "step": 17819 + }, + { + "epoch": 2.77, + "learning_rate": 1.0774241402562374e-06, + "logits/chosen": -1.4222276210784912, + "logits/rejected": -2.389556884765625, + "logps/chosen": -254.83547973632812, + "logps/rejected": -578.1402587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.34980583190918, + "rewards/margins": 18.234342575073242, + "rewards/rejected": -26.584148406982422, + "step": 17820 + }, + { + "epoch": 2.77, + "learning_rate": 1.0766906997250895e-06, + "logits/chosen": -1.349717378616333, + "logits/rejected": -2.490868091583252, + "logps/chosen": -148.9843292236328, + "logps/rejected": -405.5616760253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.99914026260376, + "rewards/margins": 11.382575988769531, + "rewards/rejected": -19.381717681884766, + "step": 17821 + }, + { + "epoch": 2.77, + "learning_rate": 1.0759572591939416e-06, + "logits/chosen": -1.6985009908676147, + "logits/rejected": -2.5444345474243164, + "logps/chosen": -168.69635009765625, + "logps/rejected": -356.75604248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.643970489501953, + "rewards/margins": 10.921274185180664, + "rewards/rejected": -18.56524658203125, + "step": 17822 + }, + { + "epoch": 2.77, + "learning_rate": 1.0752238186627936e-06, + "logits/chosen": -1.424523949623108, + "logits/rejected": -2.4827382564544678, + "logps/chosen": -157.203857421875, + "logps/rejected": -369.9254150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.521763801574707, + "rewards/margins": 9.9754638671875, + "rewards/rejected": -20.49722671508789, + "step": 17823 + }, + { + "epoch": 2.77, + "learning_rate": 1.074490378131646e-06, + "logits/chosen": -2.166365385055542, + "logits/rejected": -2.658997058868408, + "logps/chosen": -128.51513671875, + "logps/rejected": -376.07086181640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.384358406066895, + "rewards/margins": 11.777460098266602, + "rewards/rejected": -20.16181755065918, + "step": 17824 + }, + { + "epoch": 2.77, + "learning_rate": 1.073756937600498e-06, + "logits/chosen": -2.9280543327331543, + "logits/rejected": -1.8739393949508667, + "logps/chosen": -802.5679321289062, + "logps/rejected": -562.45947265625, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.522613525390625, + "rewards/margins": 8.919301986694336, + "rewards/rejected": -17.441917419433594, + "step": 17825 + }, + { + "epoch": 2.77, + "learning_rate": 1.0730234970693501e-06, + "logits/chosen": -2.45658802986145, + "logits/rejected": -2.2865521907806396, + "logps/chosen": -215.97808837890625, + "logps/rejected": -217.66455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1384124755859375, + "rewards/margins": 8.726364135742188, + "rewards/rejected": -12.864776611328125, + "step": 17826 + }, + { + "epoch": 2.77, + "learning_rate": 1.0722900565382022e-06, + "logits/chosen": -2.949361801147461, + "logits/rejected": -2.979860544204712, + "logps/chosen": -115.5553207397461, + "logps/rejected": -299.50079345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020622253417969, + "rewards/margins": 12.944343566894531, + "rewards/rejected": -18.9649658203125, + "step": 17827 + }, + { + "epoch": 2.77, + "learning_rate": 1.0715566160070543e-06, + "logits/chosen": -2.4709365367889404, + "logits/rejected": -2.7116048336029053, + "logps/chosen": -155.14674377441406, + "logps/rejected": -326.06561279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.285552978515625, + "rewards/margins": 10.942489624023438, + "rewards/rejected": -19.228042602539062, + "step": 17828 + }, + { + "epoch": 2.77, + "learning_rate": 1.0708231754759064e-06, + "logits/chosen": -2.5691728591918945, + "logits/rejected": -2.8954641819000244, + "logps/chosen": -168.3135528564453, + "logps/rejected": -501.81622314453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.879447937011719, + "rewards/margins": 10.624689102172852, + "rewards/rejected": -18.50413703918457, + "step": 17829 + }, + { + "epoch": 2.77, + "learning_rate": 1.0700897349447585e-06, + "logits/chosen": -2.866102933883667, + "logits/rejected": -2.735806465148926, + "logps/chosen": -187.57225036621094, + "logps/rejected": -269.278076171875, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.67235279083252, + "rewards/margins": 6.028985023498535, + "rewards/rejected": -14.701337814331055, + "step": 17830 + }, + { + "epoch": 2.77, + "learning_rate": 1.0693562944136106e-06, + "logits/chosen": -2.190774917602539, + "logits/rejected": -2.642332077026367, + "logps/chosen": -758.2349243164062, + "logps/rejected": -960.3350830078125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.695154190063477, + "rewards/margins": 16.11370849609375, + "rewards/rejected": -28.808862686157227, + "step": 17831 + }, + { + "epoch": 2.77, + "learning_rate": 1.0686228538824629e-06, + "logits/chosen": -2.695617198944092, + "logits/rejected": -2.4376680850982666, + "logps/chosen": -231.9783172607422, + "logps/rejected": -239.46331787109375, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.33469009399414, + "rewards/margins": 4.46830415725708, + "rewards/rejected": -15.802993774414062, + "step": 17832 + }, + { + "epoch": 2.77, + "learning_rate": 1.067889413351315e-06, + "logits/chosen": -2.4631853103637695, + "logits/rejected": -1.460408329963684, + "logps/chosen": -426.6890869140625, + "logps/rejected": -296.13006591796875, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779248237609863, + "rewards/margins": 4.586321830749512, + "rewards/rejected": -13.365570068359375, + "step": 17833 + }, + { + "epoch": 2.77, + "learning_rate": 1.067155972820167e-06, + "logits/chosen": -2.865488052368164, + "logits/rejected": -2.6611597537994385, + "logps/chosen": -150.9746856689453, + "logps/rejected": -302.6829833984375, + "loss": 0.1758, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.591606140136719, + "rewards/margins": 6.17638635635376, + "rewards/rejected": -17.76799201965332, + "step": 17834 + }, + { + "epoch": 2.77, + "learning_rate": 1.0664225322890191e-06, + "logits/chosen": -2.954704761505127, + "logits/rejected": -2.6342263221740723, + "logps/chosen": -251.9551239013672, + "logps/rejected": -326.5888366699219, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.632984161376953, + "rewards/margins": 6.517590045928955, + "rewards/rejected": -16.15057373046875, + "step": 17835 + }, + { + "epoch": 2.77, + "learning_rate": 1.0656890917578712e-06, + "logits/chosen": -2.0890212059020996, + "logits/rejected": -2.5046181678771973, + "logps/chosen": -541.1812133789062, + "logps/rejected": -406.2249755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8328962326049805, + "rewards/margins": 9.060383796691895, + "rewards/rejected": -15.893280029296875, + "step": 17836 + }, + { + "epoch": 2.77, + "learning_rate": 1.0649556512267235e-06, + "logits/chosen": -2.562089681625366, + "logits/rejected": -2.369579553604126, + "logps/chosen": -183.62203979492188, + "logps/rejected": -157.7710723876953, + "loss": 0.0724, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.217594623565674, + "rewards/margins": 4.596188545227051, + "rewards/rejected": -11.813783645629883, + "step": 17837 + }, + { + "epoch": 2.77, + "learning_rate": 1.0642222106955754e-06, + "logits/chosen": -2.465808153152466, + "logits/rejected": -2.633286237716675, + "logps/chosen": -428.1192321777344, + "logps/rejected": -502.4394836425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.086255073547363, + "rewards/margins": 10.541015625, + "rewards/rejected": -19.627269744873047, + "step": 17838 + }, + { + "epoch": 2.77, + "learning_rate": 1.0634887701644275e-06, + "logits/chosen": -2.139066696166992, + "logits/rejected": -2.7084484100341797, + "logps/chosen": -133.38348388671875, + "logps/rejected": -312.09832763671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.539938926696777, + "rewards/margins": 8.375197410583496, + "rewards/rejected": -18.915136337280273, + "step": 17839 + }, + { + "epoch": 2.77, + "learning_rate": 1.0627553296332796e-06, + "logits/chosen": -2.504255771636963, + "logits/rejected": -1.7343186140060425, + "logps/chosen": -214.5568084716797, + "logps/rejected": -292.5548400878906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.40835952758789, + "rewards/margins": 10.427322387695312, + "rewards/rejected": -18.835681915283203, + "step": 17840 + }, + { + "epoch": 2.77, + "learning_rate": 1.062021889102132e-06, + "logits/chosen": -2.1073076725006104, + "logits/rejected": -2.859173059463501, + "logps/chosen": -232.70220947265625, + "logps/rejected": -509.16876220703125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.877065658569336, + "rewards/margins": 8.233333587646484, + "rewards/rejected": -17.11039924621582, + "step": 17841 + }, + { + "epoch": 2.77, + "learning_rate": 1.061288448570984e-06, + "logits/chosen": -2.7353646755218506, + "logits/rejected": -2.9763808250427246, + "logps/chosen": -252.0404815673828, + "logps/rejected": -470.4326171875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.990962982177734, + "rewards/margins": 7.02821683883667, + "rewards/rejected": -18.019180297851562, + "step": 17842 + }, + { + "epoch": 2.77, + "learning_rate": 1.060555008039836e-06, + "logits/chosen": -1.869175672531128, + "logits/rejected": -2.5177645683288574, + "logps/chosen": -138.1734619140625, + "logps/rejected": -411.4913024902344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.197187423706055, + "rewards/margins": 11.85627555847168, + "rewards/rejected": -18.053462982177734, + "step": 17843 + }, + { + "epoch": 2.78, + "learning_rate": 1.0598215675086882e-06, + "logits/chosen": -2.7482261657714844, + "logits/rejected": -1.211952567100525, + "logps/chosen": -580.517822265625, + "logps/rejected": -476.6817932128906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.746706008911133, + "rewards/margins": 12.0263671875, + "rewards/rejected": -19.7730712890625, + "step": 17844 + }, + { + "epoch": 2.78, + "learning_rate": 1.0590881269775405e-06, + "logits/chosen": -2.821104049682617, + "logits/rejected": -2.705937623977661, + "logps/chosen": -150.49075317382812, + "logps/rejected": -231.4077606201172, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.904182434082031, + "rewards/margins": 6.089648723602295, + "rewards/rejected": -16.993831634521484, + "step": 17845 + }, + { + "epoch": 2.78, + "learning_rate": 1.0583546864463926e-06, + "logits/chosen": -1.4632617235183716, + "logits/rejected": -2.5953640937805176, + "logps/chosen": -244.266357421875, + "logps/rejected": -452.1068115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.298099517822266, + "rewards/margins": 13.0614013671875, + "rewards/rejected": -23.359500885009766, + "step": 17846 + }, + { + "epoch": 2.78, + "learning_rate": 1.0576212459152446e-06, + "logits/chosen": -2.258711576461792, + "logits/rejected": -2.16679048538208, + "logps/chosen": -258.7106628417969, + "logps/rejected": -353.9927062988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.872499465942383, + "rewards/margins": 10.603391647338867, + "rewards/rejected": -20.47589111328125, + "step": 17847 + }, + { + "epoch": 2.78, + "learning_rate": 1.0568878053840965e-06, + "logits/chosen": -1.7396318912506104, + "logits/rejected": -2.5948970317840576, + "logps/chosen": -194.6492462158203, + "logps/rejected": -398.6458435058594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.188577651977539, + "rewards/margins": 10.278980255126953, + "rewards/rejected": -21.467557907104492, + "step": 17848 + }, + { + "epoch": 2.78, + "learning_rate": 1.0561543648529488e-06, + "logits/chosen": -1.4418506622314453, + "logits/rejected": -2.413241147994995, + "logps/chosen": -288.47930908203125, + "logps/rejected": -500.3658447265625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.592460632324219, + "rewards/margins": 6.980709552764893, + "rewards/rejected": -17.573169708251953, + "step": 17849 + }, + { + "epoch": 2.78, + "learning_rate": 1.055420924321801e-06, + "logits/chosen": -2.788942337036133, + "logits/rejected": -2.151280164718628, + "logps/chosen": -392.85107421875, + "logps/rejected": -345.90362548828125, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.452842712402344, + "rewards/margins": 5.681953430175781, + "rewards/rejected": -14.134796142578125, + "step": 17850 + }, + { + "epoch": 2.78, + "learning_rate": 1.054687483790653e-06, + "logits/chosen": -2.8912971019744873, + "logits/rejected": -2.585155487060547, + "logps/chosen": -375.9714660644531, + "logps/rejected": -450.9072570800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.49499797821045, + "rewards/margins": 12.962099075317383, + "rewards/rejected": -25.457096099853516, + "step": 17851 + }, + { + "epoch": 2.78, + "learning_rate": 1.053954043259505e-06, + "logits/chosen": -1.5335701704025269, + "logits/rejected": -2.244882345199585, + "logps/chosen": -487.17364501953125, + "logps/rejected": -791.7528076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.88353157043457, + "rewards/margins": 25.367258071899414, + "rewards/rejected": -37.250789642333984, + "step": 17852 + }, + { + "epoch": 2.78, + "learning_rate": 1.0532206027283574e-06, + "logits/chosen": -2.5108540058135986, + "logits/rejected": -1.2256492376327515, + "logps/chosen": -178.30484008789062, + "logps/rejected": -223.5391845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.122715950012207, + "rewards/margins": 10.011123657226562, + "rewards/rejected": -19.133838653564453, + "step": 17853 + }, + { + "epoch": 2.78, + "learning_rate": 1.0524871621972095e-06, + "logits/chosen": -2.1142091751098633, + "logits/rejected": -2.54327392578125, + "logps/chosen": -257.74176025390625, + "logps/rejected": -355.37335205078125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.292943000793457, + "rewards/margins": 7.636476039886475, + "rewards/rejected": -16.929418563842773, + "step": 17854 + }, + { + "epoch": 2.78, + "learning_rate": 1.0517537216660616e-06, + "logits/chosen": -1.9497898817062378, + "logits/rejected": -2.903179883956909, + "logps/chosen": -467.3262939453125, + "logps/rejected": -464.85595703125, + "loss": 0.2343, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.609642028808594, + "rewards/margins": 5.673755645751953, + "rewards/rejected": -18.283397674560547, + "step": 17855 + }, + { + "epoch": 2.78, + "learning_rate": 1.0510202811349137e-06, + "logits/chosen": -1.952074646949768, + "logits/rejected": -2.3570644855499268, + "logps/chosen": -334.78167724609375, + "logps/rejected": -383.0145263671875, + "loss": 0.0825, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.713260650634766, + "rewards/margins": 3.861276149749756, + "rewards/rejected": -17.57453727722168, + "step": 17856 + }, + { + "epoch": 2.78, + "learning_rate": 1.0502868406037658e-06, + "logits/chosen": -2.4858477115631104, + "logits/rejected": -2.4583470821380615, + "logps/chosen": -139.08734130859375, + "logps/rejected": -214.43338012695312, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.282127380371094, + "rewards/margins": 4.392194747924805, + "rewards/rejected": -14.674322128295898, + "step": 17857 + }, + { + "epoch": 2.78, + "learning_rate": 1.0495534000726179e-06, + "logits/chosen": -2.2951087951660156, + "logits/rejected": -2.769594669342041, + "logps/chosen": -226.21458435058594, + "logps/rejected": -403.060302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.30604076385498, + "rewards/margins": 15.395528793334961, + "rewards/rejected": -23.701570510864258, + "step": 17858 + }, + { + "epoch": 2.78, + "learning_rate": 1.04881995954147e-06, + "logits/chosen": -1.1327540874481201, + "logits/rejected": -2.7142865657806396, + "logps/chosen": -182.662353515625, + "logps/rejected": -709.89794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.923531532287598, + "rewards/margins": 13.832438468933105, + "rewards/rejected": -22.755970001220703, + "step": 17859 + }, + { + "epoch": 2.78, + "learning_rate": 1.048086519010322e-06, + "logits/chosen": -2.9278697967529297, + "logits/rejected": -2.9801836013793945, + "logps/chosen": -148.8838653564453, + "logps/rejected": -237.45648193359375, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.482536315917969, + "rewards/margins": 5.257114887237549, + "rewards/rejected": -17.73965072631836, + "step": 17860 + }, + { + "epoch": 2.78, + "learning_rate": 1.0473530784791741e-06, + "logits/chosen": -2.405078649520874, + "logits/rejected": -1.8216580152511597, + "logps/chosen": -231.8946075439453, + "logps/rejected": -226.48789978027344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.74022388458252, + "rewards/margins": 7.665561199188232, + "rewards/rejected": -17.405784606933594, + "step": 17861 + }, + { + "epoch": 2.78, + "learning_rate": 1.0466196379480264e-06, + "logits/chosen": -2.609466791152954, + "logits/rejected": -2.9710798263549805, + "logps/chosen": -102.38148498535156, + "logps/rejected": -255.20608520507812, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915827751159668, + "rewards/margins": 9.842001914978027, + "rewards/rejected": -15.757829666137695, + "step": 17862 + }, + { + "epoch": 2.78, + "learning_rate": 1.0458861974168785e-06, + "logits/chosen": -2.111900568008423, + "logits/rejected": -2.653521776199341, + "logps/chosen": -343.0074157714844, + "logps/rejected": -559.3794555664062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.021883010864258, + "rewards/margins": 11.961254119873047, + "rewards/rejected": -24.983137130737305, + "step": 17863 + }, + { + "epoch": 2.78, + "learning_rate": 1.0451527568857306e-06, + "logits/chosen": -2.5836853981018066, + "logits/rejected": -2.9585421085357666, + "logps/chosen": -126.85714721679688, + "logps/rejected": -281.2828369140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.312356948852539, + "rewards/margins": 8.108907699584961, + "rewards/rejected": -14.4212646484375, + "step": 17864 + }, + { + "epoch": 2.78, + "learning_rate": 1.0444193163545827e-06, + "logits/chosen": -1.747690200805664, + "logits/rejected": -2.6297285556793213, + "logps/chosen": -330.5975341796875, + "logps/rejected": -400.01690673828125, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.787084579467773, + "rewards/margins": 9.013174057006836, + "rewards/rejected": -18.80025863647461, + "step": 17865 + }, + { + "epoch": 2.78, + "learning_rate": 1.043685875823435e-06, + "logits/chosen": -2.617751359939575, + "logits/rejected": -1.9980504512786865, + "logps/chosen": -416.9049072265625, + "logps/rejected": -365.66015625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.075326919555664, + "rewards/margins": 8.79922103881836, + "rewards/rejected": -20.874549865722656, + "step": 17866 + }, + { + "epoch": 2.78, + "learning_rate": 1.0429524352922869e-06, + "logits/chosen": -2.452572822570801, + "logits/rejected": -2.3486762046813965, + "logps/chosen": -274.7571105957031, + "logps/rejected": -495.7574462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.075425148010254, + "rewards/margins": 14.6941499710083, + "rewards/rejected": -24.769575119018555, + "step": 17867 + }, + { + "epoch": 2.78, + "learning_rate": 1.042218994761139e-06, + "logits/chosen": -1.0140999555587769, + "logits/rejected": -2.569859743118286, + "logps/chosen": -335.16253662109375, + "logps/rejected": -634.6676635742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.131011962890625, + "rewards/margins": 14.705612182617188, + "rewards/rejected": -23.836624145507812, + "step": 17868 + }, + { + "epoch": 2.78, + "learning_rate": 1.041485554229991e-06, + "logits/chosen": -2.648987054824829, + "logits/rejected": -2.273498773574829, + "logps/chosen": -510.2880859375, + "logps/rejected": -563.770751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.9255952835083, + "rewards/margins": 13.833921432495117, + "rewards/rejected": -23.7595157623291, + "step": 17869 + }, + { + "epoch": 2.78, + "learning_rate": 1.0407521136988434e-06, + "logits/chosen": -1.1876163482666016, + "logits/rejected": -2.9255237579345703, + "logps/chosen": -280.18890380859375, + "logps/rejected": -677.3359985351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.365817070007324, + "rewards/margins": 12.510075569152832, + "rewards/rejected": -18.875892639160156, + "step": 17870 + }, + { + "epoch": 2.78, + "learning_rate": 1.0400186731676954e-06, + "logits/chosen": -2.9000537395477295, + "logits/rejected": -2.1991798877716064, + "logps/chosen": -593.7587890625, + "logps/rejected": -610.0032958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.106245994567871, + "rewards/margins": 11.178914070129395, + "rewards/rejected": -19.285160064697266, + "step": 17871 + }, + { + "epoch": 2.78, + "learning_rate": 1.0392852326365475e-06, + "logits/chosen": -2.663398265838623, + "logits/rejected": -2.4917008876800537, + "logps/chosen": -637.9953002929688, + "logps/rejected": -866.549072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.822291374206543, + "rewards/margins": 15.90748119354248, + "rewards/rejected": -24.729772567749023, + "step": 17872 + }, + { + "epoch": 2.78, + "learning_rate": 1.0385517921053996e-06, + "logits/chosen": -2.6907265186309814, + "logits/rejected": -2.9578325748443604, + "logps/chosen": -106.96369934082031, + "logps/rejected": -185.20416259765625, + "loss": 0.0384, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.967574119567871, + "rewards/margins": 4.173313140869141, + "rewards/rejected": -12.140887260437012, + "step": 17873 + }, + { + "epoch": 2.78, + "learning_rate": 1.037818351574252e-06, + "logits/chosen": -2.039163827896118, + "logits/rejected": -2.609415292739868, + "logps/chosen": -194.82936096191406, + "logps/rejected": -395.3510437011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.983154296875, + "rewards/margins": 12.407768249511719, + "rewards/rejected": -22.39092254638672, + "step": 17874 + }, + { + "epoch": 2.78, + "learning_rate": 1.037084911043104e-06, + "logits/chosen": -2.5815186500549316, + "logits/rejected": -2.797769069671631, + "logps/chosen": -436.8271789550781, + "logps/rejected": -389.2765197753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.808163642883301, + "rewards/margins": 13.546500205993652, + "rewards/rejected": -20.354663848876953, + "step": 17875 + }, + { + "epoch": 2.78, + "learning_rate": 1.0363514705119561e-06, + "logits/chosen": -2.8195242881774902, + "logits/rejected": -1.8267171382904053, + "logps/chosen": -938.277587890625, + "logps/rejected": -408.5238952636719, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.157503128051758, + "rewards/margins": 9.633033752441406, + "rewards/rejected": -18.790536880493164, + "step": 17876 + }, + { + "epoch": 2.78, + "learning_rate": 1.035618029980808e-06, + "logits/chosen": -2.817521572113037, + "logits/rejected": -2.8677213191986084, + "logps/chosen": -360.7037048339844, + "logps/rejected": -638.1103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090518951416016, + "rewards/margins": 12.203125953674316, + "rewards/rejected": -19.293643951416016, + "step": 17877 + }, + { + "epoch": 2.78, + "learning_rate": 1.0348845894496603e-06, + "logits/chosen": -2.902493476867676, + "logits/rejected": -2.6275899410247803, + "logps/chosen": -240.11990356445312, + "logps/rejected": -378.94329833984375, + "loss": 2.4571, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.213525772094727, + "rewards/margins": 4.023958206176758, + "rewards/rejected": -13.237483978271484, + "step": 17878 + }, + { + "epoch": 2.78, + "learning_rate": 1.0341511489185124e-06, + "logits/chosen": -2.569125175476074, + "logits/rejected": -1.5350761413574219, + "logps/chosen": -196.22372436523438, + "logps/rejected": -237.34750366210938, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.322429656982422, + "rewards/margins": 9.409845352172852, + "rewards/rejected": -17.732275009155273, + "step": 17879 + }, + { + "epoch": 2.78, + "learning_rate": 1.0334177083873645e-06, + "logits/chosen": -2.9268136024475098, + "logits/rejected": -2.5276169776916504, + "logps/chosen": -388.00054931640625, + "logps/rejected": -262.91339111328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.54538631439209, + "rewards/margins": 8.715194702148438, + "rewards/rejected": -16.26058006286621, + "step": 17880 + }, + { + "epoch": 2.78, + "learning_rate": 1.0326842678562166e-06, + "logits/chosen": -2.311455011367798, + "logits/rejected": -2.729727268218994, + "logps/chosen": -184.7518310546875, + "logps/rejected": -341.70892333984375, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.753742218017578, + "rewards/margins": 6.570134162902832, + "rewards/rejected": -18.323877334594727, + "step": 17881 + }, + { + "epoch": 2.78, + "learning_rate": 1.0319508273250686e-06, + "logits/chosen": -2.175919771194458, + "logits/rejected": -1.9163535833358765, + "logps/chosen": -263.8536682128906, + "logps/rejected": -360.2327575683594, + "loss": 0.5112, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.048794746398926, + "rewards/margins": 4.749052047729492, + "rewards/rejected": -16.797847747802734, + "step": 17882 + }, + { + "epoch": 2.78, + "learning_rate": 1.031217386793921e-06, + "logits/chosen": -2.6797354221343994, + "logits/rejected": -1.8829761743545532, + "logps/chosen": -291.0102233886719, + "logps/rejected": -384.1134033203125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.47055721282959, + "rewards/margins": 6.85176944732666, + "rewards/rejected": -13.32232666015625, + "step": 17883 + }, + { + "epoch": 2.78, + "learning_rate": 1.030483946262773e-06, + "logits/chosen": -2.3882131576538086, + "logits/rejected": -2.8028438091278076, + "logps/chosen": -272.2791748046875, + "logps/rejected": -404.68658447265625, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.860498428344727, + "rewards/margins": 6.090034484863281, + "rewards/rejected": -17.950532913208008, + "step": 17884 + }, + { + "epoch": 2.78, + "learning_rate": 1.0297505057316251e-06, + "logits/chosen": -1.1606539487838745, + "logits/rejected": -2.373670816421509, + "logps/chosen": -151.6732940673828, + "logps/rejected": -501.30322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.854516983032227, + "rewards/margins": 9.462300300598145, + "rewards/rejected": -18.316818237304688, + "step": 17885 + }, + { + "epoch": 2.78, + "learning_rate": 1.0290170652004772e-06, + "logits/chosen": -1.3561649322509766, + "logits/rejected": -2.324308395385742, + "logps/chosen": -253.81182861328125, + "logps/rejected": -677.2523193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.027034759521484, + "rewards/margins": 12.080965042114258, + "rewards/rejected": -21.107999801635742, + "step": 17886 + }, + { + "epoch": 2.78, + "learning_rate": 1.0282836246693293e-06, + "logits/chosen": -2.701007843017578, + "logits/rejected": -2.4576265811920166, + "logps/chosen": -104.66049194335938, + "logps/rejected": -355.98779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.447710037231445, + "rewards/margins": 10.978440284729004, + "rewards/rejected": -17.426151275634766, + "step": 17887 + }, + { + "epoch": 2.78, + "learning_rate": 1.0275501841381814e-06, + "logits/chosen": -2.624770164489746, + "logits/rejected": -2.471278190612793, + "logps/chosen": -426.03607177734375, + "logps/rejected": -355.8656005859375, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.451053619384766, + "rewards/margins": 6.305803298950195, + "rewards/rejected": -18.75685691833496, + "step": 17888 + }, + { + "epoch": 2.78, + "learning_rate": 1.0268167436070335e-06, + "logits/chosen": -2.3683888912200928, + "logits/rejected": -2.520646572113037, + "logps/chosen": -283.22344970703125, + "logps/rejected": -678.5331420898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.070691108703613, + "rewards/margins": 10.884474754333496, + "rewards/rejected": -21.95516586303711, + "step": 17889 + }, + { + "epoch": 2.78, + "learning_rate": 1.0260833030758856e-06, + "logits/chosen": -2.0471994876861572, + "logits/rejected": -3.034517526626587, + "logps/chosen": -157.88592529296875, + "logps/rejected": -417.7831726074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.78469181060791, + "rewards/margins": 11.136810302734375, + "rewards/rejected": -19.9215030670166, + "step": 17890 + }, + { + "epoch": 2.78, + "learning_rate": 1.0253498625447379e-06, + "logits/chosen": -2.12429141998291, + "logits/rejected": -2.7461376190185547, + "logps/chosen": -181.25814819335938, + "logps/rejected": -376.3826904296875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.128607749938965, + "rewards/margins": 11.410158157348633, + "rewards/rejected": -19.538766860961914, + "step": 17891 + }, + { + "epoch": 2.78, + "learning_rate": 1.02461642201359e-06, + "logits/chosen": -2.685767889022827, + "logits/rejected": -1.5941457748413086, + "logps/chosen": -262.31402587890625, + "logps/rejected": -386.0141296386719, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.983651161193848, + "rewards/margins": 7.3908162117004395, + "rewards/rejected": -17.374467849731445, + "step": 17892 + }, + { + "epoch": 2.78, + "learning_rate": 1.023882981482442e-06, + "logits/chosen": -1.2427783012390137, + "logits/rejected": -2.8348937034606934, + "logps/chosen": -567.9822998046875, + "logps/rejected": -762.0501098632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.193839073181152, + "rewards/margins": 15.191729545593262, + "rewards/rejected": -24.385568618774414, + "step": 17893 + }, + { + "epoch": 2.78, + "learning_rate": 1.0231495409512942e-06, + "logits/chosen": -2.3569228649139404, + "logits/rejected": -2.8722083568573, + "logps/chosen": -342.2558898925781, + "logps/rejected": -552.2398681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.136160850524902, + "rewards/margins": 11.3297119140625, + "rewards/rejected": -20.46587371826172, + "step": 17894 + }, + { + "epoch": 2.78, + "learning_rate": 1.0224161004201465e-06, + "logits/chosen": -0.9050878882408142, + "logits/rejected": -2.3474836349487305, + "logps/chosen": -184.82237243652344, + "logps/rejected": -354.0369567871094, + "loss": 0.5604, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.187949180603027, + "rewards/margins": 8.723661422729492, + "rewards/rejected": -19.911611557006836, + "step": 17895 + }, + { + "epoch": 2.78, + "learning_rate": 1.0216826598889985e-06, + "logits/chosen": -2.734267234802246, + "logits/rejected": -2.6167404651641846, + "logps/chosen": -368.0770263671875, + "logps/rejected": -496.94671630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.840350151062012, + "rewards/margins": 12.133143424987793, + "rewards/rejected": -16.973493576049805, + "step": 17896 + }, + { + "epoch": 2.78, + "learning_rate": 1.0209492193578504e-06, + "logits/chosen": -2.0781426429748535, + "logits/rejected": -2.770634412765503, + "logps/chosen": -151.20626831054688, + "logps/rejected": -317.1488342285156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.733280181884766, + "rewards/margins": 9.225958824157715, + "rewards/rejected": -16.959239959716797, + "step": 17897 + }, + { + "epoch": 2.78, + "learning_rate": 1.0202157788267025e-06, + "logits/chosen": -1.6639208793640137, + "logits/rejected": -2.832050323486328, + "logps/chosen": -229.98631286621094, + "logps/rejected": -525.1594848632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.517482757568359, + "rewards/margins": 15.536224365234375, + "rewards/rejected": -22.053707122802734, + "step": 17898 + }, + { + "epoch": 2.78, + "learning_rate": 1.0194823382955548e-06, + "logits/chosen": -2.3575456142425537, + "logits/rejected": -2.3547935485839844, + "logps/chosen": -120.62368774414062, + "logps/rejected": -354.58502197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.126848220825195, + "rewards/margins": 10.372669219970703, + "rewards/rejected": -18.4995174407959, + "step": 17899 + }, + { + "epoch": 2.78, + "learning_rate": 1.018748897764407e-06, + "logits/chosen": -1.7544821500778198, + "logits/rejected": -2.5734505653381348, + "logps/chosen": -487.0518798828125, + "logps/rejected": -511.08868408203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.490957260131836, + "rewards/margins": 6.656885623931885, + "rewards/rejected": -18.147842407226562, + "step": 17900 + }, + { + "epoch": 2.78, + "learning_rate": 1.018015457233259e-06, + "logits/chosen": -1.6915987730026245, + "logits/rejected": -2.5094311237335205, + "logps/chosen": -350.12835693359375, + "logps/rejected": -781.8958740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.288354873657227, + "rewards/margins": 14.143403053283691, + "rewards/rejected": -22.4317569732666, + "step": 17901 + }, + { + "epoch": 2.78, + "learning_rate": 1.017282016702111e-06, + "logits/chosen": -2.3531081676483154, + "logits/rejected": -2.866553783416748, + "logps/chosen": -388.02691650390625, + "logps/rejected": -321.5013122558594, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.421271324157715, + "rewards/margins": 7.351897239685059, + "rewards/rejected": -15.773168563842773, + "step": 17902 + }, + { + "epoch": 2.78, + "learning_rate": 1.0165485761709632e-06, + "logits/chosen": -2.3932480812072754, + "logits/rejected": -2.7333319187164307, + "logps/chosen": -342.4823303222656, + "logps/rejected": -488.2060241699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.827857971191406, + "rewards/margins": 10.792478561401367, + "rewards/rejected": -19.62033462524414, + "step": 17903 + }, + { + "epoch": 2.78, + "learning_rate": 1.0158151356398155e-06, + "logits/chosen": -2.7025742530822754, + "logits/rejected": -2.294645309448242, + "logps/chosen": -214.6318817138672, + "logps/rejected": -173.0899200439453, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622693061828613, + "rewards/margins": 7.241732597351074, + "rewards/rejected": -13.864425659179688, + "step": 17904 + }, + { + "epoch": 2.78, + "learning_rate": 1.0150816951086676e-06, + "logits/chosen": -2.2653534412384033, + "logits/rejected": -2.8631300926208496, + "logps/chosen": -305.0137023925781, + "logps/rejected": -457.659912109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.402912139892578, + "rewards/margins": 8.502095222473145, + "rewards/rejected": -19.905006408691406, + "step": 17905 + }, + { + "epoch": 2.78, + "learning_rate": 1.0143482545775197e-06, + "logits/chosen": -1.8161952495574951, + "logits/rejected": -2.414868116378784, + "logps/chosen": -334.1881103515625, + "logps/rejected": -447.7344665527344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.526592254638672, + "rewards/margins": 9.629650115966797, + "rewards/rejected": -18.15624237060547, + "step": 17906 + }, + { + "epoch": 2.78, + "learning_rate": 1.0136148140463715e-06, + "logits/chosen": -2.892637014389038, + "logits/rejected": -1.9050953388214111, + "logps/chosen": -468.5028381347656, + "logps/rejected": -384.90911865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2794647216796875, + "rewards/margins": 10.371585845947266, + "rewards/rejected": -16.651050567626953, + "step": 17907 + }, + { + "epoch": 2.79, + "learning_rate": 1.0128813735152238e-06, + "logits/chosen": -1.830361008644104, + "logits/rejected": -2.2393107414245605, + "logps/chosen": -281.03656005859375, + "logps/rejected": -390.5570068359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.738651752471924, + "rewards/margins": 8.418478012084961, + "rewards/rejected": -14.157129287719727, + "step": 17908 + }, + { + "epoch": 2.79, + "learning_rate": 1.012147932984076e-06, + "logits/chosen": -2.542776584625244, + "logits/rejected": -1.4211857318878174, + "logps/chosen": -563.059814453125, + "logps/rejected": -432.1380920410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.018342971801758, + "rewards/margins": 12.240093231201172, + "rewards/rejected": -22.258434295654297, + "step": 17909 + }, + { + "epoch": 2.79, + "learning_rate": 1.011414492452928e-06, + "logits/chosen": -2.968520164489746, + "logits/rejected": -2.845799446105957, + "logps/chosen": -140.871337890625, + "logps/rejected": -264.276123046875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.881952285766602, + "rewards/margins": 6.629378318786621, + "rewards/rejected": -14.511330604553223, + "step": 17910 + }, + { + "epoch": 2.79, + "learning_rate": 1.0106810519217801e-06, + "logits/chosen": -2.820206642150879, + "logits/rejected": -2.8051702976226807, + "logps/chosen": -534.2449340820312, + "logps/rejected": -411.6653137207031, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.47068977355957, + "rewards/margins": 7.629489898681641, + "rewards/rejected": -19.10017967224121, + "step": 17911 + }, + { + "epoch": 2.79, + "learning_rate": 1.0099476113906324e-06, + "logits/chosen": -3.069254159927368, + "logits/rejected": -2.967273473739624, + "logps/chosen": -385.2510986328125, + "logps/rejected": -376.4229736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.024331092834473, + "rewards/margins": 8.907442092895508, + "rewards/rejected": -15.931774139404297, + "step": 17912 + }, + { + "epoch": 2.79, + "learning_rate": 1.0092141708594845e-06, + "logits/chosen": -2.2003912925720215, + "logits/rejected": -2.620492696762085, + "logps/chosen": -277.637451171875, + "logps/rejected": -382.3623352050781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.188446044921875, + "rewards/margins": 6.842010498046875, + "rewards/rejected": -15.03045654296875, + "step": 17913 + }, + { + "epoch": 2.79, + "learning_rate": 1.0084807303283366e-06, + "logits/chosen": -2.4191970825195312, + "logits/rejected": -2.9212775230407715, + "logps/chosen": -430.6876525878906, + "logps/rejected": -476.2718505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.994583129882812, + "rewards/margins": 12.285826683044434, + "rewards/rejected": -24.280410766601562, + "step": 17914 + }, + { + "epoch": 2.79, + "learning_rate": 1.0077472897971887e-06, + "logits/chosen": -2.561743974685669, + "logits/rejected": -2.3502166271209717, + "logps/chosen": -300.03759765625, + "logps/rejected": -298.74700927734375, + "loss": 0.079, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.506767272949219, + "rewards/margins": 6.344702243804932, + "rewards/rejected": -13.851469993591309, + "step": 17915 + }, + { + "epoch": 2.79, + "learning_rate": 1.007013849266041e-06, + "logits/chosen": -3.0038647651672363, + "logits/rejected": -2.9155337810516357, + "logps/chosen": -232.23654174804688, + "logps/rejected": -243.78622436523438, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.796212196350098, + "rewards/margins": 5.676030158996582, + "rewards/rejected": -16.47224235534668, + "step": 17916 + }, + { + "epoch": 2.79, + "learning_rate": 1.0062804087348929e-06, + "logits/chosen": -1.2839930057525635, + "logits/rejected": -2.7046985626220703, + "logps/chosen": -165.36843872070312, + "logps/rejected": -373.8221130371094, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.130075454711914, + "rewards/margins": 8.303126335144043, + "rewards/rejected": -19.433202743530273, + "step": 17917 + }, + { + "epoch": 2.79, + "learning_rate": 1.005546968203745e-06, + "logits/chosen": -2.7628331184387207, + "logits/rejected": -1.7028220891952515, + "logps/chosen": -259.56951904296875, + "logps/rejected": -282.09942626953125, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.629044532775879, + "rewards/margins": 5.2969818115234375, + "rewards/rejected": -14.926026344299316, + "step": 17918 + }, + { + "epoch": 2.79, + "learning_rate": 1.004813527672597e-06, + "logits/chosen": -1.6117995977401733, + "logits/rejected": -2.7063751220703125, + "logps/chosen": -208.5064697265625, + "logps/rejected": -285.7164001464844, + "loss": 1.4202, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.318105697631836, + "rewards/margins": 3.949502468109131, + "rewards/rejected": -16.267608642578125, + "step": 17919 + }, + { + "epoch": 2.79, + "learning_rate": 1.0040800871414491e-06, + "logits/chosen": -2.9165966510772705, + "logits/rejected": -1.274120569229126, + "logps/chosen": -636.3851318359375, + "logps/rejected": -461.384521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950189590454102, + "rewards/margins": 10.840035438537598, + "rewards/rejected": -16.790225982666016, + "step": 17920 + }, + { + "epoch": 2.79, + "learning_rate": 1.0033466466103014e-06, + "logits/chosen": -2.9031822681427, + "logits/rejected": -2.322406768798828, + "logps/chosen": -204.5029754638672, + "logps/rejected": -378.8423767089844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.021678924560547, + "rewards/margins": 6.244812488555908, + "rewards/rejected": -17.266490936279297, + "step": 17921 + }, + { + "epoch": 2.79, + "learning_rate": 1.0026132060791535e-06, + "logits/chosen": -2.060826539993286, + "logits/rejected": -2.857051372528076, + "logps/chosen": -545.3541870117188, + "logps/rejected": -808.8424072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.039244651794434, + "rewards/margins": 14.41386890411377, + "rewards/rejected": -22.453113555908203, + "step": 17922 + }, + { + "epoch": 2.79, + "learning_rate": 1.0018797655480056e-06, + "logits/chosen": -2.512485980987549, + "logits/rejected": -2.6818325519561768, + "logps/chosen": -194.8377227783203, + "logps/rejected": -445.13525390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.3523530960083, + "rewards/margins": 9.146001815795898, + "rewards/rejected": -18.498355865478516, + "step": 17923 + }, + { + "epoch": 2.79, + "learning_rate": 1.0011463250168577e-06, + "logits/chosen": -1.6911542415618896, + "logits/rejected": -2.929506301879883, + "logps/chosen": -376.7449645996094, + "logps/rejected": -669.4219970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.754098892211914, + "rewards/margins": 13.183666229248047, + "rewards/rejected": -21.937767028808594, + "step": 17924 + }, + { + "epoch": 2.79, + "learning_rate": 1.00041288448571e-06, + "logits/chosen": -2.5400876998901367, + "logits/rejected": -1.923416256904602, + "logps/chosen": -296.05157470703125, + "logps/rejected": -442.070556640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.106427192687988, + "rewards/margins": 9.538026809692383, + "rewards/rejected": -17.644454956054688, + "step": 17925 + }, + { + "epoch": 2.79, + "learning_rate": 9.996794439545619e-07, + "logits/chosen": -1.306219220161438, + "logits/rejected": -2.8119890689849854, + "logps/chosen": -167.7094268798828, + "logps/rejected": -464.36151123046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.165955543518066, + "rewards/margins": 9.534696578979492, + "rewards/rejected": -17.700651168823242, + "step": 17926 + }, + { + "epoch": 2.79, + "learning_rate": 9.98946003423414e-07, + "logits/chosen": -2.7842962741851807, + "logits/rejected": -2.3487460613250732, + "logps/chosen": -595.8124389648438, + "logps/rejected": -710.1351318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.411259174346924, + "rewards/margins": 12.122838973999023, + "rewards/rejected": -19.53409767150879, + "step": 17927 + }, + { + "epoch": 2.79, + "learning_rate": 9.98212562892266e-07, + "logits/chosen": -2.466550350189209, + "logits/rejected": -2.863835096359253, + "logps/chosen": -372.0450439453125, + "logps/rejected": -267.3134765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.714859008789062, + "rewards/margins": 7.4238176345825195, + "rewards/rejected": -17.138675689697266, + "step": 17928 + }, + { + "epoch": 2.79, + "learning_rate": 9.974791223611184e-07, + "logits/chosen": -2.6697843074798584, + "logits/rejected": -2.9952359199523926, + "logps/chosen": -395.913330078125, + "logps/rejected": -514.49560546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5352783203125, + "rewards/margins": 8.708568572998047, + "rewards/rejected": -15.243846893310547, + "step": 17929 + }, + { + "epoch": 2.79, + "learning_rate": 9.967456818299705e-07, + "logits/chosen": -2.6422202587127686, + "logits/rejected": -2.141770839691162, + "logps/chosen": -225.1732635498047, + "logps/rejected": -330.7861633300781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.133920669555664, + "rewards/margins": 13.210962295532227, + "rewards/rejected": -18.34488296508789, + "step": 17930 + }, + { + "epoch": 2.79, + "learning_rate": 9.960122412988225e-07, + "logits/chosen": -2.8999361991882324, + "logits/rejected": -2.517132043838501, + "logps/chosen": -243.72772216796875, + "logps/rejected": -659.7452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.474510192871094, + "rewards/margins": 13.939362525939941, + "rewards/rejected": -20.41387176513672, + "step": 17931 + }, + { + "epoch": 2.79, + "learning_rate": 9.952788007676746e-07, + "logits/chosen": -2.7049503326416016, + "logits/rejected": -1.9594142436981201, + "logps/chosen": -347.05413818359375, + "logps/rejected": -459.427001953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.020990371704102, + "rewards/margins": 10.844707489013672, + "rewards/rejected": -19.865699768066406, + "step": 17932 + }, + { + "epoch": 2.79, + "learning_rate": 9.94545360236527e-07, + "logits/chosen": -2.2110142707824707, + "logits/rejected": -2.848249912261963, + "logps/chosen": -96.55857849121094, + "logps/rejected": -235.342529296875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.073857307434082, + "rewards/margins": 7.348638534545898, + "rewards/rejected": -14.422496795654297, + "step": 17933 + }, + { + "epoch": 2.79, + "learning_rate": 9.93811919705379e-07, + "logits/chosen": -2.6425397396087646, + "logits/rejected": -2.379755973815918, + "logps/chosen": -264.0007629394531, + "logps/rejected": -286.84490966796875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.351604461669922, + "rewards/margins": 9.296369552612305, + "rewards/rejected": -16.647974014282227, + "step": 17934 + }, + { + "epoch": 2.79, + "learning_rate": 9.930784791742311e-07, + "logits/chosen": -2.344090700149536, + "logits/rejected": -2.7638583183288574, + "logps/chosen": -157.67884826660156, + "logps/rejected": -401.73223876953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.10859489440918, + "rewards/margins": 9.714366912841797, + "rewards/rejected": -16.822961807250977, + "step": 17935 + }, + { + "epoch": 2.79, + "learning_rate": 9.92345038643083e-07, + "logits/chosen": -1.94355046749115, + "logits/rejected": -2.51576566696167, + "logps/chosen": -203.68978881835938, + "logps/rejected": -601.3004150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.22558879852295, + "rewards/margins": 16.697696685791016, + "rewards/rejected": -26.92328643798828, + "step": 17936 + }, + { + "epoch": 2.79, + "learning_rate": 9.916115981119353e-07, + "logits/chosen": -2.615102767944336, + "logits/rejected": -2.4900577068328857, + "logps/chosen": -242.68045043945312, + "logps/rejected": -434.3837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.716888427734375, + "rewards/margins": 10.788938522338867, + "rewards/rejected": -18.505828857421875, + "step": 17937 + }, + { + "epoch": 2.79, + "learning_rate": 9.908781575807874e-07, + "logits/chosen": -2.706186056137085, + "logits/rejected": -2.902925491333008, + "logps/chosen": -376.4104309082031, + "logps/rejected": -700.919189453125, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.312536239624023, + "rewards/margins": 8.432256698608398, + "rewards/rejected": -18.744792938232422, + "step": 17938 + }, + { + "epoch": 2.79, + "learning_rate": 9.901447170496395e-07, + "logits/chosen": -2.564429521560669, + "logits/rejected": -2.730905532836914, + "logps/chosen": -314.0662536621094, + "logps/rejected": -441.9878234863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.673526763916016, + "rewards/margins": 10.741765975952148, + "rewards/rejected": -21.415292739868164, + "step": 17939 + }, + { + "epoch": 2.79, + "learning_rate": 9.894112765184916e-07, + "logits/chosen": -1.3689626455307007, + "logits/rejected": -2.852318048477173, + "logps/chosen": -244.6669921875, + "logps/rejected": -414.77667236328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.529245376586914, + "rewards/margins": 7.410929203033447, + "rewards/rejected": -16.940174102783203, + "step": 17940 + }, + { + "epoch": 2.79, + "learning_rate": 9.886778359873437e-07, + "logits/chosen": -2.5856895446777344, + "logits/rejected": -2.6530463695526123, + "logps/chosen": -432.07275390625, + "logps/rejected": -419.40576171875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.558734893798828, + "rewards/margins": 7.993457317352295, + "rewards/rejected": -18.55219268798828, + "step": 17941 + }, + { + "epoch": 2.79, + "learning_rate": 9.87944395456196e-07, + "logits/chosen": -1.1948574781417847, + "logits/rejected": -2.1655704975128174, + "logps/chosen": -273.557373046875, + "logps/rejected": -595.00048828125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.8927001953125, + "rewards/margins": 14.911467552185059, + "rewards/rejected": -25.804168701171875, + "step": 17942 + }, + { + "epoch": 2.79, + "learning_rate": 9.87210954925048e-07, + "logits/chosen": -1.9050135612487793, + "logits/rejected": -2.744615316390991, + "logps/chosen": -399.1789245605469, + "logps/rejected": -478.8179016113281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.008272171020508, + "rewards/margins": 10.99288558959961, + "rewards/rejected": -20.001157760620117, + "step": 17943 + }, + { + "epoch": 2.79, + "learning_rate": 9.864775143939001e-07, + "logits/chosen": -2.8538098335266113, + "logits/rejected": -2.5812387466430664, + "logps/chosen": -237.58450317382812, + "logps/rejected": -452.9874267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.08005142211914, + "rewards/margins": 13.554162979125977, + "rewards/rejected": -22.634214401245117, + "step": 17944 + }, + { + "epoch": 2.79, + "learning_rate": 9.857440738627522e-07, + "logits/chosen": -2.2949814796447754, + "logits/rejected": -2.869274854660034, + "logps/chosen": -151.45541381835938, + "logps/rejected": -318.2373046875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.20687484741211, + "rewards/margins": 5.358438491821289, + "rewards/rejected": -15.565313339233398, + "step": 17945 + }, + { + "epoch": 2.79, + "learning_rate": 9.850106333316043e-07, + "logits/chosen": -2.545936346054077, + "logits/rejected": -2.669468641281128, + "logps/chosen": -294.3115539550781, + "logps/rejected": -405.97735595703125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.634886741638184, + "rewards/margins": 7.47132682800293, + "rewards/rejected": -16.106212615966797, + "step": 17946 + }, + { + "epoch": 2.79, + "learning_rate": 9.842771928004564e-07, + "logits/chosen": -2.6930956840515137, + "logits/rejected": -2.127204656600952, + "logps/chosen": -313.19732666015625, + "logps/rejected": -418.3066711425781, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.830001831054688, + "rewards/margins": 7.523999214172363, + "rewards/rejected": -17.354000091552734, + "step": 17947 + }, + { + "epoch": 2.79, + "learning_rate": 9.835437522693085e-07, + "logits/chosen": -2.54107403755188, + "logits/rejected": -2.642005205154419, + "logps/chosen": -241.2647705078125, + "logps/rejected": -395.4654541015625, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.080994606018066, + "rewards/margins": 6.730841636657715, + "rewards/rejected": -17.81183624267578, + "step": 17948 + }, + { + "epoch": 2.79, + "learning_rate": 9.828103117381606e-07, + "logits/chosen": -2.1215076446533203, + "logits/rejected": -2.6008169651031494, + "logps/chosen": -339.0779113769531, + "logps/rejected": -562.57470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.476590156555176, + "rewards/margins": 11.072644233703613, + "rewards/rejected": -16.54923439025879, + "step": 17949 + }, + { + "epoch": 2.79, + "learning_rate": 9.820768712070129e-07, + "logits/chosen": -2.731398105621338, + "logits/rejected": -2.0462377071380615, + "logps/chosen": -271.8811950683594, + "logps/rejected": -360.0792236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.994010925292969, + "rewards/margins": 10.49747085571289, + "rewards/rejected": -20.49148178100586, + "step": 17950 + }, + { + "epoch": 2.79, + "learning_rate": 9.81343430675865e-07, + "logits/chosen": -2.238419771194458, + "logits/rejected": -1.9106676578521729, + "logps/chosen": -179.2320556640625, + "logps/rejected": -310.8622741699219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.177031517028809, + "rewards/margins": 11.310546875, + "rewards/rejected": -21.487579345703125, + "step": 17951 + }, + { + "epoch": 2.79, + "learning_rate": 9.80609990144717e-07, + "logits/chosen": -2.714944839477539, + "logits/rejected": -2.310410976409912, + "logps/chosen": -410.04620361328125, + "logps/rejected": -196.79241943359375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.549818515777588, + "rewards/margins": 7.504764080047607, + "rewards/rejected": -13.054582595825195, + "step": 17952 + }, + { + "epoch": 2.79, + "learning_rate": 9.798765496135692e-07, + "logits/chosen": -2.577333450317383, + "logits/rejected": -1.6521075963974, + "logps/chosen": -218.89781188964844, + "logps/rejected": -400.4483337402344, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.527227401733398, + "rewards/margins": 8.729368209838867, + "rewards/rejected": -18.256595611572266, + "step": 17953 + }, + { + "epoch": 2.79, + "learning_rate": 9.791431090824215e-07, + "logits/chosen": -2.0449254512786865, + "logits/rejected": -2.3206825256347656, + "logps/chosen": -260.20989990234375, + "logps/rejected": -535.2296752929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.247221946716309, + "rewards/margins": 13.244062423706055, + "rewards/rejected": -25.491283416748047, + "step": 17954 + }, + { + "epoch": 2.79, + "learning_rate": 9.784096685512736e-07, + "logits/chosen": -2.616485357284546, + "logits/rejected": -2.515001058578491, + "logps/chosen": -166.88357543945312, + "logps/rejected": -326.0327453613281, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.373590469360352, + "rewards/margins": 7.26319694519043, + "rewards/rejected": -19.63678741455078, + "step": 17955 + }, + { + "epoch": 2.79, + "learning_rate": 9.776762280201254e-07, + "logits/chosen": -2.6025032997131348, + "logits/rejected": -2.6912078857421875, + "logps/chosen": -315.5488586425781, + "logps/rejected": -574.758544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.889063835144043, + "rewards/margins": 15.618012428283691, + "rewards/rejected": -23.507076263427734, + "step": 17956 + }, + { + "epoch": 2.79, + "learning_rate": 9.769427874889775e-07, + "logits/chosen": -2.0185365676879883, + "logits/rejected": -1.660963773727417, + "logps/chosen": -196.63865661621094, + "logps/rejected": -387.58782958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.685773849487305, + "rewards/margins": 11.25781536102295, + "rewards/rejected": -20.943588256835938, + "step": 17957 + }, + { + "epoch": 2.79, + "learning_rate": 9.762093469578298e-07, + "logits/chosen": -2.34456729888916, + "logits/rejected": -2.687145233154297, + "logps/chosen": -182.877197265625, + "logps/rejected": -499.1310119628906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.532594680786133, + "rewards/margins": 11.26797103881836, + "rewards/rejected": -20.800565719604492, + "step": 17958 + }, + { + "epoch": 2.79, + "learning_rate": 9.75475906426682e-07, + "logits/chosen": -2.6058521270751953, + "logits/rejected": -2.589496612548828, + "logps/chosen": -617.907470703125, + "logps/rejected": -582.108154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081667900085449, + "rewards/margins": 12.068008422851562, + "rewards/rejected": -17.149677276611328, + "step": 17959 + }, + { + "epoch": 2.79, + "learning_rate": 9.74742465895534e-07, + "logits/chosen": -2.600004196166992, + "logits/rejected": -2.7912757396698, + "logps/chosen": -564.0848388671875, + "logps/rejected": -689.3924560546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.429895401000977, + "rewards/margins": 9.192055702209473, + "rewards/rejected": -17.621952056884766, + "step": 17960 + }, + { + "epoch": 2.79, + "learning_rate": 9.74009025364386e-07, + "logits/chosen": -2.980104684829712, + "logits/rejected": -2.850377082824707, + "logps/chosen": -149.13748168945312, + "logps/rejected": -249.36532592773438, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19621467590332, + "rewards/margins": 8.03741455078125, + "rewards/rejected": -16.23362922668457, + "step": 17961 + }, + { + "epoch": 2.79, + "learning_rate": 9.732755848332382e-07, + "logits/chosen": -1.8113127946853638, + "logits/rejected": -2.3888068199157715, + "logps/chosen": -502.9280700683594, + "logps/rejected": -1099.7081298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19780445098877, + "rewards/margins": 23.683029174804688, + "rewards/rejected": -31.88083267211914, + "step": 17962 + }, + { + "epoch": 2.79, + "learning_rate": 9.725421443020905e-07, + "logits/chosen": -2.7369680404663086, + "logits/rejected": -2.8150947093963623, + "logps/chosen": -215.00474548339844, + "logps/rejected": -359.77203369140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.809903144836426, + "rewards/margins": 9.193153381347656, + "rewards/rejected": -17.003055572509766, + "step": 17963 + }, + { + "epoch": 2.79, + "learning_rate": 9.718087037709426e-07, + "logits/chosen": -2.229846477508545, + "logits/rejected": -2.5995028018951416, + "logps/chosen": -819.8298950195312, + "logps/rejected": -774.969970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.736373901367188, + "rewards/margins": 12.053762435913086, + "rewards/rejected": -20.790138244628906, + "step": 17964 + }, + { + "epoch": 2.79, + "learning_rate": 9.710752632397947e-07, + "logits/chosen": -2.519216299057007, + "logits/rejected": -2.328942060470581, + "logps/chosen": -228.20724487304688, + "logps/rejected": -422.23486328125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.0740966796875, + "rewards/margins": 7.032096862792969, + "rewards/rejected": -19.10619354248047, + "step": 17965 + }, + { + "epoch": 2.79, + "learning_rate": 9.703418227086465e-07, + "logits/chosen": -2.2382700443267822, + "logits/rejected": -1.8385921716690063, + "logps/chosen": -181.86276245117188, + "logps/rejected": -278.60833740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.67471694946289, + "rewards/margins": 9.571186065673828, + "rewards/rejected": -22.24590301513672, + "step": 17966 + }, + { + "epoch": 2.79, + "learning_rate": 9.696083821774988e-07, + "logits/chosen": -2.4881463050842285, + "logits/rejected": -2.9473955631256104, + "logps/chosen": -149.18313598632812, + "logps/rejected": -304.3385314941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.257291793823242, + "rewards/margins": 9.415534973144531, + "rewards/rejected": -18.672828674316406, + "step": 17967 + }, + { + "epoch": 2.79, + "learning_rate": 9.68874941646351e-07, + "logits/chosen": -2.818779945373535, + "logits/rejected": -2.6483023166656494, + "logps/chosen": -264.6043395996094, + "logps/rejected": -336.8042297363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.17805290222168, + "rewards/margins": 12.353914260864258, + "rewards/rejected": -20.531967163085938, + "step": 17968 + }, + { + "epoch": 2.79, + "learning_rate": 9.68141501115203e-07, + "logits/chosen": -2.1101198196411133, + "logits/rejected": -2.3272578716278076, + "logps/chosen": -174.31298828125, + "logps/rejected": -356.83209228515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2078275680542, + "rewards/margins": 10.319127082824707, + "rewards/rejected": -19.526954650878906, + "step": 17969 + }, + { + "epoch": 2.79, + "learning_rate": 9.674080605840551e-07, + "logits/chosen": -1.3668769598007202, + "logits/rejected": -2.174098491668701, + "logps/chosen": -241.1427001953125, + "logps/rejected": -545.0145263671875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.950377464294434, + "rewards/margins": 9.241655349731445, + "rewards/rejected": -16.192033767700195, + "step": 17970 + }, + { + "epoch": 2.79, + "learning_rate": 9.666746200529074e-07, + "logits/chosen": -2.4151926040649414, + "logits/rejected": -1.4899526834487915, + "logps/chosen": -264.5552673339844, + "logps/rejected": -355.6837463378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6941819190979, + "rewards/margins": 13.416019439697266, + "rewards/rejected": -20.110200881958008, + "step": 17971 + }, + { + "epoch": 2.8, + "learning_rate": 9.659411795217595e-07, + "logits/chosen": -0.9356900453567505, + "logits/rejected": -2.0983774662017822, + "logps/chosen": -291.08770751953125, + "logps/rejected": -644.7203369140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.742313385009766, + "rewards/margins": 18.342670440673828, + "rewards/rejected": -29.084985733032227, + "step": 17972 + }, + { + "epoch": 2.8, + "learning_rate": 9.652077389906116e-07, + "logits/chosen": -2.399075746536255, + "logits/rejected": -2.5297181606292725, + "logps/chosen": -247.07666015625, + "logps/rejected": -410.63580322265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.839316368103027, + "rewards/margins": 7.685294151306152, + "rewards/rejected": -17.52461051940918, + "step": 17973 + }, + { + "epoch": 2.8, + "learning_rate": 9.644742984594637e-07, + "logits/chosen": -2.8378686904907227, + "logits/rejected": -1.923135757446289, + "logps/chosen": -615.5248413085938, + "logps/rejected": -410.13037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.838523864746094, + "rewards/margins": 9.984186172485352, + "rewards/rejected": -16.822710037231445, + "step": 17974 + }, + { + "epoch": 2.8, + "learning_rate": 9.63740857928316e-07, + "logits/chosen": -2.9079337120056152, + "logits/rejected": -2.891951560974121, + "logps/chosen": -164.850830078125, + "logps/rejected": -311.024169921875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.414901733398438, + "rewards/margins": 7.32404088973999, + "rewards/rejected": -16.738941192626953, + "step": 17975 + }, + { + "epoch": 2.8, + "learning_rate": 9.630074173971679e-07, + "logits/chosen": -2.276384115219116, + "logits/rejected": -2.2954912185668945, + "logps/chosen": -310.41375732421875, + "logps/rejected": -538.386474609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.014131546020508, + "rewards/margins": 10.592140197753906, + "rewards/rejected": -19.606271743774414, + "step": 17976 + }, + { + "epoch": 2.8, + "learning_rate": 9.6227397686602e-07, + "logits/chosen": -1.7718284130096436, + "logits/rejected": -2.2137720584869385, + "logps/chosen": -184.99325561523438, + "logps/rejected": -383.4434814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.345696449279785, + "rewards/margins": 11.866607666015625, + "rewards/rejected": -23.212303161621094, + "step": 17977 + }, + { + "epoch": 2.8, + "learning_rate": 9.61540536334872e-07, + "logits/chosen": -2.1402337551116943, + "logits/rejected": -2.7846012115478516, + "logps/chosen": -236.29986572265625, + "logps/rejected": -390.273681640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.563830375671387, + "rewards/margins": 8.040105819702148, + "rewards/rejected": -14.603935241699219, + "step": 17978 + }, + { + "epoch": 2.8, + "learning_rate": 9.608070958037243e-07, + "logits/chosen": -1.785421371459961, + "logits/rejected": -2.25709867477417, + "logps/chosen": -180.34188842773438, + "logps/rejected": -411.43597412109375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.623797416687012, + "rewards/margins": 9.762247085571289, + "rewards/rejected": -20.386043548583984, + "step": 17979 + }, + { + "epoch": 2.8, + "learning_rate": 9.600736552725764e-07, + "logits/chosen": -2.2015647888183594, + "logits/rejected": -2.844695806503296, + "logps/chosen": -239.71426391601562, + "logps/rejected": -306.7723083496094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.757198333740234, + "rewards/margins": 7.242998123168945, + "rewards/rejected": -16.00019645690918, + "step": 17980 + }, + { + "epoch": 2.8, + "learning_rate": 9.593402147414285e-07, + "logits/chosen": -1.725717306137085, + "logits/rejected": -2.192582130432129, + "logps/chosen": -220.70108032226562, + "logps/rejected": -425.21234130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.78035831451416, + "rewards/margins": 10.788850784301758, + "rewards/rejected": -20.569210052490234, + "step": 17981 + }, + { + "epoch": 2.8, + "learning_rate": 9.586067742102806e-07, + "logits/chosen": -2.811027765274048, + "logits/rejected": -1.4530558586120605, + "logps/chosen": -635.3455200195312, + "logps/rejected": -655.7379150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.56381607055664, + "rewards/margins": 13.486045837402344, + "rewards/rejected": -23.049861907958984, + "step": 17982 + }, + { + "epoch": 2.8, + "learning_rate": 9.578733336791327e-07, + "logits/chosen": -1.074767827987671, + "logits/rejected": -2.7043495178222656, + "logps/chosen": -174.07667541503906, + "logps/rejected": -562.912353515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.980315208435059, + "rewards/margins": 13.505006790161133, + "rewards/rejected": -22.485321044921875, + "step": 17983 + }, + { + "epoch": 2.8, + "learning_rate": 9.57139893147985e-07, + "logits/chosen": -1.8891116380691528, + "logits/rejected": -2.007491111755371, + "logps/chosen": -249.59132385253906, + "logps/rejected": -373.22930908203125, + "loss": 0.2696, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.122879028320312, + "rewards/margins": 9.425565719604492, + "rewards/rejected": -20.548444747924805, + "step": 17984 + }, + { + "epoch": 2.8, + "learning_rate": 9.56406452616837e-07, + "logits/chosen": -2.588411331176758, + "logits/rejected": -1.8360435962677002, + "logps/chosen": -411.27288818359375, + "logps/rejected": -385.9090576171875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.997615814208984, + "rewards/margins": 8.876777648925781, + "rewards/rejected": -17.874393463134766, + "step": 17985 + }, + { + "epoch": 2.8, + "learning_rate": 9.55673012085689e-07, + "logits/chosen": -1.434786081314087, + "logits/rejected": -2.286902666091919, + "logps/chosen": -161.86480712890625, + "logps/rejected": -424.5502014160156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.294794082641602, + "rewards/margins": 13.796182632446289, + "rewards/rejected": -22.09097671508789, + "step": 17986 + }, + { + "epoch": 2.8, + "learning_rate": 9.54939571554541e-07, + "logits/chosen": -2.877058982849121, + "logits/rejected": -2.6423709392547607, + "logps/chosen": -212.29246520996094, + "logps/rejected": -242.51895141601562, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.27524471282959, + "rewards/margins": 3.4050979614257812, + "rewards/rejected": -14.680342674255371, + "step": 17987 + }, + { + "epoch": 2.8, + "learning_rate": 9.542061310233934e-07, + "logits/chosen": -2.5341708660125732, + "logits/rejected": -2.557363271713257, + "logps/chosen": -258.6500244140625, + "logps/rejected": -253.10418701171875, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.252457618713379, + "rewards/margins": 6.864497661590576, + "rewards/rejected": -17.116954803466797, + "step": 17988 + }, + { + "epoch": 2.8, + "learning_rate": 9.534726904922455e-07, + "logits/chosen": -0.8962772488594055, + "logits/rejected": -2.5272011756896973, + "logps/chosen": -220.2557373046875, + "logps/rejected": -605.5191650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.631591796875, + "rewards/margins": 14.522964477539062, + "rewards/rejected": -25.154556274414062, + "step": 17989 + }, + { + "epoch": 2.8, + "learning_rate": 9.527392499610975e-07, + "logits/chosen": -2.9173319339752197, + "logits/rejected": -2.881077289581299, + "logps/chosen": -210.98011779785156, + "logps/rejected": -404.03125, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.84918212890625, + "rewards/margins": 7.488743305206299, + "rewards/rejected": -18.33792495727539, + "step": 17990 + }, + { + "epoch": 2.8, + "learning_rate": 9.520058094299497e-07, + "logits/chosen": -2.347529649734497, + "logits/rejected": -2.8511247634887695, + "logps/chosen": -175.03404235839844, + "logps/rejected": -448.9425354003906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.736196517944336, + "rewards/margins": 16.10487937927246, + "rewards/rejected": -25.841075897216797, + "step": 17991 + }, + { + "epoch": 2.8, + "learning_rate": 9.512723688988018e-07, + "logits/chosen": -2.7236204147338867, + "logits/rejected": -2.7615699768066406, + "logps/chosen": -93.98905181884766, + "logps/rejected": -300.1799621582031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917378902435303, + "rewards/margins": 11.261778831481934, + "rewards/rejected": -18.179157257080078, + "step": 17992 + }, + { + "epoch": 2.8, + "learning_rate": 9.50538928367654e-07, + "logits/chosen": -1.9786149263381958, + "logits/rejected": -2.5908894538879395, + "logps/chosen": -127.8010025024414, + "logps/rejected": -324.4827880859375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.500185012817383, + "rewards/margins": 7.765928268432617, + "rewards/rejected": -17.26611328125, + "step": 17993 + }, + { + "epoch": 2.8, + "learning_rate": 9.498054878365061e-07, + "logits/chosen": -2.316460132598877, + "logits/rejected": -2.587064027786255, + "logps/chosen": -115.66909790039062, + "logps/rejected": -344.851806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.680154800415039, + "rewards/margins": 10.300056457519531, + "rewards/rejected": -20.980213165283203, + "step": 17994 + }, + { + "epoch": 2.8, + "learning_rate": 9.490720473053581e-07, + "logits/chosen": -2.988590717315674, + "logits/rejected": -1.4603770971298218, + "logps/chosen": -364.1720886230469, + "logps/rejected": -202.0572509765625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.261226654052734, + "rewards/margins": 5.768043041229248, + "rewards/rejected": -14.02927017211914, + "step": 17995 + }, + { + "epoch": 2.8, + "learning_rate": 9.483386067742102e-07, + "logits/chosen": -0.7686055302619934, + "logits/rejected": -2.573240041732788, + "logps/chosen": -162.21922302246094, + "logps/rejected": -618.8676147460938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.358505249023438, + "rewards/margins": 12.042299270629883, + "rewards/rejected": -24.40080451965332, + "step": 17996 + }, + { + "epoch": 2.8, + "learning_rate": 9.476051662430624e-07, + "logits/chosen": -1.7480956315994263, + "logits/rejected": -2.7050936222076416, + "logps/chosen": -303.0838317871094, + "logps/rejected": -430.62371826171875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.031822204589844, + "rewards/margins": 6.710933685302734, + "rewards/rejected": -18.742755889892578, + "step": 17997 + }, + { + "epoch": 2.8, + "learning_rate": 9.468717257119145e-07, + "logits/chosen": -2.767245292663574, + "logits/rejected": -2.633267641067505, + "logps/chosen": -156.81979370117188, + "logps/rejected": -304.12677001953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.752872467041016, + "rewards/margins": 7.942705154418945, + "rewards/rejected": -17.69557762145996, + "step": 17998 + }, + { + "epoch": 2.8, + "learning_rate": 9.461382851807667e-07, + "logits/chosen": -1.1105611324310303, + "logits/rejected": -2.3451504707336426, + "logps/chosen": -286.3665771484375, + "logps/rejected": -637.72265625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -16.235614776611328, + "rewards/margins": 7.886507034301758, + "rewards/rejected": -24.122121810913086, + "step": 17999 + }, + { + "epoch": 2.8, + "learning_rate": 9.454048446496188e-07, + "logits/chosen": -1.9947080612182617, + "logits/rejected": -2.78342866897583, + "logps/chosen": -185.42337036132812, + "logps/rejected": -527.206787109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.647138595581055, + "rewards/margins": 9.405338287353516, + "rewards/rejected": -18.05247688293457, + "step": 18000 + }, + { + "epoch": 2.8, + "learning_rate": 9.446714041184709e-07, + "logits/chosen": -3.0087697505950928, + "logits/rejected": -2.8520407676696777, + "logps/chosen": -304.2268981933594, + "logps/rejected": -545.8073120117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.407915115356445, + "rewards/margins": 12.561872482299805, + "rewards/rejected": -20.96978759765625, + "step": 18001 + }, + { + "epoch": 2.8, + "learning_rate": 9.439379635873231e-07, + "logits/chosen": -1.4886233806610107, + "logits/rejected": -2.3953166007995605, + "logps/chosen": -198.9901885986328, + "logps/rejected": -390.4723205566406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.846033096313477, + "rewards/margins": 11.406109809875488, + "rewards/rejected": -23.25214385986328, + "step": 18002 + }, + { + "epoch": 2.8, + "learning_rate": 9.432045230561751e-07, + "logits/chosen": -2.8354969024658203, + "logits/rejected": -2.7371156215667725, + "logps/chosen": -431.40521240234375, + "logps/rejected": -827.08935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.721532821655273, + "rewards/margins": 18.874584197998047, + "rewards/rejected": -30.59611701965332, + "step": 18003 + }, + { + "epoch": 2.8, + "learning_rate": 9.424710825250273e-07, + "logits/chosen": -1.8211873769760132, + "logits/rejected": -2.6411681175231934, + "logps/chosen": -478.683837890625, + "logps/rejected": -702.1356811523438, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.156545639038086, + "rewards/margins": 8.722816467285156, + "rewards/rejected": -19.879362106323242, + "step": 18004 + }, + { + "epoch": 2.8, + "learning_rate": 9.417376419938792e-07, + "logits/chosen": -1.885526180267334, + "logits/rejected": -2.5780482292175293, + "logps/chosen": -200.29031372070312, + "logps/rejected": -328.58355712890625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.842355728149414, + "rewards/margins": 8.333503723144531, + "rewards/rejected": -20.175859451293945, + "step": 18005 + }, + { + "epoch": 2.8, + "learning_rate": 9.410042014627314e-07, + "logits/chosen": -2.8834338188171387, + "logits/rejected": -2.9370312690734863, + "logps/chosen": -89.41546630859375, + "logps/rejected": -221.63095092773438, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.720622539520264, + "rewards/margins": 8.108903884887695, + "rewards/rejected": -14.8295259475708, + "step": 18006 + }, + { + "epoch": 2.8, + "learning_rate": 9.402707609315835e-07, + "logits/chosen": -2.3108620643615723, + "logits/rejected": -2.2854154109954834, + "logps/chosen": -454.48065185546875, + "logps/rejected": -484.052001953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.260391235351562, + "rewards/margins": 8.484054565429688, + "rewards/rejected": -21.74444580078125, + "step": 18007 + }, + { + "epoch": 2.8, + "learning_rate": 9.395373204004357e-07, + "logits/chosen": -2.518681764602661, + "logits/rejected": -1.8367481231689453, + "logps/chosen": -509.9787292480469, + "logps/rejected": -482.9580993652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.728706359863281, + "rewards/margins": 10.596434593200684, + "rewards/rejected": -19.32514190673828, + "step": 18008 + }, + { + "epoch": 2.8, + "learning_rate": 9.388038798692878e-07, + "logits/chosen": -2.3826231956481934, + "logits/rejected": -2.793516159057617, + "logps/chosen": -763.9739990234375, + "logps/rejected": -708.7821044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.5831298828125, + "rewards/margins": 15.07429313659668, + "rewards/rejected": -26.65742301940918, + "step": 18009 + }, + { + "epoch": 2.8, + "learning_rate": 9.3807043933814e-07, + "logits/chosen": -1.463958978652954, + "logits/rejected": -2.400376796722412, + "logps/chosen": -198.48916625976562, + "logps/rejected": -389.2650146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.268823623657227, + "rewards/margins": 9.220130920410156, + "rewards/rejected": -21.488956451416016, + "step": 18010 + }, + { + "epoch": 2.8, + "learning_rate": 9.373369988069921e-07, + "logits/chosen": -1.8719993829727173, + "logits/rejected": -2.481231927871704, + "logps/chosen": -287.3067932128906, + "logps/rejected": -342.43829345703125, + "loss": 0.0562, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.856903076171875, + "rewards/margins": 10.553860664367676, + "rewards/rejected": -20.410764694213867, + "step": 18011 + }, + { + "epoch": 2.8, + "learning_rate": 9.366035582758443e-07, + "logits/chosen": -2.7635746002197266, + "logits/rejected": -2.4175617694854736, + "logps/chosen": -234.85662841796875, + "logps/rejected": -207.67672729492188, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.577239990234375, + "rewards/margins": 8.598149299621582, + "rewards/rejected": -16.175390243530273, + "step": 18012 + }, + { + "epoch": 2.8, + "learning_rate": 9.358701177446964e-07, + "logits/chosen": -2.329592704772949, + "logits/rejected": -2.8780016899108887, + "logps/chosen": -242.870361328125, + "logps/rejected": -379.966064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.978836059570312, + "rewards/margins": 11.519211769104004, + "rewards/rejected": -20.498046875, + "step": 18013 + }, + { + "epoch": 2.8, + "learning_rate": 9.351366772135486e-07, + "logits/chosen": -2.121861696243286, + "logits/rejected": -2.55377459526062, + "logps/chosen": -319.5050964355469, + "logps/rejected": -375.0644836425781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.882678985595703, + "rewards/margins": 9.163239479064941, + "rewards/rejected": -17.045917510986328, + "step": 18014 + }, + { + "epoch": 2.8, + "learning_rate": 9.344032366824004e-07, + "logits/chosen": -2.5478336811065674, + "logits/rejected": -2.9100844860076904, + "logps/chosen": -265.2740173339844, + "logps/rejected": -501.89892578125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.705323219299316, + "rewards/margins": 8.849056243896484, + "rewards/rejected": -21.554380416870117, + "step": 18015 + }, + { + "epoch": 2.8, + "learning_rate": 9.336697961512526e-07, + "logits/chosen": -2.774714469909668, + "logits/rejected": -2.6575567722320557, + "logps/chosen": -209.1455078125, + "logps/rejected": -284.8194580078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.912701606750488, + "rewards/margins": 9.858597755432129, + "rewards/rejected": -15.771299362182617, + "step": 18016 + }, + { + "epoch": 2.8, + "learning_rate": 9.329363556201047e-07, + "logits/chosen": -2.593559503555298, + "logits/rejected": -2.8074944019317627, + "logps/chosen": -145.15542602539062, + "logps/rejected": -224.05186462402344, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.596345901489258, + "rewards/margins": 5.637956142425537, + "rewards/rejected": -14.234302520751953, + "step": 18017 + }, + { + "epoch": 2.8, + "learning_rate": 9.322029150889569e-07, + "logits/chosen": -2.627920150756836, + "logits/rejected": -3.0144004821777344, + "logps/chosen": -351.2097473144531, + "logps/rejected": -615.3184204101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.65975284576416, + "rewards/margins": 16.083200454711914, + "rewards/rejected": -23.74295425415039, + "step": 18018 + }, + { + "epoch": 2.8, + "learning_rate": 9.31469474557809e-07, + "logits/chosen": -2.818108558654785, + "logits/rejected": -2.179248332977295, + "logps/chosen": -382.1551818847656, + "logps/rejected": -398.6361083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.90802526473999, + "rewards/margins": 10.332658767700195, + "rewards/rejected": -18.240684509277344, + "step": 18019 + }, + { + "epoch": 2.8, + "learning_rate": 9.307360340266612e-07, + "logits/chosen": -2.0108821392059326, + "logits/rejected": -2.756101608276367, + "logps/chosen": -400.781005859375, + "logps/rejected": -658.05712890625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.1331787109375, + "rewards/margins": 8.80063247680664, + "rewards/rejected": -22.93381118774414, + "step": 18020 + }, + { + "epoch": 2.8, + "learning_rate": 9.300025934955133e-07, + "logits/chosen": -2.8122406005859375, + "logits/rejected": -2.1968371868133545, + "logps/chosen": -459.0757751464844, + "logps/rejected": -446.5057067871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.471488952636719, + "rewards/margins": 15.430326461791992, + "rewards/rejected": -24.901817321777344, + "step": 18021 + }, + { + "epoch": 2.8, + "learning_rate": 9.292691529643654e-07, + "logits/chosen": -2.942938804626465, + "logits/rejected": -1.4841787815093994, + "logps/chosen": -490.64215087890625, + "logps/rejected": -245.17434692382812, + "loss": 0.0653, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.313742637634277, + "rewards/margins": 4.422706604003906, + "rewards/rejected": -16.7364501953125, + "step": 18022 + }, + { + "epoch": 2.8, + "learning_rate": 9.285357124332176e-07, + "logits/chosen": -2.804753303527832, + "logits/rejected": -2.8167266845703125, + "logps/chosen": -307.9000549316406, + "logps/rejected": -524.1211547851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.009420394897461, + "rewards/margins": 9.488170623779297, + "rewards/rejected": -20.497591018676758, + "step": 18023 + }, + { + "epoch": 2.8, + "learning_rate": 9.278022719020697e-07, + "logits/chosen": -1.9640480279922485, + "logits/rejected": -2.3668177127838135, + "logps/chosen": -364.8975830078125, + "logps/rejected": -412.8399353027344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.191327095031738, + "rewards/margins": 10.312865257263184, + "rewards/rejected": -18.504192352294922, + "step": 18024 + }, + { + "epoch": 2.8, + "learning_rate": 9.270688313709217e-07, + "logits/chosen": -2.863438367843628, + "logits/rejected": -2.878614664077759, + "logps/chosen": -122.1972427368164, + "logps/rejected": -181.83084106445312, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67308235168457, + "rewards/margins": 7.879150867462158, + "rewards/rejected": -15.55223274230957, + "step": 18025 + }, + { + "epoch": 2.8, + "learning_rate": 9.263353908397737e-07, + "logits/chosen": -2.8754138946533203, + "logits/rejected": -2.4562530517578125, + "logps/chosen": -279.09088134765625, + "logps/rejected": -451.4775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145845890045166, + "rewards/margins": 14.99852180480957, + "rewards/rejected": -20.144367218017578, + "step": 18026 + }, + { + "epoch": 2.8, + "learning_rate": 9.256019503086259e-07, + "logits/chosen": -2.663283109664917, + "logits/rejected": -1.7095587253570557, + "logps/chosen": -483.08245849609375, + "logps/rejected": -452.72186279296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.972171783447266, + "rewards/margins": 8.792037010192871, + "rewards/rejected": -18.76420783996582, + "step": 18027 + }, + { + "epoch": 2.8, + "learning_rate": 9.24868509777478e-07, + "logits/chosen": -1.3997050523757935, + "logits/rejected": -2.8394672870635986, + "logps/chosen": -523.7936401367188, + "logps/rejected": -693.5305786132812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.671800136566162, + "rewards/margins": 15.222415924072266, + "rewards/rejected": -21.894214630126953, + "step": 18028 + }, + { + "epoch": 2.8, + "learning_rate": 9.241350692463302e-07, + "logits/chosen": -1.4450106620788574, + "logits/rejected": -2.3436825275421143, + "logps/chosen": -163.28555297851562, + "logps/rejected": -437.9629821777344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.28541374206543, + "rewards/margins": 13.431293487548828, + "rewards/rejected": -23.716707229614258, + "step": 18029 + }, + { + "epoch": 2.8, + "learning_rate": 9.234016287151823e-07, + "logits/chosen": -2.777223587036133, + "logits/rejected": -2.77951979637146, + "logps/chosen": -357.8461608886719, + "logps/rejected": -502.90594482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6896586418151855, + "rewards/margins": 14.742942810058594, + "rewards/rejected": -20.432601928710938, + "step": 18030 + }, + { + "epoch": 2.8, + "learning_rate": 9.226681881840345e-07, + "logits/chosen": -2.8939132690429688, + "logits/rejected": -2.95320463180542, + "logps/chosen": -118.29532623291016, + "logps/rejected": -215.32350158691406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125881195068359, + "rewards/margins": 8.629606246948242, + "rewards/rejected": -12.755488395690918, + "step": 18031 + }, + { + "epoch": 2.8, + "learning_rate": 9.219347476528866e-07, + "logits/chosen": -2.59148907661438, + "logits/rejected": -2.1840076446533203, + "logps/chosen": -426.92010498046875, + "logps/rejected": -412.3822937011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.336581230163574, + "rewards/margins": 11.505617141723633, + "rewards/rejected": -19.84219741821289, + "step": 18032 + }, + { + "epoch": 2.8, + "learning_rate": 9.212013071217388e-07, + "logits/chosen": -2.5441505908966064, + "logits/rejected": -1.6628530025482178, + "logps/chosen": -244.804443359375, + "logps/rejected": -309.91864013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.162003517150879, + "rewards/margins": 13.13874626159668, + "rewards/rejected": -20.300750732421875, + "step": 18033 + }, + { + "epoch": 2.8, + "learning_rate": 9.204678665905909e-07, + "logits/chosen": -1.5265556573867798, + "logits/rejected": -2.3335952758789062, + "logps/chosen": -158.5500030517578, + "logps/rejected": -335.78265380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.303423881530762, + "rewards/margins": 8.99654769897461, + "rewards/rejected": -17.299970626831055, + "step": 18034 + }, + { + "epoch": 2.8, + "learning_rate": 9.197344260594429e-07, + "logits/chosen": -2.412055253982544, + "logits/rejected": -2.387641191482544, + "logps/chosen": -244.01025390625, + "logps/rejected": -292.9796142578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.683929443359375, + "rewards/margins": 8.249776840209961, + "rewards/rejected": -20.933706283569336, + "step": 18035 + }, + { + "epoch": 2.8, + "learning_rate": 9.19000985528295e-07, + "logits/chosen": -2.5882465839385986, + "logits/rejected": -2.7271997928619385, + "logps/chosen": -156.96463012695312, + "logps/rejected": -271.838623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.007560729980469, + "rewards/margins": 10.239574432373047, + "rewards/rejected": -18.247135162353516, + "step": 18036 + }, + { + "epoch": 2.81, + "learning_rate": 9.182675449971472e-07, + "logits/chosen": -2.849335193634033, + "logits/rejected": -2.575605630874634, + "logps/chosen": -475.7730407714844, + "logps/rejected": -578.081787109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.529823303222656, + "rewards/margins": 11.287530899047852, + "rewards/rejected": -21.817354202270508, + "step": 18037 + }, + { + "epoch": 2.81, + "learning_rate": 9.175341044659992e-07, + "logits/chosen": -2.7092039585113525, + "logits/rejected": -2.029857635498047, + "logps/chosen": -537.2351684570312, + "logps/rejected": -480.897705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.577449798583984, + "rewards/margins": 12.769636154174805, + "rewards/rejected": -21.34708595275879, + "step": 18038 + }, + { + "epoch": 2.81, + "learning_rate": 9.168006639348514e-07, + "logits/chosen": -1.1593694686889648, + "logits/rejected": -2.0877904891967773, + "logps/chosen": -171.98074340820312, + "logps/rejected": -406.57513427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.390106201171875, + "rewards/margins": 10.379617691040039, + "rewards/rejected": -22.76972198486328, + "step": 18039 + }, + { + "epoch": 2.81, + "learning_rate": 9.160672234037035e-07, + "logits/chosen": -2.6736295223236084, + "logits/rejected": -1.45277738571167, + "logps/chosen": -464.09661865234375, + "logps/rejected": -382.6192626953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7637529373168945, + "rewards/margins": 11.174538612365723, + "rewards/rejected": -17.938291549682617, + "step": 18040 + }, + { + "epoch": 2.81, + "learning_rate": 9.153337828725556e-07, + "logits/chosen": -2.975341796875, + "logits/rejected": -2.9064064025878906, + "logps/chosen": -424.42987060546875, + "logps/rejected": -716.4425048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.212068557739258, + "rewards/margins": 13.298264503479004, + "rewards/rejected": -20.510334014892578, + "step": 18041 + }, + { + "epoch": 2.81, + "learning_rate": 9.146003423414078e-07, + "logits/chosen": -2.4683268070220947, + "logits/rejected": -2.5105748176574707, + "logps/chosen": -275.4641418457031, + "logps/rejected": -497.48388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193173408508301, + "rewards/margins": 18.613807678222656, + "rewards/rejected": -24.806982040405273, + "step": 18042 + }, + { + "epoch": 2.81, + "learning_rate": 9.138669018102599e-07, + "logits/chosen": -2.770249843597412, + "logits/rejected": -2.0163352489471436, + "logps/chosen": -681.0211791992188, + "logps/rejected": -602.3150634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.721737861633301, + "rewards/margins": 13.166520118713379, + "rewards/rejected": -20.88825798034668, + "step": 18043 + }, + { + "epoch": 2.81, + "learning_rate": 9.131334612791121e-07, + "logits/chosen": -1.4641544818878174, + "logits/rejected": -2.309337615966797, + "logps/chosen": -229.362060546875, + "logps/rejected": -516.3526000976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.556550025939941, + "rewards/margins": 12.247208595275879, + "rewards/rejected": -19.80375862121582, + "step": 18044 + }, + { + "epoch": 2.81, + "learning_rate": 9.12400020747964e-07, + "logits/chosen": -2.6187095642089844, + "logits/rejected": -2.7672438621520996, + "logps/chosen": -110.27568054199219, + "logps/rejected": -388.3221435546875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.118753433227539, + "rewards/margins": 10.385980606079102, + "rewards/rejected": -19.50473403930664, + "step": 18045 + }, + { + "epoch": 2.81, + "learning_rate": 9.116665802168162e-07, + "logits/chosen": -2.1342294216156006, + "logits/rejected": -2.878478765487671, + "logps/chosen": -264.5423583984375, + "logps/rejected": -442.3165588378906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.245943069458008, + "rewards/margins": 8.316749572753906, + "rewards/rejected": -21.562692642211914, + "step": 18046 + }, + { + "epoch": 2.81, + "learning_rate": 9.109331396856683e-07, + "logits/chosen": -2.8481454849243164, + "logits/rejected": -2.2160253524780273, + "logps/chosen": -646.71044921875, + "logps/rejected": -517.70458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.915541648864746, + "rewards/margins": 8.974813461303711, + "rewards/rejected": -17.89035415649414, + "step": 18047 + }, + { + "epoch": 2.81, + "learning_rate": 9.101996991545205e-07, + "logits/chosen": -1.911887526512146, + "logits/rejected": -2.5606892108917236, + "logps/chosen": -173.9063720703125, + "logps/rejected": -625.542236328125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.188495635986328, + "rewards/margins": 8.175121307373047, + "rewards/rejected": -21.363616943359375, + "step": 18048 + }, + { + "epoch": 2.81, + "learning_rate": 9.094662586233726e-07, + "logits/chosen": -2.6924643516540527, + "logits/rejected": -1.9568419456481934, + "logps/chosen": -630.0360107421875, + "logps/rejected": -639.0240478515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.441855430603027, + "rewards/margins": 10.1736478805542, + "rewards/rejected": -20.615503311157227, + "step": 18049 + }, + { + "epoch": 2.81, + "learning_rate": 9.087328180922248e-07, + "logits/chosen": -2.83736252784729, + "logits/rejected": -2.3497705459594727, + "logps/chosen": -331.339111328125, + "logps/rejected": -324.3697814941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3447489738464355, + "rewards/margins": 11.143919944763184, + "rewards/rejected": -18.48866844177246, + "step": 18050 + }, + { + "epoch": 2.81, + "learning_rate": 9.079993775610768e-07, + "logits/chosen": -2.768934965133667, + "logits/rejected": -2.2637462615966797, + "logps/chosen": -349.67230224609375, + "logps/rejected": -450.936767578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.495960235595703, + "rewards/margins": 8.477945327758789, + "rewards/rejected": -20.973907470703125, + "step": 18051 + }, + { + "epoch": 2.81, + "learning_rate": 9.07265937029929e-07, + "logits/chosen": -2.406069755554199, + "logits/rejected": -2.6719534397125244, + "logps/chosen": -251.87429809570312, + "logps/rejected": -502.4454040527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.515743255615234, + "rewards/margins": 10.922534942626953, + "rewards/rejected": -21.438278198242188, + "step": 18052 + }, + { + "epoch": 2.81, + "learning_rate": 9.065324964987811e-07, + "logits/chosen": -1.997836709022522, + "logits/rejected": -2.5897765159606934, + "logps/chosen": -403.7361755371094, + "logps/rejected": -737.085693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.281571388244629, + "rewards/margins": 14.747245788574219, + "rewards/rejected": -26.028818130493164, + "step": 18053 + }, + { + "epoch": 2.81, + "learning_rate": 9.057990559676331e-07, + "logits/chosen": -1.1021019220352173, + "logits/rejected": -2.2824971675872803, + "logps/chosen": -219.46771240234375, + "logps/rejected": -357.03509521484375, + "loss": 0.967, + "rewards/accuracies": 0.5, + "rewards/chosen": -16.442001342773438, + "rewards/margins": 3.470728874206543, + "rewards/rejected": -19.912729263305664, + "step": 18054 + }, + { + "epoch": 2.81, + "learning_rate": 9.050656154364852e-07, + "logits/chosen": -2.7143335342407227, + "logits/rejected": -2.5410268306732178, + "logps/chosen": -222.9300537109375, + "logps/rejected": -283.9207763671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.048992156982422, + "rewards/margins": 9.44925308227539, + "rewards/rejected": -19.498245239257812, + "step": 18055 + }, + { + "epoch": 2.81, + "learning_rate": 9.043321749053374e-07, + "logits/chosen": -2.7594943046569824, + "logits/rejected": -2.741610527038574, + "logps/chosen": -398.129150390625, + "logps/rejected": -522.8970947265625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.371726989746094, + "rewards/margins": 9.999948501586914, + "rewards/rejected": -23.37167739868164, + "step": 18056 + }, + { + "epoch": 2.81, + "learning_rate": 9.035987343741895e-07, + "logits/chosen": -1.554057240486145, + "logits/rejected": -2.4020872116088867, + "logps/chosen": -218.50299072265625, + "logps/rejected": -426.362548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.213318824768066, + "rewards/margins": 13.439790725708008, + "rewards/rejected": -21.65311050415039, + "step": 18057 + }, + { + "epoch": 2.81, + "learning_rate": 9.028652938430417e-07, + "logits/chosen": -2.9974851608276367, + "logits/rejected": -2.224518299102783, + "logps/chosen": -237.34478759765625, + "logps/rejected": -307.2622375488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.815887451171875, + "rewards/margins": 12.108088493347168, + "rewards/rejected": -17.92397689819336, + "step": 18058 + }, + { + "epoch": 2.81, + "learning_rate": 9.021318533118938e-07, + "logits/chosen": -1.1048495769500732, + "logits/rejected": -2.1817753314971924, + "logps/chosen": -374.1865539550781, + "logps/rejected": -463.4659423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.949545383453369, + "rewards/margins": 12.880485534667969, + "rewards/rejected": -20.830032348632812, + "step": 18059 + }, + { + "epoch": 2.81, + "learning_rate": 9.01398412780746e-07, + "logits/chosen": -2.6182024478912354, + "logits/rejected": -2.8785240650177, + "logps/chosen": -263.53338623046875, + "logps/rejected": -385.0017395019531, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.372354507446289, + "rewards/margins": 4.313594341278076, + "rewards/rejected": -15.685949325561523, + "step": 18060 + }, + { + "epoch": 2.81, + "learning_rate": 9.006649722495981e-07, + "logits/chosen": -1.083072304725647, + "logits/rejected": -2.9228527545928955, + "logps/chosen": -207.84918212890625, + "logps/rejected": -589.0748291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.598966598510742, + "rewards/margins": 9.697637557983398, + "rewards/rejected": -23.29660415649414, + "step": 18061 + }, + { + "epoch": 2.81, + "learning_rate": 8.999315317184502e-07, + "logits/chosen": -2.8443703651428223, + "logits/rejected": -2.608469009399414, + "logps/chosen": -518.0502319335938, + "logps/rejected": -415.57647705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.11175537109375, + "rewards/margins": 12.85011100769043, + "rewards/rejected": -20.96186637878418, + "step": 18062 + }, + { + "epoch": 2.81, + "learning_rate": 8.991980911873023e-07, + "logits/chosen": -1.6087614297866821, + "logits/rejected": -2.344984769821167, + "logps/chosen": -182.69705200195312, + "logps/rejected": -413.7355041503906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.451614379882812, + "rewards/margins": 9.250473022460938, + "rewards/rejected": -21.70208740234375, + "step": 18063 + }, + { + "epoch": 2.81, + "learning_rate": 8.984646506561543e-07, + "logits/chosen": -1.5718188285827637, + "logits/rejected": -2.6739418506622314, + "logps/chosen": -402.1446533203125, + "logps/rejected": -555.180419921875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.796738624572754, + "rewards/margins": 13.878227233886719, + "rewards/rejected": -24.67496681213379, + "step": 18064 + }, + { + "epoch": 2.81, + "learning_rate": 8.977312101250064e-07, + "logits/chosen": -2.230318307876587, + "logits/rejected": -2.624159336090088, + "logps/chosen": -201.09669494628906, + "logps/rejected": -430.3103332519531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.937765121459961, + "rewards/margins": 13.480876922607422, + "rewards/rejected": -22.418642044067383, + "step": 18065 + }, + { + "epoch": 2.81, + "learning_rate": 8.969977695938585e-07, + "logits/chosen": -1.3360228538513184, + "logits/rejected": -2.424696683883667, + "logps/chosen": -201.59336853027344, + "logps/rejected": -369.4962463378906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.546635627746582, + "rewards/margins": 9.632039070129395, + "rewards/rejected": -20.178674697875977, + "step": 18066 + }, + { + "epoch": 2.81, + "learning_rate": 8.962643290627107e-07, + "logits/chosen": -2.6926777362823486, + "logits/rejected": -1.2323728799819946, + "logps/chosen": -404.66656494140625, + "logps/rejected": -289.5009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.642457008361816, + "rewards/margins": 10.098806381225586, + "rewards/rejected": -18.74126434326172, + "step": 18067 + }, + { + "epoch": 2.81, + "learning_rate": 8.955308885315628e-07, + "logits/chosen": -2.917405366897583, + "logits/rejected": -2.1453981399536133, + "logps/chosen": -358.4837646484375, + "logps/rejected": -353.703369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.942249298095703, + "rewards/margins": 9.18654727935791, + "rewards/rejected": -16.128795623779297, + "step": 18068 + }, + { + "epoch": 2.81, + "learning_rate": 8.94797448000415e-07, + "logits/chosen": -2.681779384613037, + "logits/rejected": -2.9313509464263916, + "logps/chosen": -222.45755004882812, + "logps/rejected": -458.05712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.594226837158203, + "rewards/margins": 12.013833999633789, + "rewards/rejected": -22.608060836791992, + "step": 18069 + }, + { + "epoch": 2.81, + "learning_rate": 8.940640074692671e-07, + "logits/chosen": -2.6966910362243652, + "logits/rejected": -2.6977779865264893, + "logps/chosen": -290.97637939453125, + "logps/rejected": -206.58026123046875, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.385377883911133, + "rewards/margins": 3.9089338779449463, + "rewards/rejected": -12.2943115234375, + "step": 18070 + }, + { + "epoch": 2.81, + "learning_rate": 8.933305669381193e-07, + "logits/chosen": -2.5261099338531494, + "logits/rejected": -2.8225886821746826, + "logps/chosen": -116.90831756591797, + "logps/rejected": -364.1990661621094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.331510543823242, + "rewards/margins": 11.444295883178711, + "rewards/rejected": -19.775806427001953, + "step": 18071 + }, + { + "epoch": 2.81, + "learning_rate": 8.925971264069714e-07, + "logits/chosen": -2.49088716506958, + "logits/rejected": -1.9670299291610718, + "logps/chosen": -1326.9056396484375, + "logps/rejected": -846.1566162109375, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.446222305297852, + "rewards/margins": 8.83935260772705, + "rewards/rejected": -22.28557586669922, + "step": 18072 + }, + { + "epoch": 2.81, + "learning_rate": 8.918636858758236e-07, + "logits/chosen": -2.6617040634155273, + "logits/rejected": -2.677391767501831, + "logps/chosen": -390.3616027832031, + "logps/rejected": -413.35986328125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.241266250610352, + "rewards/margins": 6.305708885192871, + "rewards/rejected": -18.546974182128906, + "step": 18073 + }, + { + "epoch": 2.81, + "learning_rate": 8.911302453446754e-07, + "logits/chosen": -2.703627109527588, + "logits/rejected": -2.661379098892212, + "logps/chosen": -505.5091552734375, + "logps/rejected": -808.0808715820312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.81715202331543, + "rewards/margins": 10.323467254638672, + "rewards/rejected": -17.1406192779541, + "step": 18074 + }, + { + "epoch": 2.81, + "learning_rate": 8.903968048135276e-07, + "logits/chosen": -2.360076427459717, + "logits/rejected": -2.912421941757202, + "logps/chosen": -194.2542724609375, + "logps/rejected": -785.0928344726562, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.439132690429688, + "rewards/margins": 10.176359176635742, + "rewards/rejected": -19.615489959716797, + "step": 18075 + }, + { + "epoch": 2.81, + "learning_rate": 8.896633642823797e-07, + "logits/chosen": -2.659390926361084, + "logits/rejected": -2.577943801879883, + "logps/chosen": -267.643310546875, + "logps/rejected": -361.825927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.11534595489502, + "rewards/margins": 8.79810905456543, + "rewards/rejected": -20.913455963134766, + "step": 18076 + }, + { + "epoch": 2.81, + "learning_rate": 8.889299237512319e-07, + "logits/chosen": -2.469888925552368, + "logits/rejected": -2.446401834487915, + "logps/chosen": -251.54165649414062, + "logps/rejected": -451.66802978515625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.266958236694336, + "rewards/margins": 9.642120361328125, + "rewards/rejected": -22.90907859802246, + "step": 18077 + }, + { + "epoch": 2.81, + "learning_rate": 8.88196483220084e-07, + "logits/chosen": -2.4149539470672607, + "logits/rejected": -2.6280503273010254, + "logps/chosen": -571.9259643554688, + "logps/rejected": -433.899169921875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.679041862487793, + "rewards/margins": 6.168827056884766, + "rewards/rejected": -19.847869873046875, + "step": 18078 + }, + { + "epoch": 2.81, + "learning_rate": 8.874630426889362e-07, + "logits/chosen": -1.6450080871582031, + "logits/rejected": -2.145639181137085, + "logps/chosen": -292.205322265625, + "logps/rejected": -393.96405029296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.021286010742188, + "rewards/margins": 7.348639488220215, + "rewards/rejected": -19.36992645263672, + "step": 18079 + }, + { + "epoch": 2.81, + "learning_rate": 8.867296021577883e-07, + "logits/chosen": -2.6163055896759033, + "logits/rejected": -2.198093891143799, + "logps/chosen": -491.94635009765625, + "logps/rejected": -446.56903076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.430377960205078, + "rewards/margins": 14.386510848999023, + "rewards/rejected": -18.8168888092041, + "step": 18080 + }, + { + "epoch": 2.81, + "learning_rate": 8.859961616266405e-07, + "logits/chosen": -1.2443606853485107, + "logits/rejected": -2.842337131500244, + "logps/chosen": -197.0915985107422, + "logps/rejected": -663.07177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.968899726867676, + "rewards/margins": 10.124778747558594, + "rewards/rejected": -22.093677520751953, + "step": 18081 + }, + { + "epoch": 2.81, + "learning_rate": 8.852627210954926e-07, + "logits/chosen": -2.788052797317505, + "logits/rejected": -2.7141168117523193, + "logps/chosen": -139.6385498046875, + "logps/rejected": -244.01651000976562, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.676399230957031, + "rewards/margins": 7.293670654296875, + "rewards/rejected": -16.970069885253906, + "step": 18082 + }, + { + "epoch": 2.81, + "learning_rate": 8.845292805643447e-07, + "logits/chosen": -1.1844890117645264, + "logits/rejected": -2.417009115219116, + "logps/chosen": -164.6632843017578, + "logps/rejected": -394.7618103027344, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.44561767578125, + "rewards/margins": 8.108976364135742, + "rewards/rejected": -19.554594039916992, + "step": 18083 + }, + { + "epoch": 2.81, + "learning_rate": 8.837958400331967e-07, + "logits/chosen": -2.200068712234497, + "logits/rejected": -2.803553819656372, + "logps/chosen": -197.4004364013672, + "logps/rejected": -445.126708984375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.880867958068848, + "rewards/margins": 7.286555290222168, + "rewards/rejected": -19.167423248291016, + "step": 18084 + }, + { + "epoch": 2.81, + "learning_rate": 8.830623995020489e-07, + "logits/chosen": -2.6788864135742188, + "logits/rejected": -0.9870442748069763, + "logps/chosen": -275.27459716796875, + "logps/rejected": -186.82449340820312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.260397911071777, + "rewards/margins": 9.36905288696289, + "rewards/rejected": -17.629451751708984, + "step": 18085 + }, + { + "epoch": 2.81, + "learning_rate": 8.823289589709009e-07, + "logits/chosen": -2.1480157375335693, + "logits/rejected": -3.0344502925872803, + "logps/chosen": -181.83596801757812, + "logps/rejected": -520.4700317382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.034725189208984, + "rewards/margins": 10.504024505615234, + "rewards/rejected": -20.53874969482422, + "step": 18086 + }, + { + "epoch": 2.81, + "learning_rate": 8.81595518439753e-07, + "logits/chosen": -2.5857582092285156, + "logits/rejected": -2.224087953567505, + "logps/chosen": -275.02484130859375, + "logps/rejected": -321.295654296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.062698364257812, + "rewards/margins": 8.188665390014648, + "rewards/rejected": -20.251361846923828, + "step": 18087 + }, + { + "epoch": 2.81, + "learning_rate": 8.808620779086052e-07, + "logits/chosen": -2.097076416015625, + "logits/rejected": -2.6105759143829346, + "logps/chosen": -381.9388122558594, + "logps/rejected": -490.6116943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.121122360229492, + "rewards/margins": 12.603586196899414, + "rewards/rejected": -21.724708557128906, + "step": 18088 + }, + { + "epoch": 2.81, + "learning_rate": 8.801286373774573e-07, + "logits/chosen": -2.5865323543548584, + "logits/rejected": -1.2712877988815308, + "logps/chosen": -225.0726318359375, + "logps/rejected": -302.35101318359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.735328674316406, + "rewards/margins": 10.50738525390625, + "rewards/rejected": -21.242713928222656, + "step": 18089 + }, + { + "epoch": 2.81, + "learning_rate": 8.793951968463095e-07, + "logits/chosen": -2.3168294429779053, + "logits/rejected": -2.743295192718506, + "logps/chosen": -233.73605346679688, + "logps/rejected": -489.33880615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.091550827026367, + "rewards/margins": 12.549506187438965, + "rewards/rejected": -23.641056060791016, + "step": 18090 + }, + { + "epoch": 2.81, + "learning_rate": 8.786617563151615e-07, + "logits/chosen": -1.415281057357788, + "logits/rejected": -2.7375776767730713, + "logps/chosen": -405.1728820800781, + "logps/rejected": -630.8595581054688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630985260009766, + "rewards/margins": 8.388448715209961, + "rewards/rejected": -16.019433975219727, + "step": 18091 + }, + { + "epoch": 2.81, + "learning_rate": 8.779283157840137e-07, + "logits/chosen": -2.477304220199585, + "logits/rejected": -2.874742031097412, + "logps/chosen": -154.3910675048828, + "logps/rejected": -345.20263671875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.658693313598633, + "rewards/margins": 9.469449996948242, + "rewards/rejected": -19.128143310546875, + "step": 18092 + }, + { + "epoch": 2.81, + "learning_rate": 8.771948752528658e-07, + "logits/chosen": -1.9082145690917969, + "logits/rejected": -2.480259418487549, + "logps/chosen": -299.3447265625, + "logps/rejected": -554.7911376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.55720853805542, + "rewards/margins": 12.398189544677734, + "rewards/rejected": -19.955398559570312, + "step": 18093 + }, + { + "epoch": 2.81, + "learning_rate": 8.76461434721718e-07, + "logits/chosen": -0.6122817397117615, + "logits/rejected": -2.0097384452819824, + "logps/chosen": -167.70928955078125, + "logps/rejected": -378.3310546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.871194839477539, + "rewards/margins": 8.88693618774414, + "rewards/rejected": -19.758132934570312, + "step": 18094 + }, + { + "epoch": 2.81, + "learning_rate": 8.757279941905701e-07, + "logits/chosen": -2.8098113536834717, + "logits/rejected": -3.058622360229492, + "logps/chosen": -134.0067138671875, + "logps/rejected": -253.5784454345703, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.839823246002197, + "rewards/margins": 10.416839599609375, + "rewards/rejected": -17.256662368774414, + "step": 18095 + }, + { + "epoch": 2.81, + "learning_rate": 8.749945536594222e-07, + "logits/chosen": -2.307455062866211, + "logits/rejected": -2.8064773082733154, + "logps/chosen": -434.40081787109375, + "logps/rejected": -386.8106384277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.867692947387695, + "rewards/margins": 14.033870697021484, + "rewards/rejected": -19.901561737060547, + "step": 18096 + }, + { + "epoch": 2.81, + "learning_rate": 8.742611131282743e-07, + "logits/chosen": -2.3773205280303955, + "logits/rejected": -2.5353872776031494, + "logps/chosen": -386.0062561035156, + "logps/rejected": -432.40875244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.154996871948242, + "rewards/margins": 11.837453842163086, + "rewards/rejected": -19.992450714111328, + "step": 18097 + }, + { + "epoch": 2.81, + "learning_rate": 8.735276725971265e-07, + "logits/chosen": -2.272364854812622, + "logits/rejected": -2.094151496887207, + "logps/chosen": -229.41357421875, + "logps/rejected": -330.0699462890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.442188262939453, + "rewards/margins": 10.026786804199219, + "rewards/rejected": -18.468975067138672, + "step": 18098 + }, + { + "epoch": 2.81, + "learning_rate": 8.727942320659785e-07, + "logits/chosen": -1.0947102308273315, + "logits/rejected": -2.454129934310913, + "logps/chosen": -277.51153564453125, + "logps/rejected": -636.020751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.453133583068848, + "rewards/margins": 16.622257232666016, + "rewards/rejected": -24.07539176940918, + "step": 18099 + }, + { + "epoch": 2.81, + "learning_rate": 8.720607915348307e-07, + "logits/chosen": -1.8837625980377197, + "logits/rejected": -2.6226770877838135, + "logps/chosen": -225.6900634765625, + "logps/rejected": -423.60650634765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.100500106811523, + "rewards/margins": 9.508382797241211, + "rewards/rejected": -21.608882904052734, + "step": 18100 + }, + { + "epoch": 2.82, + "learning_rate": 8.713273510036827e-07, + "logits/chosen": -2.548715353012085, + "logits/rejected": -3.0932974815368652, + "logps/chosen": -170.76177978515625, + "logps/rejected": -549.3953857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.286455154418945, + "rewards/margins": 10.625447273254395, + "rewards/rejected": -18.911903381347656, + "step": 18101 + }, + { + "epoch": 2.82, + "learning_rate": 8.705939104725349e-07, + "logits/chosen": -1.7132856845855713, + "logits/rejected": -2.947787284851074, + "logps/chosen": -190.12966918945312, + "logps/rejected": -673.686767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.858414173126221, + "rewards/margins": 12.088235855102539, + "rewards/rejected": -19.9466495513916, + "step": 18102 + }, + { + "epoch": 2.82, + "learning_rate": 8.69860469941387e-07, + "logits/chosen": -2.77998423576355, + "logits/rejected": -2.1235177516937256, + "logps/chosen": -586.348876953125, + "logps/rejected": -588.3455810546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.891453742980957, + "rewards/margins": 9.687150955200195, + "rewards/rejected": -22.57860565185547, + "step": 18103 + }, + { + "epoch": 2.82, + "learning_rate": 8.691270294102391e-07, + "logits/chosen": -1.4489160776138306, + "logits/rejected": -2.4527876377105713, + "logps/chosen": -186.76202392578125, + "logps/rejected": -564.6802978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.952569007873535, + "rewards/margins": 12.558367729187012, + "rewards/rejected": -20.510936737060547, + "step": 18104 + }, + { + "epoch": 2.82, + "learning_rate": 8.683935888790913e-07, + "logits/chosen": -1.7142858505249023, + "logits/rejected": -2.5643162727355957, + "logps/chosen": -199.13706970214844, + "logps/rejected": -511.802734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.48788833618164, + "rewards/margins": 10.377866744995117, + "rewards/rejected": -20.865755081176758, + "step": 18105 + }, + { + "epoch": 2.82, + "learning_rate": 8.676601483479433e-07, + "logits/chosen": -2.9562618732452393, + "logits/rejected": -2.7362284660339355, + "logps/chosen": -436.88848876953125, + "logps/rejected": -477.7157897949219, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.739644050598145, + "rewards/margins": 8.46117115020752, + "rewards/rejected": -19.200815200805664, + "step": 18106 + }, + { + "epoch": 2.82, + "learning_rate": 8.669267078167955e-07, + "logits/chosen": -1.7620452642440796, + "logits/rejected": -2.3965117931365967, + "logps/chosen": -202.88247680664062, + "logps/rejected": -317.9865417480469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.589176177978516, + "rewards/margins": 7.875092506408691, + "rewards/rejected": -15.464268684387207, + "step": 18107 + }, + { + "epoch": 2.82, + "learning_rate": 8.661932672856476e-07, + "logits/chosen": -1.8559643030166626, + "logits/rejected": -2.6838574409484863, + "logps/chosen": -195.7458038330078, + "logps/rejected": -466.4346923828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.10451889038086, + "rewards/margins": 9.587722778320312, + "rewards/rejected": -18.692241668701172, + "step": 18108 + }, + { + "epoch": 2.82, + "learning_rate": 8.654598267544998e-07, + "logits/chosen": -1.0176990032196045, + "logits/rejected": -2.420307159423828, + "logps/chosen": -213.85794067382812, + "logps/rejected": -413.88818359375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.160120010375977, + "rewards/margins": 8.01652717590332, + "rewards/rejected": -20.176647186279297, + "step": 18109 + }, + { + "epoch": 2.82, + "learning_rate": 8.647263862233518e-07, + "logits/chosen": -1.3148893117904663, + "logits/rejected": -2.2876365184783936, + "logps/chosen": -154.40328979492188, + "logps/rejected": -326.8277587890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.701635360717773, + "rewards/margins": 8.53695297241211, + "rewards/rejected": -19.238588333129883, + "step": 18110 + }, + { + "epoch": 2.82, + "learning_rate": 8.639929456922039e-07, + "logits/chosen": -1.788266658782959, + "logits/rejected": -2.6217994689941406, + "logps/chosen": -336.2317810058594, + "logps/rejected": -443.0445556640625, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224076271057129, + "rewards/margins": 9.796924591064453, + "rewards/rejected": -19.020999908447266, + "step": 18111 + }, + { + "epoch": 2.82, + "learning_rate": 8.63259505161056e-07, + "logits/chosen": -2.1840786933898926, + "logits/rejected": -2.727687120437622, + "logps/chosen": -500.91357421875, + "logps/rejected": -563.713623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.019723892211914, + "rewards/margins": 11.793014526367188, + "rewards/rejected": -19.8127384185791, + "step": 18112 + }, + { + "epoch": 2.82, + "learning_rate": 8.625260646299082e-07, + "logits/chosen": -1.4470394849777222, + "logits/rejected": -2.4963185787200928, + "logps/chosen": -154.89425659179688, + "logps/rejected": -478.96197509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.017995834350586, + "rewards/margins": 15.044753074645996, + "rewards/rejected": -24.0627498626709, + "step": 18113 + }, + { + "epoch": 2.82, + "learning_rate": 8.617926240987603e-07, + "logits/chosen": -2.4986653327941895, + "logits/rejected": -2.8188116550445557, + "logps/chosen": -520.6372680664062, + "logps/rejected": -551.2709350585938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.086325645446777, + "rewards/margins": 9.807962417602539, + "rewards/rejected": -16.894287109375, + "step": 18114 + }, + { + "epoch": 2.82, + "learning_rate": 8.610591835676125e-07, + "logits/chosen": -0.6132708191871643, + "logits/rejected": -2.5603830814361572, + "logps/chosen": -171.8215789794922, + "logps/rejected": -702.0414428710938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.292713165283203, + "rewards/margins": 12.012418746948242, + "rewards/rejected": -23.305131912231445, + "step": 18115 + }, + { + "epoch": 2.82, + "learning_rate": 8.603257430364645e-07, + "logits/chosen": -1.360308289527893, + "logits/rejected": -2.1866722106933594, + "logps/chosen": -252.73341369628906, + "logps/rejected": -368.98345947265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.562097549438477, + "rewards/margins": 9.169214248657227, + "rewards/rejected": -18.731311798095703, + "step": 18116 + }, + { + "epoch": 2.82, + "learning_rate": 8.595923025053167e-07, + "logits/chosen": -2.7393951416015625, + "logits/rejected": -2.0685904026031494, + "logps/chosen": -970.1014404296875, + "logps/rejected": -768.677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.030835151672363, + "rewards/margins": 11.34722900390625, + "rewards/rejected": -20.378063201904297, + "step": 18117 + }, + { + "epoch": 2.82, + "learning_rate": 8.588588619741688e-07, + "logits/chosen": -2.946132183074951, + "logits/rejected": -2.7654128074645996, + "logps/chosen": -148.70010375976562, + "logps/rejected": -282.628173828125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.338550567626953, + "rewards/margins": 5.554302215576172, + "rewards/rejected": -13.892852783203125, + "step": 18118 + }, + { + "epoch": 2.82, + "learning_rate": 8.58125421443021e-07, + "logits/chosen": -2.8107402324676514, + "logits/rejected": -2.7169017791748047, + "logps/chosen": -228.9083709716797, + "logps/rejected": -300.7113342285156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.196041107177734, + "rewards/margins": 10.692601203918457, + "rewards/rejected": -18.888641357421875, + "step": 18119 + }, + { + "epoch": 2.82, + "learning_rate": 8.573919809118731e-07, + "logits/chosen": -2.7786502838134766, + "logits/rejected": -2.614572286605835, + "logps/chosen": -359.23199462890625, + "logps/rejected": -329.7699279785156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.849113464355469, + "rewards/margins": 8.812097549438477, + "rewards/rejected": -19.661209106445312, + "step": 18120 + }, + { + "epoch": 2.82, + "learning_rate": 8.566585403807252e-07, + "logits/chosen": -1.892380714416504, + "logits/rejected": -2.7406952381134033, + "logps/chosen": -442.93768310546875, + "logps/rejected": -562.71826171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.732964515686035, + "rewards/margins": 10.759048461914062, + "rewards/rejected": -22.49201202392578, + "step": 18121 + }, + { + "epoch": 2.82, + "learning_rate": 8.559250998495772e-07, + "logits/chosen": -3.1075658798217773, + "logits/rejected": -3.1109797954559326, + "logps/chosen": -352.42779541015625, + "logps/rejected": -417.3317565917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03365421295166, + "rewards/margins": 13.514871597290039, + "rewards/rejected": -19.548524856567383, + "step": 18122 + }, + { + "epoch": 2.82, + "learning_rate": 8.551916593184294e-07, + "logits/chosen": -2.1625232696533203, + "logits/rejected": -2.377121686935425, + "logps/chosen": -174.5179443359375, + "logps/rejected": -479.2941589355469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.294624328613281, + "rewards/margins": 14.08373737335205, + "rewards/rejected": -21.378360748291016, + "step": 18123 + }, + { + "epoch": 2.82, + "learning_rate": 8.544582187872815e-07, + "logits/chosen": -2.101914644241333, + "logits/rejected": -2.5091450214385986, + "logps/chosen": -327.5283203125, + "logps/rejected": -483.09503173828125, + "loss": 0.1017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.895463943481445, + "rewards/margins": 9.072854995727539, + "rewards/rejected": -20.968318939208984, + "step": 18124 + }, + { + "epoch": 2.82, + "learning_rate": 8.537247782561336e-07, + "logits/chosen": -2.499176025390625, + "logits/rejected": -2.715902328491211, + "logps/chosen": -436.968505859375, + "logps/rejected": -413.75848388671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.339878082275391, + "rewards/margins": 8.597077369689941, + "rewards/rejected": -14.936955451965332, + "step": 18125 + }, + { + "epoch": 2.82, + "learning_rate": 8.529913377249857e-07, + "logits/chosen": -2.692819118499756, + "logits/rejected": -1.5624068975448608, + "logps/chosen": -478.05810546875, + "logps/rejected": -296.38671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.745787620544434, + "rewards/margins": 9.746783256530762, + "rewards/rejected": -20.492570877075195, + "step": 18126 + }, + { + "epoch": 2.82, + "learning_rate": 8.522578971938378e-07, + "logits/chosen": -2.73553729057312, + "logits/rejected": -1.7636317014694214, + "logps/chosen": -387.484130859375, + "logps/rejected": -258.5018005371094, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.3605375289917, + "rewards/margins": 8.415329933166504, + "rewards/rejected": -17.775867462158203, + "step": 18127 + }, + { + "epoch": 2.82, + "learning_rate": 8.5152445666269e-07, + "logits/chosen": -1.2933573722839355, + "logits/rejected": -2.5827274322509766, + "logps/chosen": -234.67945861816406, + "logps/rejected": -393.8763427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37939167022705, + "rewards/margins": 10.726202964782715, + "rewards/rejected": -19.105594635009766, + "step": 18128 + }, + { + "epoch": 2.82, + "learning_rate": 8.507910161315421e-07, + "logits/chosen": -2.3467729091644287, + "logits/rejected": -2.562023639678955, + "logps/chosen": -165.96636962890625, + "logps/rejected": -195.78773498535156, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.222293853759766, + "rewards/margins": 5.741039752960205, + "rewards/rejected": -16.963333129882812, + "step": 18129 + }, + { + "epoch": 2.82, + "learning_rate": 8.500575756003943e-07, + "logits/chosen": -2.937270402908325, + "logits/rejected": -2.8129501342773438, + "logps/chosen": -873.9788208007812, + "logps/rejected": -704.1321411132812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.043731689453125, + "rewards/margins": 10.617986679077148, + "rewards/rejected": -20.661718368530273, + "step": 18130 + }, + { + "epoch": 2.82, + "learning_rate": 8.493241350692463e-07, + "logits/chosen": -1.512308955192566, + "logits/rejected": -2.618837356567383, + "logps/chosen": -245.5585479736328, + "logps/rejected": -466.7431640625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.812358856201172, + "rewards/margins": 7.041733741760254, + "rewards/rejected": -19.85409164428711, + "step": 18131 + }, + { + "epoch": 2.82, + "learning_rate": 8.485906945380985e-07, + "logits/chosen": -2.2034411430358887, + "logits/rejected": -2.696763038635254, + "logps/chosen": -171.85659790039062, + "logps/rejected": -295.9228820800781, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.226882934570312, + "rewards/margins": 8.445609092712402, + "rewards/rejected": -16.67249298095703, + "step": 18132 + }, + { + "epoch": 2.82, + "learning_rate": 8.478572540069506e-07, + "logits/chosen": -1.9110400676727295, + "logits/rejected": -2.8224451541900635, + "logps/chosen": -368.66058349609375, + "logps/rejected": -628.6132202148438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.840150833129883, + "rewards/margins": 12.107473373413086, + "rewards/rejected": -20.94762420654297, + "step": 18133 + }, + { + "epoch": 2.82, + "learning_rate": 8.471238134758028e-07, + "logits/chosen": -1.4555771350860596, + "logits/rejected": -2.4675989151000977, + "logps/chosen": -247.9617919921875, + "logps/rejected": -501.79998779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.689453125, + "rewards/margins": 14.493797302246094, + "rewards/rejected": -21.183250427246094, + "step": 18134 + }, + { + "epoch": 2.82, + "learning_rate": 8.463903729446548e-07, + "logits/chosen": -1.7807623147964478, + "logits/rejected": -2.6179068088531494, + "logps/chosen": -208.08871459960938, + "logps/rejected": -629.5557250976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.728961944580078, + "rewards/margins": 13.742006301879883, + "rewards/rejected": -22.47096824645996, + "step": 18135 + }, + { + "epoch": 2.82, + "learning_rate": 8.456569324135069e-07, + "logits/chosen": -1.8921443223953247, + "logits/rejected": -2.6907525062561035, + "logps/chosen": -301.89019775390625, + "logps/rejected": -582.3334350585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.393380165100098, + "rewards/margins": 12.860947608947754, + "rewards/rejected": -23.25432777404785, + "step": 18136 + }, + { + "epoch": 2.82, + "learning_rate": 8.44923491882359e-07, + "logits/chosen": -1.3207849264144897, + "logits/rejected": -2.1720387935638428, + "logps/chosen": -327.7472839355469, + "logps/rejected": -607.133544921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.880502700805664, + "rewards/margins": 9.379191398620605, + "rewards/rejected": -22.259693145751953, + "step": 18137 + }, + { + "epoch": 2.82, + "learning_rate": 8.441900513512112e-07, + "logits/chosen": -2.8442490100860596, + "logits/rejected": -2.4091694355010986, + "logps/chosen": -369.08599853515625, + "logps/rejected": -550.5556030273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.083309173583984, + "rewards/margins": 11.660810470581055, + "rewards/rejected": -21.744117736816406, + "step": 18138 + }, + { + "epoch": 2.82, + "learning_rate": 8.434566108200633e-07, + "logits/chosen": -1.2845250368118286, + "logits/rejected": -2.393240451812744, + "logps/chosen": -225.60256958007812, + "logps/rejected": -529.3609619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.457564353942871, + "rewards/margins": 9.853155136108398, + "rewards/rejected": -20.310718536376953, + "step": 18139 + }, + { + "epoch": 2.82, + "learning_rate": 8.427231702889155e-07, + "logits/chosen": -1.0035067796707153, + "logits/rejected": -2.4031901359558105, + "logps/chosen": -195.4537353515625, + "logps/rejected": -571.9034423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.718921661376953, + "rewards/margins": 12.894342422485352, + "rewards/rejected": -24.613264083862305, + "step": 18140 + }, + { + "epoch": 2.82, + "learning_rate": 8.419897297577675e-07, + "logits/chosen": -2.7390663623809814, + "logits/rejected": -2.793250560760498, + "logps/chosen": -97.34162139892578, + "logps/rejected": -420.92901611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.406040191650391, + "rewards/margins": 14.100305557250977, + "rewards/rejected": -21.506345748901367, + "step": 18141 + }, + { + "epoch": 2.82, + "learning_rate": 8.412562892266197e-07, + "logits/chosen": -2.4252631664276123, + "logits/rejected": -2.4341657161712646, + "logps/chosen": -363.04840087890625, + "logps/rejected": -477.3504943847656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.547746658325195, + "rewards/margins": 7.914584636688232, + "rewards/rejected": -19.462331771850586, + "step": 18142 + }, + { + "epoch": 2.82, + "learning_rate": 8.405228486954718e-07, + "logits/chosen": -1.6517342329025269, + "logits/rejected": -2.6445720195770264, + "logps/chosen": -215.25836181640625, + "logps/rejected": -356.40179443359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.465386390686035, + "rewards/margins": 8.358606338500977, + "rewards/rejected": -20.823993682861328, + "step": 18143 + }, + { + "epoch": 2.82, + "learning_rate": 8.397894081643239e-07, + "logits/chosen": -2.438274383544922, + "logits/rejected": -1.7574305534362793, + "logps/chosen": -300.6654968261719, + "logps/rejected": -422.1496887207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.014967918395996, + "rewards/margins": 14.598932266235352, + "rewards/rejected": -24.613901138305664, + "step": 18144 + }, + { + "epoch": 2.82, + "learning_rate": 8.39055967633176e-07, + "logits/chosen": -2.66786789894104, + "logits/rejected": -2.7553672790527344, + "logps/chosen": -217.7589111328125, + "logps/rejected": -293.8131103515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.607403755187988, + "rewards/margins": 10.325154304504395, + "rewards/rejected": -16.932558059692383, + "step": 18145 + }, + { + "epoch": 2.82, + "learning_rate": 8.38322527102028e-07, + "logits/chosen": -1.8171179294586182, + "logits/rejected": -2.55932354927063, + "logps/chosen": -291.10284423828125, + "logps/rejected": -495.0929870605469, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -17.38387680053711, + "rewards/margins": 4.491349697113037, + "rewards/rejected": -21.875225067138672, + "step": 18146 + }, + { + "epoch": 2.82, + "learning_rate": 8.375890865708802e-07, + "logits/chosen": -2.32094669342041, + "logits/rejected": -1.602314829826355, + "logps/chosen": -326.790771484375, + "logps/rejected": -328.15576171875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.776418685913086, + "rewards/margins": 7.408627510070801, + "rewards/rejected": -18.185047149658203, + "step": 18147 + }, + { + "epoch": 2.82, + "learning_rate": 8.368556460397323e-07, + "logits/chosen": -2.5410754680633545, + "logits/rejected": -2.894993782043457, + "logps/chosen": -189.3997802734375, + "logps/rejected": -349.0691223144531, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.977052688598633, + "rewards/margins": 8.99612808227539, + "rewards/rejected": -15.973180770874023, + "step": 18148 + }, + { + "epoch": 2.82, + "learning_rate": 8.361222055085845e-07, + "logits/chosen": -2.887180805206299, + "logits/rejected": -2.9479238986968994, + "logps/chosen": -93.79109191894531, + "logps/rejected": -249.95181274414062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.584683895111084, + "rewards/margins": 11.190618515014648, + "rewards/rejected": -17.77530288696289, + "step": 18149 + }, + { + "epoch": 2.82, + "learning_rate": 8.353887649774365e-07, + "logits/chosen": -2.1720597743988037, + "logits/rejected": -2.8786582946777344, + "logps/chosen": -208.35231018066406, + "logps/rejected": -425.61370849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.776112079620361, + "rewards/margins": 11.659305572509766, + "rewards/rejected": -17.43541717529297, + "step": 18150 + }, + { + "epoch": 2.82, + "learning_rate": 8.346553244462887e-07, + "logits/chosen": -2.4256410598754883, + "logits/rejected": -2.0184414386749268, + "logps/chosen": -194.89620971679688, + "logps/rejected": -285.06817626953125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.387225151062012, + "rewards/margins": 8.593761444091797, + "rewards/rejected": -18.980987548828125, + "step": 18151 + }, + { + "epoch": 2.82, + "learning_rate": 8.339218839151408e-07, + "logits/chosen": -2.6984622478485107, + "logits/rejected": -0.9921521544456482, + "logps/chosen": -402.1834411621094, + "logps/rejected": -315.05487060546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.304876327514648, + "rewards/margins": 8.351547241210938, + "rewards/rejected": -15.656423568725586, + "step": 18152 + }, + { + "epoch": 2.82, + "learning_rate": 8.33188443383993e-07, + "logits/chosen": -1.7280291318893433, + "logits/rejected": -2.6474251747131348, + "logps/chosen": -207.48458862304688, + "logps/rejected": -359.86285400390625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.622773170471191, + "rewards/margins": 9.729642868041992, + "rewards/rejected": -19.352415084838867, + "step": 18153 + }, + { + "epoch": 2.82, + "learning_rate": 8.324550028528451e-07, + "logits/chosen": -1.7801132202148438, + "logits/rejected": -2.1730616092681885, + "logps/chosen": -405.8699951171875, + "logps/rejected": -621.002685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.427777290344238, + "rewards/margins": 11.658557891845703, + "rewards/rejected": -22.086334228515625, + "step": 18154 + }, + { + "epoch": 2.82, + "learning_rate": 8.317215623216972e-07, + "logits/chosen": -2.730022430419922, + "logits/rejected": -2.825863838195801, + "logps/chosen": -102.31549072265625, + "logps/rejected": -222.01544189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.830881118774414, + "rewards/margins": 9.435872077941895, + "rewards/rejected": -17.266754150390625, + "step": 18155 + }, + { + "epoch": 2.82, + "learning_rate": 8.309881217905493e-07, + "logits/chosen": -2.7096874713897705, + "logits/rejected": -3.023052215576172, + "logps/chosen": -158.43228149414062, + "logps/rejected": -822.965576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.46695613861084, + "rewards/margins": 21.63368034362793, + "rewards/rejected": -28.100635528564453, + "step": 18156 + }, + { + "epoch": 2.82, + "learning_rate": 8.302546812594015e-07, + "logits/chosen": -1.3466795682907104, + "logits/rejected": -2.721435308456421, + "logps/chosen": -492.32757568359375, + "logps/rejected": -833.7889404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.455016136169434, + "rewards/margins": 12.289092063903809, + "rewards/rejected": -24.744108200073242, + "step": 18157 + }, + { + "epoch": 2.82, + "learning_rate": 8.295212407282535e-07, + "logits/chosen": -2.850708246231079, + "logits/rejected": -1.076751708984375, + "logps/chosen": -427.4521789550781, + "logps/rejected": -299.0859069824219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.461534023284912, + "rewards/margins": 14.798600196838379, + "rewards/rejected": -18.260135650634766, + "step": 18158 + }, + { + "epoch": 2.82, + "learning_rate": 8.287878001971057e-07, + "logits/chosen": -1.9704463481903076, + "logits/rejected": -2.871676445007324, + "logps/chosen": -353.8266296386719, + "logps/rejected": -689.3624267578125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.401033401489258, + "rewards/margins": 6.663644790649414, + "rewards/rejected": -20.064678192138672, + "step": 18159 + }, + { + "epoch": 2.82, + "learning_rate": 8.280543596659577e-07, + "logits/chosen": -2.356752395629883, + "logits/rejected": -2.688018798828125, + "logps/chosen": -153.8382568359375, + "logps/rejected": -183.78353881835938, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.320183753967285, + "rewards/margins": 6.754698276519775, + "rewards/rejected": -15.074881553649902, + "step": 18160 + }, + { + "epoch": 2.82, + "learning_rate": 8.273209191348099e-07, + "logits/chosen": -2.752078056335449, + "logits/rejected": -2.4360644817352295, + "logps/chosen": -377.99554443359375, + "logps/rejected": -557.7128295898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.978358745574951, + "rewards/margins": 14.755973815917969, + "rewards/rejected": -22.734333038330078, + "step": 18161 + }, + { + "epoch": 2.82, + "learning_rate": 8.26587478603662e-07, + "logits/chosen": -1.9778931140899658, + "logits/rejected": -2.3449206352233887, + "logps/chosen": -290.5908203125, + "logps/rejected": -530.5352172851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.149795532226562, + "rewards/margins": 13.333905220031738, + "rewards/rejected": -24.483699798583984, + "step": 18162 + }, + { + "epoch": 2.82, + "learning_rate": 8.258540380725142e-07, + "logits/chosen": -2.6153414249420166, + "logits/rejected": -2.6550543308258057, + "logps/chosen": -468.62591552734375, + "logps/rejected": -514.8264770507812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.545685768127441, + "rewards/margins": 10.635411262512207, + "rewards/rejected": -18.18109703063965, + "step": 18163 + }, + { + "epoch": 2.82, + "learning_rate": 8.251205975413663e-07, + "logits/chosen": -2.8599328994750977, + "logits/rejected": -2.814162015914917, + "logps/chosen": -183.88926696777344, + "logps/rejected": -227.89764404296875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.84083890914917, + "rewards/margins": 9.053424835205078, + "rewards/rejected": -15.89426326751709, + "step": 18164 + }, + { + "epoch": 2.83, + "learning_rate": 8.243871570102184e-07, + "logits/chosen": -2.685396671295166, + "logits/rejected": -2.455075263977051, + "logps/chosen": -166.0738983154297, + "logps/rejected": -281.5768127441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.827833652496338, + "rewards/margins": 11.328091621398926, + "rewards/rejected": -17.155925750732422, + "step": 18165 + }, + { + "epoch": 2.83, + "learning_rate": 8.236537164790705e-07, + "logits/chosen": -1.7032902240753174, + "logits/rejected": -2.0960826873779297, + "logps/chosen": -242.6773681640625, + "logps/rejected": -412.0372314453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.095499038696289, + "rewards/margins": 9.024978637695312, + "rewards/rejected": -17.1204776763916, + "step": 18166 + }, + { + "epoch": 2.83, + "learning_rate": 8.229202759479226e-07, + "logits/chosen": -1.9926296472549438, + "logits/rejected": -2.8191890716552734, + "logps/chosen": -254.464599609375, + "logps/rejected": -322.67822265625, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.384377479553223, + "rewards/margins": 9.216056823730469, + "rewards/rejected": -15.600433349609375, + "step": 18167 + }, + { + "epoch": 2.83, + "learning_rate": 8.221868354167748e-07, + "logits/chosen": -2.522310733795166, + "logits/rejected": -2.943969488143921, + "logps/chosen": -445.4833984375, + "logps/rejected": -545.3294067382812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.666764259338379, + "rewards/margins": 10.528509140014648, + "rewards/rejected": -21.195274353027344, + "step": 18168 + }, + { + "epoch": 2.83, + "learning_rate": 8.214533948856269e-07, + "logits/chosen": -1.0153672695159912, + "logits/rejected": -2.783245801925659, + "logps/chosen": -166.66543579101562, + "logps/rejected": -724.046142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.53890609741211, + "rewards/margins": 12.163166046142578, + "rewards/rejected": -22.702072143554688, + "step": 18169 + }, + { + "epoch": 2.83, + "learning_rate": 8.207199543544789e-07, + "logits/chosen": -2.7746686935424805, + "logits/rejected": -2.403646230697632, + "logps/chosen": -275.42755126953125, + "logps/rejected": -168.98480224609375, + "loss": 0.5693, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.18358039855957, + "rewards/margins": 0.9767742156982422, + "rewards/rejected": -14.160354614257812, + "step": 18170 + }, + { + "epoch": 2.83, + "learning_rate": 8.19986513823331e-07, + "logits/chosen": -0.5176569223403931, + "logits/rejected": -2.7283647060394287, + "logps/chosen": -133.14321899414062, + "logps/rejected": -589.4805297851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696345806121826, + "rewards/margins": 17.226131439208984, + "rewards/rejected": -24.92247772216797, + "step": 18171 + }, + { + "epoch": 2.83, + "learning_rate": 8.192530732921832e-07, + "logits/chosen": -0.8058736324310303, + "logits/rejected": -1.817221999168396, + "logps/chosen": -313.2532958984375, + "logps/rejected": -551.0589599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.811878204345703, + "rewards/margins": 14.118427276611328, + "rewards/rejected": -24.93030548095703, + "step": 18172 + }, + { + "epoch": 2.83, + "learning_rate": 8.185196327610353e-07, + "logits/chosen": -2.1179511547088623, + "logits/rejected": -2.8319456577301025, + "logps/chosen": -401.5815734863281, + "logps/rejected": -538.1636962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985569953918457, + "rewards/margins": 12.901707649230957, + "rewards/rejected": -20.887277603149414, + "step": 18173 + }, + { + "epoch": 2.83, + "learning_rate": 8.177861922298875e-07, + "logits/chosen": -2.8717474937438965, + "logits/rejected": -2.8350613117218018, + "logps/chosen": -135.31890869140625, + "logps/rejected": -182.00601196289062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.887423515319824, + "rewards/margins": 8.701117515563965, + "rewards/rejected": -15.588541030883789, + "step": 18174 + }, + { + "epoch": 2.83, + "learning_rate": 8.170527516987395e-07, + "logits/chosen": -1.8783094882965088, + "logits/rejected": -2.995722770690918, + "logps/chosen": -469.33038330078125, + "logps/rejected": -711.7581787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.907505989074707, + "rewards/margins": 11.778865814208984, + "rewards/rejected": -25.686370849609375, + "step": 18175 + }, + { + "epoch": 2.83, + "learning_rate": 8.163193111675917e-07, + "logits/chosen": -2.2032151222229004, + "logits/rejected": -2.8431859016418457, + "logps/chosen": -306.31744384765625, + "logps/rejected": -498.7556457519531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.459957122802734, + "rewards/margins": 10.423629760742188, + "rewards/rejected": -21.883586883544922, + "step": 18176 + }, + { + "epoch": 2.83, + "learning_rate": 8.155858706364438e-07, + "logits/chosen": -1.28529691696167, + "logits/rejected": -2.4459190368652344, + "logps/chosen": -289.8272705078125, + "logps/rejected": -571.649658203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.041417121887207, + "rewards/margins": 10.800443649291992, + "rewards/rejected": -21.841861724853516, + "step": 18177 + }, + { + "epoch": 2.83, + "learning_rate": 8.14852430105296e-07, + "logits/chosen": -2.6645944118499756, + "logits/rejected": -2.278393268585205, + "logps/chosen": -258.74853515625, + "logps/rejected": -312.306640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.64576530456543, + "rewards/margins": 8.424714088439941, + "rewards/rejected": -17.070480346679688, + "step": 18178 + }, + { + "epoch": 2.83, + "learning_rate": 8.141189895741481e-07, + "logits/chosen": -1.0116487741470337, + "logits/rejected": -2.762354850769043, + "logps/chosen": -218.0853271484375, + "logps/rejected": -487.3599548339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7744550704956055, + "rewards/margins": 12.274431228637695, + "rewards/rejected": -20.048885345458984, + "step": 18179 + }, + { + "epoch": 2.83, + "learning_rate": 8.133855490430002e-07, + "logits/chosen": -2.732017755508423, + "logits/rejected": -2.327669858932495, + "logps/chosen": -235.8650665283203, + "logps/rejected": -174.07357788085938, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.835420608520508, + "rewards/margins": 1.6821844577789307, + "rewards/rejected": -12.51760482788086, + "step": 18180 + }, + { + "epoch": 2.83, + "learning_rate": 8.126521085118523e-07, + "logits/chosen": -2.193537712097168, + "logits/rejected": -2.638338565826416, + "logps/chosen": -355.14337158203125, + "logps/rejected": -421.8041687011719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.874639987945557, + "rewards/margins": 9.99677562713623, + "rewards/rejected": -16.871416091918945, + "step": 18181 + }, + { + "epoch": 2.83, + "learning_rate": 8.119186679807044e-07, + "logits/chosen": -2.8728926181793213, + "logits/rejected": -3.017287015914917, + "logps/chosen": -275.596923828125, + "logps/rejected": -580.02490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.097219467163086, + "rewards/margins": 15.591224670410156, + "rewards/rejected": -23.688444137573242, + "step": 18182 + }, + { + "epoch": 2.83, + "learning_rate": 8.111852274495565e-07, + "logits/chosen": -2.7867112159729004, + "logits/rejected": -1.9731032848358154, + "logps/chosen": -416.303466796875, + "logps/rejected": -256.84002685546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.840282440185547, + "rewards/margins": 7.984218597412109, + "rewards/rejected": -15.824501037597656, + "step": 18183 + }, + { + "epoch": 2.83, + "learning_rate": 8.104517869184086e-07, + "logits/chosen": -2.1798174381256104, + "logits/rejected": -2.1113758087158203, + "logps/chosen": -304.54852294921875, + "logps/rejected": -418.3668518066406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.070538520812988, + "rewards/margins": 9.939804077148438, + "rewards/rejected": -23.01034164428711, + "step": 18184 + }, + { + "epoch": 2.83, + "learning_rate": 8.097183463872607e-07, + "logits/chosen": -2.3447837829589844, + "logits/rejected": -1.58125901222229, + "logps/chosen": -308.069091796875, + "logps/rejected": -393.6540832519531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.607251167297363, + "rewards/margins": 10.976008415222168, + "rewards/rejected": -19.58325958251953, + "step": 18185 + }, + { + "epoch": 2.83, + "learning_rate": 8.089849058561128e-07, + "logits/chosen": -2.686244010925293, + "logits/rejected": -2.224658250808716, + "logps/chosen": -292.09515380859375, + "logps/rejected": -321.6859130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.334493637084961, + "rewards/margins": 13.680383682250977, + "rewards/rejected": -21.014877319335938, + "step": 18186 + }, + { + "epoch": 2.83, + "learning_rate": 8.08251465324965e-07, + "logits/chosen": -2.5300047397613525, + "logits/rejected": -2.7640256881713867, + "logps/chosen": -262.7476806640625, + "logps/rejected": -380.2054443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.946630477905273, + "rewards/margins": 9.938282012939453, + "rewards/rejected": -19.884912490844727, + "step": 18187 + }, + { + "epoch": 2.83, + "learning_rate": 8.075180247938171e-07, + "logits/chosen": -2.6152243614196777, + "logits/rejected": -1.5318794250488281, + "logps/chosen": -289.09771728515625, + "logps/rejected": -199.46800231933594, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.740617752075195, + "rewards/margins": 4.758483409881592, + "rewards/rejected": -17.499101638793945, + "step": 18188 + }, + { + "epoch": 2.83, + "learning_rate": 8.067845842626693e-07, + "logits/chosen": -1.7441574335098267, + "logits/rejected": -2.8310182094573975, + "logps/chosen": -190.49307250976562, + "logps/rejected": -522.6519775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3290300369262695, + "rewards/margins": 13.811845779418945, + "rewards/rejected": -20.14087677001953, + "step": 18189 + }, + { + "epoch": 2.83, + "learning_rate": 8.060511437315213e-07, + "logits/chosen": -2.612245559692383, + "logits/rejected": -2.076390504837036, + "logps/chosen": -184.66580200195312, + "logps/rejected": -338.996337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690871238708496, + "rewards/margins": 12.260124206542969, + "rewards/rejected": -20.95099639892578, + "step": 18190 + }, + { + "epoch": 2.83, + "learning_rate": 8.053177032003735e-07, + "logits/chosen": -2.3740384578704834, + "logits/rejected": -2.842689037322998, + "logps/chosen": -101.11002349853516, + "logps/rejected": -360.78997802734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.877357482910156, + "rewards/margins": 12.799527168273926, + "rewards/rejected": -21.676883697509766, + "step": 18191 + }, + { + "epoch": 2.83, + "learning_rate": 8.045842626692256e-07, + "logits/chosen": -1.767822265625, + "logits/rejected": -2.6528918743133545, + "logps/chosen": -429.65863037109375, + "logps/rejected": -633.3363037109375, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.951326370239258, + "rewards/margins": 8.76992130279541, + "rewards/rejected": -19.721248626708984, + "step": 18192 + }, + { + "epoch": 2.83, + "learning_rate": 8.038508221380778e-07, + "logits/chosen": -2.9412944316864014, + "logits/rejected": -2.3311285972595215, + "logps/chosen": -909.7703857421875, + "logps/rejected": -760.6278076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.064835071563721, + "rewards/margins": 13.650190353393555, + "rewards/rejected": -20.71502685546875, + "step": 18193 + }, + { + "epoch": 2.83, + "learning_rate": 8.031173816069298e-07, + "logits/chosen": -1.941420078277588, + "logits/rejected": -1.4674007892608643, + "logps/chosen": -342.6724853515625, + "logps/rejected": -501.96337890625, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.153458595275879, + "rewards/margins": 7.005858421325684, + "rewards/rejected": -22.159317016601562, + "step": 18194 + }, + { + "epoch": 2.83, + "learning_rate": 8.023839410757819e-07, + "logits/chosen": -2.7673399448394775, + "logits/rejected": -2.8924691677093506, + "logps/chosen": -176.22059631347656, + "logps/rejected": -294.24560546875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.386848449707031, + "rewards/margins": 7.276975631713867, + "rewards/rejected": -16.6638240814209, + "step": 18195 + }, + { + "epoch": 2.83, + "learning_rate": 8.01650500544634e-07, + "logits/chosen": -1.434261679649353, + "logits/rejected": -2.603210926055908, + "logps/chosen": -273.8311767578125, + "logps/rejected": -465.0823974609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.748992919921875, + "rewards/margins": 12.300675392150879, + "rewards/rejected": -17.049667358398438, + "step": 18196 + }, + { + "epoch": 2.83, + "learning_rate": 8.009170600134862e-07, + "logits/chosen": -1.9171624183654785, + "logits/rejected": -2.340648651123047, + "logps/chosen": -161.21905517578125, + "logps/rejected": -244.703369140625, + "loss": 0.0852, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.366497039794922, + "rewards/margins": 5.675261974334717, + "rewards/rejected": -17.041759490966797, + "step": 18197 + }, + { + "epoch": 2.83, + "learning_rate": 8.001836194823383e-07, + "logits/chosen": -0.9262368083000183, + "logits/rejected": -2.202160120010376, + "logps/chosen": -301.09326171875, + "logps/rejected": -560.9412231445312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.263405799865723, + "rewards/margins": 10.116933822631836, + "rewards/rejected": -18.380338668823242, + "step": 18198 + }, + { + "epoch": 2.83, + "learning_rate": 7.994501789511905e-07, + "logits/chosen": -2.949841022491455, + "logits/rejected": -2.055403232574463, + "logps/chosen": -594.7728271484375, + "logps/rejected": -355.8644104003906, + "loss": 0.0423, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.66748046875, + "rewards/margins": 7.67658805847168, + "rewards/rejected": -13.34406852722168, + "step": 18199 + }, + { + "epoch": 2.83, + "learning_rate": 7.987167384200425e-07, + "logits/chosen": -1.980381727218628, + "logits/rejected": -2.338465452194214, + "logps/chosen": -293.2919006347656, + "logps/rejected": -612.2520751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.753620147705078, + "rewards/margins": 13.05990219116211, + "rewards/rejected": -25.813522338867188, + "step": 18200 + }, + { + "epoch": 2.83, + "learning_rate": 7.979832978888947e-07, + "logits/chosen": -2.770745038986206, + "logits/rejected": -2.799984931945801, + "logps/chosen": -125.23426818847656, + "logps/rejected": -231.48367309570312, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.011405944824219, + "rewards/margins": 6.565244674682617, + "rewards/rejected": -15.576650619506836, + "step": 18201 + }, + { + "epoch": 2.83, + "learning_rate": 7.972498573577468e-07, + "logits/chosen": -1.9258328676223755, + "logits/rejected": -2.428298234939575, + "logps/chosen": -165.6337890625, + "logps/rejected": -363.59967041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.112519264221191, + "rewards/margins": 11.663151741027832, + "rewards/rejected": -22.775671005249023, + "step": 18202 + }, + { + "epoch": 2.83, + "learning_rate": 7.96516416826599e-07, + "logits/chosen": -1.740508794784546, + "logits/rejected": -2.615922689437866, + "logps/chosen": -343.15216064453125, + "logps/rejected": -470.17034912109375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.088964462280273, + "rewards/margins": 6.618884086608887, + "rewards/rejected": -18.707849502563477, + "step": 18203 + }, + { + "epoch": 2.83, + "learning_rate": 7.957829762954511e-07, + "logits/chosen": -2.794677495956421, + "logits/rejected": -2.9349331855773926, + "logps/chosen": -141.7036590576172, + "logps/rejected": -256.33837890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.961284637451172, + "rewards/margins": 7.909515857696533, + "rewards/rejected": -16.870800018310547, + "step": 18204 + }, + { + "epoch": 2.83, + "learning_rate": 7.950495357643032e-07, + "logits/chosen": -1.6959794759750366, + "logits/rejected": -2.6688098907470703, + "logps/chosen": -184.35012817382812, + "logps/rejected": -483.7995300292969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.592292308807373, + "rewards/margins": 12.315872192382812, + "rewards/rejected": -17.908164978027344, + "step": 18205 + }, + { + "epoch": 2.83, + "learning_rate": 7.943160952331552e-07, + "logits/chosen": -2.7309632301330566, + "logits/rejected": -2.027865409851074, + "logps/chosen": -259.34228515625, + "logps/rejected": -412.4110412597656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.070433616638184, + "rewards/margins": 12.24664306640625, + "rewards/rejected": -21.31707763671875, + "step": 18206 + }, + { + "epoch": 2.83, + "learning_rate": 7.935826547020073e-07, + "logits/chosen": -2.437777519226074, + "logits/rejected": -2.155672550201416, + "logps/chosen": -413.3344421386719, + "logps/rejected": -364.0500793457031, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.394242286682129, + "rewards/margins": 10.070342063903809, + "rewards/rejected": -20.464584350585938, + "step": 18207 + }, + { + "epoch": 2.83, + "learning_rate": 7.928492141708595e-07, + "logits/chosen": -2.2712831497192383, + "logits/rejected": -2.696349859237671, + "logps/chosen": -319.8738098144531, + "logps/rejected": -417.9944763183594, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6922149658203125, + "rewards/margins": 6.207796573638916, + "rewards/rejected": -13.90001106262207, + "step": 18208 + }, + { + "epoch": 2.83, + "learning_rate": 7.921157736397115e-07, + "logits/chosen": -1.873128056526184, + "logits/rejected": -2.6292765140533447, + "logps/chosen": -268.9467468261719, + "logps/rejected": -483.76190185546875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.831723690032959, + "rewards/margins": 11.226297378540039, + "rewards/rejected": -17.058021545410156, + "step": 18209 + }, + { + "epoch": 2.83, + "learning_rate": 7.913823331085637e-07, + "logits/chosen": -2.716891288757324, + "logits/rejected": -2.059664011001587, + "logps/chosen": -550.7839965820312, + "logps/rejected": -509.86651611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5982561111450195, + "rewards/margins": 16.206790924072266, + "rewards/rejected": -23.8050479888916, + "step": 18210 + }, + { + "epoch": 2.83, + "learning_rate": 7.906488925774158e-07, + "logits/chosen": -2.746655225753784, + "logits/rejected": -2.351223945617676, + "logps/chosen": -576.331298828125, + "logps/rejected": -589.01904296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.506416320800781, + "rewards/margins": 8.860974311828613, + "rewards/rejected": -22.367389678955078, + "step": 18211 + }, + { + "epoch": 2.83, + "learning_rate": 7.89915452046268e-07, + "logits/chosen": -2.636678457260132, + "logits/rejected": -2.84332537651062, + "logps/chosen": -173.9371795654297, + "logps/rejected": -317.626953125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.31958293914795, + "rewards/margins": 8.016504287719727, + "rewards/rejected": -18.336088180541992, + "step": 18212 + }, + { + "epoch": 2.83, + "learning_rate": 7.891820115151201e-07, + "logits/chosen": -1.8251897096633911, + "logits/rejected": -2.1121890544891357, + "logps/chosen": -180.07119750976562, + "logps/rejected": -371.3483581542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.65916919708252, + "rewards/margins": 12.552517890930176, + "rewards/rejected": -24.211687088012695, + "step": 18213 + }, + { + "epoch": 2.83, + "learning_rate": 7.884485709839722e-07, + "logits/chosen": -2.0328004360198975, + "logits/rejected": -2.9530673027038574, + "logps/chosen": -204.68426513671875, + "logps/rejected": -308.90875244140625, + "loss": 0.0979, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.737861633300781, + "rewards/margins": 6.269072532653809, + "rewards/rejected": -17.006935119628906, + "step": 18214 + }, + { + "epoch": 2.83, + "learning_rate": 7.877151304528243e-07, + "logits/chosen": -1.9975863695144653, + "logits/rejected": -2.8433566093444824, + "logps/chosen": -348.7566223144531, + "logps/rejected": -533.57958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.23571491241455, + "rewards/margins": 12.882250785827637, + "rewards/rejected": -23.117965698242188, + "step": 18215 + }, + { + "epoch": 2.83, + "learning_rate": 7.869816899216765e-07, + "logits/chosen": -2.5248231887817383, + "logits/rejected": -2.094370126724243, + "logps/chosen": -382.130859375, + "logps/rejected": -454.1572570800781, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.071797370910645, + "rewards/margins": 8.6937255859375, + "rewards/rejected": -19.765522003173828, + "step": 18216 + }, + { + "epoch": 2.83, + "learning_rate": 7.862482493905286e-07, + "logits/chosen": -1.5145574808120728, + "logits/rejected": -2.6046035289764404, + "logps/chosen": -332.0835876464844, + "logps/rejected": -510.87835693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.10342788696289, + "rewards/margins": 10.788372039794922, + "rewards/rejected": -21.891799926757812, + "step": 18217 + }, + { + "epoch": 2.83, + "learning_rate": 7.855148088593808e-07, + "logits/chosen": -2.128115177154541, + "logits/rejected": -2.895225763320923, + "logps/chosen": -176.52305603027344, + "logps/rejected": -415.11236572265625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.762250900268555, + "rewards/margins": 7.481947898864746, + "rewards/rejected": -18.244197845458984, + "step": 18218 + }, + { + "epoch": 2.83, + "learning_rate": 7.847813683282327e-07, + "logits/chosen": -1.032196283340454, + "logits/rejected": -1.7995258569717407, + "logps/chosen": -321.759521484375, + "logps/rejected": -606.226318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.857301712036133, + "rewards/margins": 16.27944564819336, + "rewards/rejected": -24.136749267578125, + "step": 18219 + }, + { + "epoch": 2.83, + "learning_rate": 7.840479277970849e-07, + "logits/chosen": -2.3120007514953613, + "logits/rejected": -2.5943450927734375, + "logps/chosen": -504.0448303222656, + "logps/rejected": -402.91693115234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.291337013244629, + "rewards/margins": 13.143976211547852, + "rewards/rejected": -22.435314178466797, + "step": 18220 + }, + { + "epoch": 2.83, + "learning_rate": 7.83314487265937e-07, + "logits/chosen": -2.3110384941101074, + "logits/rejected": -2.6692440509796143, + "logps/chosen": -436.57049560546875, + "logps/rejected": -461.7853088378906, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.801811218261719, + "rewards/margins": 11.440251350402832, + "rewards/rejected": -18.242061614990234, + "step": 18221 + }, + { + "epoch": 2.83, + "learning_rate": 7.825810467347892e-07, + "logits/chosen": -2.7351672649383545, + "logits/rejected": -2.5892953872680664, + "logps/chosen": -563.5043334960938, + "logps/rejected": -795.5108642578125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.077978134155273, + "rewards/margins": 8.626853942871094, + "rewards/rejected": -21.704832077026367, + "step": 18222 + }, + { + "epoch": 2.83, + "learning_rate": 7.818476062036413e-07, + "logits/chosen": -2.9190456867218018, + "logits/rejected": -2.1187050342559814, + "logps/chosen": -266.30596923828125, + "logps/rejected": -439.88946533203125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4630303382873535, + "rewards/margins": 12.72536849975586, + "rewards/rejected": -20.188398361206055, + "step": 18223 + }, + { + "epoch": 2.83, + "learning_rate": 7.811141656724934e-07, + "logits/chosen": -0.9559951424598694, + "logits/rejected": -2.5263564586639404, + "logps/chosen": -251.1666717529297, + "logps/rejected": -559.8228759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.543716430664062, + "rewards/margins": 17.512920379638672, + "rewards/rejected": -26.056636810302734, + "step": 18224 + }, + { + "epoch": 2.83, + "learning_rate": 7.803807251413455e-07, + "logits/chosen": -1.566094994544983, + "logits/rejected": -2.649433135986328, + "logps/chosen": -293.4364929199219, + "logps/rejected": -521.1011962890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.445262908935547, + "rewards/margins": 8.690754890441895, + "rewards/rejected": -22.136016845703125, + "step": 18225 + }, + { + "epoch": 2.83, + "learning_rate": 7.796472846101976e-07, + "logits/chosen": -2.7663254737854004, + "logits/rejected": -2.030332565307617, + "logps/chosen": -422.2593994140625, + "logps/rejected": -396.59130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696761131286621, + "rewards/margins": 10.610260963439941, + "rewards/rejected": -18.307022094726562, + "step": 18226 + }, + { + "epoch": 2.83, + "learning_rate": 7.789138440790498e-07, + "logits/chosen": -2.752608060836792, + "logits/rejected": -2.9046101570129395, + "logps/chosen": -219.47500610351562, + "logps/rejected": -378.80694580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.595409393310547, + "rewards/margins": 11.057424545288086, + "rewards/rejected": -17.652833938598633, + "step": 18227 + }, + { + "epoch": 2.83, + "learning_rate": 7.781804035479019e-07, + "logits/chosen": -2.877396821975708, + "logits/rejected": -1.874722957611084, + "logps/chosen": -221.46546936035156, + "logps/rejected": -331.5403747558594, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760104179382324, + "rewards/margins": 12.496639251708984, + "rewards/rejected": -18.256742477416992, + "step": 18228 + }, + { + "epoch": 2.83, + "learning_rate": 7.77446963016754e-07, + "logits/chosen": -2.938382387161255, + "logits/rejected": -2.6713056564331055, + "logps/chosen": -193.4755096435547, + "logps/rejected": -210.54246520996094, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.474334716796875, + "rewards/margins": 5.541848182678223, + "rewards/rejected": -14.016181945800781, + "step": 18229 + }, + { + "epoch": 2.84, + "learning_rate": 7.76713522485606e-07, + "logits/chosen": -2.578395366668701, + "logits/rejected": -1.7269775867462158, + "logps/chosen": -221.6202850341797, + "logps/rejected": -222.484619140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.026893615722656, + "rewards/margins": 8.811583518981934, + "rewards/rejected": -18.838478088378906, + "step": 18230 + }, + { + "epoch": 2.84, + "learning_rate": 7.759800819544582e-07, + "logits/chosen": -2.7825212478637695, + "logits/rejected": -2.104999303817749, + "logps/chosen": -465.1231994628906, + "logps/rejected": -407.1015319824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.564715385437012, + "rewards/margins": 8.819759368896484, + "rewards/rejected": -18.384475708007812, + "step": 18231 + }, + { + "epoch": 2.84, + "learning_rate": 7.752466414233103e-07, + "logits/chosen": -2.7284135818481445, + "logits/rejected": -2.1586813926696777, + "logps/chosen": -177.95281982421875, + "logps/rejected": -266.60943603515625, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.25956916809082, + "rewards/margins": 5.071537017822266, + "rewards/rejected": -15.331106185913086, + "step": 18232 + }, + { + "epoch": 2.84, + "learning_rate": 7.745132008921625e-07, + "logits/chosen": -2.493441343307495, + "logits/rejected": -2.888195276260376, + "logps/chosen": -132.46087646484375, + "logps/rejected": -319.30841064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.273235321044922, + "rewards/margins": 10.296209335327148, + "rewards/rejected": -18.56944465637207, + "step": 18233 + }, + { + "epoch": 2.84, + "learning_rate": 7.737797603610145e-07, + "logits/chosen": -1.7567579746246338, + "logits/rejected": -2.6289820671081543, + "logps/chosen": -132.21591186523438, + "logps/rejected": -432.5871276855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.309812545776367, + "rewards/margins": 12.851784706115723, + "rewards/rejected": -22.161598205566406, + "step": 18234 + }, + { + "epoch": 2.84, + "learning_rate": 7.730463198298667e-07, + "logits/chosen": -2.2762322425842285, + "logits/rejected": -2.272535800933838, + "logps/chosen": -432.4383544921875, + "logps/rejected": -462.2348937988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.006992340087891, + "rewards/margins": 10.638816833496094, + "rewards/rejected": -17.645809173583984, + "step": 18235 + }, + { + "epoch": 2.84, + "learning_rate": 7.723128792987188e-07, + "logits/chosen": -1.7550840377807617, + "logits/rejected": -2.685534715652466, + "logps/chosen": -232.1135711669922, + "logps/rejected": -475.21295166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.60456657409668, + "rewards/margins": 11.877120971679688, + "rewards/rejected": -20.481687545776367, + "step": 18236 + }, + { + "epoch": 2.84, + "learning_rate": 7.71579438767571e-07, + "logits/chosen": -2.1551194190979004, + "logits/rejected": -2.771658420562744, + "logps/chosen": -128.01820373535156, + "logps/rejected": -378.4214172363281, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.04281997680664, + "rewards/margins": 9.470010757446289, + "rewards/rejected": -20.51283073425293, + "step": 18237 + }, + { + "epoch": 2.84, + "learning_rate": 7.708459982364231e-07, + "logits/chosen": -2.631194829940796, + "logits/rejected": -2.7957518100738525, + "logps/chosen": -331.78204345703125, + "logps/rejected": -410.19927978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.560896873474121, + "rewards/margins": 16.252208709716797, + "rewards/rejected": -24.813106536865234, + "step": 18238 + }, + { + "epoch": 2.84, + "learning_rate": 7.701125577052752e-07, + "logits/chosen": -2.7005064487457275, + "logits/rejected": -2.7563512325286865, + "logps/chosen": -152.0203857421875, + "logps/rejected": -329.63665771484375, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.966951370239258, + "rewards/margins": 5.538695335388184, + "rewards/rejected": -15.505647659301758, + "step": 18239 + }, + { + "epoch": 2.84, + "learning_rate": 7.693791171741273e-07, + "logits/chosen": -2.7397665977478027, + "logits/rejected": -2.860748529434204, + "logps/chosen": -257.2625732421875, + "logps/rejected": -278.24505615234375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.099227428436279, + "rewards/margins": 8.13552474975586, + "rewards/rejected": -15.234752655029297, + "step": 18240 + }, + { + "epoch": 2.84, + "learning_rate": 7.686456766429795e-07, + "logits/chosen": -2.6758410930633545, + "logits/rejected": -1.42287278175354, + "logps/chosen": -569.2280883789062, + "logps/rejected": -406.13055419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.278908729553223, + "rewards/margins": 10.36061954498291, + "rewards/rejected": -21.639528274536133, + "step": 18241 + }, + { + "epoch": 2.84, + "learning_rate": 7.679122361118315e-07, + "logits/chosen": -1.100499153137207, + "logits/rejected": -2.528566598892212, + "logps/chosen": -129.75796508789062, + "logps/rejected": -497.0332946777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.55803918838501, + "rewards/margins": 11.648714065551758, + "rewards/rejected": -19.20675277709961, + "step": 18242 + }, + { + "epoch": 2.84, + "learning_rate": 7.671787955806837e-07, + "logits/chosen": -1.2233043909072876, + "logits/rejected": -2.436518907546997, + "logps/chosen": -184.14447021484375, + "logps/rejected": -444.53338623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.783453941345215, + "rewards/margins": 14.395642280578613, + "rewards/rejected": -23.179096221923828, + "step": 18243 + }, + { + "epoch": 2.84, + "learning_rate": 7.664453550495357e-07, + "logits/chosen": -2.5805859565734863, + "logits/rejected": -2.7162861824035645, + "logps/chosen": -170.53948974609375, + "logps/rejected": -349.99755859375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.854592800140381, + "rewards/margins": 11.574650764465332, + "rewards/rejected": -18.429243087768555, + "step": 18244 + }, + { + "epoch": 2.84, + "learning_rate": 7.657119145183879e-07, + "logits/chosen": -2.202915668487549, + "logits/rejected": -2.4410922527313232, + "logps/chosen": -224.73861694335938, + "logps/rejected": -321.21636962890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5965447425842285, + "rewards/margins": 8.627182006835938, + "rewards/rejected": -15.223727226257324, + "step": 18245 + }, + { + "epoch": 2.84, + "learning_rate": 7.6497847398724e-07, + "logits/chosen": -2.259614944458008, + "logits/rejected": -2.794332504272461, + "logps/chosen": -209.34600830078125, + "logps/rejected": -338.3017272949219, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.285340309143066, + "rewards/margins": 6.468033790588379, + "rewards/rejected": -18.753374099731445, + "step": 18246 + }, + { + "epoch": 2.84, + "learning_rate": 7.642450334560921e-07, + "logits/chosen": -2.9409375190734863, + "logits/rejected": -2.9247610569000244, + "logps/chosen": -272.51849365234375, + "logps/rejected": -387.08465576171875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.209573745727539, + "rewards/margins": 7.9808549880981445, + "rewards/rejected": -19.1904296875, + "step": 18247 + }, + { + "epoch": 2.84, + "learning_rate": 7.635115929249443e-07, + "logits/chosen": -1.5007596015930176, + "logits/rejected": -2.384964942932129, + "logps/chosen": -221.85874938964844, + "logps/rejected": -490.88311767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.26133918762207, + "rewards/margins": 12.346050262451172, + "rewards/rejected": -21.60738754272461, + "step": 18248 + }, + { + "epoch": 2.84, + "learning_rate": 7.627781523937963e-07, + "logits/chosen": -2.739269495010376, + "logits/rejected": -2.2457127571105957, + "logps/chosen": -425.5972900390625, + "logps/rejected": -518.7330322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.715550422668457, + "rewards/margins": 10.638487815856934, + "rewards/rejected": -17.35403823852539, + "step": 18249 + }, + { + "epoch": 2.84, + "learning_rate": 7.620447118626485e-07, + "logits/chosen": -1.776756763458252, + "logits/rejected": -2.3522865772247314, + "logps/chosen": -220.229248046875, + "logps/rejected": -269.534912109375, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.562208652496338, + "rewards/margins": 6.484054088592529, + "rewards/rejected": -14.046262741088867, + "step": 18250 + }, + { + "epoch": 2.84, + "learning_rate": 7.613112713315006e-07, + "logits/chosen": -2.2382829189300537, + "logits/rejected": -2.5757198333740234, + "logps/chosen": -220.1026153564453, + "logps/rejected": -327.7310791015625, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.422603607177734, + "rewards/margins": 1.8214945793151855, + "rewards/rejected": -14.244098663330078, + "step": 18251 + }, + { + "epoch": 2.84, + "learning_rate": 7.605778308003528e-07, + "logits/chosen": -2.8062734603881836, + "logits/rejected": -1.316899061203003, + "logps/chosen": -547.8309326171875, + "logps/rejected": -463.54998779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.484624862670898, + "rewards/margins": 11.169282913208008, + "rewards/rejected": -20.653907775878906, + "step": 18252 + }, + { + "epoch": 2.84, + "learning_rate": 7.598443902692049e-07, + "logits/chosen": -1.1424189805984497, + "logits/rejected": -2.5377252101898193, + "logps/chosen": -138.18478393554688, + "logps/rejected": -479.6999816894531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.468976020812988, + "rewards/margins": 11.718513488769531, + "rewards/rejected": -19.187488555908203, + "step": 18253 + }, + { + "epoch": 2.84, + "learning_rate": 7.591109497380569e-07, + "logits/chosen": -2.249386787414551, + "logits/rejected": -2.6505846977233887, + "logps/chosen": -452.10595703125, + "logps/rejected": -469.0595703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.518279075622559, + "rewards/margins": 10.272180557250977, + "rewards/rejected": -19.79045867919922, + "step": 18254 + }, + { + "epoch": 2.84, + "learning_rate": 7.58377509206909e-07, + "logits/chosen": -2.7397382259368896, + "logits/rejected": -2.489349603652954, + "logps/chosen": -262.84490966796875, + "logps/rejected": -393.04986572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.544049263000488, + "rewards/margins": 13.340721130371094, + "rewards/rejected": -22.884769439697266, + "step": 18255 + }, + { + "epoch": 2.84, + "learning_rate": 7.576440686757612e-07, + "logits/chosen": -1.9945050477981567, + "logits/rejected": -2.5001254081726074, + "logps/chosen": -255.57049560546875, + "logps/rejected": -355.4593505859375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.282886505126953, + "rewards/margins": 8.07081413269043, + "rewards/rejected": -21.353700637817383, + "step": 18256 + }, + { + "epoch": 2.84, + "learning_rate": 7.569106281446133e-07, + "logits/chosen": -2.9607343673706055, + "logits/rejected": -2.7274587154388428, + "logps/chosen": -186.66796875, + "logps/rejected": -209.2783966064453, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.708484649658203, + "rewards/margins": 7.259756565093994, + "rewards/rejected": -16.968242645263672, + "step": 18257 + }, + { + "epoch": 2.84, + "learning_rate": 7.561771876134655e-07, + "logits/chosen": -2.1814956665039062, + "logits/rejected": -2.5757062435150146, + "logps/chosen": -215.51272583007812, + "logps/rejected": -522.3877563476562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.074146270751953, + "rewards/margins": 14.612302780151367, + "rewards/rejected": -24.68644905090332, + "step": 18258 + }, + { + "epoch": 2.84, + "learning_rate": 7.554437470823175e-07, + "logits/chosen": -2.739042043685913, + "logits/rejected": -2.8674590587615967, + "logps/chosen": -228.74432373046875, + "logps/rejected": -361.979736328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.600664138793945, + "rewards/margins": 8.303763389587402, + "rewards/rejected": -16.90442657470703, + "step": 18259 + }, + { + "epoch": 2.84, + "learning_rate": 7.547103065511697e-07, + "logits/chosen": -2.4800143241882324, + "logits/rejected": -2.8085408210754395, + "logps/chosen": -175.81948852539062, + "logps/rejected": -407.85211181640625, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.935686111450195, + "rewards/margins": 10.055681228637695, + "rewards/rejected": -20.99136734008789, + "step": 18260 + }, + { + "epoch": 2.84, + "learning_rate": 7.539768660200218e-07, + "logits/chosen": -2.624873399734497, + "logits/rejected": -2.01769757270813, + "logps/chosen": -375.106201171875, + "logps/rejected": -382.69403076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.723012924194336, + "rewards/margins": 10.292889595031738, + "rewards/rejected": -20.01590347290039, + "step": 18261 + }, + { + "epoch": 2.84, + "learning_rate": 7.53243425488874e-07, + "logits/chosen": -2.78735089302063, + "logits/rejected": -1.193108081817627, + "logps/chosen": -425.0170593261719, + "logps/rejected": -236.42999267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.879290580749512, + "rewards/margins": 13.443805694580078, + "rewards/rejected": -18.323097229003906, + "step": 18262 + }, + { + "epoch": 2.84, + "learning_rate": 7.525099849577261e-07, + "logits/chosen": -2.4186666011810303, + "logits/rejected": -2.5558323860168457, + "logps/chosen": -343.1425476074219, + "logps/rejected": -312.28314208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.965702056884766, + "rewards/margins": 8.838674545288086, + "rewards/rejected": -18.80437469482422, + "step": 18263 + }, + { + "epoch": 2.84, + "learning_rate": 7.517765444265782e-07, + "logits/chosen": -2.3945062160491943, + "logits/rejected": -2.0944111347198486, + "logps/chosen": -254.80349731445312, + "logps/rejected": -315.00640869140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.198188781738281, + "rewards/margins": 8.634017944335938, + "rewards/rejected": -18.83220672607422, + "step": 18264 + }, + { + "epoch": 2.84, + "learning_rate": 7.510431038954303e-07, + "logits/chosen": -1.8814802169799805, + "logits/rejected": -2.7573704719543457, + "logps/chosen": -225.29791259765625, + "logps/rejected": -504.31353759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.484598159790039, + "rewards/margins": 16.735794067382812, + "rewards/rejected": -27.22039222717285, + "step": 18265 + }, + { + "epoch": 2.84, + "learning_rate": 7.503096633642824e-07, + "logits/chosen": -2.8978347778320312, + "logits/rejected": -2.772465944290161, + "logps/chosen": -203.23696899414062, + "logps/rejected": -246.12289428710938, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6332597732543945, + "rewards/margins": 4.127910137176514, + "rewards/rejected": -11.76116943359375, + "step": 18266 + }, + { + "epoch": 2.84, + "learning_rate": 7.495762228331345e-07, + "logits/chosen": -2.780113935470581, + "logits/rejected": -2.747032880783081, + "logps/chosen": -167.9296112060547, + "logps/rejected": -376.52862548828125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.264676094055176, + "rewards/margins": 6.469010353088379, + "rewards/rejected": -16.733686447143555, + "step": 18267 + }, + { + "epoch": 2.84, + "learning_rate": 7.488427823019866e-07, + "logits/chosen": -1.7540092468261719, + "logits/rejected": -2.8138020038604736, + "logps/chosen": -303.78216552734375, + "logps/rejected": -478.87738037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5108208656311035, + "rewards/margins": 13.008966445922852, + "rewards/rejected": -20.519786834716797, + "step": 18268 + }, + { + "epoch": 2.84, + "learning_rate": 7.481093417708387e-07, + "logits/chosen": -1.9019365310668945, + "logits/rejected": -2.785907506942749, + "logps/chosen": -332.52996826171875, + "logps/rejected": -739.7435913085938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.337958335876465, + "rewards/margins": 12.645078659057617, + "rewards/rejected": -26.983036041259766, + "step": 18269 + }, + { + "epoch": 2.84, + "learning_rate": 7.473759012396908e-07, + "logits/chosen": -2.678696632385254, + "logits/rejected": -2.790470600128174, + "logps/chosen": -183.36013793945312, + "logps/rejected": -450.9517822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.080260276794434, + "rewards/margins": 10.852323532104492, + "rewards/rejected": -23.93258285522461, + "step": 18270 + }, + { + "epoch": 2.84, + "learning_rate": 7.46642460708543e-07, + "logits/chosen": -2.741753339767456, + "logits/rejected": -1.4877294301986694, + "logps/chosen": -425.5553283691406, + "logps/rejected": -412.4188232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.73464584350586, + "rewards/margins": 13.580450057983398, + "rewards/rejected": -24.315095901489258, + "step": 18271 + }, + { + "epoch": 2.84, + "learning_rate": 7.459090201773951e-07, + "logits/chosen": -2.5870273113250732, + "logits/rejected": -2.516270875930786, + "logps/chosen": -358.8143310546875, + "logps/rejected": -428.2962341308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.258018493652344, + "rewards/margins": 10.782280921936035, + "rewards/rejected": -20.040298461914062, + "step": 18272 + }, + { + "epoch": 2.84, + "learning_rate": 7.451755796462472e-07, + "logits/chosen": -2.2821602821350098, + "logits/rejected": -2.582596778869629, + "logps/chosen": -438.4753723144531, + "logps/rejected": -602.7159423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.053298950195312, + "rewards/margins": 14.214977264404297, + "rewards/rejected": -24.26827621459961, + "step": 18273 + }, + { + "epoch": 2.84, + "learning_rate": 7.444421391150993e-07, + "logits/chosen": -1.3230425119400024, + "logits/rejected": -2.303513765335083, + "logps/chosen": -196.81692504882812, + "logps/rejected": -483.082275390625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.70671272277832, + "rewards/margins": 10.366350173950195, + "rewards/rejected": -24.073062896728516, + "step": 18274 + }, + { + "epoch": 2.84, + "learning_rate": 7.437086985839515e-07, + "logits/chosen": -2.43811297416687, + "logits/rejected": -2.4182472229003906, + "logps/chosen": -268.2229919433594, + "logps/rejected": -407.6336669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.920182228088379, + "rewards/margins": 13.712459564208984, + "rewards/rejected": -23.632640838623047, + "step": 18275 + }, + { + "epoch": 2.84, + "learning_rate": 7.429752580528036e-07, + "logits/chosen": -2.851273775100708, + "logits/rejected": -2.667670488357544, + "logps/chosen": -300.08636474609375, + "logps/rejected": -581.8572387695312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.448091506958008, + "rewards/margins": 13.432144165039062, + "rewards/rejected": -22.880237579345703, + "step": 18276 + }, + { + "epoch": 2.84, + "learning_rate": 7.422418175216558e-07, + "logits/chosen": -2.4001660346984863, + "logits/rejected": -2.5576977729797363, + "logps/chosen": -313.8145751953125, + "logps/rejected": -418.64422607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.812580108642578, + "rewards/margins": 10.28646469116211, + "rewards/rejected": -21.099044799804688, + "step": 18277 + }, + { + "epoch": 2.84, + "learning_rate": 7.415083769905077e-07, + "logits/chosen": -2.6430509090423584, + "logits/rejected": -1.3973207473754883, + "logps/chosen": -246.7922821044922, + "logps/rejected": -274.0301818847656, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.053067207336426, + "rewards/margins": 8.326884269714355, + "rewards/rejected": -16.37995147705078, + "step": 18278 + }, + { + "epoch": 2.84, + "learning_rate": 7.407749364593599e-07, + "logits/chosen": -3.0855016708374023, + "logits/rejected": -3.0268256664276123, + "logps/chosen": -473.1636962890625, + "logps/rejected": -609.8775024414062, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.351654052734375, + "rewards/margins": 7.734399795532227, + "rewards/rejected": -21.0860538482666, + "step": 18279 + }, + { + "epoch": 2.84, + "learning_rate": 7.40041495928212e-07, + "logits/chosen": -2.6000640392303467, + "logits/rejected": -2.6491427421569824, + "logps/chosen": -887.0894775390625, + "logps/rejected": -757.42041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.438522338867188, + "rewards/margins": 13.660587310791016, + "rewards/rejected": -25.099109649658203, + "step": 18280 + }, + { + "epoch": 2.84, + "learning_rate": 7.393080553970642e-07, + "logits/chosen": -1.5672820806503296, + "logits/rejected": -2.8510355949401855, + "logps/chosen": -209.32009887695312, + "logps/rejected": -465.6455078125, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.401424407958984, + "rewards/margins": 7.618321895599365, + "rewards/rejected": -19.019746780395508, + "step": 18281 + }, + { + "epoch": 2.84, + "learning_rate": 7.385746148659163e-07, + "logits/chosen": -2.794260263442993, + "logits/rejected": -3.0159082412719727, + "logps/chosen": -275.5809020996094, + "logps/rejected": -281.3458251953125, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.859979152679443, + "rewards/margins": 5.057766437530518, + "rewards/rejected": -12.917745590209961, + "step": 18282 + }, + { + "epoch": 2.84, + "learning_rate": 7.378411743347684e-07, + "logits/chosen": -1.183685541152954, + "logits/rejected": -2.537978410720825, + "logps/chosen": -178.50997924804688, + "logps/rejected": -589.9393310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.607475280761719, + "rewards/margins": 10.54028606414795, + "rewards/rejected": -19.147762298583984, + "step": 18283 + }, + { + "epoch": 2.84, + "learning_rate": 7.371077338036205e-07, + "logits/chosen": -2.0831639766693115, + "logits/rejected": -2.4221973419189453, + "logps/chosen": -187.0402069091797, + "logps/rejected": -433.9814758300781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.91115951538086, + "rewards/margins": 15.20395278930664, + "rewards/rejected": -24.1151123046875, + "step": 18284 + }, + { + "epoch": 2.84, + "learning_rate": 7.363742932724727e-07, + "logits/chosen": -2.1340718269348145, + "logits/rejected": -2.6216440200805664, + "logps/chosen": -327.4653015136719, + "logps/rejected": -529.6435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.872079849243164, + "rewards/margins": 13.58495044708252, + "rewards/rejected": -23.45703125, + "step": 18285 + }, + { + "epoch": 2.84, + "learning_rate": 7.356408527413248e-07, + "logits/chosen": -2.8612630367279053, + "logits/rejected": -2.9805917739868164, + "logps/chosen": -148.61251831054688, + "logps/rejected": -410.1983642578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.690420150756836, + "rewards/margins": 7.78870964050293, + "rewards/rejected": -18.479129791259766, + "step": 18286 + }, + { + "epoch": 2.84, + "learning_rate": 7.349074122101769e-07, + "logits/chosen": -2.5424039363861084, + "logits/rejected": -2.925593614578247, + "logps/chosen": -1089.369384765625, + "logps/rejected": -901.7616577148438, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.936334609985352, + "rewards/margins": 9.297551155090332, + "rewards/rejected": -23.23388671875, + "step": 18287 + }, + { + "epoch": 2.84, + "learning_rate": 7.34173971679029e-07, + "logits/chosen": -2.764694929122925, + "logits/rejected": -3.0024518966674805, + "logps/chosen": -1094.42822265625, + "logps/rejected": -913.4710693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.355981826782227, + "rewards/margins": 15.476116180419922, + "rewards/rejected": -24.83209800720215, + "step": 18288 + }, + { + "epoch": 2.84, + "learning_rate": 7.33440531147881e-07, + "logits/chosen": -2.094308376312256, + "logits/rejected": -2.8413186073303223, + "logps/chosen": -143.03347778320312, + "logps/rejected": -441.73779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.769850730895996, + "rewards/margins": 12.002960205078125, + "rewards/rejected": -18.772809982299805, + "step": 18289 + }, + { + "epoch": 2.84, + "learning_rate": 7.327070906167332e-07, + "logits/chosen": -2.823625326156616, + "logits/rejected": -2.2336881160736084, + "logps/chosen": -310.193603515625, + "logps/rejected": -418.81817626953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.950010299682617, + "rewards/margins": 9.853633880615234, + "rewards/rejected": -21.80364418029785, + "step": 18290 + }, + { + "epoch": 2.84, + "learning_rate": 7.319736500855853e-07, + "logits/chosen": -2.6032559871673584, + "logits/rejected": -2.6356430053710938, + "logps/chosen": -468.65570068359375, + "logps/rejected": -444.276611328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.26639175415039, + "rewards/margins": 7.376996040344238, + "rewards/rejected": -18.643386840820312, + "step": 18291 + }, + { + "epoch": 2.84, + "learning_rate": 7.312402095544375e-07, + "logits/chosen": -1.4172658920288086, + "logits/rejected": -2.5646557807922363, + "logps/chosen": -134.04315185546875, + "logps/rejected": -357.068115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.678874969482422, + "rewards/margins": 9.187946319580078, + "rewards/rejected": -17.8668212890625, + "step": 18292 + }, + { + "epoch": 2.84, + "learning_rate": 7.305067690232895e-07, + "logits/chosen": -2.0271828174591064, + "logits/rejected": -2.8605926036834717, + "logps/chosen": -234.77774047851562, + "logps/rejected": -306.60211181640625, + "loss": 0.1169, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.286456108093262, + "rewards/margins": 8.690442085266113, + "rewards/rejected": -16.976898193359375, + "step": 18293 + }, + { + "epoch": 2.85, + "learning_rate": 7.297733284921417e-07, + "logits/chosen": -2.7772722244262695, + "logits/rejected": -2.972520589828491, + "logps/chosen": -400.59197998046875, + "logps/rejected": -449.19586181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.196704387664795, + "rewards/margins": 11.44305419921875, + "rewards/rejected": -18.639759063720703, + "step": 18294 + }, + { + "epoch": 2.85, + "learning_rate": 7.290398879609938e-07, + "logits/chosen": -1.4780428409576416, + "logits/rejected": -2.369225025177002, + "logps/chosen": -158.51837158203125, + "logps/rejected": -385.27630615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.587015151977539, + "rewards/margins": 13.90787124633789, + "rewards/rejected": -24.494888305664062, + "step": 18295 + }, + { + "epoch": 2.85, + "learning_rate": 7.28306447429846e-07, + "logits/chosen": -2.6964738368988037, + "logits/rejected": -2.445939779281616, + "logps/chosen": -387.0926513671875, + "logps/rejected": -391.7451477050781, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.453968048095703, + "rewards/margins": 5.648473262786865, + "rewards/rejected": -20.102441787719727, + "step": 18296 + }, + { + "epoch": 2.85, + "learning_rate": 7.275730068986981e-07, + "logits/chosen": -2.741210699081421, + "logits/rejected": -1.5731558799743652, + "logps/chosen": -347.8261413574219, + "logps/rejected": -452.3505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3394975662231445, + "rewards/margins": 15.734622955322266, + "rewards/rejected": -20.074119567871094, + "step": 18297 + }, + { + "epoch": 2.85, + "learning_rate": 7.268395663675502e-07, + "logits/chosen": -2.383517026901245, + "logits/rejected": -1.2003368139266968, + "logps/chosen": -320.2472229003906, + "logps/rejected": -268.82098388671875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035417556762695, + "rewards/margins": 9.16550064086914, + "rewards/rejected": -14.200918197631836, + "step": 18298 + }, + { + "epoch": 2.85, + "learning_rate": 7.261061258364023e-07, + "logits/chosen": -2.6059839725494385, + "logits/rejected": -3.0093915462493896, + "logps/chosen": -170.6366424560547, + "logps/rejected": -344.254638671875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.14916706085205, + "rewards/margins": 4.780034065246582, + "rewards/rejected": -15.929201126098633, + "step": 18299 + }, + { + "epoch": 2.85, + "learning_rate": 7.253726853052545e-07, + "logits/chosen": -2.439182996749878, + "logits/rejected": -2.670811176300049, + "logps/chosen": -349.28350830078125, + "logps/rejected": -433.83367919921875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.698058128356934, + "rewards/margins": 9.235418319702148, + "rewards/rejected": -18.933475494384766, + "step": 18300 + }, + { + "epoch": 2.85, + "learning_rate": 7.246392447741066e-07, + "logits/chosen": -2.9052810668945312, + "logits/rejected": -2.463564395904541, + "logps/chosen": -272.05474853515625, + "logps/rejected": -361.0093688964844, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.860977172851562, + "rewards/margins": 6.566564083099365, + "rewards/rejected": -17.427541732788086, + "step": 18301 + }, + { + "epoch": 2.85, + "learning_rate": 7.239058042429587e-07, + "logits/chosen": -2.657550096511841, + "logits/rejected": -2.905512571334839, + "logps/chosen": -555.174072265625, + "logps/rejected": -507.205322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.466827392578125, + "rewards/margins": 16.50048828125, + "rewards/rejected": -20.967315673828125, + "step": 18302 + }, + { + "epoch": 2.85, + "learning_rate": 7.231723637118107e-07, + "logits/chosen": -2.5329275131225586, + "logits/rejected": -2.2390079498291016, + "logps/chosen": -353.31451416015625, + "logps/rejected": -395.2514343261719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.700777053833008, + "rewards/margins": 11.340658187866211, + "rewards/rejected": -18.04143524169922, + "step": 18303 + }, + { + "epoch": 2.85, + "learning_rate": 7.224389231806629e-07, + "logits/chosen": -1.3344656229019165, + "logits/rejected": -2.5618667602539062, + "logps/chosen": -183.24810791015625, + "logps/rejected": -643.7970581054688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.266776084899902, + "rewards/margins": 15.060251235961914, + "rewards/rejected": -23.3270263671875, + "step": 18304 + }, + { + "epoch": 2.85, + "learning_rate": 7.21705482649515e-07, + "logits/chosen": -2.758617401123047, + "logits/rejected": -2.2736895084381104, + "logps/chosen": -1015.0365600585938, + "logps/rejected": -621.4719848632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.165356636047363, + "rewards/margins": 13.56558609008789, + "rewards/rejected": -20.730941772460938, + "step": 18305 + }, + { + "epoch": 2.85, + "learning_rate": 7.209720421183672e-07, + "logits/chosen": -1.4378753900527954, + "logits/rejected": -2.6844897270202637, + "logps/chosen": -199.2173309326172, + "logps/rejected": -374.4830322265625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.872797966003418, + "rewards/margins": 6.773247718811035, + "rewards/rejected": -19.646045684814453, + "step": 18306 + }, + { + "epoch": 2.85, + "learning_rate": 7.202386015872193e-07, + "logits/chosen": -2.440047025680542, + "logits/rejected": -2.9522955417633057, + "logps/chosen": -243.25067138671875, + "logps/rejected": -416.6766357421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.755488395690918, + "rewards/margins": 11.507421493530273, + "rewards/rejected": -23.262908935546875, + "step": 18307 + }, + { + "epoch": 2.85, + "learning_rate": 7.195051610560714e-07, + "logits/chosen": -1.530803918838501, + "logits/rejected": -2.4043819904327393, + "logps/chosen": -232.39328002929688, + "logps/rejected": -380.2363586425781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.802140235900879, + "rewards/margins": 8.372661590576172, + "rewards/rejected": -18.174800872802734, + "step": 18308 + }, + { + "epoch": 2.85, + "learning_rate": 7.187717205249235e-07, + "logits/chosen": -2.8678150177001953, + "logits/rejected": -2.8169703483581543, + "logps/chosen": -321.8664855957031, + "logps/rejected": -356.180908203125, + "loss": 0.1429, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.064002990722656, + "rewards/margins": 4.90440559387207, + "rewards/rejected": -16.968408584594727, + "step": 18309 + }, + { + "epoch": 2.85, + "learning_rate": 7.180382799937756e-07, + "logits/chosen": -2.7413220405578613, + "logits/rejected": -2.2376160621643066, + "logps/chosen": -335.135009765625, + "logps/rejected": -506.80767822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.682962417602539, + "rewards/margins": 14.533842086791992, + "rewards/rejected": -25.21680450439453, + "step": 18310 + }, + { + "epoch": 2.85, + "learning_rate": 7.173048394626278e-07, + "logits/chosen": -2.219841480255127, + "logits/rejected": -2.34438419342041, + "logps/chosen": -227.03904724121094, + "logps/rejected": -486.0916442871094, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.783722877502441, + "rewards/margins": 9.629715919494629, + "rewards/rejected": -20.41343879699707, + "step": 18311 + }, + { + "epoch": 2.85, + "learning_rate": 7.165713989314799e-07, + "logits/chosen": -0.9051604866981506, + "logits/rejected": -2.158132314682007, + "logps/chosen": -169.16114807128906, + "logps/rejected": -431.33514404296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.945701599121094, + "rewards/margins": 13.534805297851562, + "rewards/rejected": -24.480506896972656, + "step": 18312 + }, + { + "epoch": 2.85, + "learning_rate": 7.15837958400332e-07, + "logits/chosen": -2.4230880737304688, + "logits/rejected": -2.3590145111083984, + "logps/chosen": -241.7373046875, + "logps/rejected": -352.36773681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.408746719360352, + "rewards/margins": 10.66366958618164, + "rewards/rejected": -18.072416305541992, + "step": 18313 + }, + { + "epoch": 2.85, + "learning_rate": 7.15104517869184e-07, + "logits/chosen": -2.4352314472198486, + "logits/rejected": -2.516707181930542, + "logps/chosen": -269.0940246582031, + "logps/rejected": -443.13116455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956232070922852, + "rewards/margins": 13.196863174438477, + "rewards/rejected": -19.153095245361328, + "step": 18314 + }, + { + "epoch": 2.85, + "learning_rate": 7.143710773380362e-07, + "logits/chosen": -2.1746175289154053, + "logits/rejected": -2.58286452293396, + "logps/chosen": -180.8214874267578, + "logps/rejected": -518.272216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.000884056091309, + "rewards/margins": 16.575363159179688, + "rewards/rejected": -23.576248168945312, + "step": 18315 + }, + { + "epoch": 2.85, + "learning_rate": 7.136376368068883e-07, + "logits/chosen": -2.8078153133392334, + "logits/rejected": -2.0986249446868896, + "logps/chosen": -464.78704833984375, + "logps/rejected": -545.8294067382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.348983764648438, + "rewards/margins": 12.404196739196777, + "rewards/rejected": -23.75318145751953, + "step": 18316 + }, + { + "epoch": 2.85, + "learning_rate": 7.129041962757405e-07, + "logits/chosen": -2.4035279750823975, + "logits/rejected": -1.0620598793029785, + "logps/chosen": -371.4375305175781, + "logps/rejected": -247.56118774414062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.704161167144775, + "rewards/margins": 9.93909740447998, + "rewards/rejected": -14.643259048461914, + "step": 18317 + }, + { + "epoch": 2.85, + "learning_rate": 7.121707557445925e-07, + "logits/chosen": -2.55802321434021, + "logits/rejected": -2.5017595291137695, + "logps/chosen": -646.541748046875, + "logps/rejected": -686.4085083007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690115928649902, + "rewards/margins": 11.472293853759766, + "rewards/rejected": -20.162410736083984, + "step": 18318 + }, + { + "epoch": 2.85, + "learning_rate": 7.114373152134447e-07, + "logits/chosen": -2.283892869949341, + "logits/rejected": -2.381129741668701, + "logps/chosen": -275.0921936035156, + "logps/rejected": -456.794189453125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.965845108032227, + "rewards/margins": 8.22723388671875, + "rewards/rejected": -17.193078994750977, + "step": 18319 + }, + { + "epoch": 2.85, + "learning_rate": 7.107038746822968e-07, + "logits/chosen": -2.908193588256836, + "logits/rejected": -2.2780137062072754, + "logps/chosen": -210.86898803710938, + "logps/rejected": -267.9801940917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.494132041931152, + "rewards/margins": 14.069238662719727, + "rewards/rejected": -19.563369750976562, + "step": 18320 + }, + { + "epoch": 2.85, + "learning_rate": 7.09970434151149e-07, + "logits/chosen": -2.9526076316833496, + "logits/rejected": -3.0056238174438477, + "logps/chosen": -204.79420471191406, + "logps/rejected": -344.1752014160156, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.059264183044434, + "rewards/margins": 8.034177780151367, + "rewards/rejected": -16.093441009521484, + "step": 18321 + }, + { + "epoch": 2.85, + "learning_rate": 7.092369936200011e-07, + "logits/chosen": -2.239837169647217, + "logits/rejected": -2.3775126934051514, + "logps/chosen": -388.65106201171875, + "logps/rejected": -509.45440673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.029914855957031, + "rewards/margins": 11.864702224731445, + "rewards/rejected": -22.894615173339844, + "step": 18322 + }, + { + "epoch": 2.85, + "learning_rate": 7.085035530888532e-07, + "logits/chosen": -2.7177767753601074, + "logits/rejected": -2.8272759914398193, + "logps/chosen": -107.89219665527344, + "logps/rejected": -222.05532836914062, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.24709701538086, + "rewards/margins": 6.353391647338867, + "rewards/rejected": -15.600488662719727, + "step": 18323 + }, + { + "epoch": 2.85, + "learning_rate": 7.077701125577053e-07, + "logits/chosen": -2.4507410526275635, + "logits/rejected": -2.716808319091797, + "logps/chosen": -267.85772705078125, + "logps/rejected": -335.67059326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.13970947265625, + "rewards/margins": 9.58141040802002, + "rewards/rejected": -17.721118927001953, + "step": 18324 + }, + { + "epoch": 2.85, + "learning_rate": 7.070366720265575e-07, + "logits/chosen": -2.116960287094116, + "logits/rejected": -1.8335822820663452, + "logps/chosen": -249.76768493652344, + "logps/rejected": -402.671142578125, + "loss": 0.1189, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.768669128417969, + "rewards/margins": 7.889591693878174, + "rewards/rejected": -17.658260345458984, + "step": 18325 + }, + { + "epoch": 2.85, + "learning_rate": 7.063032314954095e-07, + "logits/chosen": -2.363224744796753, + "logits/rejected": -2.7941272258758545, + "logps/chosen": -120.48645782470703, + "logps/rejected": -348.19390869140625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.942061424255371, + "rewards/margins": 8.522860527038574, + "rewards/rejected": -19.464921951293945, + "step": 18326 + }, + { + "epoch": 2.85, + "learning_rate": 7.055697909642616e-07, + "logits/chosen": -1.7992684841156006, + "logits/rejected": -2.883908987045288, + "logps/chosen": -157.56419372558594, + "logps/rejected": -518.38037109375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.579980850219727, + "rewards/margins": 8.447150230407715, + "rewards/rejected": -17.027132034301758, + "step": 18327 + }, + { + "epoch": 2.85, + "learning_rate": 7.048363504331137e-07, + "logits/chosen": -1.042222023010254, + "logits/rejected": -2.181499719619751, + "logps/chosen": -307.684814453125, + "logps/rejected": -720.1630859375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.841676712036133, + "rewards/margins": 11.274272918701172, + "rewards/rejected": -23.115947723388672, + "step": 18328 + }, + { + "epoch": 2.85, + "learning_rate": 7.041029099019658e-07, + "logits/chosen": -2.8598811626434326, + "logits/rejected": -2.0722503662109375, + "logps/chosen": -582.196533203125, + "logps/rejected": -844.5020751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463930130004883, + "rewards/margins": 16.514184951782227, + "rewards/rejected": -21.97811508178711, + "step": 18329 + }, + { + "epoch": 2.85, + "learning_rate": 7.03369469370818e-07, + "logits/chosen": -2.007960796356201, + "logits/rejected": -2.6397390365600586, + "logps/chosen": -172.23544311523438, + "logps/rejected": -288.9876708984375, + "loss": 0.1106, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.314212799072266, + "rewards/margins": 6.55605936050415, + "rewards/rejected": -13.870271682739258, + "step": 18330 + }, + { + "epoch": 2.85, + "learning_rate": 7.026360288396701e-07, + "logits/chosen": -2.590625762939453, + "logits/rejected": -2.934858560562134, + "logps/chosen": -145.83944702148438, + "logps/rejected": -589.9312744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.623516082763672, + "rewards/margins": 10.846514701843262, + "rewards/rejected": -22.47003173828125, + "step": 18331 + }, + { + "epoch": 2.85, + "learning_rate": 7.019025883085222e-07, + "logits/chosen": -2.742964506149292, + "logits/rejected": -2.8509600162506104, + "logps/chosen": -368.998046875, + "logps/rejected": -737.505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.368688583374023, + "rewards/margins": 11.042901992797852, + "rewards/rejected": -20.411590576171875, + "step": 18332 + }, + { + "epoch": 2.85, + "learning_rate": 7.011691477773743e-07, + "logits/chosen": -2.239032506942749, + "logits/rejected": -2.8081841468811035, + "logps/chosen": -118.33770751953125, + "logps/rejected": -449.218994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.45813512802124, + "rewards/margins": 12.167617797851562, + "rewards/rejected": -19.62575340270996, + "step": 18333 + }, + { + "epoch": 2.85, + "learning_rate": 7.004357072462265e-07, + "logits/chosen": -2.010042428970337, + "logits/rejected": -2.2825872898101807, + "logps/chosen": -229.93948364257812, + "logps/rejected": -376.336181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.956182479858398, + "rewards/margins": 9.517122268676758, + "rewards/rejected": -21.473304748535156, + "step": 18334 + }, + { + "epoch": 2.85, + "learning_rate": 6.997022667150786e-07, + "logits/chosen": -2.40769362449646, + "logits/rejected": -2.9310953617095947, + "logps/chosen": -150.09774780273438, + "logps/rejected": -340.11944580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985746383666992, + "rewards/margins": 12.094971656799316, + "rewards/rejected": -20.080718994140625, + "step": 18335 + }, + { + "epoch": 2.85, + "learning_rate": 6.989688261839308e-07, + "logits/chosen": -2.5326755046844482, + "logits/rejected": -1.8296828269958496, + "logps/chosen": -515.5020751953125, + "logps/rejected": -422.4580383300781, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.773056030273438, + "rewards/margins": 10.09796142578125, + "rewards/rejected": -20.871017456054688, + "step": 18336 + }, + { + "epoch": 2.85, + "learning_rate": 6.982353856527827e-07, + "logits/chosen": -2.935750961303711, + "logits/rejected": -2.2202067375183105, + "logps/chosen": -450.5318298339844, + "logps/rejected": -527.1783447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.141500473022461, + "rewards/margins": 9.274173736572266, + "rewards/rejected": -18.415674209594727, + "step": 18337 + }, + { + "epoch": 2.85, + "learning_rate": 6.975019451216349e-07, + "logits/chosen": -1.2606781721115112, + "logits/rejected": -2.6272926330566406, + "logps/chosen": -375.23895263671875, + "logps/rejected": -582.6043701171875, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.36630630493164, + "rewards/margins": 6.377329349517822, + "rewards/rejected": -19.743635177612305, + "step": 18338 + }, + { + "epoch": 2.85, + "learning_rate": 6.96768504590487e-07, + "logits/chosen": -2.910428524017334, + "logits/rejected": -2.2043697834014893, + "logps/chosen": -332.47320556640625, + "logps/rejected": -295.6823425292969, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.334877967834473, + "rewards/margins": 8.810643196105957, + "rewards/rejected": -16.14552116394043, + "step": 18339 + }, + { + "epoch": 2.85, + "learning_rate": 6.960350640593392e-07, + "logits/chosen": -0.6960800290107727, + "logits/rejected": -2.477477550506592, + "logps/chosen": -139.7519989013672, + "logps/rejected": -553.478759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.67855453491211, + "rewards/margins": 15.83660888671875, + "rewards/rejected": -26.51516342163086, + "step": 18340 + }, + { + "epoch": 2.85, + "learning_rate": 6.953016235281913e-07, + "logits/chosen": -2.8557004928588867, + "logits/rejected": -1.7916722297668457, + "logps/chosen": -587.5418090820312, + "logps/rejected": -629.56005859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.425697326660156, + "rewards/margins": 8.82280158996582, + "rewards/rejected": -20.248498916625977, + "step": 18341 + }, + { + "epoch": 2.85, + "learning_rate": 6.945681829970434e-07, + "logits/chosen": -1.5931081771850586, + "logits/rejected": -1.8347222805023193, + "logps/chosen": -276.3471984863281, + "logps/rejected": -543.7055053710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.383413791656494, + "rewards/margins": 16.60125160217285, + "rewards/rejected": -21.984664916992188, + "step": 18342 + }, + { + "epoch": 2.85, + "learning_rate": 6.938347424658955e-07, + "logits/chosen": -1.367130994796753, + "logits/rejected": -2.5283703804016113, + "logps/chosen": -141.97286987304688, + "logps/rejected": -337.60455322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.882896423339844, + "rewards/margins": 9.941690444946289, + "rewards/rejected": -19.8245849609375, + "step": 18343 + }, + { + "epoch": 2.85, + "learning_rate": 6.931013019347477e-07, + "logits/chosen": -2.5104899406433105, + "logits/rejected": -2.6129207611083984, + "logps/chosen": -161.94424438476562, + "logps/rejected": -331.5262451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9199395179748535, + "rewards/margins": 9.918769836425781, + "rewards/rejected": -15.838709831237793, + "step": 18344 + }, + { + "epoch": 2.85, + "learning_rate": 6.923678614035998e-07, + "logits/chosen": -1.185105562210083, + "logits/rejected": -2.178319215774536, + "logps/chosen": -158.39016723632812, + "logps/rejected": -338.6902770996094, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.0946626663208, + "rewards/margins": 8.302974700927734, + "rewards/rejected": -19.39763641357422, + "step": 18345 + }, + { + "epoch": 2.85, + "learning_rate": 6.91634420872452e-07, + "logits/chosen": -2.4890260696411133, + "logits/rejected": -2.369823932647705, + "logps/chosen": -197.20745849609375, + "logps/rejected": -465.6088562011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.351667404174805, + "rewards/margins": 11.893287658691406, + "rewards/rejected": -21.24495506286621, + "step": 18346 + }, + { + "epoch": 2.85, + "learning_rate": 6.90900980341304e-07, + "logits/chosen": -2.584232807159424, + "logits/rejected": -2.7748281955718994, + "logps/chosen": -359.057861328125, + "logps/rejected": -414.4082946777344, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.229114532470703, + "rewards/margins": 9.891366958618164, + "rewards/rejected": -18.120481491088867, + "step": 18347 + }, + { + "epoch": 2.85, + "learning_rate": 6.901675398101562e-07, + "logits/chosen": -2.858768939971924, + "logits/rejected": -2.2546262741088867, + "logps/chosen": -534.8785400390625, + "logps/rejected": -544.5633544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.276643753051758, + "rewards/margins": 12.294958114624023, + "rewards/rejected": -22.57160186767578, + "step": 18348 + }, + { + "epoch": 2.85, + "learning_rate": 6.894340992790083e-07, + "logits/chosen": -2.4140195846557617, + "logits/rejected": -2.8691182136535645, + "logps/chosen": -161.69615173339844, + "logps/rejected": -380.54779052734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.05873966217041, + "rewards/margins": 9.982508659362793, + "rewards/rejected": -19.041248321533203, + "step": 18349 + }, + { + "epoch": 2.85, + "learning_rate": 6.887006587478603e-07, + "logits/chosen": -2.852464437484741, + "logits/rejected": -2.8025825023651123, + "logps/chosen": -191.108154296875, + "logps/rejected": -489.9812316894531, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.999826431274414, + "rewards/margins": 4.902547836303711, + "rewards/rejected": -16.902374267578125, + "step": 18350 + }, + { + "epoch": 2.85, + "learning_rate": 6.879672182167125e-07, + "logits/chosen": -2.3112435340881348, + "logits/rejected": -2.6450188159942627, + "logps/chosen": -338.83697509765625, + "logps/rejected": -343.41656494140625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.680534362792969, + "rewards/margins": 7.319766998291016, + "rewards/rejected": -16.000301361083984, + "step": 18351 + }, + { + "epoch": 2.85, + "learning_rate": 6.872337776855645e-07, + "logits/chosen": -2.0746705532073975, + "logits/rejected": -2.843315362930298, + "logps/chosen": -125.40652465820312, + "logps/rejected": -447.84173583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.648805618286133, + "rewards/margins": 10.053566932678223, + "rewards/rejected": -18.702373504638672, + "step": 18352 + }, + { + "epoch": 2.85, + "learning_rate": 6.865003371544167e-07, + "logits/chosen": -2.97247052192688, + "logits/rejected": -2.94632887840271, + "logps/chosen": -124.01830291748047, + "logps/rejected": -226.32516479492188, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132780075073242, + "rewards/margins": 8.442508697509766, + "rewards/rejected": -14.575288772583008, + "step": 18353 + }, + { + "epoch": 2.85, + "learning_rate": 6.857668966232688e-07, + "logits/chosen": -2.6697261333465576, + "logits/rejected": -1.9463425874710083, + "logps/chosen": -274.7613220214844, + "logps/rejected": -287.82763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.092789649963379, + "rewards/margins": 9.335769653320312, + "rewards/rejected": -18.428560256958008, + "step": 18354 + }, + { + "epoch": 2.85, + "learning_rate": 6.85033456092121e-07, + "logits/chosen": -2.3817038536071777, + "logits/rejected": -2.8699188232421875, + "logps/chosen": -270.1099548339844, + "logps/rejected": -476.45733642578125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.669677734375, + "rewards/margins": 8.538729667663574, + "rewards/rejected": -20.20840835571289, + "step": 18355 + }, + { + "epoch": 2.85, + "learning_rate": 6.843000155609731e-07, + "logits/chosen": -1.4014889001846313, + "logits/rejected": -2.126566171646118, + "logps/chosen": -541.9237670898438, + "logps/rejected": -695.066650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.645172119140625, + "rewards/margins": 13.472301483154297, + "rewards/rejected": -29.117473602294922, + "step": 18356 + }, + { + "epoch": 2.85, + "learning_rate": 6.835665750298252e-07, + "logits/chosen": -2.7107138633728027, + "logits/rejected": -1.593585729598999, + "logps/chosen": -668.4042358398438, + "logps/rejected": -514.3497314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.147843360900879, + "rewards/margins": 13.019213676452637, + "rewards/rejected": -26.167057037353516, + "step": 18357 + }, + { + "epoch": 2.86, + "learning_rate": 6.828331344986773e-07, + "logits/chosen": -2.7488300800323486, + "logits/rejected": -2.0203890800476074, + "logps/chosen": -655.521728515625, + "logps/rejected": -679.890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.308582305908203, + "rewards/margins": 10.746914863586426, + "rewards/rejected": -19.055496215820312, + "step": 18358 + }, + { + "epoch": 2.86, + "learning_rate": 6.820996939675295e-07, + "logits/chosen": -2.467205762863159, + "logits/rejected": -2.729308843612671, + "logps/chosen": -184.5882568359375, + "logps/rejected": -437.85137939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.659696578979492, + "rewards/margins": 9.372392654418945, + "rewards/rejected": -21.032089233398438, + "step": 18359 + }, + { + "epoch": 2.86, + "learning_rate": 6.813662534363816e-07, + "logits/chosen": -2.746676206588745, + "logits/rejected": -2.663780927658081, + "logps/chosen": -456.7274169921875, + "logps/rejected": -664.2403564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.04095458984375, + "rewards/margins": 17.377656936645508, + "rewards/rejected": -25.41861343383789, + "step": 18360 + }, + { + "epoch": 2.86, + "learning_rate": 6.806328129052338e-07, + "logits/chosen": -1.9558738470077515, + "logits/rejected": -2.651341676712036, + "logps/chosen": -315.56903076171875, + "logps/rejected": -614.7108764648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.949710845947266, + "rewards/margins": 10.693706512451172, + "rewards/rejected": -22.643417358398438, + "step": 18361 + }, + { + "epoch": 2.86, + "learning_rate": 6.798993723740857e-07, + "logits/chosen": -2.6036107540130615, + "logits/rejected": -2.8385961055755615, + "logps/chosen": -247.25241088867188, + "logps/rejected": -447.7569580078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.397756576538086, + "rewards/margins": 8.310172080993652, + "rewards/rejected": -19.707927703857422, + "step": 18362 + }, + { + "epoch": 2.86, + "learning_rate": 6.791659318429379e-07, + "logits/chosen": -1.0391371250152588, + "logits/rejected": -2.7024543285369873, + "logps/chosen": -158.85137939453125, + "logps/rejected": -402.2315979003906, + "loss": 0.1563, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.608773231506348, + "rewards/margins": 6.9251790046691895, + "rewards/rejected": -16.533952713012695, + "step": 18363 + }, + { + "epoch": 2.86, + "learning_rate": 6.7843249131179e-07, + "logits/chosen": -1.9520608186721802, + "logits/rejected": -2.829146146774292, + "logps/chosen": -290.3861083984375, + "logps/rejected": -461.45062255859375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.420193672180176, + "rewards/margins": 6.152624130249023, + "rewards/rejected": -19.572818756103516, + "step": 18364 + }, + { + "epoch": 2.86, + "learning_rate": 6.776990507806422e-07, + "logits/chosen": -2.7380383014678955, + "logits/rejected": -2.0560965538024902, + "logps/chosen": -616.9778442382812, + "logps/rejected": -563.0989990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.609407424926758, + "rewards/margins": 10.916826248168945, + "rewards/rejected": -19.526233673095703, + "step": 18365 + }, + { + "epoch": 2.86, + "learning_rate": 6.769656102494943e-07, + "logits/chosen": -3.0760717391967773, + "logits/rejected": -2.535130262374878, + "logps/chosen": -430.4600830078125, + "logps/rejected": -221.2702178955078, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.698117256164551, + "rewards/margins": 12.369014739990234, + "rewards/rejected": -16.06713104248047, + "step": 18366 + }, + { + "epoch": 2.86, + "learning_rate": 6.762321697183464e-07, + "logits/chosen": -1.8508734703063965, + "logits/rejected": -2.3576672077178955, + "logps/chosen": -588.4739379882812, + "logps/rejected": -680.4461669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.120553970336914, + "rewards/margins": 9.302207946777344, + "rewards/rejected": -20.422761917114258, + "step": 18367 + }, + { + "epoch": 2.86, + "learning_rate": 6.754987291871985e-07, + "logits/chosen": -2.6543731689453125, + "logits/rejected": -2.136115550994873, + "logps/chosen": -279.3899841308594, + "logps/rejected": -475.0791015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.270369529724121, + "rewards/margins": 13.139081001281738, + "rewards/rejected": -22.40945053100586, + "step": 18368 + }, + { + "epoch": 2.86, + "learning_rate": 6.747652886560506e-07, + "logits/chosen": -2.0645482540130615, + "logits/rejected": -2.6380293369293213, + "logps/chosen": -451.0419006347656, + "logps/rejected": -861.558349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.098739624023438, + "rewards/margins": 11.451749801635742, + "rewards/rejected": -23.55048942565918, + "step": 18369 + }, + { + "epoch": 2.86, + "learning_rate": 6.740318481249028e-07, + "logits/chosen": -1.462693452835083, + "logits/rejected": -2.6265316009521484, + "logps/chosen": -304.66162109375, + "logps/rejected": -474.392333984375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.224384307861328, + "rewards/margins": 6.231190204620361, + "rewards/rejected": -19.45557403564453, + "step": 18370 + }, + { + "epoch": 2.86, + "learning_rate": 6.732984075937549e-07, + "logits/chosen": -2.3694114685058594, + "logits/rejected": -2.0880532264709473, + "logps/chosen": -197.04876708984375, + "logps/rejected": -222.2250213623047, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.93241024017334, + "rewards/margins": 6.797181129455566, + "rewards/rejected": -15.729591369628906, + "step": 18371 + }, + { + "epoch": 2.86, + "learning_rate": 6.72564967062607e-07, + "logits/chosen": -2.6902389526367188, + "logits/rejected": -1.2100218534469604, + "logps/chosen": -394.9998779296875, + "logps/rejected": -265.1148681640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.396087646484375, + "rewards/margins": 9.642601013183594, + "rewards/rejected": -14.038688659667969, + "step": 18372 + }, + { + "epoch": 2.86, + "learning_rate": 6.71831526531459e-07, + "logits/chosen": -2.749988079071045, + "logits/rejected": -2.4668140411376953, + "logps/chosen": -252.98316955566406, + "logps/rejected": -343.9190979003906, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.959516525268555, + "rewards/margins": 7.570889472961426, + "rewards/rejected": -21.530406951904297, + "step": 18373 + }, + { + "epoch": 2.86, + "learning_rate": 6.710980860003112e-07, + "logits/chosen": -2.3110263347625732, + "logits/rejected": -2.814328193664551, + "logps/chosen": -151.72994995117188, + "logps/rejected": -311.6043701171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.201105117797852, + "rewards/margins": 8.50283432006836, + "rewards/rejected": -14.703939437866211, + "step": 18374 + }, + { + "epoch": 2.86, + "learning_rate": 6.703646454691633e-07, + "logits/chosen": -2.3745126724243164, + "logits/rejected": -2.5975654125213623, + "logps/chosen": -407.5623779296875, + "logps/rejected": -523.0482177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52919864654541, + "rewards/margins": 15.135114669799805, + "rewards/rejected": -23.66431427001953, + "step": 18375 + }, + { + "epoch": 2.86, + "learning_rate": 6.696312049380155e-07, + "logits/chosen": -1.6666783094406128, + "logits/rejected": -2.7993292808532715, + "logps/chosen": -217.11012268066406, + "logps/rejected": -671.3316650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.397123336791992, + "rewards/margins": 13.585342407226562, + "rewards/rejected": -22.982465744018555, + "step": 18376 + }, + { + "epoch": 2.86, + "learning_rate": 6.688977644068675e-07, + "logits/chosen": -1.9137749671936035, + "logits/rejected": -2.858271598815918, + "logps/chosen": -436.12506103515625, + "logps/rejected": -653.62451171875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.18020248413086, + "rewards/margins": 7.1022539138793945, + "rewards/rejected": -18.282455444335938, + "step": 18377 + }, + { + "epoch": 2.86, + "learning_rate": 6.681643238757197e-07, + "logits/chosen": -2.3521478176116943, + "logits/rejected": -2.2508468627929688, + "logps/chosen": -193.09829711914062, + "logps/rejected": -364.8039245605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.650028228759766, + "rewards/margins": 12.112844467163086, + "rewards/rejected": -20.762874603271484, + "step": 18378 + }, + { + "epoch": 2.86, + "learning_rate": 6.674308833445718e-07, + "logits/chosen": -2.877484083175659, + "logits/rejected": -2.244943141937256, + "logps/chosen": -394.911376953125, + "logps/rejected": -221.7504119873047, + "loss": 1.7461, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.76021671295166, + "rewards/margins": -0.21788692474365234, + "rewards/rejected": -13.542329788208008, + "step": 18379 + }, + { + "epoch": 2.86, + "learning_rate": 6.66697442813424e-07, + "logits/chosen": -2.7123405933380127, + "logits/rejected": -1.9254671335220337, + "logps/chosen": -214.13961791992188, + "logps/rejected": -270.4146728515625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.259045600891113, + "rewards/margins": 7.695100784301758, + "rewards/rejected": -17.954147338867188, + "step": 18380 + }, + { + "epoch": 2.86, + "learning_rate": 6.659640022822761e-07, + "logits/chosen": -2.2832767963409424, + "logits/rejected": -2.552687883377075, + "logps/chosen": -788.9179077148438, + "logps/rejected": -785.8756103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.308494567871094, + "rewards/margins": 13.718107223510742, + "rewards/rejected": -28.026601791381836, + "step": 18381 + }, + { + "epoch": 2.86, + "learning_rate": 6.652305617511282e-07, + "logits/chosen": -2.359720468521118, + "logits/rejected": -2.7598392963409424, + "logps/chosen": -279.4012451171875, + "logps/rejected": -364.30255126953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.251594543457031, + "rewards/margins": 7.441181659698486, + "rewards/rejected": -18.69277572631836, + "step": 18382 + }, + { + "epoch": 2.86, + "learning_rate": 6.644971212199803e-07, + "logits/chosen": -2.753352642059326, + "logits/rejected": -1.7623690366744995, + "logps/chosen": -257.7044372558594, + "logps/rejected": -427.79705810546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.992609977722168, + "rewards/margins": 8.944477081298828, + "rewards/rejected": -15.93708610534668, + "step": 18383 + }, + { + "epoch": 2.86, + "learning_rate": 6.637636806888325e-07, + "logits/chosen": -1.4495935440063477, + "logits/rejected": -2.890428304672241, + "logps/chosen": -316.8632507324219, + "logps/rejected": -614.7554321289062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.895166397094727, + "rewards/margins": 8.598180770874023, + "rewards/rejected": -21.49334716796875, + "step": 18384 + }, + { + "epoch": 2.86, + "learning_rate": 6.630302401576846e-07, + "logits/chosen": -2.7103631496429443, + "logits/rejected": -2.8318965435028076, + "logps/chosen": -308.99908447265625, + "logps/rejected": -552.8821411132812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.954447746276855, + "rewards/margins": 13.639626502990723, + "rewards/rejected": -24.594074249267578, + "step": 18385 + }, + { + "epoch": 2.86, + "learning_rate": 6.622967996265367e-07, + "logits/chosen": -2.801316022872925, + "logits/rejected": -2.69118332862854, + "logps/chosen": -162.54043579101562, + "logps/rejected": -251.54879760742188, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446431159973145, + "rewards/margins": 8.125921249389648, + "rewards/rejected": -16.57235336303711, + "step": 18386 + }, + { + "epoch": 2.86, + "learning_rate": 6.615633590953887e-07, + "logits/chosen": -2.8192849159240723, + "logits/rejected": -2.0527992248535156, + "logps/chosen": -734.7639770507812, + "logps/rejected": -647.7081298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.981925010681152, + "rewards/margins": 11.618609428405762, + "rewards/rejected": -20.600534439086914, + "step": 18387 + }, + { + "epoch": 2.86, + "learning_rate": 6.608299185642409e-07, + "logits/chosen": -2.755765199661255, + "logits/rejected": -1.9378840923309326, + "logps/chosen": -271.05389404296875, + "logps/rejected": -258.6595458984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.981708526611328, + "rewards/margins": 7.272214412689209, + "rewards/rejected": -18.253921508789062, + "step": 18388 + }, + { + "epoch": 2.86, + "learning_rate": 6.60096478033093e-07, + "logits/chosen": -2.6088411808013916, + "logits/rejected": -1.9863439798355103, + "logps/chosen": -220.63148498535156, + "logps/rejected": -404.6173095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.205110549926758, + "rewards/margins": 13.204174041748047, + "rewards/rejected": -22.409286499023438, + "step": 18389 + }, + { + "epoch": 2.86, + "learning_rate": 6.593630375019451e-07, + "logits/chosen": -2.7038426399230957, + "logits/rejected": -2.7241721153259277, + "logps/chosen": -135.73544311523438, + "logps/rejected": -324.3177795410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.010144233703613, + "rewards/margins": 14.263701438903809, + "rewards/rejected": -22.273845672607422, + "step": 18390 + }, + { + "epoch": 2.86, + "learning_rate": 6.586295969707972e-07, + "logits/chosen": -2.2654433250427246, + "logits/rejected": -2.392185926437378, + "logps/chosen": -344.8050842285156, + "logps/rejected": -388.78985595703125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.045074462890625, + "rewards/margins": 7.047715663909912, + "rewards/rejected": -17.092790603637695, + "step": 18391 + }, + { + "epoch": 2.86, + "learning_rate": 6.578961564396493e-07, + "logits/chosen": -2.50028920173645, + "logits/rejected": -2.818859338760376, + "logps/chosen": -297.7430419921875, + "logps/rejected": -408.3304748535156, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.789125442504883, + "rewards/margins": 6.204707622528076, + "rewards/rejected": -19.993833541870117, + "step": 18392 + }, + { + "epoch": 2.86, + "learning_rate": 6.571627159085015e-07, + "logits/chosen": -1.8419818878173828, + "logits/rejected": -2.940955638885498, + "logps/chosen": -339.7083435058594, + "logps/rejected": -548.399658203125, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.04356575012207, + "rewards/margins": 7.028944969177246, + "rewards/rejected": -17.072509765625, + "step": 18393 + }, + { + "epoch": 2.86, + "learning_rate": 6.564292753773536e-07, + "logits/chosen": -2.2721920013427734, + "logits/rejected": -2.040360927581787, + "logps/chosen": -418.33721923828125, + "logps/rejected": -502.36334228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.324553489685059, + "rewards/margins": 14.379364967346191, + "rewards/rejected": -21.70391845703125, + "step": 18394 + }, + { + "epoch": 2.86, + "learning_rate": 6.556958348462058e-07, + "logits/chosen": -2.438948154449463, + "logits/rejected": -2.8516640663146973, + "logps/chosen": -553.201904296875, + "logps/rejected": -648.26806640625, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.999881744384766, + "rewards/margins": 10.371933937072754, + "rewards/rejected": -19.371814727783203, + "step": 18395 + }, + { + "epoch": 2.86, + "learning_rate": 6.549623943150578e-07, + "logits/chosen": -2.7689828872680664, + "logits/rejected": -2.6800384521484375, + "logps/chosen": -198.23464965820312, + "logps/rejected": -307.3609313964844, + "loss": 0.9074, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.660781860351562, + "rewards/margins": 8.118072509765625, + "rewards/rejected": -20.778854370117188, + "step": 18396 + }, + { + "epoch": 2.86, + "learning_rate": 6.5422895378391e-07, + "logits/chosen": -2.8897974491119385, + "logits/rejected": -1.8474332094192505, + "logps/chosen": -221.56756591796875, + "logps/rejected": -200.9041748046875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076735019683838, + "rewards/margins": 7.155759811401367, + "rewards/rejected": -13.232494354248047, + "step": 18397 + }, + { + "epoch": 2.86, + "learning_rate": 6.53495513252762e-07, + "logits/chosen": -0.9394532442092896, + "logits/rejected": -1.5254985094070435, + "logps/chosen": -523.94580078125, + "logps/rejected": -671.0383911132812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.578248977661133, + "rewards/margins": 14.243873596191406, + "rewards/rejected": -23.82212257385254, + "step": 18398 + }, + { + "epoch": 2.86, + "learning_rate": 6.527620727216142e-07, + "logits/chosen": -2.7707934379577637, + "logits/rejected": -2.64487361907959, + "logps/chosen": -870.4163818359375, + "logps/rejected": -516.323486328125, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.83993911743164, + "rewards/margins": 6.439088821411133, + "rewards/rejected": -16.279027938842773, + "step": 18399 + }, + { + "epoch": 2.86, + "learning_rate": 6.520286321904663e-07, + "logits/chosen": -2.974917411804199, + "logits/rejected": -2.93353009223938, + "logps/chosen": -261.1600646972656, + "logps/rejected": -389.056640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.403949737548828, + "rewards/margins": 13.495677947998047, + "rewards/rejected": -23.899627685546875, + "step": 18400 + }, + { + "epoch": 2.86, + "learning_rate": 6.512951916593184e-07, + "logits/chosen": -1.1349622011184692, + "logits/rejected": -2.035017967224121, + "logps/chosen": -268.47406005859375, + "logps/rejected": -505.569580078125, + "loss": 0.0629, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.47807788848877, + "rewards/margins": 12.597269058227539, + "rewards/rejected": -24.075347900390625, + "step": 18401 + }, + { + "epoch": 2.86, + "learning_rate": 6.505617511281705e-07, + "logits/chosen": -1.9944921731948853, + "logits/rejected": -2.9852442741394043, + "logps/chosen": -406.0236511230469, + "logps/rejected": -632.364013671875, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.07078742980957, + "rewards/margins": 6.931394577026367, + "rewards/rejected": -17.002182006835938, + "step": 18402 + }, + { + "epoch": 2.86, + "learning_rate": 6.498283105970227e-07, + "logits/chosen": -1.3192776441574097, + "logits/rejected": -2.6265885829925537, + "logps/chosen": -161.8852081298828, + "logps/rejected": -630.8610229492188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.362781524658203, + "rewards/margins": 17.38834571838379, + "rewards/rejected": -27.751127243041992, + "step": 18403 + }, + { + "epoch": 2.86, + "learning_rate": 6.490948700658748e-07, + "logits/chosen": -1.794441819190979, + "logits/rejected": -2.4409191608428955, + "logps/chosen": -155.14390563964844, + "logps/rejected": -340.2861328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.965900421142578, + "rewards/margins": 7.938950061798096, + "rewards/rejected": -14.904850959777832, + "step": 18404 + }, + { + "epoch": 2.86, + "learning_rate": 6.48361429534727e-07, + "logits/chosen": -1.2633837461471558, + "logits/rejected": -2.445162296295166, + "logps/chosen": -154.5491943359375, + "logps/rejected": -607.650634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.272680282592773, + "rewards/margins": 17.000843048095703, + "rewards/rejected": -31.273521423339844, + "step": 18405 + }, + { + "epoch": 2.86, + "learning_rate": 6.47627989003579e-07, + "logits/chosen": -2.5189781188964844, + "logits/rejected": -1.8803651332855225, + "logps/chosen": -251.5221710205078, + "logps/rejected": -313.3509216308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158895492553711, + "rewards/margins": 11.294095993041992, + "rewards/rejected": -17.452991485595703, + "step": 18406 + }, + { + "epoch": 2.86, + "learning_rate": 6.468945484724312e-07, + "logits/chosen": -2.566584348678589, + "logits/rejected": -2.639234781265259, + "logps/chosen": -386.77587890625, + "logps/rejected": -461.95574951171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0321831703186035, + "rewards/margins": 9.104496955871582, + "rewards/rejected": -16.136680603027344, + "step": 18407 + }, + { + "epoch": 2.86, + "learning_rate": 6.461611079412833e-07, + "logits/chosen": -0.5957227349281311, + "logits/rejected": -2.596583843231201, + "logps/chosen": -143.6978759765625, + "logps/rejected": -668.880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.101662635803223, + "rewards/margins": 15.126690864562988, + "rewards/rejected": -26.22835350036621, + "step": 18408 + }, + { + "epoch": 2.86, + "learning_rate": 6.454276674101355e-07, + "logits/chosen": -2.8646090030670166, + "logits/rejected": -2.355466365814209, + "logps/chosen": -238.36741638183594, + "logps/rejected": -276.4992370605469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.187841415405273, + "rewards/margins": 10.418054580688477, + "rewards/rejected": -17.60589599609375, + "step": 18409 + }, + { + "epoch": 2.86, + "learning_rate": 6.446942268789875e-07, + "logits/chosen": -2.608020305633545, + "logits/rejected": -1.615981936454773, + "logps/chosen": -369.95751953125, + "logps/rejected": -343.093017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.074972629547119, + "rewards/margins": 14.559267044067383, + "rewards/rejected": -21.634239196777344, + "step": 18410 + }, + { + "epoch": 2.86, + "learning_rate": 6.439607863478396e-07, + "logits/chosen": -1.3033138513565063, + "logits/rejected": -2.1291701793670654, + "logps/chosen": -327.5850524902344, + "logps/rejected": -528.0887451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.104630470275879, + "rewards/margins": 12.530542373657227, + "rewards/rejected": -24.635173797607422, + "step": 18411 + }, + { + "epoch": 2.86, + "learning_rate": 6.432273458166917e-07, + "logits/chosen": -2.497385263442993, + "logits/rejected": -1.8499153852462769, + "logps/chosen": -169.1215362548828, + "logps/rejected": -324.9278564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.937236309051514, + "rewards/margins": 10.376705169677734, + "rewards/rejected": -17.313941955566406, + "step": 18412 + }, + { + "epoch": 2.86, + "learning_rate": 6.424939052855438e-07, + "logits/chosen": -2.3401401042938232, + "logits/rejected": -2.150944709777832, + "logps/chosen": -192.526611328125, + "logps/rejected": -362.707275390625, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.426837921142578, + "rewards/margins": 10.830467224121094, + "rewards/rejected": -21.257305145263672, + "step": 18413 + }, + { + "epoch": 2.86, + "learning_rate": 6.41760464754396e-07, + "logits/chosen": -3.007014036178589, + "logits/rejected": -2.8415377140045166, + "logps/chosen": -252.22860717773438, + "logps/rejected": -204.6795654296875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.157505989074707, + "rewards/margins": 7.5372700691223145, + "rewards/rejected": -14.69477653503418, + "step": 18414 + }, + { + "epoch": 2.86, + "learning_rate": 6.410270242232481e-07, + "logits/chosen": -2.151824951171875, + "logits/rejected": -2.7493526935577393, + "logps/chosen": -473.1949768066406, + "logps/rejected": -588.8287353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.057010650634766, + "rewards/margins": 10.338021278381348, + "rewards/rejected": -18.395030975341797, + "step": 18415 + }, + { + "epoch": 2.86, + "learning_rate": 6.402935836921002e-07, + "logits/chosen": -2.1746137142181396, + "logits/rejected": -2.813926935195923, + "logps/chosen": -96.67155456542969, + "logps/rejected": -260.0479431152344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.049801826477051, + "rewards/margins": 7.922370910644531, + "rewards/rejected": -13.972171783447266, + "step": 18416 + }, + { + "epoch": 2.86, + "learning_rate": 6.395601431609523e-07, + "logits/chosen": -2.753756284713745, + "logits/rejected": -2.7939255237579346, + "logps/chosen": -204.58859252929688, + "logps/rejected": -443.74700927734375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.076178550720215, + "rewards/margins": 6.432062149047852, + "rewards/rejected": -14.508240699768066, + "step": 18417 + }, + { + "epoch": 2.86, + "learning_rate": 6.388267026298045e-07, + "logits/chosen": -2.1738879680633545, + "logits/rejected": -2.8490071296691895, + "logps/chosen": -180.61158752441406, + "logps/rejected": -298.847412109375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.379762649536133, + "rewards/margins": 6.091863632202148, + "rewards/rejected": -17.47162628173828, + "step": 18418 + }, + { + "epoch": 2.86, + "learning_rate": 6.380932620986566e-07, + "logits/chosen": -2.2548537254333496, + "logits/rejected": -2.66909122467041, + "logps/chosen": -379.42572021484375, + "logps/rejected": -652.3954467773438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3453216552734375, + "rewards/margins": 12.178642272949219, + "rewards/rejected": -19.523963928222656, + "step": 18419 + }, + { + "epoch": 2.86, + "learning_rate": 6.373598215675088e-07, + "logits/chosen": -2.0517525672912598, + "logits/rejected": -2.572782516479492, + "logps/chosen": -264.85479736328125, + "logps/rejected": -520.6810302734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.813753128051758, + "rewards/margins": 9.713016510009766, + "rewards/rejected": -20.526771545410156, + "step": 18420 + }, + { + "epoch": 2.86, + "learning_rate": 6.366263810363607e-07, + "logits/chosen": -2.3515267372131348, + "logits/rejected": -2.4143691062927246, + "logps/chosen": -223.4275360107422, + "logps/rejected": -362.07977294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.969412803649902, + "rewards/margins": 10.713033676147461, + "rewards/rejected": -18.68244743347168, + "step": 18421 + }, + { + "epoch": 2.87, + "learning_rate": 6.358929405052129e-07, + "logits/chosen": -2.6187055110931396, + "logits/rejected": -2.527712821960449, + "logps/chosen": -301.23663330078125, + "logps/rejected": -638.9718627929688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.812798500061035, + "rewards/margins": 12.033746719360352, + "rewards/rejected": -19.84654426574707, + "step": 18422 + }, + { + "epoch": 2.87, + "learning_rate": 6.35159499974065e-07, + "logits/chosen": -2.95511794090271, + "logits/rejected": -2.9900949001312256, + "logps/chosen": -85.37387084960938, + "logps/rejected": -255.6219482421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7029128074646, + "rewards/margins": 10.258411407470703, + "rewards/rejected": -16.96132469177246, + "step": 18423 + }, + { + "epoch": 2.87, + "learning_rate": 6.344260594429172e-07, + "logits/chosen": -1.963083267211914, + "logits/rejected": -2.437164545059204, + "logps/chosen": -220.7281951904297, + "logps/rejected": -440.3211669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.095040321350098, + "rewards/margins": 11.483383178710938, + "rewards/rejected": -20.57842254638672, + "step": 18424 + }, + { + "epoch": 2.87, + "learning_rate": 6.336926189117693e-07, + "logits/chosen": -2.940646171569824, + "logits/rejected": -2.5069189071655273, + "logps/chosen": -172.39735412597656, + "logps/rejected": -260.99334716796875, + "loss": 0.2379, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.988378524780273, + "rewards/margins": 2.572596788406372, + "rewards/rejected": -13.560976028442383, + "step": 18425 + }, + { + "epoch": 2.87, + "learning_rate": 6.329591783806214e-07, + "logits/chosen": -2.8168249130249023, + "logits/rejected": -2.4128499031066895, + "logps/chosen": -160.376220703125, + "logps/rejected": -268.4234619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.375960350036621, + "rewards/margins": 12.020870208740234, + "rewards/rejected": -18.396831512451172, + "step": 18426 + }, + { + "epoch": 2.87, + "learning_rate": 6.322257378494735e-07, + "logits/chosen": -2.532419204711914, + "logits/rejected": -3.0583977699279785, + "logps/chosen": -505.87872314453125, + "logps/rejected": -448.8150634765625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.266965866088867, + "rewards/margins": 9.09376335144043, + "rewards/rejected": -19.360729217529297, + "step": 18427 + }, + { + "epoch": 2.87, + "learning_rate": 6.314922973183257e-07, + "logits/chosen": -1.7250819206237793, + "logits/rejected": -2.416685104370117, + "logps/chosen": -282.1512145996094, + "logps/rejected": -448.8520202636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.601472854614258, + "rewards/margins": 13.095990180969238, + "rewards/rejected": -20.697463989257812, + "step": 18428 + }, + { + "epoch": 2.87, + "learning_rate": 6.307588567871778e-07, + "logits/chosen": -2.5355212688446045, + "logits/rejected": -2.80492901802063, + "logps/chosen": -268.6195983886719, + "logps/rejected": -276.968994140625, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.545685768127441, + "rewards/margins": 6.144961357116699, + "rewards/rejected": -19.69064712524414, + "step": 18429 + }, + { + "epoch": 2.87, + "learning_rate": 6.300254162560299e-07, + "logits/chosen": -1.4249242544174194, + "logits/rejected": -2.2698490619659424, + "logps/chosen": -193.35086059570312, + "logps/rejected": -468.9566955566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.306493759155273, + "rewards/margins": 15.558866500854492, + "rewards/rejected": -26.865360260009766, + "step": 18430 + }, + { + "epoch": 2.87, + "learning_rate": 6.29291975724882e-07, + "logits/chosen": -2.8834474086761475, + "logits/rejected": -2.4703667163848877, + "logps/chosen": -314.58013916015625, + "logps/rejected": -453.9053955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.700315952301025, + "rewards/margins": 11.507268905639648, + "rewards/rejected": -18.207584381103516, + "step": 18431 + }, + { + "epoch": 2.87, + "learning_rate": 6.285585351937341e-07, + "logits/chosen": -2.4437663555145264, + "logits/rejected": -1.5154316425323486, + "logps/chosen": -244.5471954345703, + "logps/rejected": -182.59608459472656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.431825637817383, + "rewards/margins": 7.872405052185059, + "rewards/rejected": -16.304231643676758, + "step": 18432 + }, + { + "epoch": 2.87, + "learning_rate": 6.278250946625863e-07, + "logits/chosen": -0.992327094078064, + "logits/rejected": -2.259024143218994, + "logps/chosen": -359.6650695800781, + "logps/rejected": -714.0362548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.129356384277344, + "rewards/margins": 13.615242004394531, + "rewards/rejected": -21.744598388671875, + "step": 18433 + }, + { + "epoch": 2.87, + "learning_rate": 6.270916541314383e-07, + "logits/chosen": -2.2749037742614746, + "logits/rejected": -2.8940651416778564, + "logps/chosen": -228.1350860595703, + "logps/rejected": -478.8321838378906, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.592708587646484, + "rewards/margins": 6.975828647613525, + "rewards/rejected": -15.568537712097168, + "step": 18434 + }, + { + "epoch": 2.87, + "learning_rate": 6.263582136002905e-07, + "logits/chosen": -3.0080041885375977, + "logits/rejected": -2.262678861618042, + "logps/chosen": -243.71470642089844, + "logps/rejected": -174.4756317138672, + "loss": 0.8272, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.499042510986328, + "rewards/margins": 3.6144561767578125, + "rewards/rejected": -14.11349868774414, + "step": 18435 + }, + { + "epoch": 2.87, + "learning_rate": 6.256247730691425e-07, + "logits/chosen": -2.6922996044158936, + "logits/rejected": -2.64101505279541, + "logps/chosen": -256.4950256347656, + "logps/rejected": -389.0998840332031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.32470989227295, + "rewards/margins": 10.131035804748535, + "rewards/rejected": -19.455745697021484, + "step": 18436 + }, + { + "epoch": 2.87, + "learning_rate": 6.248913325379947e-07, + "logits/chosen": -1.8487861156463623, + "logits/rejected": -2.935929536819458, + "logps/chosen": -182.26759338378906, + "logps/rejected": -505.7908935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.942792892456055, + "rewards/margins": 10.187145233154297, + "rewards/rejected": -24.12993812561035, + "step": 18437 + }, + { + "epoch": 2.87, + "learning_rate": 6.241578920068468e-07, + "logits/chosen": -1.3619074821472168, + "logits/rejected": -1.4876192808151245, + "logps/chosen": -265.8268737792969, + "logps/rejected": -421.9756774902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.157901287078857, + "rewards/margins": 13.090316772460938, + "rewards/rejected": -20.248218536376953, + "step": 18438 + }, + { + "epoch": 2.87, + "learning_rate": 6.23424451475699e-07, + "logits/chosen": -1.9902476072311401, + "logits/rejected": -2.4608957767486572, + "logps/chosen": -337.93572998046875, + "logps/rejected": -610.2175903320312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.228164672851562, + "rewards/margins": 14.993772506713867, + "rewards/rejected": -26.221935272216797, + "step": 18439 + }, + { + "epoch": 2.87, + "learning_rate": 6.226910109445511e-07, + "logits/chosen": -1.8776166439056396, + "logits/rejected": -2.793388843536377, + "logps/chosen": -474.2337646484375, + "logps/rejected": -578.8056640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.48342514038086, + "rewards/margins": 8.980945587158203, + "rewards/rejected": -18.464370727539062, + "step": 18440 + }, + { + "epoch": 2.87, + "learning_rate": 6.219575704134032e-07, + "logits/chosen": -2.756349563598633, + "logits/rejected": -1.826124906539917, + "logps/chosen": -208.43467712402344, + "logps/rejected": -373.31439208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8364410400390625, + "rewards/margins": 9.043716430664062, + "rewards/rejected": -13.880157470703125, + "step": 18441 + }, + { + "epoch": 2.87, + "learning_rate": 6.212241298822553e-07, + "logits/chosen": -2.6034634113311768, + "logits/rejected": -2.6480345726013184, + "logps/chosen": -303.10980224609375, + "logps/rejected": -390.918701171875, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.405213356018066, + "rewards/margins": 5.854211807250977, + "rewards/rejected": -15.259425163269043, + "step": 18442 + }, + { + "epoch": 2.87, + "learning_rate": 6.204906893511075e-07, + "logits/chosen": -2.7120518684387207, + "logits/rejected": -1.3244593143463135, + "logps/chosen": -241.3220977783203, + "logps/rejected": -317.7828674316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.55142879486084, + "rewards/margins": 10.9168701171875, + "rewards/rejected": -18.468299865722656, + "step": 18443 + }, + { + "epoch": 2.87, + "learning_rate": 6.197572488199596e-07, + "logits/chosen": -2.0792596340179443, + "logits/rejected": -3.0145821571350098, + "logps/chosen": -96.79513549804688, + "logps/rejected": -352.2919616699219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.376737594604492, + "rewards/margins": 8.083609580993652, + "rewards/rejected": -16.460346221923828, + "step": 18444 + }, + { + "epoch": 2.87, + "learning_rate": 6.190238082888118e-07, + "logits/chosen": -2.042489767074585, + "logits/rejected": -2.7336199283599854, + "logps/chosen": -242.483154296875, + "logps/rejected": -358.46893310546875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.973858833312988, + "rewards/margins": 8.140687942504883, + "rewards/rejected": -16.114547729492188, + "step": 18445 + }, + { + "epoch": 2.87, + "learning_rate": 6.182903677576637e-07, + "logits/chosen": -2.475285291671753, + "logits/rejected": -1.8449925184249878, + "logps/chosen": -686.3302001953125, + "logps/rejected": -494.26507568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.14143180847168, + "rewards/margins": 11.467323303222656, + "rewards/rejected": -23.60875701904297, + "step": 18446 + }, + { + "epoch": 2.87, + "learning_rate": 6.175569272265159e-07, + "logits/chosen": -2.274603843688965, + "logits/rejected": -2.750584363937378, + "logps/chosen": -658.2681884765625, + "logps/rejected": -780.6083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.742159843444824, + "rewards/margins": 13.336368560791016, + "rewards/rejected": -20.078529357910156, + "step": 18447 + }, + { + "epoch": 2.87, + "learning_rate": 6.16823486695368e-07, + "logits/chosen": -2.843796491622925, + "logits/rejected": -2.9744362831115723, + "logps/chosen": -929.1132202148438, + "logps/rejected": -653.948974609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.302316665649414, + "rewards/margins": 7.692162990570068, + "rewards/rejected": -19.99448013305664, + "step": 18448 + }, + { + "epoch": 2.87, + "learning_rate": 6.160900461642202e-07, + "logits/chosen": -2.161519765853882, + "logits/rejected": -2.526445150375366, + "logps/chosen": -203.92576599121094, + "logps/rejected": -353.63629150390625, + "loss": 0.1362, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.646574020385742, + "rewards/margins": 3.7885003089904785, + "rewards/rejected": -13.435073852539062, + "step": 18449 + }, + { + "epoch": 2.87, + "learning_rate": 6.153566056330723e-07, + "logits/chosen": -2.7793080806732178, + "logits/rejected": -2.2259888648986816, + "logps/chosen": -396.1526184082031, + "logps/rejected": -342.1283264160156, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52126693725586, + "rewards/margins": 10.483255386352539, + "rewards/rejected": -19.0045223236084, + "step": 18450 + }, + { + "epoch": 2.87, + "learning_rate": 6.146231651019244e-07, + "logits/chosen": -2.901235818862915, + "logits/rejected": -2.757094144821167, + "logps/chosen": -184.6278839111328, + "logps/rejected": -277.9980773925781, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.358880043029785, + "rewards/margins": 7.28352165222168, + "rewards/rejected": -14.642401695251465, + "step": 18451 + }, + { + "epoch": 2.87, + "learning_rate": 6.138897245707765e-07, + "logits/chosen": -2.25140643119812, + "logits/rejected": -2.6903724670410156, + "logps/chosen": -357.374267578125, + "logps/rejected": -333.18084716796875, + "loss": 0.6451, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.822183609008789, + "rewards/margins": 6.098574638366699, + "rewards/rejected": -16.920757293701172, + "step": 18452 + }, + { + "epoch": 2.87, + "learning_rate": 6.131562840396286e-07, + "logits/chosen": -1.3285380601882935, + "logits/rejected": -2.1437900066375732, + "logps/chosen": -240.83860778808594, + "logps/rejected": -436.87799072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.751396179199219, + "rewards/margins": 11.822067260742188, + "rewards/rejected": -21.573463439941406, + "step": 18453 + }, + { + "epoch": 2.87, + "learning_rate": 6.124228435084808e-07, + "logits/chosen": -1.4886503219604492, + "logits/rejected": -2.291015625, + "logps/chosen": -190.04306030273438, + "logps/rejected": -585.2911376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.11227798461914, + "rewards/margins": 14.015621185302734, + "rewards/rejected": -25.127899169921875, + "step": 18454 + }, + { + "epoch": 2.87, + "learning_rate": 6.116894029773328e-07, + "logits/chosen": -2.8919947147369385, + "logits/rejected": -1.7514007091522217, + "logps/chosen": -313.05975341796875, + "logps/rejected": -250.73239135742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.643349647521973, + "rewards/margins": 11.704195022583008, + "rewards/rejected": -20.347545623779297, + "step": 18455 + }, + { + "epoch": 2.87, + "learning_rate": 6.10955962446185e-07, + "logits/chosen": -1.552232027053833, + "logits/rejected": -2.253807306289673, + "logps/chosen": -186.181884765625, + "logps/rejected": -411.9298400878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.10649585723877, + "rewards/margins": 15.005240440368652, + "rewards/rejected": -24.111736297607422, + "step": 18456 + }, + { + "epoch": 2.87, + "learning_rate": 6.10222521915037e-07, + "logits/chosen": -2.5876576900482178, + "logits/rejected": -1.3261371850967407, + "logps/chosen": -318.0716552734375, + "logps/rejected": -241.39474487304688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.566791534423828, + "rewards/margins": 8.973573684692383, + "rewards/rejected": -18.54036521911621, + "step": 18457 + }, + { + "epoch": 2.87, + "learning_rate": 6.094890813838892e-07, + "logits/chosen": -2.520153045654297, + "logits/rejected": -1.447242259979248, + "logps/chosen": -204.98843383789062, + "logps/rejected": -254.82675170898438, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.734670639038086, + "rewards/margins": 6.515471935272217, + "rewards/rejected": -18.25014305114746, + "step": 18458 + }, + { + "epoch": 2.87, + "learning_rate": 6.087556408527413e-07, + "logits/chosen": -2.0260133743286133, + "logits/rejected": -2.4959139823913574, + "logps/chosen": -175.22225952148438, + "logps/rejected": -418.08502197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.916172504425049, + "rewards/margins": 12.10086441040039, + "rewards/rejected": -19.01703643798828, + "step": 18459 + }, + { + "epoch": 2.87, + "learning_rate": 6.080222003215934e-07, + "logits/chosen": -2.6450796127319336, + "logits/rejected": -2.1291260719299316, + "logps/chosen": -651.2586059570312, + "logps/rejected": -563.7257690429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.388360023498535, + "rewards/margins": 11.166107177734375, + "rewards/rejected": -20.554466247558594, + "step": 18460 + }, + { + "epoch": 2.87, + "learning_rate": 6.072887597904455e-07, + "logits/chosen": -3.026881217956543, + "logits/rejected": -2.831576108932495, + "logps/chosen": -247.31756591796875, + "logps/rejected": -578.1480712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.533295631408691, + "rewards/margins": 12.726751327514648, + "rewards/rejected": -22.260047912597656, + "step": 18461 + }, + { + "epoch": 2.87, + "learning_rate": 6.065553192592977e-07, + "logits/chosen": -2.8078362941741943, + "logits/rejected": -2.7718682289123535, + "logps/chosen": -270.3330383300781, + "logps/rejected": -323.0131530761719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.658089637756348, + "rewards/margins": 9.912245750427246, + "rewards/rejected": -17.570335388183594, + "step": 18462 + }, + { + "epoch": 2.87, + "learning_rate": 6.058218787281498e-07, + "logits/chosen": -1.4403825998306274, + "logits/rejected": -2.248608350753784, + "logps/chosen": -189.9693603515625, + "logps/rejected": -277.2868347167969, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.959760665893555, + "rewards/margins": 4.063661575317383, + "rewards/rejected": -15.023422241210938, + "step": 18463 + }, + { + "epoch": 2.87, + "learning_rate": 6.05088438197002e-07, + "logits/chosen": -2.656752347946167, + "logits/rejected": -2.668976306915283, + "logps/chosen": -333.8023376464844, + "logps/rejected": -429.48492431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.315994262695312, + "rewards/margins": 11.00381851196289, + "rewards/rejected": -22.319812774658203, + "step": 18464 + }, + { + "epoch": 2.87, + "learning_rate": 6.04354997665854e-07, + "logits/chosen": -2.430230140686035, + "logits/rejected": -2.959376335144043, + "logps/chosen": -203.37432861328125, + "logps/rejected": -467.3391418457031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.243269920349121, + "rewards/margins": 9.108208656311035, + "rewards/rejected": -19.351478576660156, + "step": 18465 + }, + { + "epoch": 2.87, + "learning_rate": 6.036215571347062e-07, + "logits/chosen": -2.646366596221924, + "logits/rejected": -2.7563891410827637, + "logps/chosen": -188.3896942138672, + "logps/rejected": -410.3802185058594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.724147796630859, + "rewards/margins": 12.860076904296875, + "rewards/rejected": -18.584224700927734, + "step": 18466 + }, + { + "epoch": 2.87, + "learning_rate": 6.028881166035583e-07, + "logits/chosen": -1.6122097969055176, + "logits/rejected": -2.6098544597625732, + "logps/chosen": -210.67974853515625, + "logps/rejected": -470.5111999511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.851959228515625, + "rewards/margins": 11.011689186096191, + "rewards/rejected": -22.8636474609375, + "step": 18467 + }, + { + "epoch": 2.87, + "learning_rate": 6.021546760724105e-07, + "logits/chosen": -2.746764659881592, + "logits/rejected": -2.31868314743042, + "logps/chosen": -358.25750732421875, + "logps/rejected": -379.92584228515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.321367263793945, + "rewards/margins": 9.084930419921875, + "rewards/rejected": -20.40629768371582, + "step": 18468 + }, + { + "epoch": 2.87, + "learning_rate": 6.014212355412626e-07, + "logits/chosen": -2.648293972015381, + "logits/rejected": -2.733482599258423, + "logps/chosen": -265.9209289550781, + "logps/rejected": -283.7903747558594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.31617259979248, + "rewards/margins": 8.504244804382324, + "rewards/rejected": -21.820417404174805, + "step": 18469 + }, + { + "epoch": 2.87, + "learning_rate": 6.006877950101146e-07, + "logits/chosen": -1.9165563583374023, + "logits/rejected": -1.8943960666656494, + "logps/chosen": -109.47737121582031, + "logps/rejected": -282.9273681640625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.331767082214355, + "rewards/margins": 10.90679931640625, + "rewards/rejected": -20.238567352294922, + "step": 18470 + }, + { + "epoch": 2.87, + "learning_rate": 5.999543544789667e-07, + "logits/chosen": -1.5837477445602417, + "logits/rejected": -2.3081445693969727, + "logps/chosen": -142.01539611816406, + "logps/rejected": -336.96990966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.010476112365723, + "rewards/margins": 9.296390533447266, + "rewards/rejected": -20.306865692138672, + "step": 18471 + }, + { + "epoch": 2.87, + "learning_rate": 5.992209139478188e-07, + "logits/chosen": -2.52510404586792, + "logits/rejected": -2.777700662612915, + "logps/chosen": -185.32437133789062, + "logps/rejected": -510.13861083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.76974868774414, + "rewards/margins": 12.41469669342041, + "rewards/rejected": -23.184444427490234, + "step": 18472 + }, + { + "epoch": 2.87, + "learning_rate": 5.98487473416671e-07, + "logits/chosen": -2.6662697792053223, + "logits/rejected": -1.6950749158859253, + "logps/chosen": -500.91204833984375, + "logps/rejected": -461.3367919921875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.299861907958984, + "rewards/margins": 12.234643936157227, + "rewards/rejected": -21.53450584411621, + "step": 18473 + }, + { + "epoch": 2.87, + "learning_rate": 5.977540328855231e-07, + "logits/chosen": -2.3019118309020996, + "logits/rejected": -2.8419370651245117, + "logps/chosen": -337.28045654296875, + "logps/rejected": -485.59063720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.754781723022461, + "rewards/margins": 9.355135917663574, + "rewards/rejected": -22.10991668701172, + "step": 18474 + }, + { + "epoch": 2.87, + "learning_rate": 5.970205923543752e-07, + "logits/chosen": -2.8879361152648926, + "logits/rejected": -2.7926886081695557, + "logps/chosen": -183.51736450195312, + "logps/rejected": -379.16583251953125, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.195818901062012, + "rewards/margins": 10.258284568786621, + "rewards/rejected": -18.454103469848633, + "step": 18475 + }, + { + "epoch": 2.87, + "learning_rate": 5.962871518232273e-07, + "logits/chosen": -2.4558358192443848, + "logits/rejected": -1.7510162591934204, + "logps/chosen": -341.6236572265625, + "logps/rejected": -414.0908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224539756774902, + "rewards/margins": 9.734525680541992, + "rewards/rejected": -18.959064483642578, + "step": 18476 + }, + { + "epoch": 2.87, + "learning_rate": 5.955537112920795e-07, + "logits/chosen": -2.869961738586426, + "logits/rejected": -2.3964104652404785, + "logps/chosen": -302.96893310546875, + "logps/rejected": -382.94012451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.732032775878906, + "rewards/margins": 9.224620819091797, + "rewards/rejected": -19.956653594970703, + "step": 18477 + }, + { + "epoch": 2.87, + "learning_rate": 5.948202707609316e-07, + "logits/chosen": -2.3105642795562744, + "logits/rejected": -2.6176369190216064, + "logps/chosen": -138.79344177246094, + "logps/rejected": -336.6026611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.647015571594238, + "rewards/margins": 9.564008712768555, + "rewards/rejected": -19.21102523803711, + "step": 18478 + }, + { + "epoch": 2.87, + "learning_rate": 5.940868302297838e-07, + "logits/chosen": -1.0175602436065674, + "logits/rejected": -2.170600414276123, + "logps/chosen": -280.19171142578125, + "logps/rejected": -475.2928466796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.038019180297852, + "rewards/margins": 10.937057495117188, + "rewards/rejected": -23.975078582763672, + "step": 18479 + }, + { + "epoch": 2.87, + "learning_rate": 5.933533896986358e-07, + "logits/chosen": -2.373577833175659, + "logits/rejected": -2.7676823139190674, + "logps/chosen": -156.34811401367188, + "logps/rejected": -445.7339782714844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.450773239135742, + "rewards/margins": 9.04832935333252, + "rewards/rejected": -16.499103546142578, + "step": 18480 + }, + { + "epoch": 2.87, + "learning_rate": 5.92619949167488e-07, + "logits/chosen": -2.533538818359375, + "logits/rejected": -1.6276781558990479, + "logps/chosen": -299.6061706542969, + "logps/rejected": -313.11114501953125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.092418670654297, + "rewards/margins": 7.745189666748047, + "rewards/rejected": -17.837608337402344, + "step": 18481 + }, + { + "epoch": 2.87, + "learning_rate": 5.9188650863634e-07, + "logits/chosen": -2.838148355484009, + "logits/rejected": -1.9878548383712769, + "logps/chosen": -338.3784484863281, + "logps/rejected": -556.8629760742188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.231561660766602, + "rewards/margins": 11.208593368530273, + "rewards/rejected": -22.440155029296875, + "step": 18482 + }, + { + "epoch": 2.87, + "learning_rate": 5.911530681051922e-07, + "logits/chosen": -1.6603375673294067, + "logits/rejected": -2.4874863624572754, + "logps/chosen": -227.93255615234375, + "logps/rejected": -462.1576232910156, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.969545364379883, + "rewards/margins": 5.91449499130249, + "rewards/rejected": -19.88404083251953, + "step": 18483 + }, + { + "epoch": 2.87, + "learning_rate": 5.904196275740443e-07, + "logits/chosen": -2.187345027923584, + "logits/rejected": -2.820124387741089, + "logps/chosen": -471.621337890625, + "logps/rejected": -500.4991149902344, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.350595474243164, + "rewards/margins": 9.882308006286621, + "rewards/rejected": -19.23290252685547, + "step": 18484 + }, + { + "epoch": 2.87, + "learning_rate": 5.896861870428964e-07, + "logits/chosen": -2.1185925006866455, + "logits/rejected": -2.493544340133667, + "logps/chosen": -380.37640380859375, + "logps/rejected": -549.9306640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.595026969909668, + "rewards/margins": 10.772589683532715, + "rewards/rejected": -20.367616653442383, + "step": 18485 + }, + { + "epoch": 2.87, + "learning_rate": 5.889527465117485e-07, + "logits/chosen": -2.4226832389831543, + "logits/rejected": -2.0160231590270996, + "logps/chosen": -392.2434387207031, + "logps/rejected": -289.4066162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035041809082031, + "rewards/margins": 11.837528228759766, + "rewards/rejected": -17.872570037841797, + "step": 18486 + }, + { + "epoch": 2.88, + "learning_rate": 5.882193059806007e-07, + "logits/chosen": -1.8515108823776245, + "logits/rejected": -2.609450578689575, + "logps/chosen": -136.48709106445312, + "logps/rejected": -268.51678466796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.126184463500977, + "rewards/margins": 8.482505798339844, + "rewards/rejected": -18.60869026184082, + "step": 18487 + }, + { + "epoch": 2.88, + "learning_rate": 5.874858654494528e-07, + "logits/chosen": -0.763344407081604, + "logits/rejected": -2.5424370765686035, + "logps/chosen": -184.8313751220703, + "logps/rejected": -440.2672119140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.98588752746582, + "rewards/margins": 7.959445476531982, + "rewards/rejected": -20.945331573486328, + "step": 18488 + }, + { + "epoch": 2.88, + "learning_rate": 5.86752424918305e-07, + "logits/chosen": -2.803938865661621, + "logits/rejected": -2.3098104000091553, + "logps/chosen": -205.72410583496094, + "logps/rejected": -270.5997619628906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.646337509155273, + "rewards/margins": 8.682701110839844, + "rewards/rejected": -17.329038619995117, + "step": 18489 + }, + { + "epoch": 2.88, + "learning_rate": 5.86018984387157e-07, + "logits/chosen": -1.9382126331329346, + "logits/rejected": -2.5233969688415527, + "logps/chosen": -270.6324768066406, + "logps/rejected": -459.96728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.433138847351074, + "rewards/margins": 11.459014892578125, + "rewards/rejected": -20.892154693603516, + "step": 18490 + }, + { + "epoch": 2.88, + "learning_rate": 5.852855438560092e-07, + "logits/chosen": -2.876685380935669, + "logits/rejected": -2.554370164871216, + "logps/chosen": -1023.7473754882812, + "logps/rejected": -641.6032104492188, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.356497764587402, + "rewards/margins": 6.0543413162231445, + "rewards/rejected": -16.410839080810547, + "step": 18491 + }, + { + "epoch": 2.88, + "learning_rate": 5.845521033248613e-07, + "logits/chosen": -2.2312448024749756, + "logits/rejected": -2.766515016555786, + "logps/chosen": -155.5927276611328, + "logps/rejected": -346.0244140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8042402267456055, + "rewards/margins": 13.187626838684082, + "rewards/rejected": -19.991867065429688, + "step": 18492 + }, + { + "epoch": 2.88, + "learning_rate": 5.838186627937133e-07, + "logits/chosen": -2.7506747245788574, + "logits/rejected": -2.5211148262023926, + "logps/chosen": -184.2176513671875, + "logps/rejected": -395.87847900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2222900390625, + "rewards/margins": 10.516107559204102, + "rewards/rejected": -18.7383975982666, + "step": 18493 + }, + { + "epoch": 2.88, + "learning_rate": 5.830852222625655e-07, + "logits/chosen": -2.070563316345215, + "logits/rejected": -2.5224645137786865, + "logps/chosen": -222.57577514648438, + "logps/rejected": -636.0176391601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.182039260864258, + "rewards/margins": 15.011646270751953, + "rewards/rejected": -25.19368553161621, + "step": 18494 + }, + { + "epoch": 2.88, + "learning_rate": 5.823517817314175e-07, + "logits/chosen": -2.206005573272705, + "logits/rejected": -2.530719757080078, + "logps/chosen": -246.7258758544922, + "logps/rejected": -543.901123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.154666900634766, + "rewards/margins": 10.320718765258789, + "rewards/rejected": -23.475385665893555, + "step": 18495 + }, + { + "epoch": 2.88, + "learning_rate": 5.816183412002697e-07, + "logits/chosen": -2.8193068504333496, + "logits/rejected": -2.353036880493164, + "logps/chosen": -319.27703857421875, + "logps/rejected": -306.7083740234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.767114639282227, + "rewards/margins": 9.524870872497559, + "rewards/rejected": -19.2919864654541, + "step": 18496 + }, + { + "epoch": 2.88, + "learning_rate": 5.808849006691218e-07, + "logits/chosen": -2.638842821121216, + "logits/rejected": -2.714071750640869, + "logps/chosen": -236.00033569335938, + "logps/rejected": -333.211669921875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.486785888671875, + "rewards/margins": 10.191108703613281, + "rewards/rejected": -16.677894592285156, + "step": 18497 + }, + { + "epoch": 2.88, + "learning_rate": 5.80151460137974e-07, + "logits/chosen": -2.804136276245117, + "logits/rejected": -2.837522506713867, + "logps/chosen": -95.28266143798828, + "logps/rejected": -360.045166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.597490310668945, + "rewards/margins": 12.339210510253906, + "rewards/rejected": -20.93670082092285, + "step": 18498 + }, + { + "epoch": 2.88, + "learning_rate": 5.794180196068261e-07, + "logits/chosen": -1.7132858037948608, + "logits/rejected": -2.579054594039917, + "logps/chosen": -163.91891479492188, + "logps/rejected": -322.74285888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.733749866485596, + "rewards/margins": 10.49673843383789, + "rewards/rejected": -18.230487823486328, + "step": 18499 + }, + { + "epoch": 2.88, + "learning_rate": 5.786845790756782e-07, + "logits/chosen": -2.818878650665283, + "logits/rejected": -2.2453110218048096, + "logps/chosen": -626.3247680664062, + "logps/rejected": -663.2013549804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.633916854858398, + "rewards/margins": 12.544788360595703, + "rewards/rejected": -24.178707122802734, + "step": 18500 + }, + { + "epoch": 2.88, + "learning_rate": 5.779511385445303e-07, + "logits/chosen": -1.8648169040679932, + "logits/rejected": -1.8512980937957764, + "logps/chosen": -242.18746948242188, + "logps/rejected": -430.0967712402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.820818901062012, + "rewards/margins": 11.83617877960205, + "rewards/rejected": -20.656997680664062, + "step": 18501 + }, + { + "epoch": 2.88, + "learning_rate": 5.772176980133825e-07, + "logits/chosen": -2.323218584060669, + "logits/rejected": -2.804433822631836, + "logps/chosen": -159.52442932128906, + "logps/rejected": -286.0523986816406, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904807090759277, + "rewards/margins": 7.514173984527588, + "rewards/rejected": -14.418981552124023, + "step": 18502 + }, + { + "epoch": 2.88, + "learning_rate": 5.764842574822346e-07, + "logits/chosen": -2.437986135482788, + "logits/rejected": -2.645494222640991, + "logps/chosen": -497.3199768066406, + "logps/rejected": -627.84375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1900835037231445, + "rewards/margins": 11.204904556274414, + "rewards/rejected": -18.394989013671875, + "step": 18503 + }, + { + "epoch": 2.88, + "learning_rate": 5.757508169510868e-07, + "logits/chosen": -3.043501615524292, + "logits/rejected": -2.514465570449829, + "logps/chosen": -719.17333984375, + "logps/rejected": -525.4110717773438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.543591499328613, + "rewards/margins": 7.962897777557373, + "rewards/rejected": -14.506488800048828, + "step": 18504 + }, + { + "epoch": 2.88, + "learning_rate": 5.750173764199387e-07, + "logits/chosen": -2.110032081604004, + "logits/rejected": -2.6669297218322754, + "logps/chosen": -641.7127685546875, + "logps/rejected": -804.1248779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.059144020080566, + "rewards/margins": 16.444931030273438, + "rewards/rejected": -24.504074096679688, + "step": 18505 + }, + { + "epoch": 2.88, + "learning_rate": 5.742839358887909e-07, + "logits/chosen": -2.35504150390625, + "logits/rejected": -2.2596826553344727, + "logps/chosen": -314.52490234375, + "logps/rejected": -585.344970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.350734233856201, + "rewards/margins": 14.000903129577637, + "rewards/rejected": -21.351638793945312, + "step": 18506 + }, + { + "epoch": 2.88, + "learning_rate": 5.73550495357643e-07, + "logits/chosen": -2.695817708969116, + "logits/rejected": -2.815303325653076, + "logps/chosen": -701.5377197265625, + "logps/rejected": -1065.4718017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.280562400817871, + "rewards/margins": 10.970951080322266, + "rewards/rejected": -19.251514434814453, + "step": 18507 + }, + { + "epoch": 2.88, + "learning_rate": 5.728170548264952e-07, + "logits/chosen": -2.160083770751953, + "logits/rejected": -2.8269529342651367, + "logps/chosen": -267.36688232421875, + "logps/rejected": -534.9383544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.925774574279785, + "rewards/margins": 11.331732749938965, + "rewards/rejected": -22.25750732421875, + "step": 18508 + }, + { + "epoch": 2.88, + "learning_rate": 5.720836142953473e-07, + "logits/chosen": -2.29587984085083, + "logits/rejected": -2.315906524658203, + "logps/chosen": -217.3656768798828, + "logps/rejected": -400.5203857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.354753494262695, + "rewards/margins": 12.939531326293945, + "rewards/rejected": -22.29428482055664, + "step": 18509 + }, + { + "epoch": 2.88, + "learning_rate": 5.713501737641994e-07, + "logits/chosen": -3.0560553073883057, + "logits/rejected": -3.058506965637207, + "logps/chosen": -276.07025146484375, + "logps/rejected": -459.3252868652344, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.992672920227051, + "rewards/margins": 9.49720573425293, + "rewards/rejected": -16.489879608154297, + "step": 18510 + }, + { + "epoch": 2.88, + "learning_rate": 5.706167332330515e-07, + "logits/chosen": -2.497143507003784, + "logits/rejected": -1.5830731391906738, + "logps/chosen": -222.31643676757812, + "logps/rejected": -215.347900390625, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.464969635009766, + "rewards/margins": 3.745387077331543, + "rewards/rejected": -15.210356712341309, + "step": 18511 + }, + { + "epoch": 2.88, + "learning_rate": 5.698832927019036e-07, + "logits/chosen": -2.3881890773773193, + "logits/rejected": -2.807776689529419, + "logps/chosen": -813.7326049804688, + "logps/rejected": -692.607177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.970373153686523, + "rewards/margins": 12.441341400146484, + "rewards/rejected": -24.411712646484375, + "step": 18512 + }, + { + "epoch": 2.88, + "learning_rate": 5.691498521707558e-07, + "logits/chosen": -2.8401455879211426, + "logits/rejected": -2.9678359031677246, + "logps/chosen": -120.45577239990234, + "logps/rejected": -215.42601013183594, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.223783493041992, + "rewards/margins": 8.169290542602539, + "rewards/rejected": -16.39307403564453, + "step": 18513 + }, + { + "epoch": 2.88, + "learning_rate": 5.684164116396079e-07, + "logits/chosen": -2.2550480365753174, + "logits/rejected": -2.4931182861328125, + "logps/chosen": -398.3150634765625, + "logps/rejected": -575.0256958007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.461926460266113, + "rewards/margins": 14.421243667602539, + "rewards/rejected": -26.88317108154297, + "step": 18514 + }, + { + "epoch": 2.88, + "learning_rate": 5.6768297110846e-07, + "logits/chosen": -2.5347800254821777, + "logits/rejected": -2.504852056503296, + "logps/chosen": -196.24176025390625, + "logps/rejected": -440.50457763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.374141693115234, + "rewards/margins": 14.841296195983887, + "rewards/rejected": -24.215438842773438, + "step": 18515 + }, + { + "epoch": 2.88, + "learning_rate": 5.669495305773121e-07, + "logits/chosen": -2.4300119876861572, + "logits/rejected": -2.4088313579559326, + "logps/chosen": -424.9854736328125, + "logps/rejected": -458.91107177734375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.13691520690918, + "rewards/margins": 6.708778381347656, + "rewards/rejected": -17.845693588256836, + "step": 18516 + }, + { + "epoch": 2.88, + "learning_rate": 5.662160900461643e-07, + "logits/chosen": -2.7630763053894043, + "logits/rejected": -2.690688371658325, + "logps/chosen": -220.89718627929688, + "logps/rejected": -376.7962341308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.063669204711914, + "rewards/margins": 11.892179489135742, + "rewards/rejected": -20.955848693847656, + "step": 18517 + }, + { + "epoch": 2.88, + "learning_rate": 5.654826495150163e-07, + "logits/chosen": -2.8770337104797363, + "logits/rejected": -2.359975814819336, + "logps/chosen": -205.96682739257812, + "logps/rejected": -550.752197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.782692909240723, + "rewards/margins": 18.782459259033203, + "rewards/rejected": -24.56515121459961, + "step": 18518 + }, + { + "epoch": 2.88, + "learning_rate": 5.647492089838684e-07, + "logits/chosen": -2.394029378890991, + "logits/rejected": -2.7906439304351807, + "logps/chosen": -248.06686401367188, + "logps/rejected": -596.060791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.581311225891113, + "rewards/margins": 14.344552993774414, + "rewards/rejected": -23.925865173339844, + "step": 18519 + }, + { + "epoch": 2.88, + "learning_rate": 5.640157684527205e-07, + "logits/chosen": -2.91904616355896, + "logits/rejected": -2.76589035987854, + "logps/chosen": -267.2772216796875, + "logps/rejected": -322.3607482910156, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995759963989258, + "rewards/margins": 6.701624870300293, + "rewards/rejected": -15.69738483428955, + "step": 18520 + }, + { + "epoch": 2.88, + "learning_rate": 5.632823279215727e-07, + "logits/chosen": -1.1726057529449463, + "logits/rejected": -2.5142822265625, + "logps/chosen": -269.7005615234375, + "logps/rejected": -625.11279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.709539413452148, + "rewards/margins": 14.849035263061523, + "rewards/rejected": -25.558574676513672, + "step": 18521 + }, + { + "epoch": 2.88, + "learning_rate": 5.625488873904248e-07, + "logits/chosen": -1.687997579574585, + "logits/rejected": -2.7210161685943604, + "logps/chosen": -205.15835571289062, + "logps/rejected": -608.676513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.713807106018066, + "rewards/margins": 18.169279098510742, + "rewards/rejected": -29.883087158203125, + "step": 18522 + }, + { + "epoch": 2.88, + "learning_rate": 5.61815446859277e-07, + "logits/chosen": -1.5668611526489258, + "logits/rejected": -2.5105698108673096, + "logps/chosen": -124.9817886352539, + "logps/rejected": -377.15802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.297776222229004, + "rewards/margins": 12.478858947753906, + "rewards/rejected": -19.776634216308594, + "step": 18523 + }, + { + "epoch": 2.88, + "learning_rate": 5.61082006328129e-07, + "logits/chosen": -2.513723611831665, + "logits/rejected": -2.684837579727173, + "logps/chosen": -297.2926330566406, + "logps/rejected": -192.20362854003906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.693475723266602, + "rewards/margins": 7.379932403564453, + "rewards/rejected": -15.073408126831055, + "step": 18524 + }, + { + "epoch": 2.88, + "learning_rate": 5.603485657969812e-07, + "logits/chosen": -2.2562363147735596, + "logits/rejected": -2.216846227645874, + "logps/chosen": -215.80804443359375, + "logps/rejected": -450.4964599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.231182098388672, + "rewards/margins": 11.85765552520752, + "rewards/rejected": -21.088836669921875, + "step": 18525 + }, + { + "epoch": 2.88, + "learning_rate": 5.596151252658333e-07, + "logits/chosen": -2.2068893909454346, + "logits/rejected": -2.7584359645843506, + "logps/chosen": -280.3302307128906, + "logps/rejected": -715.3233032226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.72775650024414, + "rewards/margins": 21.162456512451172, + "rewards/rejected": -29.890213012695312, + "step": 18526 + }, + { + "epoch": 2.88, + "learning_rate": 5.588816847346855e-07, + "logits/chosen": -2.945802927017212, + "logits/rejected": -1.5152560472488403, + "logps/chosen": -483.5184020996094, + "logps/rejected": -261.421630859375, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.017230033874512, + "rewards/margins": 6.055602073669434, + "rewards/rejected": -17.072832107543945, + "step": 18527 + }, + { + "epoch": 2.88, + "learning_rate": 5.581482442035376e-07, + "logits/chosen": -1.7665700912475586, + "logits/rejected": -2.7888903617858887, + "logps/chosen": -735.52001953125, + "logps/rejected": -760.9576416015625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.441426277160645, + "rewards/margins": 9.170783996582031, + "rewards/rejected": -23.612211227416992, + "step": 18528 + }, + { + "epoch": 2.88, + "learning_rate": 5.574148036723896e-07, + "logits/chosen": -0.9794467687606812, + "logits/rejected": -2.764983654022217, + "logps/chosen": -112.65675354003906, + "logps/rejected": -460.002197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.940168380737305, + "rewards/margins": 11.38956069946289, + "rewards/rejected": -18.329729080200195, + "step": 18529 + }, + { + "epoch": 2.88, + "learning_rate": 5.566813631412417e-07, + "logits/chosen": -2.5878727436065674, + "logits/rejected": -1.7743088006973267, + "logps/chosen": -272.54052734375, + "logps/rejected": -423.4779052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.148481369018555, + "rewards/margins": 10.258766174316406, + "rewards/rejected": -21.40724754333496, + "step": 18530 + }, + { + "epoch": 2.88, + "learning_rate": 5.559479226100939e-07, + "logits/chosen": -3.0244321823120117, + "logits/rejected": -3.050097703933716, + "logps/chosen": -163.0944366455078, + "logps/rejected": -243.1233673095703, + "loss": 0.6245, + "rewards/accuracies": 0.5, + "rewards/chosen": -14.275108337402344, + "rewards/margins": 3.3555874824523926, + "rewards/rejected": -17.630695343017578, + "step": 18531 + }, + { + "epoch": 2.88, + "learning_rate": 5.55214482078946e-07, + "logits/chosen": -1.7432355880737305, + "logits/rejected": -2.618952512741089, + "logps/chosen": -325.84326171875, + "logps/rejected": -539.458740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.433860778808594, + "rewards/margins": 15.742618560791016, + "rewards/rejected": -24.17647933959961, + "step": 18532 + }, + { + "epoch": 2.88, + "learning_rate": 5.544810415477981e-07, + "logits/chosen": -2.72397780418396, + "logits/rejected": -1.9840229749679565, + "logps/chosen": -839.6957397460938, + "logps/rejected": -541.3929443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.841840744018555, + "rewards/margins": 10.109878540039062, + "rewards/rejected": -18.95172119140625, + "step": 18533 + }, + { + "epoch": 2.88, + "learning_rate": 5.537476010166502e-07, + "logits/chosen": -1.5514260530471802, + "logits/rejected": -2.728546142578125, + "logps/chosen": -234.30726623535156, + "logps/rejected": -574.875732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.532803535461426, + "rewards/margins": 14.385987281799316, + "rewards/rejected": -23.918790817260742, + "step": 18534 + }, + { + "epoch": 2.88, + "learning_rate": 5.530141604855023e-07, + "logits/chosen": -2.67954421043396, + "logits/rejected": -2.7745790481567383, + "logps/chosen": -199.78695678710938, + "logps/rejected": -257.59942626953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.80501937866211, + "rewards/margins": 9.072189331054688, + "rewards/rejected": -17.877208709716797, + "step": 18535 + }, + { + "epoch": 2.88, + "learning_rate": 5.522807199543545e-07, + "logits/chosen": -2.8113105297088623, + "logits/rejected": -1.9669615030288696, + "logps/chosen": -280.78021240234375, + "logps/rejected": -387.37060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.527253150939941, + "rewards/margins": 12.906658172607422, + "rewards/rejected": -19.433910369873047, + "step": 18536 + }, + { + "epoch": 2.88, + "learning_rate": 5.515472794232066e-07, + "logits/chosen": -2.5289535522460938, + "logits/rejected": -3.097296953201294, + "logps/chosen": -93.96055603027344, + "logps/rejected": -252.7930450439453, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.21117115020752, + "rewards/margins": 6.337113857269287, + "rewards/rejected": -14.548284530639648, + "step": 18537 + }, + { + "epoch": 2.88, + "learning_rate": 5.508138388920588e-07, + "logits/chosen": -2.177253246307373, + "logits/rejected": -2.6590960025787354, + "logps/chosen": -355.805908203125, + "logps/rejected": -562.5313720703125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.125395774841309, + "rewards/margins": 10.35229778289795, + "rewards/rejected": -21.477693557739258, + "step": 18538 + }, + { + "epoch": 2.88, + "learning_rate": 5.500803983609108e-07, + "logits/chosen": -2.183817148208618, + "logits/rejected": -2.602241277694702, + "logps/chosen": -241.48207092285156, + "logps/rejected": -442.7784423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9440388679504395, + "rewards/margins": 12.901321411132812, + "rewards/rejected": -19.845359802246094, + "step": 18539 + }, + { + "epoch": 2.88, + "learning_rate": 5.49346957829763e-07, + "logits/chosen": -1.839015245437622, + "logits/rejected": -2.7557175159454346, + "logps/chosen": -183.70985412597656, + "logps/rejected": -559.1961059570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.754354476928711, + "rewards/margins": 17.180458068847656, + "rewards/rejected": -24.934810638427734, + "step": 18540 + }, + { + "epoch": 2.88, + "learning_rate": 5.48613517298615e-07, + "logits/chosen": -2.7346179485321045, + "logits/rejected": -1.7348195314407349, + "logps/chosen": -257.6879577636719, + "logps/rejected": -180.84072875976562, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.39006519317627, + "rewards/margins": 4.372064590454102, + "rewards/rejected": -15.762129783630371, + "step": 18541 + }, + { + "epoch": 2.88, + "learning_rate": 5.478800767674672e-07, + "logits/chosen": -2.964639186859131, + "logits/rejected": -2.491806745529175, + "logps/chosen": -280.1484375, + "logps/rejected": -305.76251220703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.939623832702637, + "rewards/margins": 8.40832805633545, + "rewards/rejected": -15.347951889038086, + "step": 18542 + }, + { + "epoch": 2.88, + "learning_rate": 5.471466362363193e-07, + "logits/chosen": -2.560256242752075, + "logits/rejected": -2.8170371055603027, + "logps/chosen": -141.950927734375, + "logps/rejected": -377.0727233886719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.88024377822876, + "rewards/margins": 13.78762435913086, + "rewards/rejected": -20.66786766052246, + "step": 18543 + }, + { + "epoch": 2.88, + "learning_rate": 5.464131957051714e-07, + "logits/chosen": -1.5572322607040405, + "logits/rejected": -2.580904006958008, + "logps/chosen": -121.32844543457031, + "logps/rejected": -467.0072021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.724702835083008, + "rewards/margins": 14.7791748046875, + "rewards/rejected": -24.503875732421875, + "step": 18544 + }, + { + "epoch": 2.88, + "learning_rate": 5.456797551740235e-07, + "logits/chosen": -2.44986891746521, + "logits/rejected": -2.203303337097168, + "logps/chosen": -289.7626953125, + "logps/rejected": -330.68328857421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.984274387359619, + "rewards/margins": 12.412544250488281, + "rewards/rejected": -20.396818161010742, + "step": 18545 + }, + { + "epoch": 2.88, + "learning_rate": 5.449463146428757e-07, + "logits/chosen": -2.4995906352996826, + "logits/rejected": -2.9505019187927246, + "logps/chosen": -263.9017333984375, + "logps/rejected": -231.92845153808594, + "loss": 0.9239, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.552900314331055, + "rewards/margins": 2.356532096862793, + "rewards/rejected": -15.909433364868164, + "step": 18546 + }, + { + "epoch": 2.88, + "learning_rate": 5.442128741117278e-07, + "logits/chosen": -3.0203802585601807, + "logits/rejected": -2.3646843433380127, + "logps/chosen": -665.4503173828125, + "logps/rejected": -576.48681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.44139575958252, + "rewards/margins": 11.200753211975098, + "rewards/rejected": -19.642148971557617, + "step": 18547 + }, + { + "epoch": 2.88, + "learning_rate": 5.4347943358058e-07, + "logits/chosen": -2.9316225051879883, + "logits/rejected": -3.0893077850341797, + "logps/chosen": -344.87811279296875, + "logps/rejected": -273.83966064453125, + "loss": 0.7947, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.903738975524902, + "rewards/margins": 3.0849759578704834, + "rewards/rejected": -11.988714218139648, + "step": 18548 + }, + { + "epoch": 2.88, + "learning_rate": 5.42745993049432e-07, + "logits/chosen": -2.2517404556274414, + "logits/rejected": -2.804555892944336, + "logps/chosen": -366.4425354003906, + "logps/rejected": -648.5944213867188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.64720344543457, + "rewards/margins": 13.083230972290039, + "rewards/rejected": -22.73043441772461, + "step": 18549 + }, + { + "epoch": 2.88, + "learning_rate": 5.420125525182842e-07, + "logits/chosen": -2.5574865341186523, + "logits/rejected": -2.4370005130767822, + "logps/chosen": -251.96432495117188, + "logps/rejected": -376.39947509765625, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.85502815246582, + "rewards/margins": 4.988687992095947, + "rewards/rejected": -19.84371566772461, + "step": 18550 + }, + { + "epoch": 2.89, + "learning_rate": 5.412791119871363e-07, + "logits/chosen": -0.6611503958702087, + "logits/rejected": -2.8186018466949463, + "logps/chosen": -266.7769470214844, + "logps/rejected": -800.137451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.899085998535156, + "rewards/margins": 13.079024314880371, + "rewards/rejected": -24.978111267089844, + "step": 18551 + }, + { + "epoch": 2.89, + "learning_rate": 5.405456714559885e-07, + "logits/chosen": -1.6926913261413574, + "logits/rejected": -2.7255618572235107, + "logps/chosen": -205.11138916015625, + "logps/rejected": -519.807373046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.161750316619873, + "rewards/margins": 8.991983413696289, + "rewards/rejected": -16.15373420715332, + "step": 18552 + }, + { + "epoch": 2.89, + "learning_rate": 5.398122309248406e-07, + "logits/chosen": -2.677980422973633, + "logits/rejected": -2.332697629928589, + "logps/chosen": -774.0174560546875, + "logps/rejected": -774.9007568359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.031728744506836, + "rewards/margins": 9.079774856567383, + "rewards/rejected": -21.11150360107422, + "step": 18553 + }, + { + "epoch": 2.89, + "learning_rate": 5.390787903936926e-07, + "logits/chosen": -2.6019668579101562, + "logits/rejected": -2.920907735824585, + "logps/chosen": -156.99575805664062, + "logps/rejected": -438.0947570800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.451369285583496, + "rewards/margins": 12.432513236999512, + "rewards/rejected": -19.883882522583008, + "step": 18554 + }, + { + "epoch": 2.89, + "learning_rate": 5.383453498625447e-07, + "logits/chosen": -2.555739641189575, + "logits/rejected": -2.1025235652923584, + "logps/chosen": -225.15231323242188, + "logps/rejected": -342.51666259765625, + "loss": 0.3536, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.944665908813477, + "rewards/margins": 4.400246620178223, + "rewards/rejected": -13.344913482666016, + "step": 18555 + }, + { + "epoch": 2.89, + "learning_rate": 5.376119093313968e-07, + "logits/chosen": -2.6447107791900635, + "logits/rejected": -2.3003556728363037, + "logps/chosen": -360.370361328125, + "logps/rejected": -414.0887451171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.39181137084961, + "rewards/margins": 10.533098220825195, + "rewards/rejected": -19.924909591674805, + "step": 18556 + }, + { + "epoch": 2.89, + "learning_rate": 5.36878468800249e-07, + "logits/chosen": -2.5115768909454346, + "logits/rejected": -2.2267298698425293, + "logps/chosen": -417.94384765625, + "logps/rejected": -423.2289733886719, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.911046981811523, + "rewards/margins": 6.710385799407959, + "rewards/rejected": -20.62143325805664, + "step": 18557 + }, + { + "epoch": 2.89, + "learning_rate": 5.361450282691011e-07, + "logits/chosen": -1.3808704614639282, + "logits/rejected": -2.330562114715576, + "logps/chosen": -177.40933227539062, + "logps/rejected": -478.5784912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.289005279541016, + "rewards/margins": 10.272266387939453, + "rewards/rejected": -19.56127166748047, + "step": 18558 + }, + { + "epoch": 2.89, + "learning_rate": 5.354115877379532e-07, + "logits/chosen": -1.7751481533050537, + "logits/rejected": -2.4492597579956055, + "logps/chosen": -194.04190063476562, + "logps/rejected": -378.82415771484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.863615036010742, + "rewards/margins": 10.533514022827148, + "rewards/rejected": -22.39712905883789, + "step": 18559 + }, + { + "epoch": 2.89, + "learning_rate": 5.346781472068053e-07, + "logits/chosen": -2.933589220046997, + "logits/rejected": -2.8026483058929443, + "logps/chosen": -149.533203125, + "logps/rejected": -378.8690490722656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.537961959838867, + "rewards/margins": 8.726422309875488, + "rewards/rejected": -20.26438331604004, + "step": 18560 + }, + { + "epoch": 2.89, + "learning_rate": 5.339447066756575e-07, + "logits/chosen": -2.2254419326782227, + "logits/rejected": -2.746326208114624, + "logps/chosen": -346.1673583984375, + "logps/rejected": -447.03802490234375, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.399860382080078, + "rewards/margins": 7.755902290344238, + "rewards/rejected": -18.15576171875, + "step": 18561 + }, + { + "epoch": 2.89, + "learning_rate": 5.332112661445096e-07, + "logits/chosen": -1.2682164907455444, + "logits/rejected": -2.282825231552124, + "logps/chosen": -255.12648010253906, + "logps/rejected": -687.4540405273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.212517738342285, + "rewards/margins": 14.260804176330566, + "rewards/rejected": -28.47332191467285, + "step": 18562 + }, + { + "epoch": 2.89, + "learning_rate": 5.324778256133618e-07, + "logits/chosen": -2.6536507606506348, + "logits/rejected": -2.7686357498168945, + "logps/chosen": -255.15164184570312, + "logps/rejected": -362.72027587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.262540817260742, + "rewards/margins": 11.20406723022461, + "rewards/rejected": -19.46660804748535, + "step": 18563 + }, + { + "epoch": 2.89, + "learning_rate": 5.317443850822138e-07, + "logits/chosen": -2.436070203781128, + "logits/rejected": -2.682955026626587, + "logps/chosen": -391.1011657714844, + "logps/rejected": -500.37060546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.120187759399414, + "rewards/margins": 10.589990615844727, + "rewards/rejected": -16.71017837524414, + "step": 18564 + }, + { + "epoch": 2.89, + "learning_rate": 5.31010944551066e-07, + "logits/chosen": -2.6304287910461426, + "logits/rejected": -2.872709274291992, + "logps/chosen": -149.5063934326172, + "logps/rejected": -342.26458740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.785780906677246, + "rewards/margins": 10.53433609008789, + "rewards/rejected": -19.320117950439453, + "step": 18565 + }, + { + "epoch": 2.89, + "learning_rate": 5.30277504019918e-07, + "logits/chosen": -2.509439468383789, + "logits/rejected": -2.7029595375061035, + "logps/chosen": -324.64959716796875, + "logps/rejected": -438.4667663574219, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.92198371887207, + "rewards/margins": 7.469730854034424, + "rewards/rejected": -17.39171600341797, + "step": 18566 + }, + { + "epoch": 2.89, + "learning_rate": 5.295440634887702e-07, + "logits/chosen": -2.0705673694610596, + "logits/rejected": -2.8900279998779297, + "logps/chosen": -193.11514282226562, + "logps/rejected": -391.67626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.658169746398926, + "rewards/margins": 10.591979026794434, + "rewards/rejected": -22.25014877319336, + "step": 18567 + }, + { + "epoch": 2.89, + "learning_rate": 5.288106229576223e-07, + "logits/chosen": -1.562955617904663, + "logits/rejected": -2.4286468029022217, + "logps/chosen": -456.6748046875, + "logps/rejected": -692.3845825195312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.644020080566406, + "rewards/margins": 13.96565055847168, + "rewards/rejected": -23.60967254638672, + "step": 18568 + }, + { + "epoch": 2.89, + "learning_rate": 5.280771824264744e-07, + "logits/chosen": -2.9781150817871094, + "logits/rejected": -1.7917046546936035, + "logps/chosen": -269.8638610839844, + "logps/rejected": -224.99862670898438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.378211975097656, + "rewards/margins": 9.930060386657715, + "rewards/rejected": -17.308273315429688, + "step": 18569 + }, + { + "epoch": 2.89, + "learning_rate": 5.273437418953265e-07, + "logits/chosen": -2.633319616317749, + "logits/rejected": -2.9996354579925537, + "logps/chosen": -164.651611328125, + "logps/rejected": -400.6099853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.35106086730957, + "rewards/margins": 10.807148933410645, + "rewards/rejected": -17.15821075439453, + "step": 18570 + }, + { + "epoch": 2.89, + "learning_rate": 5.266103013641787e-07, + "logits/chosen": -2.385962963104248, + "logits/rejected": -2.798725128173828, + "logps/chosen": -83.65621948242188, + "logps/rejected": -336.5087890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3065385818481445, + "rewards/margins": 7.251443386077881, + "rewards/rejected": -14.557981491088867, + "step": 18571 + }, + { + "epoch": 2.89, + "learning_rate": 5.258768608330308e-07, + "logits/chosen": -2.233182191848755, + "logits/rejected": -2.8659768104553223, + "logps/chosen": -162.16815185546875, + "logps/rejected": -359.86273193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.810382843017578, + "rewards/margins": 10.912435531616211, + "rewards/rejected": -19.72281837463379, + "step": 18572 + }, + { + "epoch": 2.89, + "learning_rate": 5.251434203018829e-07, + "logits/chosen": -1.631539225578308, + "logits/rejected": -2.296272039413452, + "logps/chosen": -167.92701721191406, + "logps/rejected": -433.4193115234375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.235204696655273, + "rewards/margins": 12.803131103515625, + "rewards/rejected": -25.0383358001709, + "step": 18573 + }, + { + "epoch": 2.89, + "learning_rate": 5.24409979770735e-07, + "logits/chosen": -2.4986753463745117, + "logits/rejected": -2.3955702781677246, + "logps/chosen": -180.29249572753906, + "logps/rejected": -218.8408203125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.393836975097656, + "rewards/margins": 7.523077011108398, + "rewards/rejected": -16.916913986206055, + "step": 18574 + }, + { + "epoch": 2.89, + "learning_rate": 5.236765392395871e-07, + "logits/chosen": -2.142836332321167, + "logits/rejected": -2.7099380493164062, + "logps/chosen": -370.2848205566406, + "logps/rejected": -790.9246826171875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.463418006896973, + "rewards/margins": 8.23776626586914, + "rewards/rejected": -17.70118522644043, + "step": 18575 + }, + { + "epoch": 2.89, + "learning_rate": 5.229430987084393e-07, + "logits/chosen": -2.8461403846740723, + "logits/rejected": -2.9083759784698486, + "logps/chosen": -135.9443359375, + "logps/rejected": -378.84564208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.365609169006348, + "rewards/margins": 14.151155471801758, + "rewards/rejected": -22.516765594482422, + "step": 18576 + }, + { + "epoch": 2.89, + "learning_rate": 5.222096581772913e-07, + "logits/chosen": -2.3071296215057373, + "logits/rejected": -2.727771759033203, + "logps/chosen": -313.39959716796875, + "logps/rejected": -295.748046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.160501480102539, + "rewards/margins": 8.690901756286621, + "rewards/rejected": -18.851402282714844, + "step": 18577 + }, + { + "epoch": 2.89, + "learning_rate": 5.214762176461434e-07, + "logits/chosen": -1.479853868484497, + "logits/rejected": -2.7700657844543457, + "logps/chosen": -189.14039611816406, + "logps/rejected": -406.01171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.653409004211426, + "rewards/margins": 8.414102554321289, + "rewards/rejected": -18.06751251220703, + "step": 18578 + }, + { + "epoch": 2.89, + "learning_rate": 5.207427771149955e-07, + "logits/chosen": -2.6994543075561523, + "logits/rejected": -2.6980643272399902, + "logps/chosen": -788.2172241210938, + "logps/rejected": -602.595458984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.058685302734375, + "rewards/margins": 7.889421463012695, + "rewards/rejected": -17.94810676574707, + "step": 18579 + }, + { + "epoch": 2.89, + "learning_rate": 5.200093365838477e-07, + "logits/chosen": -1.5490944385528564, + "logits/rejected": -2.755930185317993, + "logps/chosen": -559.7354125976562, + "logps/rejected": -785.638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.092509269714355, + "rewards/margins": 13.565199851989746, + "rewards/rejected": -24.6577091217041, + "step": 18580 + }, + { + "epoch": 2.89, + "learning_rate": 5.192758960526998e-07, + "logits/chosen": -2.352569103240967, + "logits/rejected": -2.68774676322937, + "logps/chosen": -121.14363098144531, + "logps/rejected": -469.1776123046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5171308517456055, + "rewards/margins": 13.030029296875, + "rewards/rejected": -19.547161102294922, + "step": 18581 + }, + { + "epoch": 2.89, + "learning_rate": 5.18542455521552e-07, + "logits/chosen": -1.3057612180709839, + "logits/rejected": -2.44914174079895, + "logps/chosen": -119.05809020996094, + "logps/rejected": -423.415771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.051192283630371, + "rewards/margins": 12.173675537109375, + "rewards/rejected": -21.224868774414062, + "step": 18582 + }, + { + "epoch": 2.89, + "learning_rate": 5.17809014990404e-07, + "logits/chosen": -2.7429354190826416, + "logits/rejected": -2.759720802307129, + "logps/chosen": -232.79574584960938, + "logps/rejected": -330.81427001953125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.651963233947754, + "rewards/margins": 9.496843338012695, + "rewards/rejected": -18.148807525634766, + "step": 18583 + }, + { + "epoch": 2.89, + "learning_rate": 5.170755744592562e-07, + "logits/chosen": -2.4796907901763916, + "logits/rejected": -2.956176519393921, + "logps/chosen": -413.0936584472656, + "logps/rejected": -558.8343505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.530704021453857, + "rewards/margins": 11.824470520019531, + "rewards/rejected": -17.355175018310547, + "step": 18584 + }, + { + "epoch": 2.89, + "learning_rate": 5.163421339281083e-07, + "logits/chosen": -1.9736027717590332, + "logits/rejected": -2.4581186771392822, + "logps/chosen": -230.988525390625, + "logps/rejected": -489.52978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.893552780151367, + "rewards/margins": 14.208463668823242, + "rewards/rejected": -24.10201644897461, + "step": 18585 + }, + { + "epoch": 2.89, + "learning_rate": 5.156086933969605e-07, + "logits/chosen": -2.471116542816162, + "logits/rejected": -2.9310951232910156, + "logps/chosen": -154.64437866210938, + "logps/rejected": -334.37945556640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.632779121398926, + "rewards/margins": 8.272644996643066, + "rewards/rejected": -20.905424118041992, + "step": 18586 + }, + { + "epoch": 2.89, + "learning_rate": 5.148752528658126e-07, + "logits/chosen": -2.654968023300171, + "logits/rejected": -2.330504894256592, + "logps/chosen": -328.0594787597656, + "logps/rejected": -465.6082763671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.454200744628906, + "rewards/margins": 10.737896919250488, + "rewards/rejected": -21.192096710205078, + "step": 18587 + }, + { + "epoch": 2.89, + "learning_rate": 5.141418123346647e-07, + "logits/chosen": -2.0053820610046387, + "logits/rejected": -2.846729040145874, + "logps/chosen": -389.2528381347656, + "logps/rejected": -620.82373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.151493072509766, + "rewards/margins": 10.256952285766602, + "rewards/rejected": -22.408443450927734, + "step": 18588 + }, + { + "epoch": 2.89, + "learning_rate": 5.134083718035167e-07, + "logits/chosen": -2.5930233001708984, + "logits/rejected": -2.767881393432617, + "logps/chosen": -321.66229248046875, + "logps/rejected": -406.2222900390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5693511962890625, + "rewards/margins": 6.121103286743164, + "rewards/rejected": -13.690454483032227, + "step": 18589 + }, + { + "epoch": 2.89, + "learning_rate": 5.126749312723689e-07, + "logits/chosen": -2.882899284362793, + "logits/rejected": -1.7939260005950928, + "logps/chosen": -301.7413330078125, + "logps/rejected": -254.84780883789062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.974626541137695, + "rewards/margins": 9.081939697265625, + "rewards/rejected": -18.05656623840332, + "step": 18590 + }, + { + "epoch": 2.89, + "learning_rate": 5.11941490741221e-07, + "logits/chosen": -2.4600656032562256, + "logits/rejected": -0.8318450450897217, + "logps/chosen": -242.0360107421875, + "logps/rejected": -134.47454833984375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.889824867248535, + "rewards/margins": 7.556477069854736, + "rewards/rejected": -12.44630241394043, + "step": 18591 + }, + { + "epoch": 2.89, + "learning_rate": 5.112080502100732e-07, + "logits/chosen": -2.3662683963775635, + "logits/rejected": -2.847609281539917, + "logps/chosen": -206.4038543701172, + "logps/rejected": -414.84405517578125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.39803695678711, + "rewards/margins": 9.855012893676758, + "rewards/rejected": -22.253049850463867, + "step": 18592 + }, + { + "epoch": 2.89, + "learning_rate": 5.104746096789252e-07, + "logits/chosen": -2.400480270385742, + "logits/rejected": -2.9503138065338135, + "logps/chosen": -137.8097686767578, + "logps/rejected": -229.51239013671875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.805825233459473, + "rewards/margins": 7.1048150062561035, + "rewards/rejected": -13.910640716552734, + "step": 18593 + }, + { + "epoch": 2.89, + "learning_rate": 5.097411691477774e-07, + "logits/chosen": -0.7203732132911682, + "logits/rejected": -2.1855602264404297, + "logps/chosen": -227.4473876953125, + "logps/rejected": -649.0458984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.579814910888672, + "rewards/margins": 9.648099899291992, + "rewards/rejected": -21.227914810180664, + "step": 18594 + }, + { + "epoch": 2.89, + "learning_rate": 5.090077286166295e-07, + "logits/chosen": -2.887887477874756, + "logits/rejected": -2.5481209754943848, + "logps/chosen": -318.20916748046875, + "logps/rejected": -290.1092834472656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.876184463500977, + "rewards/margins": 8.737564086914062, + "rewards/rejected": -13.613747596740723, + "step": 18595 + }, + { + "epoch": 2.89, + "learning_rate": 5.082742880854816e-07, + "logits/chosen": -2.6969685554504395, + "logits/rejected": -2.709055185317993, + "logps/chosen": -100.60951232910156, + "logps/rejected": -219.51138305664062, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.965559959411621, + "rewards/margins": 7.959760665893555, + "rewards/rejected": -15.925320625305176, + "step": 18596 + }, + { + "epoch": 2.89, + "learning_rate": 5.075408475543338e-07, + "logits/chosen": -2.8429696559906006, + "logits/rejected": -2.815492630004883, + "logps/chosen": -140.2498321533203, + "logps/rejected": -360.2403259277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.64952278137207, + "rewards/margins": 12.809786796569824, + "rewards/rejected": -20.459308624267578, + "step": 18597 + }, + { + "epoch": 2.89, + "learning_rate": 5.068074070231858e-07, + "logits/chosen": -2.8445394039154053, + "logits/rejected": -2.535240411758423, + "logps/chosen": -183.63946533203125, + "logps/rejected": -340.06903076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.864557266235352, + "rewards/margins": 13.26942253112793, + "rewards/rejected": -22.13397979736328, + "step": 18598 + }, + { + "epoch": 2.89, + "learning_rate": 5.06073966492038e-07, + "logits/chosen": -2.213043212890625, + "logits/rejected": -2.501929759979248, + "logps/chosen": -184.95858764648438, + "logps/rejected": -555.3377685546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.69870376586914, + "rewards/margins": 12.805227279663086, + "rewards/rejected": -23.503931045532227, + "step": 18599 + }, + { + "epoch": 2.89, + "learning_rate": 5.053405259608901e-07, + "logits/chosen": -2.3565940856933594, + "logits/rejected": -2.2910544872283936, + "logps/chosen": -250.8960723876953, + "logps/rejected": -357.5284423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.180643081665039, + "rewards/margins": 9.427538871765137, + "rewards/rejected": -18.60818099975586, + "step": 18600 + }, + { + "epoch": 2.89, + "learning_rate": 5.046070854297422e-07, + "logits/chosen": -2.6555094718933105, + "logits/rejected": -2.8156192302703857, + "logps/chosen": -170.81431579589844, + "logps/rejected": -352.173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1442975997924805, + "rewards/margins": 16.623146057128906, + "rewards/rejected": -22.76744270324707, + "step": 18601 + }, + { + "epoch": 2.89, + "learning_rate": 5.038736448985943e-07, + "logits/chosen": -1.4565136432647705, + "logits/rejected": -2.7690138816833496, + "logps/chosen": -286.91986083984375, + "logps/rejected": -650.4004516601562, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.685066223144531, + "rewards/margins": 11.260196685791016, + "rewards/rejected": -19.945262908935547, + "step": 18602 + }, + { + "epoch": 2.89, + "learning_rate": 5.031402043674464e-07, + "logits/chosen": -1.110392451286316, + "logits/rejected": -2.6214866638183594, + "logps/chosen": -185.14784240722656, + "logps/rejected": -593.8692626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.789799690246582, + "rewards/margins": 13.414846420288086, + "rewards/rejected": -23.204647064208984, + "step": 18603 + }, + { + "epoch": 2.89, + "learning_rate": 5.024067638362985e-07, + "logits/chosen": -0.995242714881897, + "logits/rejected": -2.6043903827667236, + "logps/chosen": -188.87203979492188, + "logps/rejected": -514.750244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.663528442382812, + "rewards/margins": 9.463253021240234, + "rewards/rejected": -21.126781463623047, + "step": 18604 + }, + { + "epoch": 2.89, + "learning_rate": 5.016733233051507e-07, + "logits/chosen": -2.7105815410614014, + "logits/rejected": -1.8970943689346313, + "logps/chosen": -461.98406982421875, + "logps/rejected": -357.73321533203125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.46751880645752, + "rewards/margins": 7.134336948394775, + "rewards/rejected": -16.601856231689453, + "step": 18605 + }, + { + "epoch": 2.89, + "learning_rate": 5.009398827740028e-07, + "logits/chosen": -2.8065927028656006, + "logits/rejected": -1.8140745162963867, + "logps/chosen": -667.9445190429688, + "logps/rejected": -564.2591552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2356538772583, + "rewards/margins": 8.930788040161133, + "rewards/rejected": -18.16644287109375, + "step": 18606 + }, + { + "epoch": 2.89, + "learning_rate": 5.00206442242855e-07, + "logits/chosen": -2.820911169052124, + "logits/rejected": -2.1495778560638428, + "logps/chosen": -184.60433959960938, + "logps/rejected": -414.0011901855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.994670867919922, + "rewards/margins": 12.403282165527344, + "rewards/rejected": -22.397953033447266, + "step": 18607 + }, + { + "epoch": 2.89, + "learning_rate": 4.99473001711707e-07, + "logits/chosen": -2.796243667602539, + "logits/rejected": -1.589814305305481, + "logps/chosen": -365.3184814453125, + "logps/rejected": -425.35345458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1876068115234375, + "rewards/margins": 11.690311431884766, + "rewards/rejected": -18.877918243408203, + "step": 18608 + }, + { + "epoch": 2.89, + "learning_rate": 4.987395611805592e-07, + "logits/chosen": -2.436199903488159, + "logits/rejected": -2.3001110553741455, + "logps/chosen": -277.6471252441406, + "logps/rejected": -462.68145751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.6492919921875, + "rewards/margins": 13.144853591918945, + "rewards/rejected": -23.794145584106445, + "step": 18609 + }, + { + "epoch": 2.89, + "learning_rate": 4.980061206494113e-07, + "logits/chosen": -2.385100841522217, + "logits/rejected": -2.4206838607788086, + "logps/chosen": -678.5824584960938, + "logps/rejected": -770.0424194335938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.786312103271484, + "rewards/margins": 9.387807846069336, + "rewards/rejected": -21.174118041992188, + "step": 18610 + }, + { + "epoch": 2.89, + "learning_rate": 4.972726801182635e-07, + "logits/chosen": -2.768507480621338, + "logits/rejected": -2.4576282501220703, + "logps/chosen": -350.6346435546875, + "logps/rejected": -410.5665588378906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.694389343261719, + "rewards/margins": 8.714235305786133, + "rewards/rejected": -18.40862464904785, + "step": 18611 + }, + { + "epoch": 2.89, + "learning_rate": 4.965392395871156e-07, + "logits/chosen": -2.3479537963867188, + "logits/rejected": -1.6153641939163208, + "logps/chosen": -216.72418212890625, + "logps/rejected": -219.9976348876953, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.833090305328369, + "rewards/margins": 6.629354476928711, + "rewards/rejected": -12.462444305419922, + "step": 18612 + }, + { + "epoch": 2.89, + "learning_rate": 4.958057990559676e-07, + "logits/chosen": -2.0163087844848633, + "logits/rejected": -2.471132755279541, + "logps/chosen": -212.200927734375, + "logps/rejected": -353.42431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.833730697631836, + "rewards/margins": 9.682666778564453, + "rewards/rejected": -14.516397476196289, + "step": 18613 + }, + { + "epoch": 2.89, + "learning_rate": 4.950723585248197e-07, + "logits/chosen": -2.2413580417633057, + "logits/rejected": -2.7327351570129395, + "logps/chosen": -251.87045288085938, + "logps/rejected": -538.0051879882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.175530433654785, + "rewards/margins": 13.068628311157227, + "rewards/rejected": -21.244159698486328, + "step": 18614 + }, + { + "epoch": 2.9, + "learning_rate": 4.943389179936718e-07, + "logits/chosen": -2.3409500122070312, + "logits/rejected": -2.4716367721557617, + "logps/chosen": -299.0796203613281, + "logps/rejected": -412.06610107421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.033979415893555, + "rewards/margins": 8.62529182434082, + "rewards/rejected": -17.659271240234375, + "step": 18615 + }, + { + "epoch": 2.9, + "learning_rate": 4.93605477462524e-07, + "logits/chosen": -1.8018920421600342, + "logits/rejected": -2.3618345260620117, + "logps/chosen": -216.07037353515625, + "logps/rejected": -448.57281494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.645198822021484, + "rewards/margins": 11.896184921264648, + "rewards/rejected": -24.541383743286133, + "step": 18616 + }, + { + "epoch": 2.9, + "learning_rate": 4.928720369313761e-07, + "logits/chosen": -2.7055227756500244, + "logits/rejected": -2.7332651615142822, + "logps/chosen": -189.65081787109375, + "logps/rejected": -266.1312255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.05370044708252, + "rewards/margins": 9.6217041015625, + "rewards/rejected": -20.675403594970703, + "step": 18617 + }, + { + "epoch": 2.9, + "learning_rate": 4.921385964002282e-07, + "logits/chosen": -2.195594072341919, + "logits/rejected": -2.7450149059295654, + "logps/chosen": -409.4275817871094, + "logps/rejected": -668.4407348632812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.095190048217773, + "rewards/margins": 10.33646011352539, + "rewards/rejected": -22.431650161743164, + "step": 18618 + }, + { + "epoch": 2.9, + "learning_rate": 4.914051558690803e-07, + "logits/chosen": -2.073756217956543, + "logits/rejected": -2.64621639251709, + "logps/chosen": -270.4434814453125, + "logps/rejected": -534.82666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.708032608032227, + "rewards/margins": 12.532180786132812, + "rewards/rejected": -21.24021339416504, + "step": 18619 + }, + { + "epoch": 2.9, + "learning_rate": 4.906717153379325e-07, + "logits/chosen": -1.8147821426391602, + "logits/rejected": -2.6374638080596924, + "logps/chosen": -279.226806640625, + "logps/rejected": -396.7327880859375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.698720932006836, + "rewards/margins": 5.823556423187256, + "rewards/rejected": -17.52227783203125, + "step": 18620 + }, + { + "epoch": 2.9, + "learning_rate": 4.899382748067846e-07, + "logits/chosen": -2.142198085784912, + "logits/rejected": -2.4651033878326416, + "logps/chosen": -389.26141357421875, + "logps/rejected": -473.64117431640625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.762873649597168, + "rewards/margins": 18.152589797973633, + "rewards/rejected": -31.915464401245117, + "step": 18621 + }, + { + "epoch": 2.9, + "learning_rate": 4.892048342756368e-07, + "logits/chosen": -2.5905795097351074, + "logits/rejected": -2.81816029548645, + "logps/chosen": -121.45659637451172, + "logps/rejected": -261.2180480957031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.532366752624512, + "rewards/margins": 10.189882278442383, + "rewards/rejected": -19.722248077392578, + "step": 18622 + }, + { + "epoch": 2.9, + "learning_rate": 4.884713937444888e-07, + "logits/chosen": -1.8091710805892944, + "logits/rejected": -2.8625948429107666, + "logps/chosen": -179.10415649414062, + "logps/rejected": -472.94891357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.734634399414062, + "rewards/margins": 11.543571472167969, + "rewards/rejected": -21.27820587158203, + "step": 18623 + }, + { + "epoch": 2.9, + "learning_rate": 4.87737953213341e-07, + "logits/chosen": -2.203563928604126, + "logits/rejected": -2.828033685684204, + "logps/chosen": -332.2732849121094, + "logps/rejected": -290.32666015625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.524858474731445, + "rewards/margins": 9.043659210205078, + "rewards/rejected": -18.568517684936523, + "step": 18624 + }, + { + "epoch": 2.9, + "learning_rate": 4.87004512682193e-07, + "logits/chosen": -2.7048823833465576, + "logits/rejected": -2.131399631500244, + "logps/chosen": -313.151611328125, + "logps/rejected": -339.23919677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.486052513122559, + "rewards/margins": 15.718518257141113, + "rewards/rejected": -23.204570770263672, + "step": 18625 + }, + { + "epoch": 2.9, + "learning_rate": 4.862710721510452e-07, + "logits/chosen": -2.3247101306915283, + "logits/rejected": -2.375617027282715, + "logps/chosen": -160.64410400390625, + "logps/rejected": -321.3154296875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.119779586791992, + "rewards/margins": 8.645469665527344, + "rewards/rejected": -18.765249252319336, + "step": 18626 + }, + { + "epoch": 2.9, + "learning_rate": 4.855376316198973e-07, + "logits/chosen": -1.8494234085083008, + "logits/rejected": -2.785992383956909, + "logps/chosen": -269.88372802734375, + "logps/rejected": -505.56658935546875, + "loss": 0.0443, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.794099807739258, + "rewards/margins": 5.538172245025635, + "rewards/rejected": -17.332271575927734, + "step": 18627 + }, + { + "epoch": 2.9, + "learning_rate": 4.848041910887494e-07, + "logits/chosen": -2.845597743988037, + "logits/rejected": -2.9542782306671143, + "logps/chosen": -200.69544982910156, + "logps/rejected": -226.14422607421875, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.96933650970459, + "rewards/margins": 5.5997772216796875, + "rewards/rejected": -14.569113731384277, + "step": 18628 + }, + { + "epoch": 2.9, + "learning_rate": 4.840707505576015e-07, + "logits/chosen": -1.6024922132492065, + "logits/rejected": -2.9773941040039062, + "logps/chosen": -165.52078247070312, + "logps/rejected": -546.7908935546875, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.877439498901367, + "rewards/margins": 12.077009201049805, + "rewards/rejected": -23.954448699951172, + "step": 18629 + }, + { + "epoch": 2.9, + "learning_rate": 4.833373100264537e-07, + "logits/chosen": -2.677126884460449, + "logits/rejected": -3.0030875205993652, + "logps/chosen": -90.2524185180664, + "logps/rejected": -200.80300903320312, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.804293632507324, + "rewards/margins": 6.386795997619629, + "rewards/rejected": -14.191089630126953, + "step": 18630 + }, + { + "epoch": 2.9, + "learning_rate": 4.826038694953058e-07, + "logits/chosen": -2.2255611419677734, + "logits/rejected": -2.641651153564453, + "logps/chosen": -228.8741912841797, + "logps/rejected": -490.687744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.086726188659668, + "rewards/margins": 14.366132736206055, + "rewards/rejected": -21.452857971191406, + "step": 18631 + }, + { + "epoch": 2.9, + "learning_rate": 4.81870428964158e-07, + "logits/chosen": -1.3640468120574951, + "logits/rejected": -2.630789041519165, + "logps/chosen": -210.9191436767578, + "logps/rejected": -482.2892150878906, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.265631675720215, + "rewards/margins": 9.267782211303711, + "rewards/rejected": -21.53341293334961, + "step": 18632 + }, + { + "epoch": 2.9, + "learning_rate": 4.8113698843301e-07, + "logits/chosen": -2.587402105331421, + "logits/rejected": -2.7043910026550293, + "logps/chosen": -797.9989013671875, + "logps/rejected": -914.24755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.781268119812012, + "rewards/margins": 15.212509155273438, + "rewards/rejected": -24.993778228759766, + "step": 18633 + }, + { + "epoch": 2.9, + "learning_rate": 4.804035479018622e-07, + "logits/chosen": -2.757598638534546, + "logits/rejected": -2.685581684112549, + "logps/chosen": -192.14193725585938, + "logps/rejected": -442.328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.900002956390381, + "rewards/margins": 12.178863525390625, + "rewards/rejected": -19.078866958618164, + "step": 18634 + }, + { + "epoch": 2.9, + "learning_rate": 4.796701073707143e-07, + "logits/chosen": -2.606571674346924, + "logits/rejected": -2.621122360229492, + "logps/chosen": -211.64849853515625, + "logps/rejected": -273.6519775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.311328887939453, + "rewards/margins": 11.291635513305664, + "rewards/rejected": -19.602964401245117, + "step": 18635 + }, + { + "epoch": 2.9, + "learning_rate": 4.789366668395664e-07, + "logits/chosen": -2.5086452960968018, + "logits/rejected": -2.4293601512908936, + "logps/chosen": -378.38665771484375, + "logps/rejected": -617.4400634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.376516819000244, + "rewards/margins": 15.167560577392578, + "rewards/rejected": -21.544076919555664, + "step": 18636 + }, + { + "epoch": 2.9, + "learning_rate": 4.782032263084185e-07, + "logits/chosen": -1.6949213743209839, + "logits/rejected": -2.4947407245635986, + "logps/chosen": -167.44747924804688, + "logps/rejected": -359.80029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.160758972167969, + "rewards/margins": 11.907487869262695, + "rewards/rejected": -21.068246841430664, + "step": 18637 + }, + { + "epoch": 2.9, + "learning_rate": 4.774697857772705e-07, + "logits/chosen": -2.173522710800171, + "logits/rejected": -2.408344268798828, + "logps/chosen": -148.9201202392578, + "logps/rejected": -359.332763671875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.395832061767578, + "rewards/margins": 6.0670485496521, + "rewards/rejected": -15.462881088256836, + "step": 18638 + }, + { + "epoch": 2.9, + "learning_rate": 4.7673634524612273e-07, + "logits/chosen": -2.8783085346221924, + "logits/rejected": -2.27593731880188, + "logps/chosen": -305.4842834472656, + "logps/rejected": -364.7767028808594, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.550713539123535, + "rewards/margins": 11.294168472290039, + "rewards/rejected": -21.84488296508789, + "step": 18639 + }, + { + "epoch": 2.9, + "learning_rate": 4.7600290471497487e-07, + "logits/chosen": -1.0626858472824097, + "logits/rejected": -2.6094279289245605, + "logps/chosen": -189.13339233398438, + "logps/rejected": -435.74041748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.90160846710205, + "rewards/margins": 8.372095108032227, + "rewards/rejected": -21.273704528808594, + "step": 18640 + }, + { + "epoch": 2.9, + "learning_rate": 4.75269464183827e-07, + "logits/chosen": -2.5670053958892822, + "logits/rejected": -2.673175573348999, + "logps/chosen": -157.9252166748047, + "logps/rejected": -345.9648742675781, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.421595573425293, + "rewards/margins": 7.792787551879883, + "rewards/rejected": -18.21438217163086, + "step": 18641 + }, + { + "epoch": 2.9, + "learning_rate": 4.7453602365267905e-07, + "logits/chosen": -3.0712387561798096, + "logits/rejected": -2.693326950073242, + "logps/chosen": -172.70712280273438, + "logps/rejected": -315.4666442871094, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.632767677307129, + "rewards/margins": 11.007553100585938, + "rewards/rejected": -19.640321731567383, + "step": 18642 + }, + { + "epoch": 2.9, + "learning_rate": 4.738025831215312e-07, + "logits/chosen": -2.1625239849090576, + "logits/rejected": -2.42753267288208, + "logps/chosen": -244.46673583984375, + "logps/rejected": -341.22344970703125, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.451525688171387, + "rewards/margins": 5.708530426025391, + "rewards/rejected": -15.160056114196777, + "step": 18643 + }, + { + "epoch": 2.9, + "learning_rate": 4.7306914259038334e-07, + "logits/chosen": -2.16363525390625, + "logits/rejected": -2.9105958938598633, + "logps/chosen": -110.9811019897461, + "logps/rejected": -601.6776123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0143723487854, + "rewards/margins": 17.24353790283203, + "rewards/rejected": -23.257911682128906, + "step": 18644 + }, + { + "epoch": 2.9, + "learning_rate": 4.7233570205923543e-07, + "logits/chosen": -1.1051985025405884, + "logits/rejected": -2.486710786819458, + "logps/chosen": -197.68203735351562, + "logps/rejected": -429.4043273925781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.716909408569336, + "rewards/margins": 10.757329940795898, + "rewards/rejected": -19.474239349365234, + "step": 18645 + }, + { + "epoch": 2.9, + "learning_rate": 4.7160226152808757e-07, + "logits/chosen": -2.427717685699463, + "logits/rejected": -2.494595766067505, + "logps/chosen": -324.4637451171875, + "logps/rejected": -410.6681213378906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.061832427978516, + "rewards/margins": 8.055387496948242, + "rewards/rejected": -22.11722183227539, + "step": 18646 + }, + { + "epoch": 2.9, + "learning_rate": 4.708688209969396e-07, + "logits/chosen": -2.017777681350708, + "logits/rejected": -2.7748329639434814, + "logps/chosen": -219.23748779296875, + "logps/rejected": -650.2437133789062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.148778915405273, + "rewards/margins": 12.875529289245605, + "rewards/rejected": -27.024307250976562, + "step": 18647 + }, + { + "epoch": 2.9, + "learning_rate": 4.7013538046579175e-07, + "logits/chosen": -2.5623621940612793, + "logits/rejected": -2.715761661529541, + "logps/chosen": -556.3710327148438, + "logps/rejected": -753.6339721679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.890214920043945, + "rewards/margins": 13.536432266235352, + "rewards/rejected": -22.426647186279297, + "step": 18648 + }, + { + "epoch": 2.9, + "learning_rate": 4.694019399346439e-07, + "logits/chosen": -2.7728018760681152, + "logits/rejected": -2.7826755046844482, + "logps/chosen": -153.56480407714844, + "logps/rejected": -302.8641662597656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.052736282348633, + "rewards/margins": 9.674118041992188, + "rewards/rejected": -15.72685432434082, + "step": 18649 + }, + { + "epoch": 2.9, + "learning_rate": 4.6866849940349604e-07, + "logits/chosen": -1.9155926704406738, + "logits/rejected": -2.6743578910827637, + "logps/chosen": -411.27392578125, + "logps/rejected": -626.7555541992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.252237319946289, + "rewards/margins": 12.672616958618164, + "rewards/rejected": -22.924854278564453, + "step": 18650 + }, + { + "epoch": 2.9, + "learning_rate": 4.679350588723482e-07, + "logits/chosen": -1.63301682472229, + "logits/rejected": -2.4433653354644775, + "logps/chosen": -222.7838134765625, + "logps/rejected": -585.164306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.374998092651367, + "rewards/margins": 11.425859451293945, + "rewards/rejected": -18.800857543945312, + "step": 18651 + }, + { + "epoch": 2.9, + "learning_rate": 4.672016183412002e-07, + "logits/chosen": -1.5699924230575562, + "logits/rejected": -2.655700445175171, + "logps/chosen": -132.3330078125, + "logps/rejected": -338.9153747558594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.104419708251953, + "rewards/margins": 9.308408737182617, + "rewards/rejected": -19.41282844543457, + "step": 18652 + }, + { + "epoch": 2.9, + "learning_rate": 4.6646817781005236e-07, + "logits/chosen": -2.660562753677368, + "logits/rejected": -1.6433755159378052, + "logps/chosen": -290.5108642578125, + "logps/rejected": -248.40538024902344, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.975223541259766, + "rewards/margins": 5.882136344909668, + "rewards/rejected": -15.85736083984375, + "step": 18653 + }, + { + "epoch": 2.9, + "learning_rate": 4.657347372789045e-07, + "logits/chosen": -1.7210779190063477, + "logits/rejected": -2.4623308181762695, + "logps/chosen": -264.8055114746094, + "logps/rejected": -556.5165405273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.132195472717285, + "rewards/margins": 13.394729614257812, + "rewards/rejected": -23.52692413330078, + "step": 18654 + }, + { + "epoch": 2.9, + "learning_rate": 4.6500129674775665e-07, + "logits/chosen": -2.6522631645202637, + "logits/rejected": -2.9354465007781982, + "logps/chosen": -123.39006042480469, + "logps/rejected": -285.09259033203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.032881736755371, + "rewards/margins": 8.830862045288086, + "rewards/rejected": -17.86374282836914, + "step": 18655 + }, + { + "epoch": 2.9, + "learning_rate": 4.642678562166088e-07, + "logits/chosen": -2.843902587890625, + "logits/rejected": -2.960172653198242, + "logps/chosen": -257.6859436035156, + "logps/rejected": -410.705810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.925724983215332, + "rewards/margins": 13.122139930725098, + "rewards/rejected": -21.04786491394043, + "step": 18656 + }, + { + "epoch": 2.9, + "learning_rate": 4.6353441568546083e-07, + "logits/chosen": -2.8825759887695312, + "logits/rejected": -2.03328537940979, + "logps/chosen": -256.3555603027344, + "logps/rejected": -271.0933837890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.217118263244629, + "rewards/margins": 8.372306823730469, + "rewards/rejected": -16.58942413330078, + "step": 18657 + }, + { + "epoch": 2.9, + "learning_rate": 4.6280097515431297e-07, + "logits/chosen": -2.590036630630493, + "logits/rejected": -2.606785535812378, + "logps/chosen": -107.86534118652344, + "logps/rejected": -493.57733154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.373994827270508, + "rewards/margins": 15.939186096191406, + "rewards/rejected": -25.313180923461914, + "step": 18658 + }, + { + "epoch": 2.9, + "learning_rate": 4.620675346231651e-07, + "logits/chosen": -2.366870164871216, + "logits/rejected": -2.8870253562927246, + "logps/chosen": -155.9464111328125, + "logps/rejected": -598.2971801757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.938634872436523, + "rewards/margins": 16.00278091430664, + "rewards/rejected": -26.941417694091797, + "step": 18659 + }, + { + "epoch": 2.9, + "learning_rate": 4.6133409409201726e-07, + "logits/chosen": -1.577594518661499, + "logits/rejected": -2.6459312438964844, + "logps/chosen": -213.31582641601562, + "logps/rejected": -329.7294921875, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.95695972442627, + "rewards/margins": 9.165407180786133, + "rewards/rejected": -20.12236785888672, + "step": 18660 + }, + { + "epoch": 2.9, + "learning_rate": 4.606006535608694e-07, + "logits/chosen": -1.9610166549682617, + "logits/rejected": -2.569692850112915, + "logps/chosen": -230.07305908203125, + "logps/rejected": -396.2786865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.007571220397949, + "rewards/margins": 14.116744995117188, + "rewards/rejected": -21.124317169189453, + "step": 18661 + }, + { + "epoch": 2.9, + "learning_rate": 4.5986721302972144e-07, + "logits/chosen": -2.893493890762329, + "logits/rejected": -2.678083658218384, + "logps/chosen": -657.03125, + "logps/rejected": -503.52288818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.825037002563477, + "rewards/margins": 10.83619499206543, + "rewards/rejected": -17.661231994628906, + "step": 18662 + }, + { + "epoch": 2.9, + "learning_rate": 4.591337724985736e-07, + "logits/chosen": -1.9620375633239746, + "logits/rejected": -2.7162163257598877, + "logps/chosen": -395.0858154296875, + "logps/rejected": -737.4358520507812, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.982063293457031, + "rewards/margins": 7.138947010040283, + "rewards/rejected": -17.121009826660156, + "step": 18663 + }, + { + "epoch": 2.9, + "learning_rate": 4.584003319674257e-07, + "logits/chosen": -2.122401475906372, + "logits/rejected": -2.7449393272399902, + "logps/chosen": -507.2919616699219, + "logps/rejected": -684.9920654296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.254632949829102, + "rewards/margins": 11.249832153320312, + "rewards/rejected": -21.504467010498047, + "step": 18664 + }, + { + "epoch": 2.9, + "learning_rate": 4.576668914362778e-07, + "logits/chosen": -2.8754849433898926, + "logits/rejected": -2.6733767986297607, + "logps/chosen": -207.63519287109375, + "logps/rejected": -361.5909423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.10688591003418, + "rewards/margins": 11.193620681762695, + "rewards/rejected": -22.300506591796875, + "step": 18665 + }, + { + "epoch": 2.9, + "learning_rate": 4.5693345090512996e-07, + "logits/chosen": -1.6314996480941772, + "logits/rejected": -2.566185235977173, + "logps/chosen": -320.3724670410156, + "logps/rejected": -585.8369140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.547107696533203, + "rewards/margins": 13.934503555297852, + "rewards/rejected": -23.481613159179688, + "step": 18666 + }, + { + "epoch": 2.9, + "learning_rate": 4.56200010373982e-07, + "logits/chosen": -1.7631093263626099, + "logits/rejected": -2.507143020629883, + "logps/chosen": -187.71694946289062, + "logps/rejected": -458.1260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.658842086791992, + "rewards/margins": 12.32207202911377, + "rewards/rejected": -19.980915069580078, + "step": 18667 + }, + { + "epoch": 2.9, + "learning_rate": 4.5546656984283414e-07, + "logits/chosen": -2.6266677379608154, + "logits/rejected": -2.751626491546631, + "logps/chosen": -406.99810791015625, + "logps/rejected": -376.0878601074219, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.934646606445312, + "rewards/margins": 6.253638744354248, + "rewards/rejected": -16.18828582763672, + "step": 18668 + }, + { + "epoch": 2.9, + "learning_rate": 4.547331293116863e-07, + "logits/chosen": -2.047501564025879, + "logits/rejected": -2.6489357948303223, + "logps/chosen": -415.5284729003906, + "logps/rejected": -565.337890625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.316877365112305, + "rewards/margins": 14.311662673950195, + "rewards/rejected": -26.6285400390625, + "step": 18669 + }, + { + "epoch": 2.9, + "learning_rate": 4.539996887805384e-07, + "logits/chosen": -2.899604320526123, + "logits/rejected": -2.4609200954437256, + "logps/chosen": -425.63238525390625, + "logps/rejected": -357.8465881347656, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5983445644378662, + "rewards/margins": 10.484416007995605, + "rewards/rejected": -12.082759857177734, + "step": 18670 + }, + { + "epoch": 2.9, + "learning_rate": 4.5326624824939056e-07, + "logits/chosen": -2.8379571437835693, + "logits/rejected": -2.619514226913452, + "logps/chosen": -291.0430908203125, + "logps/rejected": -211.7900390625, + "loss": 0.9148, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.562365531921387, + "rewards/margins": 0.8704266548156738, + "rewards/rejected": -13.432792663574219, + "step": 18671 + }, + { + "epoch": 2.9, + "learning_rate": 4.525328077182426e-07, + "logits/chosen": -2.6064438819885254, + "logits/rejected": -1.9188549518585205, + "logps/chosen": -393.93804931640625, + "logps/rejected": -540.260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.073439598083496, + "rewards/margins": 13.813817024230957, + "rewards/rejected": -26.887256622314453, + "step": 18672 + }, + { + "epoch": 2.9, + "learning_rate": 4.5179936718709474e-07, + "logits/chosen": -2.8094310760498047, + "logits/rejected": -1.41110098361969, + "logps/chosen": -503.47113037109375, + "logps/rejected": -492.68408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956501007080078, + "rewards/margins": 16.953243255615234, + "rewards/rejected": -22.909746170043945, + "step": 18673 + }, + { + "epoch": 2.9, + "learning_rate": 4.510659266559469e-07, + "logits/chosen": -1.9974846839904785, + "logits/rejected": -2.2515244483947754, + "logps/chosen": -190.79103088378906, + "logps/rejected": -419.9468688964844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.874117851257324, + "rewards/margins": 12.115954399108887, + "rewards/rejected": -20.99007225036621, + "step": 18674 + }, + { + "epoch": 2.9, + "learning_rate": 4.5033248612479903e-07, + "logits/chosen": -3.0091562271118164, + "logits/rejected": -3.0040392875671387, + "logps/chosen": -232.34490966796875, + "logps/rejected": -402.0277099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.118728637695312, + "rewards/margins": 10.160634994506836, + "rewards/rejected": -18.27936363220215, + "step": 18675 + }, + { + "epoch": 2.9, + "learning_rate": 4.4959904559365117e-07, + "logits/chosen": -2.704599618911743, + "logits/rejected": -2.7507362365722656, + "logps/chosen": -383.8175964355469, + "logps/rejected": -394.5160217285156, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.111275672912598, + "rewards/margins": 8.121150970458984, + "rewards/rejected": -17.2324275970459, + "step": 18676 + }, + { + "epoch": 2.9, + "learning_rate": 4.488656050625032e-07, + "logits/chosen": -1.284828782081604, + "logits/rejected": -2.298272132873535, + "logps/chosen": -200.98892211914062, + "logps/rejected": -519.238525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.991666793823242, + "rewards/margins": 12.401787757873535, + "rewards/rejected": -22.393455505371094, + "step": 18677 + }, + { + "epoch": 2.9, + "learning_rate": 4.4813216453135535e-07, + "logits/chosen": -1.4288861751556396, + "logits/rejected": -2.331031560897827, + "logps/chosen": -296.8866271972656, + "logps/rejected": -424.6906433105469, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.163034439086914, + "rewards/margins": 8.507329940795898, + "rewards/rejected": -17.670364379882812, + "step": 18678 + }, + { + "epoch": 2.9, + "learning_rate": 4.473987240002075e-07, + "logits/chosen": -2.549485206604004, + "logits/rejected": -2.871199131011963, + "logps/chosen": -460.39007568359375, + "logps/rejected": -634.8031616210938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.330314636230469, + "rewards/margins": 13.645015716552734, + "rewards/rejected": -22.975330352783203, + "step": 18679 + }, + { + "epoch": 2.91, + "learning_rate": 4.4666528346905964e-07, + "logits/chosen": -1.7701932191848755, + "logits/rejected": -2.932835817337036, + "logps/chosen": -165.65420532226562, + "logps/rejected": -413.4446716308594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.205629825592041, + "rewards/margins": 7.8371429443359375, + "rewards/rejected": -15.04277229309082, + "step": 18680 + }, + { + "epoch": 2.91, + "learning_rate": 4.459318429379118e-07, + "logits/chosen": -2.658618211746216, + "logits/rejected": -1.865134596824646, + "logps/chosen": -176.3848114013672, + "logps/rejected": -235.77764892578125, + "loss": 0.1038, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.883749008178711, + "rewards/margins": 7.639944553375244, + "rewards/rejected": -19.523693084716797, + "step": 18681 + }, + { + "epoch": 2.91, + "learning_rate": 4.451984024067638e-07, + "logits/chosen": -2.127486228942871, + "logits/rejected": -2.3860936164855957, + "logps/chosen": -683.5716552734375, + "logps/rejected": -681.4547729492188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.816640853881836, + "rewards/margins": 12.53464126586914, + "rewards/rejected": -22.35128402709961, + "step": 18682 + }, + { + "epoch": 2.91, + "learning_rate": 4.4446496187561596e-07, + "logits/chosen": -2.8122057914733887, + "logits/rejected": -2.51401424407959, + "logps/chosen": -374.7095947265625, + "logps/rejected": -427.9417724609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.553592681884766, + "rewards/margins": 9.347262382507324, + "rewards/rejected": -19.900856018066406, + "step": 18683 + }, + { + "epoch": 2.91, + "learning_rate": 4.437315213444681e-07, + "logits/chosen": -1.2589668035507202, + "logits/rejected": -2.7919487953186035, + "logps/chosen": -267.30859375, + "logps/rejected": -387.26953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.599222183227539, + "rewards/margins": 9.016778945922852, + "rewards/rejected": -17.61600112915039, + "step": 18684 + }, + { + "epoch": 2.91, + "learning_rate": 4.4299808081332025e-07, + "logits/chosen": -3.0352604389190674, + "logits/rejected": -2.6679317951202393, + "logps/chosen": -344.67901611328125, + "logps/rejected": -366.2541809082031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771414279937744, + "rewards/margins": 10.525262832641602, + "rewards/rejected": -16.296676635742188, + "step": 18685 + }, + { + "epoch": 2.91, + "learning_rate": 4.4226464028217234e-07, + "logits/chosen": -1.7072283029556274, + "logits/rejected": -2.343158721923828, + "logps/chosen": -262.3472595214844, + "logps/rejected": -379.5211181640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.002887725830078, + "rewards/margins": 8.091438293457031, + "rewards/rejected": -17.09432601928711, + "step": 18686 + }, + { + "epoch": 2.91, + "learning_rate": 4.4153119975102443e-07, + "logits/chosen": -2.938370704650879, + "logits/rejected": -2.1089844703674316, + "logps/chosen": -752.6979370117188, + "logps/rejected": -509.3399353027344, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.236908912658691, + "rewards/margins": 9.582540512084961, + "rewards/rejected": -16.81945037841797, + "step": 18687 + }, + { + "epoch": 2.91, + "learning_rate": 4.407977592198765e-07, + "logits/chosen": -2.622060537338257, + "logits/rejected": -1.748356580734253, + "logps/chosen": -395.96600341796875, + "logps/rejected": -371.30657958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.091069221496582, + "rewards/margins": 9.846467971801758, + "rewards/rejected": -17.937536239624023, + "step": 18688 + }, + { + "epoch": 2.91, + "learning_rate": 4.4006431868872866e-07, + "logits/chosen": -2.8163115978240967, + "logits/rejected": -2.629971981048584, + "logps/chosen": -231.82997131347656, + "logps/rejected": -385.6340026855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.669302940368652, + "rewards/margins": 17.71860122680664, + "rewards/rejected": -24.387903213500977, + "step": 18689 + }, + { + "epoch": 2.91, + "learning_rate": 4.3933087815758075e-07, + "logits/chosen": -1.4080231189727783, + "logits/rejected": -2.4775750637054443, + "logps/chosen": -207.4167938232422, + "logps/rejected": -601.1450805664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.553540229797363, + "rewards/margins": 20.760284423828125, + "rewards/rejected": -29.313823699951172, + "step": 18690 + }, + { + "epoch": 2.91, + "learning_rate": 4.385974376264329e-07, + "logits/chosen": -2.183814764022827, + "logits/rejected": -2.505403518676758, + "logps/chosen": -119.04859924316406, + "logps/rejected": -313.6219482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.293013572692871, + "rewards/margins": 11.613325119018555, + "rewards/rejected": -18.90633773803711, + "step": 18691 + }, + { + "epoch": 2.91, + "learning_rate": 4.3786399709528504e-07, + "logits/chosen": -2.222182512283325, + "logits/rejected": -2.3842148780822754, + "logps/chosen": -246.30258178710938, + "logps/rejected": -226.5872802734375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.589763641357422, + "rewards/margins": 6.857298374176025, + "rewards/rejected": -16.447063446044922, + "step": 18692 + }, + { + "epoch": 2.91, + "learning_rate": 4.3713055656413713e-07, + "logits/chosen": -2.246725559234619, + "logits/rejected": -2.242220878601074, + "logps/chosen": -389.1916809082031, + "logps/rejected": -425.167236328125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.397226333618164, + "rewards/margins": 9.102293014526367, + "rewards/rejected": -21.49951934814453, + "step": 18693 + }, + { + "epoch": 2.91, + "learning_rate": 4.3639711603298927e-07, + "logits/chosen": -2.142439365386963, + "logits/rejected": -2.448051691055298, + "logps/chosen": -200.76739501953125, + "logps/rejected": -416.5140686035156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.935002326965332, + "rewards/margins": 9.149175643920898, + "rewards/rejected": -17.084178924560547, + "step": 18694 + }, + { + "epoch": 2.91, + "learning_rate": 4.3566367550184136e-07, + "logits/chosen": -2.401512861251831, + "logits/rejected": -2.778095245361328, + "logps/chosen": -223.41622924804688, + "logps/rejected": -235.8743438720703, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.070110321044922, + "rewards/margins": 8.386754989624023, + "rewards/rejected": -17.456865310668945, + "step": 18695 + }, + { + "epoch": 2.91, + "learning_rate": 4.349302349706935e-07, + "logits/chosen": -2.428337574005127, + "logits/rejected": -2.7795565128326416, + "logps/chosen": -422.32537841796875, + "logps/rejected": -371.8457946777344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.744781494140625, + "rewards/margins": 8.64224624633789, + "rewards/rejected": -16.387027740478516, + "step": 18696 + }, + { + "epoch": 2.91, + "learning_rate": 4.3419679443954565e-07, + "logits/chosen": -2.5781781673431396, + "logits/rejected": -2.4070305824279785, + "logps/chosen": -368.98480224609375, + "logps/rejected": -353.6748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.181593894958496, + "rewards/margins": 10.067734718322754, + "rewards/rejected": -19.24932861328125, + "step": 18697 + }, + { + "epoch": 2.91, + "learning_rate": 4.3346335390839774e-07, + "logits/chosen": -2.7432069778442383, + "logits/rejected": -2.8010921478271484, + "logps/chosen": -135.8475799560547, + "logps/rejected": -220.78067016601562, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.883576393127441, + "rewards/margins": 4.4501543045043945, + "rewards/rejected": -15.333730697631836, + "step": 18698 + }, + { + "epoch": 2.91, + "learning_rate": 4.327299133772499e-07, + "logits/chosen": -2.7224011421203613, + "logits/rejected": -2.3933918476104736, + "logps/chosen": -421.73931884765625, + "logps/rejected": -449.59326171875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0882415771484375, + "rewards/margins": 9.363212585449219, + "rewards/rejected": -15.451454162597656, + "step": 18699 + }, + { + "epoch": 2.91, + "learning_rate": 4.3199647284610197e-07, + "logits/chosen": -2.7783870697021484, + "logits/rejected": -2.160004138946533, + "logps/chosen": -612.6546020507812, + "logps/rejected": -588.171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.565229415893555, + "rewards/margins": 10.55508041381836, + "rewards/rejected": -24.120309829711914, + "step": 18700 + }, + { + "epoch": 2.91, + "learning_rate": 4.312630323149541e-07, + "logits/chosen": -2.7966434955596924, + "logits/rejected": -2.8793721199035645, + "logps/chosen": -238.20736694335938, + "logps/rejected": -353.60601806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.026435852050781, + "rewards/margins": 10.506111145019531, + "rewards/rejected": -20.532546997070312, + "step": 18701 + }, + { + "epoch": 2.91, + "learning_rate": 4.3052959178380626e-07, + "logits/chosen": -2.103327989578247, + "logits/rejected": -2.684659719467163, + "logps/chosen": -195.48663330078125, + "logps/rejected": -451.8924255371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.394656181335449, + "rewards/margins": 17.132604598999023, + "rewards/rejected": -24.527259826660156, + "step": 18702 + }, + { + "epoch": 2.91, + "learning_rate": 4.2979615125265835e-07, + "logits/chosen": -2.752110004425049, + "logits/rejected": -2.94452166557312, + "logps/chosen": -118.39297485351562, + "logps/rejected": -241.12905883789062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.83909797668457, + "rewards/margins": 8.762413024902344, + "rewards/rejected": -17.601511001586914, + "step": 18703 + }, + { + "epoch": 2.91, + "learning_rate": 4.290627107215105e-07, + "logits/chosen": -2.6975159645080566, + "logits/rejected": -2.307163953781128, + "logps/chosen": -504.68572998046875, + "logps/rejected": -571.2607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.27253532409668, + "rewards/margins": 15.518056869506836, + "rewards/rejected": -25.790592193603516, + "step": 18704 + }, + { + "epoch": 2.91, + "learning_rate": 4.283292701903626e-07, + "logits/chosen": -1.945030689239502, + "logits/rejected": -2.909395694732666, + "logps/chosen": -127.10417175292969, + "logps/rejected": -384.0476989746094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.065711975097656, + "rewards/margins": 8.484111785888672, + "rewards/rejected": -17.549823760986328, + "step": 18705 + }, + { + "epoch": 2.91, + "learning_rate": 4.275958296592147e-07, + "logits/chosen": -2.8946874141693115, + "logits/rejected": -2.9966282844543457, + "logps/chosen": -153.71466064453125, + "logps/rejected": -312.2548522949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.38659954071045, + "rewards/margins": 11.586155891418457, + "rewards/rejected": -21.972755432128906, + "step": 18706 + }, + { + "epoch": 2.91, + "learning_rate": 4.268623891280668e-07, + "logits/chosen": -1.7329800128936768, + "logits/rejected": -2.589890956878662, + "logps/chosen": -184.23434448242188, + "logps/rejected": -512.9104614257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.280921936035156, + "rewards/margins": 14.228195190429688, + "rewards/rejected": -24.509117126464844, + "step": 18707 + }, + { + "epoch": 2.91, + "learning_rate": 4.261289485969189e-07, + "logits/chosen": -2.0698516368865967, + "logits/rejected": -2.107081890106201, + "logps/chosen": -895.5172729492188, + "logps/rejected": -656.1290283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.323309421539307, + "rewards/margins": 13.449060440063477, + "rewards/rejected": -20.772369384765625, + "step": 18708 + }, + { + "epoch": 2.91, + "learning_rate": 4.2539550806577104e-07, + "logits/chosen": -2.58186411857605, + "logits/rejected": -2.416081428527832, + "logps/chosen": -232.08059692382812, + "logps/rejected": -375.19268798828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073077201843262, + "rewards/margins": 9.61837387084961, + "rewards/rejected": -15.691452026367188, + "step": 18709 + }, + { + "epoch": 2.91, + "learning_rate": 4.2466206753462313e-07, + "logits/chosen": -2.612553596496582, + "logits/rejected": -1.835943341255188, + "logps/chosen": -534.2208251953125, + "logps/rejected": -492.4993591308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.14629077911377, + "rewards/margins": 13.065698623657227, + "rewards/rejected": -24.211990356445312, + "step": 18710 + }, + { + "epoch": 2.91, + "learning_rate": 4.239286270034753e-07, + "logits/chosen": -2.21309757232666, + "logits/rejected": -1.9660148620605469, + "logps/chosen": -334.42498779296875, + "logps/rejected": -471.55712890625, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.792891502380371, + "rewards/margins": 11.299798965454102, + "rewards/rejected": -22.09269142150879, + "step": 18711 + }, + { + "epoch": 2.91, + "learning_rate": 4.231951864723274e-07, + "logits/chosen": -2.127746105194092, + "logits/rejected": -2.631836175918579, + "logps/chosen": -171.59217834472656, + "logps/rejected": -372.04913330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.698477745056152, + "rewards/margins": 14.104719161987305, + "rewards/rejected": -20.80319595336914, + "step": 18712 + }, + { + "epoch": 2.91, + "learning_rate": 4.224617459411795e-07, + "logits/chosen": -2.5462098121643066, + "logits/rejected": -2.8113436698913574, + "logps/chosen": -128.9027862548828, + "logps/rejected": -396.5830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.962637901306152, + "rewards/margins": 13.385747909545898, + "rewards/rejected": -22.348384857177734, + "step": 18713 + }, + { + "epoch": 2.91, + "learning_rate": 4.2172830541003165e-07, + "logits/chosen": -1.8509608507156372, + "logits/rejected": -2.2561542987823486, + "logps/chosen": -274.8061828613281, + "logps/rejected": -382.4134521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.851065635681152, + "rewards/margins": 9.521316528320312, + "rewards/rejected": -19.37238311767578, + "step": 18714 + }, + { + "epoch": 2.91, + "learning_rate": 4.2099486487888374e-07, + "logits/chosen": -2.7844998836517334, + "logits/rejected": -2.8144030570983887, + "logps/chosen": -201.53155517578125, + "logps/rejected": -360.81842041015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041916370391846, + "rewards/margins": 15.552152633666992, + "rewards/rejected": -21.59406852722168, + "step": 18715 + }, + { + "epoch": 2.91, + "learning_rate": 4.202614243477359e-07, + "logits/chosen": -2.4382810592651367, + "logits/rejected": -2.298577070236206, + "logps/chosen": -261.11346435546875, + "logps/rejected": -378.71209716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.317816734313965, + "rewards/margins": 9.162917137145996, + "rewards/rejected": -20.48073387145996, + "step": 18716 + }, + { + "epoch": 2.91, + "learning_rate": 4.19527983816588e-07, + "logits/chosen": -2.136929750442505, + "logits/rejected": -3.0002758502960205, + "logps/chosen": -224.96270751953125, + "logps/rejected": -354.2265930175781, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.620868682861328, + "rewards/margins": 8.381916046142578, + "rewards/rejected": -20.002784729003906, + "step": 18717 + }, + { + "epoch": 2.91, + "learning_rate": 4.187945432854401e-07, + "logits/chosen": -2.3090505599975586, + "logits/rejected": -2.410784959793091, + "logps/chosen": -299.85186767578125, + "logps/rejected": -394.8687744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.068313598632812, + "rewards/margins": 10.124703407287598, + "rewards/rejected": -21.193017959594727, + "step": 18718 + }, + { + "epoch": 2.91, + "learning_rate": 4.1806110275429226e-07, + "logits/chosen": -3.0173428058624268, + "logits/rejected": -2.534024715423584, + "logps/chosen": -455.8839111328125, + "logps/rejected": -409.10748291015625, + "loss": 0.4146, + "rewards/accuracies": 0.5, + "rewards/chosen": -13.50830078125, + "rewards/margins": 5.9510674476623535, + "rewards/rejected": -19.459369659423828, + "step": 18719 + }, + { + "epoch": 2.91, + "learning_rate": 4.1732766222314435e-07, + "logits/chosen": -2.4515140056610107, + "logits/rejected": -2.574045181274414, + "logps/chosen": -356.54510498046875, + "logps/rejected": -420.264892578125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.344911575317383, + "rewards/margins": 9.928340911865234, + "rewards/rejected": -15.2732515335083, + "step": 18720 + }, + { + "epoch": 2.91, + "learning_rate": 4.165942216919965e-07, + "logits/chosen": -2.7902779579162598, + "logits/rejected": -2.192725658416748, + "logps/chosen": -283.34521484375, + "logps/rejected": -251.21505737304688, + "loss": 0.4594, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.94149112701416, + "rewards/margins": 6.900017261505127, + "rewards/rejected": -15.841507911682129, + "step": 18721 + }, + { + "epoch": 2.91, + "learning_rate": 4.158607811608486e-07, + "logits/chosen": -2.3650994300842285, + "logits/rejected": -1.5338953733444214, + "logps/chosen": -339.9505920410156, + "logps/rejected": -500.19110107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.771129608154297, + "rewards/margins": 11.55567741394043, + "rewards/rejected": -18.326807022094727, + "step": 18722 + }, + { + "epoch": 2.91, + "learning_rate": 4.1512734062970073e-07, + "logits/chosen": -2.8772342205047607, + "logits/rejected": -1.8788779973983765, + "logps/chosen": -366.905517578125, + "logps/rejected": -415.1398620605469, + "loss": 0.7234, + "rewards/accuracies": 0.5, + "rewards/chosen": -15.195317268371582, + "rewards/margins": 5.272215366363525, + "rewards/rejected": -20.467533111572266, + "step": 18723 + }, + { + "epoch": 2.91, + "learning_rate": 4.1439390009855287e-07, + "logits/chosen": -2.697796106338501, + "logits/rejected": -1.6903691291809082, + "logps/chosen": -269.6927185058594, + "logps/rejected": -179.37985229492188, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.158024787902832, + "rewards/margins": 3.3841099739074707, + "rewards/rejected": -16.54213523864746, + "step": 18724 + }, + { + "epoch": 2.91, + "learning_rate": 4.1366045956740496e-07, + "logits/chosen": -2.4881751537323, + "logits/rejected": -2.035499095916748, + "logps/chosen": -400.07684326171875, + "logps/rejected": -367.37799072265625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.343815803527832, + "rewards/margins": 11.270078659057617, + "rewards/rejected": -21.613895416259766, + "step": 18725 + }, + { + "epoch": 2.91, + "learning_rate": 4.129270190362571e-07, + "logits/chosen": -2.249148368835449, + "logits/rejected": -2.9134562015533447, + "logps/chosen": -234.51998901367188, + "logps/rejected": -698.2379150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.162857055664062, + "rewards/margins": 17.03390121459961, + "rewards/rejected": -27.196758270263672, + "step": 18726 + }, + { + "epoch": 2.91, + "learning_rate": 4.121935785051092e-07, + "logits/chosen": -3.0338504314422607, + "logits/rejected": -2.649308204650879, + "logps/chosen": -250.33221435546875, + "logps/rejected": -308.80072021484375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.832696914672852, + "rewards/margins": 9.241917610168457, + "rewards/rejected": -18.074613571166992, + "step": 18727 + }, + { + "epoch": 2.91, + "learning_rate": 4.114601379739613e-07, + "logits/chosen": -1.4803513288497925, + "logits/rejected": -2.3940558433532715, + "logps/chosen": -176.14114379882812, + "logps/rejected": -275.00933837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.779961585998535, + "rewards/margins": 9.66620922088623, + "rewards/rejected": -18.446170806884766, + "step": 18728 + }, + { + "epoch": 2.91, + "learning_rate": 4.1072669744281343e-07, + "logits/chosen": -1.1980217695236206, + "logits/rejected": -2.6679110527038574, + "logps/chosen": -193.93215942382812, + "logps/rejected": -462.4249267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.645519256591797, + "rewards/margins": 12.022960662841797, + "rewards/rejected": -21.668479919433594, + "step": 18729 + }, + { + "epoch": 2.91, + "learning_rate": 4.099932569116655e-07, + "logits/chosen": -2.3955183029174805, + "logits/rejected": -2.8825254440307617, + "logps/chosen": -299.66748046875, + "logps/rejected": -379.94927978515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.165544509887695, + "rewards/margins": 8.881072998046875, + "rewards/rejected": -22.04661750793457, + "step": 18730 + }, + { + "epoch": 2.91, + "learning_rate": 4.0925981638051766e-07, + "logits/chosen": -2.751230239868164, + "logits/rejected": -2.8124935626983643, + "logps/chosen": -367.46783447265625, + "logps/rejected": -341.2813720703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.485953330993652, + "rewards/margins": 7.577239990234375, + "rewards/rejected": -17.063194274902344, + "step": 18731 + }, + { + "epoch": 2.91, + "learning_rate": 4.0852637584936975e-07, + "logits/chosen": -2.8595635890960693, + "logits/rejected": -2.838425636291504, + "logps/chosen": -203.95567321777344, + "logps/rejected": -425.84375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446131706237793, + "rewards/margins": 8.66760540008545, + "rewards/rejected": -17.113737106323242, + "step": 18732 + }, + { + "epoch": 2.91, + "learning_rate": 4.077929353182219e-07, + "logits/chosen": -2.2458715438842773, + "logits/rejected": -2.8939011096954346, + "logps/chosen": -116.59811401367188, + "logps/rejected": -321.57232666015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.194369316101074, + "rewards/margins": 10.737399101257324, + "rewards/rejected": -20.9317684173584, + "step": 18733 + }, + { + "epoch": 2.91, + "learning_rate": 4.0705949478707404e-07, + "logits/chosen": -2.4865047931671143, + "logits/rejected": -2.773442029953003, + "logps/chosen": -1033.5048828125, + "logps/rejected": -783.6668701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.623712539672852, + "rewards/margins": 11.37796401977539, + "rewards/rejected": -23.001676559448242, + "step": 18734 + }, + { + "epoch": 2.91, + "learning_rate": 4.0632605425592613e-07, + "logits/chosen": -2.3973052501678467, + "logits/rejected": -2.94368052482605, + "logps/chosen": -114.80797576904297, + "logps/rejected": -421.56341552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.883460998535156, + "rewards/margins": 14.470436096191406, + "rewards/rejected": -22.353897094726562, + "step": 18735 + }, + { + "epoch": 2.91, + "learning_rate": 4.0559261372477827e-07, + "logits/chosen": -2.515132188796997, + "logits/rejected": -1.6303337812423706, + "logps/chosen": -154.84959411621094, + "logps/rejected": -203.04293823242188, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.67574691772461, + "rewards/margins": 7.777128219604492, + "rewards/rejected": -17.4528751373291, + "step": 18736 + }, + { + "epoch": 2.91, + "learning_rate": 4.0485917319363036e-07, + "logits/chosen": -2.805817127227783, + "logits/rejected": -2.2546334266662598, + "logps/chosen": -772.2044677734375, + "logps/rejected": -619.6478881835938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.542645454406738, + "rewards/margins": 9.451192855834961, + "rewards/rejected": -16.993839263916016, + "step": 18737 + }, + { + "epoch": 2.91, + "learning_rate": 4.041257326624825e-07, + "logits/chosen": -2.9115183353424072, + "logits/rejected": -2.5287981033325195, + "logps/chosen": -308.7791748046875, + "logps/rejected": -434.37158203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.810876846313477, + "rewards/margins": 9.567012786865234, + "rewards/rejected": -19.37788963317871, + "step": 18738 + }, + { + "epoch": 2.91, + "learning_rate": 4.0339229213133465e-07, + "logits/chosen": -2.2476205825805664, + "logits/rejected": -2.5588836669921875, + "logps/chosen": -128.18084716796875, + "logps/rejected": -245.62086486816406, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.45981502532959, + "rewards/margins": 7.692228317260742, + "rewards/rejected": -15.152043342590332, + "step": 18739 + }, + { + "epoch": 2.91, + "learning_rate": 4.0265885160018674e-07, + "logits/chosen": -2.8565492630004883, + "logits/rejected": -2.1488373279571533, + "logps/chosen": -274.5002746582031, + "logps/rejected": -269.849365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.084712982177734, + "rewards/margins": 9.156488418579102, + "rewards/rejected": -22.241201400756836, + "step": 18740 + }, + { + "epoch": 2.91, + "learning_rate": 4.019254110690389e-07, + "logits/chosen": -2.5789082050323486, + "logits/rejected": -2.807250499725342, + "logps/chosen": -123.35513305664062, + "logps/rejected": -316.381591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.483311653137207, + "rewards/margins": 11.096473693847656, + "rewards/rejected": -20.579784393310547, + "step": 18741 + }, + { + "epoch": 2.91, + "learning_rate": 4.0119197053789097e-07, + "logits/chosen": -1.1939846277236938, + "logits/rejected": -2.52032732963562, + "logps/chosen": -257.8139343261719, + "logps/rejected": -587.7293701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.565147399902344, + "rewards/margins": 13.28206729888916, + "rewards/rejected": -27.847213745117188, + "step": 18742 + }, + { + "epoch": 2.91, + "learning_rate": 4.004585300067431e-07, + "logits/chosen": -2.7757537364959717, + "logits/rejected": -2.870117425918579, + "logps/chosen": -118.54518127441406, + "logps/rejected": -355.6681213378906, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.340267181396484, + "rewards/margins": 8.577679634094238, + "rewards/rejected": -14.917945861816406, + "step": 18743 + }, + { + "epoch": 2.92, + "learning_rate": 3.9972508947559525e-07, + "logits/chosen": -2.68567156791687, + "logits/rejected": -1.249969482421875, + "logps/chosen": -363.6833190917969, + "logps/rejected": -235.47784423828125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.559267044067383, + "rewards/margins": 8.932767868041992, + "rewards/rejected": -14.492034912109375, + "step": 18744 + }, + { + "epoch": 2.92, + "learning_rate": 3.9899164894444735e-07, + "logits/chosen": -2.106793165206909, + "logits/rejected": -2.311267614364624, + "logps/chosen": -176.22686767578125, + "logps/rejected": -345.13787841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.698005676269531, + "rewards/margins": 14.054976463317871, + "rewards/rejected": -23.75298309326172, + "step": 18745 + }, + { + "epoch": 2.92, + "learning_rate": 3.982582084132995e-07, + "logits/chosen": -2.766587257385254, + "logits/rejected": -2.8311314582824707, + "logps/chosen": -118.2375259399414, + "logps/rejected": -359.9375305175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.097670555114746, + "rewards/margins": 11.726733207702637, + "rewards/rejected": -18.824403762817383, + "step": 18746 + }, + { + "epoch": 2.92, + "learning_rate": 3.975247678821516e-07, + "logits/chosen": -2.3157055377960205, + "logits/rejected": -2.4907703399658203, + "logps/chosen": -145.21728515625, + "logps/rejected": -352.1018981933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.723488807678223, + "rewards/margins": 14.469350814819336, + "rewards/rejected": -22.192840576171875, + "step": 18747 + }, + { + "epoch": 2.92, + "learning_rate": 3.9679132735100367e-07, + "logits/chosen": -1.679276704788208, + "logits/rejected": -2.536658525466919, + "logps/chosen": -353.05975341796875, + "logps/rejected": -623.173583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.812150955200195, + "rewards/margins": 15.292800903320312, + "rewards/rejected": -25.104951858520508, + "step": 18748 + }, + { + "epoch": 2.92, + "learning_rate": 3.9605788681985576e-07, + "logits/chosen": -2.3773396015167236, + "logits/rejected": -2.787729501724243, + "logps/chosen": -403.0221862792969, + "logps/rejected": -571.5183715820312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668872356414795, + "rewards/margins": 15.806102752685547, + "rewards/rejected": -22.4749755859375, + "step": 18749 + }, + { + "epoch": 2.92, + "learning_rate": 3.953244462887079e-07, + "logits/chosen": -2.6587774753570557, + "logits/rejected": -2.90347957611084, + "logps/chosen": -146.9891815185547, + "logps/rejected": -379.283447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.78358268737793, + "rewards/margins": 12.523852348327637, + "rewards/rejected": -19.30743408203125, + "step": 18750 + }, + { + "epoch": 2.92, + "learning_rate": 3.9459100575756004e-07, + "logits/chosen": -2.760951519012451, + "logits/rejected": -2.6044015884399414, + "logps/chosen": -226.28463745117188, + "logps/rejected": -347.09246826171875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446109771728516, + "rewards/margins": 8.266576766967773, + "rewards/rejected": -16.71268653869629, + "step": 18751 + }, + { + "epoch": 2.92, + "learning_rate": 3.9385756522641213e-07, + "logits/chosen": -2.5793986320495605, + "logits/rejected": -2.287794828414917, + "logps/chosen": -287.39227294921875, + "logps/rejected": -435.3460388183594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.60499906539917, + "rewards/margins": 10.645198822021484, + "rewards/rejected": -18.250198364257812, + "step": 18752 + }, + { + "epoch": 2.92, + "learning_rate": 3.931241246952643e-07, + "logits/chosen": -2.327610492706299, + "logits/rejected": -2.654851198196411, + "logps/chosen": -157.7859344482422, + "logps/rejected": -337.36004638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.167047500610352, + "rewards/margins": 11.798378944396973, + "rewards/rejected": -19.96542739868164, + "step": 18753 + }, + { + "epoch": 2.92, + "learning_rate": 3.9239068416411637e-07, + "logits/chosen": -1.8943133354187012, + "logits/rejected": -2.6062238216400146, + "logps/chosen": -187.08575439453125, + "logps/rejected": -392.1182861328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.318906784057617, + "rewards/margins": 10.121063232421875, + "rewards/rejected": -19.439970016479492, + "step": 18754 + }, + { + "epoch": 2.92, + "learning_rate": 3.916572436329685e-07, + "logits/chosen": -2.6492745876312256, + "logits/rejected": -2.792207956314087, + "logps/chosen": -317.7749938964844, + "logps/rejected": -351.72308349609375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.160642623901367, + "rewards/margins": 6.428826808929443, + "rewards/rejected": -20.58946990966797, + "step": 18755 + }, + { + "epoch": 2.92, + "learning_rate": 3.9092380310182065e-07, + "logits/chosen": -2.2671051025390625, + "logits/rejected": -2.9436492919921875, + "logps/chosen": -184.1932830810547, + "logps/rejected": -383.5467529296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032054424285889, + "rewards/margins": 8.504931449890137, + "rewards/rejected": -14.536985397338867, + "step": 18756 + }, + { + "epoch": 2.92, + "learning_rate": 3.9019036257067274e-07, + "logits/chosen": -2.689204692840576, + "logits/rejected": -2.909933090209961, + "logps/chosen": -107.28097534179688, + "logps/rejected": -263.7558288574219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.045127868652344, + "rewards/margins": 11.714539527893066, + "rewards/rejected": -19.759666442871094, + "step": 18757 + }, + { + "epoch": 2.92, + "learning_rate": 3.894569220395249e-07, + "logits/chosen": -2.3346664905548096, + "logits/rejected": -2.8216514587402344, + "logps/chosen": -290.7859191894531, + "logps/rejected": -334.04998779296875, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.574244499206543, + "rewards/margins": 7.7135772705078125, + "rewards/rejected": -19.287822723388672, + "step": 18758 + }, + { + "epoch": 2.92, + "learning_rate": 3.88723481508377e-07, + "logits/chosen": -0.6282176375389099, + "logits/rejected": -2.65419864654541, + "logps/chosen": -133.151611328125, + "logps/rejected": -540.3941650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.424283027648926, + "rewards/margins": 10.543502807617188, + "rewards/rejected": -20.967784881591797, + "step": 18759 + }, + { + "epoch": 2.92, + "learning_rate": 3.879900409772291e-07, + "logits/chosen": -1.8229846954345703, + "logits/rejected": -2.7416470050811768, + "logps/chosen": -195.40298461914062, + "logps/rejected": -485.384521484375, + "loss": 0.1231, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.386669158935547, + "rewards/margins": 6.383374214172363, + "rewards/rejected": -18.770042419433594, + "step": 18760 + }, + { + "epoch": 2.92, + "learning_rate": 3.8725660044608126e-07, + "logits/chosen": -1.5649569034576416, + "logits/rejected": -2.5674235820770264, + "logps/chosen": -192.41140747070312, + "logps/rejected": -572.7701416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.33885669708252, + "rewards/margins": 12.867608070373535, + "rewards/rejected": -24.206464767456055, + "step": 18761 + }, + { + "epoch": 2.92, + "learning_rate": 3.8652315991493335e-07, + "logits/chosen": -2.8274648189544678, + "logits/rejected": -2.3826889991760254, + "logps/chosen": -474.02532958984375, + "logps/rejected": -474.60302734375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.048648834228516, + "rewards/margins": 9.898706436157227, + "rewards/rejected": -14.947355270385742, + "step": 18762 + }, + { + "epoch": 2.92, + "learning_rate": 3.857897193837855e-07, + "logits/chosen": -2.8921544551849365, + "logits/rejected": -2.880918025970459, + "logps/chosen": -260.56024169921875, + "logps/rejected": -442.0595397949219, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.427837371826172, + "rewards/margins": 6.6503586769104, + "rewards/rejected": -18.078195571899414, + "step": 18763 + }, + { + "epoch": 2.92, + "learning_rate": 3.850562788526376e-07, + "logits/chosen": -2.815321683883667, + "logits/rejected": -2.531743288040161, + "logps/chosen": -588.6356201171875, + "logps/rejected": -668.3867797851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.306478500366211, + "rewards/margins": 13.305952072143555, + "rewards/rejected": -24.612430572509766, + "step": 18764 + }, + { + "epoch": 2.92, + "learning_rate": 3.8432283832148973e-07, + "logits/chosen": -1.2761845588684082, + "logits/rejected": -2.426441192626953, + "logps/chosen": -238.13629150390625, + "logps/rejected": -387.1348571777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0638017654418945, + "rewards/margins": 13.354779243469238, + "rewards/rejected": -19.418581008911133, + "step": 18765 + }, + { + "epoch": 2.92, + "learning_rate": 3.8358939779034187e-07, + "logits/chosen": -1.71602463722229, + "logits/rejected": -2.4988372325897217, + "logps/chosen": -113.98066711425781, + "logps/rejected": -374.6371154785156, + "loss": 0.0818, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.65462875366211, + "rewards/margins": 10.948492050170898, + "rewards/rejected": -21.603120803833008, + "step": 18766 + }, + { + "epoch": 2.92, + "learning_rate": 3.8285595725919396e-07, + "logits/chosen": -2.712653875350952, + "logits/rejected": -2.3643898963928223, + "logps/chosen": -186.06314086914062, + "logps/rejected": -245.84786987304688, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2953643798828125, + "rewards/margins": 7.990462303161621, + "rewards/rejected": -14.285825729370117, + "step": 18767 + }, + { + "epoch": 2.92, + "learning_rate": 3.8212251672804605e-07, + "logits/chosen": -1.7494595050811768, + "logits/rejected": -2.8783867359161377, + "logps/chosen": -153.62599182128906, + "logps/rejected": -575.4979858398438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.074789047241211, + "rewards/margins": 10.47371768951416, + "rewards/rejected": -17.548507690429688, + "step": 18768 + }, + { + "epoch": 2.92, + "learning_rate": 3.8138907619689814e-07, + "logits/chosen": -2.9838359355926514, + "logits/rejected": -2.4093029499053955, + "logps/chosen": -239.7080078125, + "logps/rejected": -143.9492950439453, + "loss": 0.1637, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.287050247192383, + "rewards/margins": 3.1645827293395996, + "rewards/rejected": -13.45163345336914, + "step": 18769 + }, + { + "epoch": 2.92, + "learning_rate": 3.806556356657503e-07, + "logits/chosen": -1.9293144941329956, + "logits/rejected": -2.60886549949646, + "logps/chosen": -194.3402557373047, + "logps/rejected": -380.04547119140625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.24863052368164, + "rewards/margins": 6.669831275939941, + "rewards/rejected": -14.918461799621582, + "step": 18770 + }, + { + "epoch": 2.92, + "learning_rate": 3.7992219513460243e-07, + "logits/chosen": -2.6396336555480957, + "logits/rejected": -2.8701326847076416, + "logps/chosen": -473.7401123046875, + "logps/rejected": -505.14990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.007551193237305, + "rewards/margins": 10.517084121704102, + "rewards/rejected": -20.524635314941406, + "step": 18771 + }, + { + "epoch": 2.92, + "learning_rate": 3.791887546034545e-07, + "logits/chosen": -2.7045156955718994, + "logits/rejected": -1.4191033840179443, + "logps/chosen": -431.4571228027344, + "logps/rejected": -317.8291015625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.653099060058594, + "rewards/margins": 9.906632423400879, + "rewards/rejected": -16.559730529785156, + "step": 18772 + }, + { + "epoch": 2.92, + "learning_rate": 3.7845531407230666e-07, + "logits/chosen": -2.7460713386535645, + "logits/rejected": -2.9006524085998535, + "logps/chosen": -96.09748077392578, + "logps/rejected": -165.59698486328125, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.939969062805176, + "rewards/margins": 5.259710311889648, + "rewards/rejected": -13.199679374694824, + "step": 18773 + }, + { + "epoch": 2.92, + "learning_rate": 3.7772187354115875e-07, + "logits/chosen": -2.400158643722534, + "logits/rejected": -1.7750585079193115, + "logps/chosen": -236.40203857421875, + "logps/rejected": -436.66864013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.588245391845703, + "rewards/margins": 13.407548904418945, + "rewards/rejected": -22.99579429626465, + "step": 18774 + }, + { + "epoch": 2.92, + "learning_rate": 3.769884330100109e-07, + "logits/chosen": -1.9999257326126099, + "logits/rejected": -2.9508910179138184, + "logps/chosen": -326.9345397949219, + "logps/rejected": -760.8675537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.729204177856445, + "rewards/margins": 12.707015037536621, + "rewards/rejected": -20.43621826171875, + "step": 18775 + }, + { + "epoch": 2.92, + "learning_rate": 3.7625499247886304e-07, + "logits/chosen": -2.9547278881073, + "logits/rejected": -2.547454595565796, + "logps/chosen": -656.43994140625, + "logps/rejected": -427.11724853515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52667236328125, + "rewards/margins": 8.11000919342041, + "rewards/rejected": -16.636682510375977, + "step": 18776 + }, + { + "epoch": 2.92, + "learning_rate": 3.755215519477151e-07, + "logits/chosen": -2.134061813354492, + "logits/rejected": -2.2919628620147705, + "logps/chosen": -228.40985107421875, + "logps/rejected": -543.85400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.865421295166016, + "rewards/margins": 13.366720199584961, + "rewards/rejected": -25.232141494750977, + "step": 18777 + }, + { + "epoch": 2.92, + "learning_rate": 3.7478811141656727e-07, + "logits/chosen": -2.5690624713897705, + "logits/rejected": -2.961672782897949, + "logps/chosen": -309.7691955566406, + "logps/rejected": -346.625244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2917280197143555, + "rewards/margins": 9.44272232055664, + "rewards/rejected": -16.734451293945312, + "step": 18778 + }, + { + "epoch": 2.92, + "learning_rate": 3.7405467088541936e-07, + "logits/chosen": -2.463667392730713, + "logits/rejected": -2.9518253803253174, + "logps/chosen": -407.7667236328125, + "logps/rejected": -483.65972900390625, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.374354362487793, + "rewards/margins": 6.110347747802734, + "rewards/rejected": -20.484703063964844, + "step": 18779 + }, + { + "epoch": 2.92, + "learning_rate": 3.733212303542715e-07, + "logits/chosen": -2.426828384399414, + "logits/rejected": -2.790970802307129, + "logps/chosen": -99.62608337402344, + "logps/rejected": -487.8573303222656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.3682861328125, + "rewards/margins": 13.982690811157227, + "rewards/rejected": -23.350976943969727, + "step": 18780 + }, + { + "epoch": 2.92, + "learning_rate": 3.725877898231236e-07, + "logits/chosen": -1.443320631980896, + "logits/rejected": -2.696338176727295, + "logps/chosen": -319.90802001953125, + "logps/rejected": -654.9189453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.770002365112305, + "rewards/margins": 8.506410598754883, + "rewards/rejected": -20.276412963867188, + "step": 18781 + }, + { + "epoch": 2.92, + "learning_rate": 3.7185434929197574e-07, + "logits/chosen": -1.6252964735031128, + "logits/rejected": -2.399625778198242, + "logps/chosen": -232.7771759033203, + "logps/rejected": -453.04010009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.803037643432617, + "rewards/margins": 10.280450820922852, + "rewards/rejected": -21.08348846435547, + "step": 18782 + }, + { + "epoch": 2.92, + "learning_rate": 3.711209087608279e-07, + "logits/chosen": -1.169764518737793, + "logits/rejected": -2.6325128078460693, + "logps/chosen": -193.37184143066406, + "logps/rejected": -527.3395385742188, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.547638893127441, + "rewards/margins": 13.507471084594727, + "rewards/rejected": -28.055110931396484, + "step": 18783 + }, + { + "epoch": 2.92, + "learning_rate": 3.7038746822967997e-07, + "logits/chosen": -2.3850314617156982, + "logits/rejected": -2.6864023208618164, + "logps/chosen": -187.21652221679688, + "logps/rejected": -457.20526123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.011237144470215, + "rewards/margins": 12.60888385772705, + "rewards/rejected": -20.620121002197266, + "step": 18784 + }, + { + "epoch": 2.92, + "learning_rate": 3.696540276985321e-07, + "logits/chosen": -2.819059133529663, + "logits/rejected": -2.7962536811828613, + "logps/chosen": -592.466552734375, + "logps/rejected": -301.9939270019531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2159271240234375, + "rewards/margins": 8.740059852600098, + "rewards/rejected": -13.955986022949219, + "step": 18785 + }, + { + "epoch": 2.92, + "learning_rate": 3.689205871673842e-07, + "logits/chosen": -2.1857616901397705, + "logits/rejected": -2.7875964641571045, + "logps/chosen": -737.15966796875, + "logps/rejected": -813.0446166992188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.8348970413208, + "rewards/margins": 11.098881721496582, + "rewards/rejected": -22.933778762817383, + "step": 18786 + }, + { + "epoch": 2.92, + "learning_rate": 3.6818714663623634e-07, + "logits/chosen": -2.7780447006225586, + "logits/rejected": -2.0716702938079834, + "logps/chosen": -285.7460632324219, + "logps/rejected": -254.4805145263672, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985071659088135, + "rewards/margins": 9.580574035644531, + "rewards/rejected": -15.565646171569824, + "step": 18787 + }, + { + "epoch": 2.92, + "learning_rate": 3.6745370610508843e-07, + "logits/chosen": -1.8636821508407593, + "logits/rejected": -2.8735499382019043, + "logps/chosen": -451.9345397949219, + "logps/rejected": -727.3426513671875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.999917984008789, + "rewards/margins": 8.897422790527344, + "rewards/rejected": -20.897340774536133, + "step": 18788 + }, + { + "epoch": 2.92, + "learning_rate": 3.667202655739405e-07, + "logits/chosen": -2.494994640350342, + "logits/rejected": -2.6348841190338135, + "logps/chosen": -223.4984130859375, + "logps/rejected": -414.246337890625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.20457935333252, + "rewards/margins": 10.820257186889648, + "rewards/rejected": -20.02483558654785, + "step": 18789 + }, + { + "epoch": 2.92, + "learning_rate": 3.6598682504279267e-07, + "logits/chosen": -2.660818099975586, + "logits/rejected": -1.897226095199585, + "logps/chosen": -609.6024169921875, + "logps/rejected": -458.87432861328125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.533619403839111, + "rewards/margins": 11.100967407226562, + "rewards/rejected": -18.634586334228516, + "step": 18790 + }, + { + "epoch": 2.92, + "learning_rate": 3.6525338451164476e-07, + "logits/chosen": -3.043341875076294, + "logits/rejected": -3.064790725708008, + "logps/chosen": -170.32745361328125, + "logps/rejected": -444.6004638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.304156303405762, + "rewards/margins": 17.416723251342773, + "rewards/rejected": -22.72088050842285, + "step": 18791 + }, + { + "epoch": 2.92, + "learning_rate": 3.645199439804969e-07, + "logits/chosen": -2.282504081726074, + "logits/rejected": -2.413203001022339, + "logps/chosen": -208.64309692382812, + "logps/rejected": -418.5321044921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.035605430603027, + "rewards/margins": 13.816905975341797, + "rewards/rejected": -25.85251235961914, + "step": 18792 + }, + { + "epoch": 2.92, + "learning_rate": 3.6378650344934904e-07, + "logits/chosen": -2.2748589515686035, + "logits/rejected": -2.7021353244781494, + "logps/chosen": -227.66900634765625, + "logps/rejected": -386.37152099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.688015937805176, + "rewards/margins": 9.261987686157227, + "rewards/rejected": -17.950002670288086, + "step": 18793 + }, + { + "epoch": 2.92, + "learning_rate": 3.6305306291820113e-07, + "logits/chosen": -2.9229347705841064, + "logits/rejected": -2.9748616218566895, + "logps/chosen": -110.58332824707031, + "logps/rejected": -293.4544677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.314072608947754, + "rewards/margins": 11.381492614746094, + "rewards/rejected": -20.69556427001953, + "step": 18794 + }, + { + "epoch": 2.92, + "learning_rate": 3.623196223870533e-07, + "logits/chosen": -2.888469934463501, + "logits/rejected": -2.7810428142547607, + "logps/chosen": -552.3685913085938, + "logps/rejected": -407.3201599121094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.92616081237793, + "rewards/margins": 12.636235237121582, + "rewards/rejected": -20.562397003173828, + "step": 18795 + }, + { + "epoch": 2.92, + "learning_rate": 3.6158618185590537e-07, + "logits/chosen": -2.5891072750091553, + "logits/rejected": -2.744990825653076, + "logps/chosen": -596.7381591796875, + "logps/rejected": -867.5335693359375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.096351623535156, + "rewards/margins": 8.123495101928711, + "rewards/rejected": -23.219846725463867, + "step": 18796 + }, + { + "epoch": 2.92, + "learning_rate": 3.608527413247575e-07, + "logits/chosen": -2.103078842163086, + "logits/rejected": -1.6182843446731567, + "logps/chosen": -186.40634155273438, + "logps/rejected": -121.59170532226562, + "loss": 2.5424, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.91584300994873, + "rewards/margins": -0.2036135196685791, + "rewards/rejected": -10.71222972869873, + "step": 18797 + }, + { + "epoch": 2.92, + "learning_rate": 3.6011930079360965e-07, + "logits/chosen": -2.801196813583374, + "logits/rejected": -2.5038843154907227, + "logps/chosen": -370.2429504394531, + "logps/rejected": -478.4920654296875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.499009132385254, + "rewards/margins": 7.515848159790039, + "rewards/rejected": -17.01485824584961, + "step": 18798 + }, + { + "epoch": 2.92, + "learning_rate": 3.5938586026246174e-07, + "logits/chosen": -2.105848789215088, + "logits/rejected": -2.590365409851074, + "logps/chosen": -273.8205871582031, + "logps/rejected": -298.0340881347656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.613152503967285, + "rewards/margins": 7.853473663330078, + "rewards/rejected": -14.466626167297363, + "step": 18799 + }, + { + "epoch": 2.92, + "learning_rate": 3.586524197313139e-07, + "logits/chosen": -2.2806899547576904, + "logits/rejected": -2.6752657890319824, + "logps/chosen": -244.46401977539062, + "logps/rejected": -605.144287109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.866527557373047, + "rewards/margins": 13.421585083007812, + "rewards/rejected": -24.28811264038086, + "step": 18800 + }, + { + "epoch": 2.92, + "learning_rate": 3.57918979200166e-07, + "logits/chosen": -1.8249918222427368, + "logits/rejected": -2.43805193901062, + "logps/chosen": -184.6459503173828, + "logps/rejected": -391.89288330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.403591156005859, + "rewards/margins": 11.278047561645508, + "rewards/rejected": -17.681638717651367, + "step": 18801 + }, + { + "epoch": 2.92, + "learning_rate": 3.571855386690181e-07, + "logits/chosen": -1.7473793029785156, + "logits/rejected": -2.3972766399383545, + "logps/chosen": -137.4604949951172, + "logps/rejected": -391.216552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.421577453613281, + "rewards/margins": 13.507698059082031, + "rewards/rejected": -21.929275512695312, + "step": 18802 + }, + { + "epoch": 2.92, + "learning_rate": 3.5645209813787026e-07, + "logits/chosen": -2.5434141159057617, + "logits/rejected": -2.924182653427124, + "logps/chosen": -585.116455078125, + "logps/rejected": -600.1289672851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344461441040039, + "rewards/margins": 11.041419982910156, + "rewards/rejected": -19.385881423950195, + "step": 18803 + }, + { + "epoch": 2.92, + "learning_rate": 3.5571865760672235e-07, + "logits/chosen": -2.1172614097595215, + "logits/rejected": -2.346920967102051, + "logps/chosen": -408.3436279296875, + "logps/rejected": -385.87939453125, + "loss": 0.0448, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.420316696166992, + "rewards/margins": 6.315728664398193, + "rewards/rejected": -15.736045837402344, + "step": 18804 + }, + { + "epoch": 2.92, + "learning_rate": 3.549852170755745e-07, + "logits/chosen": -2.9328370094299316, + "logits/rejected": -2.7299985885620117, + "logps/chosen": -159.062744140625, + "logps/rejected": -226.3406982421875, + "loss": 1.2295, + "rewards/accuracies": 0.5, + "rewards/chosen": -12.400677680969238, + "rewards/margins": 3.7249550819396973, + "rewards/rejected": -16.125633239746094, + "step": 18805 + }, + { + "epoch": 2.92, + "learning_rate": 3.542517765444266e-07, + "logits/chosen": -2.8827602863311768, + "logits/rejected": -2.999049186706543, + "logps/chosen": -725.174560546875, + "logps/rejected": -883.0673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.415872097015381, + "rewards/margins": 17.29595184326172, + "rewards/rejected": -22.711824417114258, + "step": 18806 + }, + { + "epoch": 2.92, + "learning_rate": 3.5351833601327873e-07, + "logits/chosen": -2.5303423404693604, + "logits/rejected": -2.790839433670044, + "logps/chosen": -122.07194519042969, + "logps/rejected": -545.9706420898438, + "loss": 0.0336, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.722764015197754, + "rewards/margins": 13.999835968017578, + "rewards/rejected": -21.72260093688965, + "step": 18807 + }, + { + "epoch": 2.93, + "learning_rate": 3.527848954821308e-07, + "logits/chosen": -2.4229328632354736, + "logits/rejected": -1.5171200037002563, + "logps/chosen": -249.13018798828125, + "logps/rejected": -293.65087890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.87518310546875, + "rewards/margins": 8.5494384765625, + "rewards/rejected": -17.42462158203125, + "step": 18808 + }, + { + "epoch": 2.93, + "learning_rate": 3.520514549509829e-07, + "logits/chosen": -1.4320319890975952, + "logits/rejected": -2.585484266281128, + "logps/chosen": -179.0889129638672, + "logps/rejected": -423.84991455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.332921981811523, + "rewards/margins": 13.33658218383789, + "rewards/rejected": -23.669506072998047, + "step": 18809 + }, + { + "epoch": 2.93, + "learning_rate": 3.5131801441983505e-07, + "logits/chosen": -2.4826102256774902, + "logits/rejected": -2.7654569149017334, + "logps/chosen": -230.64697265625, + "logps/rejected": -263.40771484375, + "loss": 0.2118, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.092320442199707, + "rewards/margins": 3.1394762992858887, + "rewards/rejected": -16.231796264648438, + "step": 18810 + }, + { + "epoch": 2.93, + "learning_rate": 3.5058457388868714e-07, + "logits/chosen": -2.0495970249176025, + "logits/rejected": -2.6965205669403076, + "logps/chosen": -330.9693908691406, + "logps/rejected": -444.34332275390625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.512463569641113, + "rewards/margins": 7.721059322357178, + "rewards/rejected": -18.233522415161133, + "step": 18811 + }, + { + "epoch": 2.93, + "learning_rate": 3.498511333575393e-07, + "logits/chosen": -1.009374976158142, + "logits/rejected": -2.522127628326416, + "logps/chosen": -125.60736083984375, + "logps/rejected": -433.8629455566406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.289604187011719, + "rewards/margins": 11.332958221435547, + "rewards/rejected": -21.622562408447266, + "step": 18812 + }, + { + "epoch": 2.93, + "learning_rate": 3.491176928263914e-07, + "logits/chosen": -2.6642532348632812, + "logits/rejected": -1.8992377519607544, + "logps/chosen": -264.4351806640625, + "logps/rejected": -338.84124755859375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.008602142333984, + "rewards/margins": 7.610708713531494, + "rewards/rejected": -18.619312286376953, + "step": 18813 + }, + { + "epoch": 2.93, + "learning_rate": 3.483842522952435e-07, + "logits/chosen": -1.6174556016921997, + "logits/rejected": -2.7098500728607178, + "logps/chosen": -280.7445983886719, + "logps/rejected": -488.6661376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.644804954528809, + "rewards/margins": 12.118424415588379, + "rewards/rejected": -23.763229370117188, + "step": 18814 + }, + { + "epoch": 2.93, + "learning_rate": 3.4765081176409566e-07, + "logits/chosen": -1.3747845888137817, + "logits/rejected": -1.7327126264572144, + "logps/chosen": -421.69390869140625, + "logps/rejected": -479.2106628417969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.637234687805176, + "rewards/margins": 16.159969329833984, + "rewards/rejected": -21.797203063964844, + "step": 18815 + }, + { + "epoch": 2.93, + "learning_rate": 3.4691737123294775e-07, + "logits/chosen": -2.1395461559295654, + "logits/rejected": -2.046290397644043, + "logps/chosen": -239.96481323242188, + "logps/rejected": -441.56329345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.270702838897705, + "rewards/margins": 12.719690322875977, + "rewards/rejected": -17.990392684936523, + "step": 18816 + }, + { + "epoch": 2.93, + "learning_rate": 3.461839307017999e-07, + "logits/chosen": -2.269015312194824, + "logits/rejected": -2.706676959991455, + "logps/chosen": -410.3240661621094, + "logps/rejected": -653.2598266601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.717199325561523, + "rewards/margins": 14.277865409851074, + "rewards/rejected": -24.99506378173828, + "step": 18817 + }, + { + "epoch": 2.93, + "learning_rate": 3.45450490170652e-07, + "logits/chosen": -2.4256889820098877, + "logits/rejected": -1.5431344509124756, + "logps/chosen": -284.7766418457031, + "logps/rejected": -258.59527587890625, + "loss": 0.0329, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.783967018127441, + "rewards/margins": 6.458956718444824, + "rewards/rejected": -18.242923736572266, + "step": 18818 + }, + { + "epoch": 2.93, + "learning_rate": 3.447170496395041e-07, + "logits/chosen": -2.7904601097106934, + "logits/rejected": -2.8318169116973877, + "logps/chosen": -316.25, + "logps/rejected": -485.9909362792969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.638849258422852, + "rewards/margins": 11.387590408325195, + "rewards/rejected": -20.026439666748047, + "step": 18819 + }, + { + "epoch": 2.93, + "learning_rate": 3.4398360910835627e-07, + "logits/chosen": -2.374885320663452, + "logits/rejected": -1.7132656574249268, + "logps/chosen": -425.79193115234375, + "logps/rejected": -220.66473388671875, + "loss": 1.4988, + "rewards/accuracies": 0.5, + "rewards/chosen": -9.750142097473145, + "rewards/margins": 2.418865442276001, + "rewards/rejected": -12.169007301330566, + "step": 18820 + }, + { + "epoch": 2.93, + "learning_rate": 3.4325016857720836e-07, + "logits/chosen": -2.7757298946380615, + "logits/rejected": -2.5633387565612793, + "logps/chosen": -167.66876220703125, + "logps/rejected": -338.94403076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.955848693847656, + "rewards/margins": 10.974187850952148, + "rewards/rejected": -16.930036544799805, + "step": 18821 + }, + { + "epoch": 2.93, + "learning_rate": 3.425167280460605e-07, + "logits/chosen": -2.1595044136047363, + "logits/rejected": -2.4958062171936035, + "logps/chosen": -264.1510009765625, + "logps/rejected": -420.79949951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.18649959564209, + "rewards/margins": 12.68446159362793, + "rewards/rejected": -21.870962142944336, + "step": 18822 + }, + { + "epoch": 2.93, + "learning_rate": 3.417832875149126e-07, + "logits/chosen": -2.347444772720337, + "logits/rejected": -2.8943357467651367, + "logps/chosen": -156.73681640625, + "logps/rejected": -393.467529296875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.842551231384277, + "rewards/margins": 7.460657596588135, + "rewards/rejected": -16.30320930480957, + "step": 18823 + }, + { + "epoch": 2.93, + "learning_rate": 3.4104984698376474e-07, + "logits/chosen": -1.57998788356781, + "logits/rejected": -2.534475088119507, + "logps/chosen": -163.37203979492188, + "logps/rejected": -360.3009033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.381176948547363, + "rewards/margins": 10.685379028320312, + "rewards/rejected": -16.066556930541992, + "step": 18824 + }, + { + "epoch": 2.93, + "learning_rate": 3.403164064526169e-07, + "logits/chosen": -1.3449667692184448, + "logits/rejected": -2.2619807720184326, + "logps/chosen": -222.307861328125, + "logps/rejected": -587.6199951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.763609886169434, + "rewards/margins": 13.500669479370117, + "rewards/rejected": -23.264278411865234, + "step": 18825 + }, + { + "epoch": 2.93, + "learning_rate": 3.3958296592146897e-07, + "logits/chosen": -1.944475531578064, + "logits/rejected": -2.782439947128296, + "logps/chosen": -394.0164489746094, + "logps/rejected": -791.365234375, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.707610130310059, + "rewards/margins": 7.355868339538574, + "rewards/rejected": -19.063478469848633, + "step": 18826 + }, + { + "epoch": 2.93, + "learning_rate": 3.388495253903211e-07, + "logits/chosen": -2.7646052837371826, + "logits/rejected": -0.9305791854858398, + "logps/chosen": -358.7995910644531, + "logps/rejected": -407.33648681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.685750961303711, + "rewards/margins": 10.157050132751465, + "rewards/rejected": -17.84280014038086, + "step": 18827 + }, + { + "epoch": 2.93, + "learning_rate": 3.381160848591732e-07, + "logits/chosen": -2.6219890117645264, + "logits/rejected": -1.6210492849349976, + "logps/chosen": -533.2499389648438, + "logps/rejected": -621.8836059570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.793841361999512, + "rewards/margins": 12.19372272491455, + "rewards/rejected": -22.987564086914062, + "step": 18828 + }, + { + "epoch": 2.93, + "learning_rate": 3.373826443280253e-07, + "logits/chosen": -2.366670608520508, + "logits/rejected": -1.500991940498352, + "logps/chosen": -293.4386901855469, + "logps/rejected": -454.67401123046875, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2763090133667, + "rewards/margins": 12.073465347290039, + "rewards/rejected": -21.349775314331055, + "step": 18829 + }, + { + "epoch": 2.93, + "learning_rate": 3.3664920379687743e-07, + "logits/chosen": -2.640646457672119, + "logits/rejected": -2.785308599472046, + "logps/chosen": -420.93804931640625, + "logps/rejected": -835.3717041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.130061149597168, + "rewards/margins": 17.95981216430664, + "rewards/rejected": -25.089872360229492, + "step": 18830 + }, + { + "epoch": 2.93, + "learning_rate": 3.359157632657295e-07, + "logits/chosen": -2.1624844074249268, + "logits/rejected": -2.729914665222168, + "logps/chosen": -240.48016357421875, + "logps/rejected": -419.9226379394531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.613906860351562, + "rewards/margins": 12.036018371582031, + "rewards/rejected": -21.649925231933594, + "step": 18831 + }, + { + "epoch": 2.93, + "learning_rate": 3.3518232273458167e-07, + "logits/chosen": -2.6859843730926514, + "logits/rejected": -2.997694969177246, + "logps/chosen": -251.93869018554688, + "logps/rejected": -435.0020446777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.497489929199219, + "rewards/margins": 11.604629516601562, + "rewards/rejected": -22.10211944580078, + "step": 18832 + }, + { + "epoch": 2.93, + "learning_rate": 3.3444888220343376e-07, + "logits/chosen": -2.694927930831909, + "logits/rejected": -2.138965368270874, + "logps/chosen": -516.2147216796875, + "logps/rejected": -516.6846923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.457250595092773, + "rewards/margins": 12.143375396728516, + "rewards/rejected": -19.60062599182129, + "step": 18833 + }, + { + "epoch": 2.93, + "learning_rate": 3.337154416722859e-07, + "logits/chosen": -1.468056082725525, + "logits/rejected": -3.009462833404541, + "logps/chosen": -98.73855590820312, + "logps/rejected": -379.09625244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.553622245788574, + "rewards/margins": 8.819716453552246, + "rewards/rejected": -17.37333869934082, + "step": 18834 + }, + { + "epoch": 2.93, + "learning_rate": 3.3298200114113804e-07, + "logits/chosen": -1.2164260149002075, + "logits/rejected": -2.7555692195892334, + "logps/chosen": -226.88278198242188, + "logps/rejected": -543.7593994140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.578614234924316, + "rewards/margins": 11.718177795410156, + "rewards/rejected": -23.296791076660156, + "step": 18835 + }, + { + "epoch": 2.93, + "learning_rate": 3.3224856060999013e-07, + "logits/chosen": -3.013613700866699, + "logits/rejected": -2.796043872833252, + "logps/chosen": -343.4277648925781, + "logps/rejected": -538.2473754882812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.190098762512207, + "rewards/margins": 16.55539321899414, + "rewards/rejected": -26.74549102783203, + "step": 18836 + }, + { + "epoch": 2.93, + "learning_rate": 3.315151200788423e-07, + "logits/chosen": -0.8055952191352844, + "logits/rejected": -1.610093593597412, + "logps/chosen": -269.5870666503906, + "logps/rejected": -529.8255615234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.973350524902344, + "rewards/margins": 10.764415740966797, + "rewards/rejected": -18.73776626586914, + "step": 18837 + }, + { + "epoch": 2.93, + "learning_rate": 3.3078167954769437e-07, + "logits/chosen": -1.4387820959091187, + "logits/rejected": -2.4720568656921387, + "logps/chosen": -198.15695190429688, + "logps/rejected": -395.156494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.486734390258789, + "rewards/margins": 9.240968704223633, + "rewards/rejected": -20.727703094482422, + "step": 18838 + }, + { + "epoch": 2.93, + "learning_rate": 3.300482390165465e-07, + "logits/chosen": -2.0599310398101807, + "logits/rejected": -2.4549641609191895, + "logps/chosen": -313.50848388671875, + "logps/rejected": -324.7221374511719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.834151268005371, + "rewards/margins": 8.420616149902344, + "rewards/rejected": -16.25476837158203, + "step": 18839 + }, + { + "epoch": 2.93, + "learning_rate": 3.293147984853986e-07, + "logits/chosen": -2.127500534057617, + "logits/rejected": -2.638430595397949, + "logps/chosen": -254.45660400390625, + "logps/rejected": -421.9189147949219, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.2337646484375, + "rewards/margins": 8.503382682800293, + "rewards/rejected": -18.737146377563477, + "step": 18840 + }, + { + "epoch": 2.93, + "learning_rate": 3.2858135795425074e-07, + "logits/chosen": -1.4966726303100586, + "logits/rejected": -2.4062020778656006, + "logps/chosen": -177.88446044921875, + "logps/rejected": -496.4042663574219, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.293031692504883, + "rewards/margins": 12.322927474975586, + "rewards/rejected": -25.61595916748047, + "step": 18841 + }, + { + "epoch": 2.93, + "learning_rate": 3.278479174231029e-07, + "logits/chosen": -1.2513699531555176, + "logits/rejected": -2.551788568496704, + "logps/chosen": -116.98002624511719, + "logps/rejected": -428.5592956542969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.295866012573242, + "rewards/margins": 9.952537536621094, + "rewards/rejected": -19.248403549194336, + "step": 18842 + }, + { + "epoch": 2.93, + "learning_rate": 3.27114476891955e-07, + "logits/chosen": -2.356924057006836, + "logits/rejected": -2.773159980773926, + "logps/chosen": -293.1961364746094, + "logps/rejected": -672.7601318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.751352310180664, + "rewards/margins": 16.881277084350586, + "rewards/rejected": -26.63262939453125, + "step": 18843 + }, + { + "epoch": 2.93, + "learning_rate": 3.263810363608071e-07, + "logits/chosen": -2.6724419593811035, + "logits/rejected": -2.648043632507324, + "logps/chosen": -271.873046875, + "logps/rejected": -370.23980712890625, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.025964736938477, + "rewards/margins": 9.738722801208496, + "rewards/rejected": -20.76468849182129, + "step": 18844 + }, + { + "epoch": 2.93, + "learning_rate": 3.256475958296592e-07, + "logits/chosen": -2.6341652870178223, + "logits/rejected": -2.9837517738342285, + "logps/chosen": -290.9527282714844, + "logps/rejected": -513.9781494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.551464080810547, + "rewards/margins": 9.899347305297852, + "rewards/rejected": -18.4508113861084, + "step": 18845 + }, + { + "epoch": 2.93, + "learning_rate": 3.2491415529851135e-07, + "logits/chosen": -2.305274248123169, + "logits/rejected": -2.6226162910461426, + "logps/chosen": -200.810791015625, + "logps/rejected": -414.8809509277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19858455657959, + "rewards/margins": 11.319295883178711, + "rewards/rejected": -19.517879486083984, + "step": 18846 + }, + { + "epoch": 2.93, + "learning_rate": 3.241807147673635e-07, + "logits/chosen": -2.598093032836914, + "logits/rejected": -2.7679011821746826, + "logps/chosen": -261.3177490234375, + "logps/rejected": -311.18768310546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.38344955444336, + "rewards/margins": 9.49813461303711, + "rewards/rejected": -17.88158416748047, + "step": 18847 + }, + { + "epoch": 2.93, + "learning_rate": 3.234472742362156e-07, + "logits/chosen": -2.772038221359253, + "logits/rejected": -2.6441612243652344, + "logps/chosen": -330.34246826171875, + "logps/rejected": -244.24461364746094, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.206555366516113, + "rewards/margins": 6.995283126831055, + "rewards/rejected": -16.201839447021484, + "step": 18848 + }, + { + "epoch": 2.93, + "learning_rate": 3.2271383370506773e-07, + "logits/chosen": -2.6290667057037354, + "logits/rejected": -2.98088002204895, + "logps/chosen": -79.94432067871094, + "logps/rejected": -306.05023193359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.563477516174316, + "rewards/margins": 9.402847290039062, + "rewards/rejected": -14.966325759887695, + "step": 18849 + }, + { + "epoch": 2.93, + "learning_rate": 3.219803931739198e-07, + "logits/chosen": -2.6885554790496826, + "logits/rejected": -2.18170166015625, + "logps/chosen": -648.165283203125, + "logps/rejected": -406.3614807128906, + "loss": 0.0544, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.749432563781738, + "rewards/margins": 6.179409027099609, + "rewards/rejected": -14.928841590881348, + "step": 18850 + }, + { + "epoch": 2.93, + "learning_rate": 3.212469526427719e-07, + "logits/chosen": -2.5552539825439453, + "logits/rejected": -1.1633821725845337, + "logps/chosen": -368.63311767578125, + "logps/rejected": -313.54443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.51041030883789, + "rewards/margins": 9.725618362426758, + "rewards/rejected": -18.23602867126465, + "step": 18851 + }, + { + "epoch": 2.93, + "learning_rate": 3.2051351211162405e-07, + "logits/chosen": -2.042290449142456, + "logits/rejected": -2.8961031436920166, + "logps/chosen": -201.92245483398438, + "logps/rejected": -539.5525512695312, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.521527290344238, + "rewards/margins": 9.036953926086426, + "rewards/rejected": -22.558481216430664, + "step": 18852 + }, + { + "epoch": 2.93, + "learning_rate": 3.1978007158047614e-07, + "logits/chosen": -2.0942490100860596, + "logits/rejected": -0.9208236336708069, + "logps/chosen": -307.7259521484375, + "logps/rejected": -218.70919799804688, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.237970352172852, + "rewards/margins": 3.0584774017333984, + "rewards/rejected": -13.29644775390625, + "step": 18853 + }, + { + "epoch": 2.93, + "learning_rate": 3.190466310493283e-07, + "logits/chosen": -2.5135483741760254, + "logits/rejected": -2.647747278213501, + "logps/chosen": -245.15792846679688, + "logps/rejected": -356.61407470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.155279159545898, + "rewards/margins": 12.030618667602539, + "rewards/rejected": -21.185897827148438, + "step": 18854 + }, + { + "epoch": 2.93, + "learning_rate": 3.1831319051818037e-07, + "logits/chosen": -2.306838274002075, + "logits/rejected": -2.687302350997925, + "logps/chosen": -423.35601806640625, + "logps/rejected": -478.0572509765625, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.256486892700195, + "rewards/margins": 7.356252193450928, + "rewards/rejected": -21.61273956298828, + "step": 18855 + }, + { + "epoch": 2.93, + "learning_rate": 3.175797499870325e-07, + "logits/chosen": -1.954375147819519, + "logits/rejected": -2.7072229385375977, + "logps/chosen": -155.16600036621094, + "logps/rejected": -440.75860595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250018119812012, + "rewards/margins": 18.257572174072266, + "rewards/rejected": -24.50758934020996, + "step": 18856 + }, + { + "epoch": 2.93, + "learning_rate": 3.1684630945588466e-07, + "logits/chosen": -1.9128180742263794, + "logits/rejected": -2.8396496772766113, + "logps/chosen": -271.974365234375, + "logps/rejected": -565.9724731445312, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.704782485961914, + "rewards/margins": 9.8342866897583, + "rewards/rejected": -18.53907012939453, + "step": 18857 + }, + { + "epoch": 2.93, + "learning_rate": 3.1611286892473675e-07, + "logits/chosen": -2.5570361614227295, + "logits/rejected": -1.11748468875885, + "logps/chosen": -443.1112060546875, + "logps/rejected": -337.33819580078125, + "loss": 0.5826, + "rewards/accuracies": 0.5, + "rewards/chosen": -15.186074256896973, + "rewards/margins": 6.612172603607178, + "rewards/rejected": -21.798246383666992, + "step": 18858 + }, + { + "epoch": 2.93, + "learning_rate": 3.153794283935889e-07, + "logits/chosen": -1.4571093320846558, + "logits/rejected": -2.46956729888916, + "logps/chosen": -271.3280944824219, + "logps/rejected": -637.54345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.578666687011719, + "rewards/margins": 13.419873237609863, + "rewards/rejected": -24.998538970947266, + "step": 18859 + }, + { + "epoch": 2.93, + "learning_rate": 3.14645987862441e-07, + "logits/chosen": -2.362001657485962, + "logits/rejected": -2.893643379211426, + "logps/chosen": -397.8131408691406, + "logps/rejected": -389.58062744140625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.951526641845703, + "rewards/margins": 6.555289268493652, + "rewards/rejected": -14.506816864013672, + "step": 18860 + }, + { + "epoch": 2.93, + "learning_rate": 3.139125473312931e-07, + "logits/chosen": -2.8071839809417725, + "logits/rejected": -2.8139288425445557, + "logps/chosen": -516.3900756835938, + "logps/rejected": -552.401611328125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.170331954956055, + "rewards/margins": 9.58604621887207, + "rewards/rejected": -18.756378173828125, + "step": 18861 + }, + { + "epoch": 2.93, + "learning_rate": 3.1317910680014527e-07, + "logits/chosen": -2.197659730911255, + "logits/rejected": -2.6602325439453125, + "logps/chosen": -321.1923828125, + "logps/rejected": -498.5019226074219, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.57415771484375, + "rewards/margins": 8.316869735717773, + "rewards/rejected": -16.891027450561523, + "step": 18862 + }, + { + "epoch": 2.93, + "learning_rate": 3.1244566626899736e-07, + "logits/chosen": -2.6266696453094482, + "logits/rejected": -2.9187614917755127, + "logps/chosen": -145.91690063476562, + "logps/rejected": -245.32310485839844, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.624531745910645, + "rewards/margins": 7.937771797180176, + "rewards/rejected": -18.56230354309082, + "step": 18863 + }, + { + "epoch": 2.93, + "learning_rate": 3.117122257378495e-07, + "logits/chosen": -2.8173439502716064, + "logits/rejected": -1.875340223312378, + "logps/chosen": -351.9935302734375, + "logps/rejected": -323.51055908203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.503759384155273, + "rewards/margins": 10.41457462310791, + "rewards/rejected": -19.9183349609375, + "step": 18864 + }, + { + "epoch": 2.93, + "learning_rate": 3.109787852067016e-07, + "logits/chosen": -1.28800630569458, + "logits/rejected": -2.496751546859741, + "logps/chosen": -234.7679901123047, + "logps/rejected": -444.9881591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.78934097290039, + "rewards/margins": 10.403688430786133, + "rewards/rejected": -22.193029403686523, + "step": 18865 + }, + { + "epoch": 2.93, + "learning_rate": 3.1024534467555373e-07, + "logits/chosen": -2.657742977142334, + "logits/rejected": -1.337258219718933, + "logps/chosen": -260.4808044433594, + "logps/rejected": -374.59539794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.533068656921387, + "rewards/margins": 8.998348236083984, + "rewards/rejected": -15.531415939331055, + "step": 18866 + }, + { + "epoch": 2.93, + "learning_rate": 3.095119041444059e-07, + "logits/chosen": -1.8641464710235596, + "logits/rejected": -2.853468179702759, + "logps/chosen": -196.89300537109375, + "logps/rejected": -380.1838684082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.973593711853027, + "rewards/margins": 11.137566566467285, + "rewards/rejected": -20.111160278320312, + "step": 18867 + }, + { + "epoch": 2.93, + "learning_rate": 3.0877846361325797e-07, + "logits/chosen": -2.573030471801758, + "logits/rejected": -1.8728358745574951, + "logps/chosen": -753.9976806640625, + "logps/rejected": -581.732177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.636282444000244, + "rewards/margins": 11.19095230102539, + "rewards/rejected": -16.827234268188477, + "step": 18868 + }, + { + "epoch": 2.93, + "learning_rate": 3.080450230821101e-07, + "logits/chosen": -1.2786166667938232, + "logits/rejected": -2.7573230266571045, + "logps/chosen": -168.32144165039062, + "logps/rejected": -418.359130859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.630535125732422, + "rewards/margins": 11.12112808227539, + "rewards/rejected": -19.751663208007812, + "step": 18869 + }, + { + "epoch": 2.93, + "learning_rate": 3.073115825509622e-07, + "logits/chosen": -1.889992356300354, + "logits/rejected": -2.423659086227417, + "logps/chosen": -164.0231170654297, + "logps/rejected": -411.31988525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.659366607666016, + "rewards/margins": 17.272958755493164, + "rewards/rejected": -26.93232536315918, + "step": 18870 + }, + { + "epoch": 2.93, + "learning_rate": 3.065781420198143e-07, + "logits/chosen": -2.3680202960968018, + "logits/rejected": -2.7345809936523438, + "logps/chosen": -254.4703826904297, + "logps/rejected": -346.30767822265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.825425624847412, + "rewards/margins": 10.784723281860352, + "rewards/rejected": -17.610149383544922, + "step": 18871 + }, + { + "epoch": 2.93, + "learning_rate": 3.058447014886664e-07, + "logits/chosen": -2.6495816707611084, + "logits/rejected": -2.3638315200805664, + "logps/chosen": -150.18496704101562, + "logps/rejected": -272.14031982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.689474105834961, + "rewards/margins": 9.411775588989258, + "rewards/rejected": -17.10124969482422, + "step": 18872 + }, + { + "epoch": 2.94, + "learning_rate": 3.051112609575185e-07, + "logits/chosen": -1.436585783958435, + "logits/rejected": -2.594531297683716, + "logps/chosen": -188.41314697265625, + "logps/rejected": -557.136474609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7951250076293945, + "rewards/margins": 11.458110809326172, + "rewards/rejected": -18.253236770629883, + "step": 18873 + }, + { + "epoch": 2.94, + "learning_rate": 3.0437782042637067e-07, + "logits/chosen": -2.7510344982147217, + "logits/rejected": -2.398824691772461, + "logps/chosen": -400.12445068359375, + "logps/rejected": -546.41455078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.815461158752441, + "rewards/margins": 10.3643798828125, + "rewards/rejected": -18.179840087890625, + "step": 18874 + }, + { + "epoch": 2.94, + "learning_rate": 3.0364437989522276e-07, + "logits/chosen": -3.0574727058410645, + "logits/rejected": -2.9606919288635254, + "logps/chosen": -399.5874328613281, + "logps/rejected": -441.3775634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.626949310302734, + "rewards/margins": 15.227193832397461, + "rewards/rejected": -24.854143142700195, + "step": 18875 + }, + { + "epoch": 2.94, + "learning_rate": 3.029109393640749e-07, + "logits/chosen": -2.116438627243042, + "logits/rejected": -2.8504245281219482, + "logps/chosen": -866.0399780273438, + "logps/rejected": -966.1669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.864471435546875, + "rewards/margins": 13.657788276672363, + "rewards/rejected": -20.522258758544922, + "step": 18876 + }, + { + "epoch": 2.94, + "learning_rate": 3.02177498832927e-07, + "logits/chosen": -2.7261438369750977, + "logits/rejected": -2.886766195297241, + "logps/chosen": -202.5955352783203, + "logps/rejected": -210.22451782226562, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.12044906616211, + "rewards/margins": 7.824558734893799, + "rewards/rejected": -15.94500732421875, + "step": 18877 + }, + { + "epoch": 2.94, + "learning_rate": 3.0144405830177913e-07, + "logits/chosen": -2.2187721729278564, + "logits/rejected": -2.1882004737854004, + "logps/chosen": -316.1212158203125, + "logps/rejected": -312.3025207519531, + "loss": 0.4372, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.115446090698242, + "rewards/margins": 3.2386868000030518, + "rewards/rejected": -13.354132652282715, + "step": 18878 + }, + { + "epoch": 2.94, + "learning_rate": 3.007106177706313e-07, + "logits/chosen": -2.2659480571746826, + "logits/rejected": -2.844367504119873, + "logps/chosen": -337.8258361816406, + "logps/rejected": -403.4932556152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469761848449707, + "rewards/margins": 10.35539436340332, + "rewards/rejected": -17.82515525817871, + "step": 18879 + }, + { + "epoch": 2.94, + "learning_rate": 2.9997717723948337e-07, + "logits/chosen": -1.8481979370117188, + "logits/rejected": -2.7427728176116943, + "logps/chosen": -224.97535705566406, + "logps/rejected": -475.1754455566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.764345645904541, + "rewards/margins": 13.408414840698242, + "rewards/rejected": -21.172760009765625, + "step": 18880 + }, + { + "epoch": 2.94, + "learning_rate": 2.992437367083355e-07, + "logits/chosen": -2.7506983280181885, + "logits/rejected": -2.412219762802124, + "logps/chosen": -385.69287109375, + "logps/rejected": -330.93731689453125, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.713395118713379, + "rewards/margins": 7.522458076477051, + "rewards/rejected": -19.23585319519043, + "step": 18881 + }, + { + "epoch": 2.94, + "learning_rate": 2.985102961771876e-07, + "logits/chosen": -2.485793113708496, + "logits/rejected": -2.556011199951172, + "logps/chosen": -259.5277099609375, + "logps/rejected": -361.04974365234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.641216278076172, + "rewards/margins": 8.273004531860352, + "rewards/rejected": -19.91421890258789, + "step": 18882 + }, + { + "epoch": 2.94, + "learning_rate": 2.9777685564603974e-07, + "logits/chosen": -2.600627899169922, + "logits/rejected": -2.631988048553467, + "logps/chosen": -138.04629516601562, + "logps/rejected": -322.1122131347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.598974227905273, + "rewards/margins": 10.992324829101562, + "rewards/rejected": -17.591299057006836, + "step": 18883 + }, + { + "epoch": 2.94, + "learning_rate": 2.970434151148919e-07, + "logits/chosen": -2.033864974975586, + "logits/rejected": -2.497375249862671, + "logps/chosen": -236.55081176757812, + "logps/rejected": -470.61846923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.01028823852539, + "rewards/margins": 12.676712036132812, + "rewards/rejected": -23.687000274658203, + "step": 18884 + }, + { + "epoch": 2.94, + "learning_rate": 2.96309974583744e-07, + "logits/chosen": -1.9750275611877441, + "logits/rejected": -2.7333364486694336, + "logps/chosen": -527.7642822265625, + "logps/rejected": -587.0318603515625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.41663932800293, + "rewards/margins": 5.839849948883057, + "rewards/rejected": -18.256488800048828, + "step": 18885 + }, + { + "epoch": 2.94, + "learning_rate": 2.955765340525961e-07, + "logits/chosen": -2.343719482421875, + "logits/rejected": -2.7105188369750977, + "logps/chosen": -154.2288818359375, + "logps/rejected": -312.1772766113281, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.286591529846191, + "rewards/margins": 8.733232498168945, + "rewards/rejected": -18.019824981689453, + "step": 18886 + }, + { + "epoch": 2.94, + "learning_rate": 2.948430935214482e-07, + "logits/chosen": -1.8412604331970215, + "logits/rejected": -2.630948543548584, + "logps/chosen": -204.34036254882812, + "logps/rejected": -469.53253173828125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.270046710968018, + "rewards/margins": 9.291037559509277, + "rewards/rejected": -16.561084747314453, + "step": 18887 + }, + { + "epoch": 2.94, + "learning_rate": 2.9410965299030035e-07, + "logits/chosen": -1.8548755645751953, + "logits/rejected": -2.6319007873535156, + "logps/chosen": -208.0867156982422, + "logps/rejected": -403.8038635253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.769522666931152, + "rewards/margins": 13.55324649810791, + "rewards/rejected": -20.322769165039062, + "step": 18888 + }, + { + "epoch": 2.94, + "learning_rate": 2.933762124591525e-07, + "logits/chosen": -3.0216259956359863, + "logits/rejected": -2.7916204929351807, + "logps/chosen": -286.90582275390625, + "logps/rejected": -414.569091796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.427186965942383, + "rewards/margins": 8.696089744567871, + "rewards/rejected": -16.123275756835938, + "step": 18889 + }, + { + "epoch": 2.94, + "learning_rate": 2.926427719280046e-07, + "logits/chosen": -2.664973020553589, + "logits/rejected": -2.0115177631378174, + "logps/chosen": -317.51434326171875, + "logps/rejected": -469.7779846191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.579241752624512, + "rewards/margins": 17.779882431030273, + "rewards/rejected": -27.3591251373291, + "step": 18890 + }, + { + "epoch": 2.94, + "learning_rate": 2.919093313968567e-07, + "logits/chosen": -2.1919450759887695, + "logits/rejected": -2.5611631870269775, + "logps/chosen": -121.78353118896484, + "logps/rejected": -342.2395935058594, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.448462009429932, + "rewards/margins": 7.547227382659912, + "rewards/rejected": -14.995689392089844, + "step": 18891 + }, + { + "epoch": 2.94, + "learning_rate": 2.9117589086570876e-07, + "logits/chosen": -2.319100856781006, + "logits/rejected": -2.5768823623657227, + "logps/chosen": -191.21939086914062, + "logps/rejected": -373.5601501464844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.0673189163208, + "rewards/margins": 11.244425773620605, + "rewards/rejected": -19.311744689941406, + "step": 18892 + }, + { + "epoch": 2.94, + "learning_rate": 2.904424503345609e-07, + "logits/chosen": -2.761749744415283, + "logits/rejected": -2.620480537414551, + "logps/chosen": -358.9451904296875, + "logps/rejected": -437.7936096191406, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.098709106445312, + "rewards/margins": 9.87397575378418, + "rewards/rejected": -18.97268295288086, + "step": 18893 + }, + { + "epoch": 2.94, + "learning_rate": 2.8970900980341305e-07, + "logits/chosen": -1.9047082662582397, + "logits/rejected": -2.809415340423584, + "logps/chosen": -339.21087646484375, + "logps/rejected": -743.24365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.955018043518066, + "rewards/margins": 11.770363807678223, + "rewards/rejected": -21.72538185119629, + "step": 18894 + }, + { + "epoch": 2.94, + "learning_rate": 2.8897556927226514e-07, + "logits/chosen": -2.869924545288086, + "logits/rejected": -2.649019956588745, + "logps/chosen": -453.986083984375, + "logps/rejected": -535.03125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.021759033203125, + "rewards/margins": 11.309788703918457, + "rewards/rejected": -20.331546783447266, + "step": 18895 + }, + { + "epoch": 2.94, + "learning_rate": 2.882421287411173e-07, + "logits/chosen": -2.637362003326416, + "logits/rejected": -2.508328914642334, + "logps/chosen": -713.7636108398438, + "logps/rejected": -649.043212890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.332158088684082, + "rewards/margins": 9.008009910583496, + "rewards/rejected": -18.340167999267578, + "step": 18896 + }, + { + "epoch": 2.94, + "learning_rate": 2.8750868820996937e-07, + "logits/chosen": -2.5629806518554688, + "logits/rejected": -2.3228442668914795, + "logps/chosen": -353.37518310546875, + "logps/rejected": -367.7127990722656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.772534370422363, + "rewards/margins": 9.218358993530273, + "rewards/rejected": -16.990894317626953, + "step": 18897 + }, + { + "epoch": 2.94, + "learning_rate": 2.867752476788215e-07, + "logits/chosen": -1.8662822246551514, + "logits/rejected": -2.495819330215454, + "logps/chosen": -460.97308349609375, + "logps/rejected": -740.0979614257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.770378112792969, + "rewards/margins": 18.051748275756836, + "rewards/rejected": -29.822128295898438, + "step": 18898 + }, + { + "epoch": 2.94, + "learning_rate": 2.8604180714767366e-07, + "logits/chosen": -2.5678958892822266, + "logits/rejected": -2.4609463214874268, + "logps/chosen": -167.89190673828125, + "logps/rejected": -180.32357788085938, + "loss": 0.187, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.179323196411133, + "rewards/margins": 3.168745279312134, + "rewards/rejected": -13.348068237304688, + "step": 18899 + }, + { + "epoch": 2.94, + "learning_rate": 2.8530836661652575e-07, + "logits/chosen": -0.7197434902191162, + "logits/rejected": -1.5412638187408447, + "logps/chosen": -257.2131042480469, + "logps/rejected": -711.011474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.631156921386719, + "rewards/margins": 21.487140655517578, + "rewards/rejected": -31.118297576904297, + "step": 18900 + }, + { + "epoch": 2.94, + "learning_rate": 2.845749260853779e-07, + "logits/chosen": -1.5078234672546387, + "logits/rejected": -2.506535768508911, + "logps/chosen": -175.74636840820312, + "logps/rejected": -426.771240234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.272186279296875, + "rewards/margins": 9.018327713012695, + "rewards/rejected": -21.29051399230957, + "step": 18901 + }, + { + "epoch": 2.94, + "learning_rate": 2.8384148555423e-07, + "logits/chosen": -2.6133062839508057, + "logits/rejected": -2.9845242500305176, + "logps/chosen": -406.3408508300781, + "logps/rejected": -584.056640625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.860517501831055, + "rewards/margins": 8.768835067749023, + "rewards/rejected": -19.629352569580078, + "step": 18902 + }, + { + "epoch": 2.94, + "learning_rate": 2.831080450230821e-07, + "logits/chosen": -1.6944324970245361, + "logits/rejected": -2.7985024452209473, + "logps/chosen": -549.953125, + "logps/rejected": -724.3431396484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.885876655578613, + "rewards/margins": 9.654274940490723, + "rewards/rejected": -22.540151596069336, + "step": 18903 + }, + { + "epoch": 2.94, + "learning_rate": 2.823746044919342e-07, + "logits/chosen": -1.9253884553909302, + "logits/rejected": -2.0553395748138428, + "logps/chosen": -195.648681640625, + "logps/rejected": -261.13897705078125, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.869685173034668, + "rewards/margins": 4.055995464324951, + "rewards/rejected": -17.92568016052246, + "step": 18904 + }, + { + "epoch": 2.94, + "learning_rate": 2.8164116396078636e-07, + "logits/chosen": -2.813498020172119, + "logits/rejected": -1.8536376953125, + "logps/chosen": -289.7319641113281, + "logps/rejected": -547.0919799804688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8705010414123535, + "rewards/margins": 15.968050956726074, + "rewards/rejected": -20.838550567626953, + "step": 18905 + }, + { + "epoch": 2.94, + "learning_rate": 2.809077234296385e-07, + "logits/chosen": -2.6251754760742188, + "logits/rejected": -2.558685541152954, + "logps/chosen": -242.27304077148438, + "logps/rejected": -357.6222229003906, + "loss": 0.7443, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.726093292236328, + "rewards/margins": 3.659700393676758, + "rewards/rejected": -15.385793685913086, + "step": 18906 + }, + { + "epoch": 2.94, + "learning_rate": 2.801742828984906e-07, + "logits/chosen": -1.7354564666748047, + "logits/rejected": -2.8330020904541016, + "logps/chosen": -289.44586181640625, + "logps/rejected": -541.1658935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4064302444458, + "rewards/margins": 10.666814804077148, + "rewards/rejected": -19.073246002197266, + "step": 18907 + }, + { + "epoch": 2.94, + "learning_rate": 2.7944084236734273e-07, + "logits/chosen": -1.8062937259674072, + "logits/rejected": -2.455838680267334, + "logps/chosen": -187.33502197265625, + "logps/rejected": -397.55841064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.472708702087402, + "rewards/margins": 14.173173904418945, + "rewards/rejected": -22.64588165283203, + "step": 18908 + }, + { + "epoch": 2.94, + "learning_rate": 2.787074018361948e-07, + "logits/chosen": -2.614997625350952, + "logits/rejected": -2.2833504676818848, + "logps/chosen": -188.1979522705078, + "logps/rejected": -376.3818054199219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.973796844482422, + "rewards/margins": 9.973969459533691, + "rewards/rejected": -18.947765350341797, + "step": 18909 + }, + { + "epoch": 2.94, + "learning_rate": 2.7797396130504697e-07, + "logits/chosen": -2.429992198944092, + "logits/rejected": -2.84024977684021, + "logps/chosen": -122.82060241699219, + "logps/rejected": -307.31268310546875, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.637807846069336, + "rewards/margins": 6.647756576538086, + "rewards/rejected": -15.285564422607422, + "step": 18910 + }, + { + "epoch": 2.94, + "learning_rate": 2.7724052077389906e-07, + "logits/chosen": -2.037081718444824, + "logits/rejected": -2.5338659286499023, + "logps/chosen": -208.75985717773438, + "logps/rejected": -391.3213806152344, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.785636901855469, + "rewards/margins": 7.502349376678467, + "rewards/rejected": -17.287986755371094, + "step": 18911 + }, + { + "epoch": 2.94, + "learning_rate": 2.7650708024275115e-07, + "logits/chosen": -2.747347831726074, + "logits/rejected": -1.9205952882766724, + "logps/chosen": -302.8338317871094, + "logps/rejected": -262.76580810546875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.139101505279541, + "rewards/margins": 8.575607299804688, + "rewards/rejected": -12.714709281921387, + "step": 18912 + }, + { + "epoch": 2.94, + "learning_rate": 2.757736397116033e-07, + "logits/chosen": -2.3600456714630127, + "logits/rejected": -2.779754400253296, + "logps/chosen": -338.7249450683594, + "logps/rejected": -535.109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.981802940368652, + "rewards/margins": 10.977180480957031, + "rewards/rejected": -17.958984375, + "step": 18913 + }, + { + "epoch": 2.94, + "learning_rate": 2.750401991804554e-07, + "logits/chosen": -2.591935634613037, + "logits/rejected": -2.6432509422302246, + "logps/chosen": -243.92178344726562, + "logps/rejected": -427.9976806640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.42786979675293, + "rewards/margins": 13.088605880737305, + "rewards/rejected": -26.516475677490234, + "step": 18914 + }, + { + "epoch": 2.94, + "learning_rate": 2.743067586493075e-07, + "logits/chosen": -0.7547745704650879, + "logits/rejected": -2.742912769317627, + "logps/chosen": -214.50848388671875, + "logps/rejected": -566.2271728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.946273803710938, + "rewards/margins": 16.687145233154297, + "rewards/rejected": -29.633419036865234, + "step": 18915 + }, + { + "epoch": 2.94, + "learning_rate": 2.7357331811815967e-07, + "logits/chosen": -1.5005046129226685, + "logits/rejected": -2.9057648181915283, + "logps/chosen": -151.12513732910156, + "logps/rejected": -829.653564453125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.428321838378906, + "rewards/margins": 9.246522903442383, + "rewards/rejected": -20.67484474182129, + "step": 18916 + }, + { + "epoch": 2.94, + "learning_rate": 2.7283987758701176e-07, + "logits/chosen": -1.1975500583648682, + "logits/rejected": -2.179720163345337, + "logps/chosen": -305.28363037109375, + "logps/rejected": -598.303955078125, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.405104637145996, + "rewards/margins": 12.197084426879883, + "rewards/rejected": -22.602188110351562, + "step": 18917 + }, + { + "epoch": 2.94, + "learning_rate": 2.721064370558639e-07, + "logits/chosen": -0.8966082334518433, + "logits/rejected": -2.623445749282837, + "logps/chosen": -150.84817504882812, + "logps/rejected": -379.83294677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.49389362335205, + "rewards/margins": 11.110980033874512, + "rewards/rejected": -22.604873657226562, + "step": 18918 + }, + { + "epoch": 2.94, + "learning_rate": 2.71372996524716e-07, + "logits/chosen": -2.4249589443206787, + "logits/rejected": -2.823539972305298, + "logps/chosen": -128.5308837890625, + "logps/rejected": -331.0200500488281, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.715530395507812, + "rewards/margins": 11.158683776855469, + "rewards/rejected": -19.87421226501465, + "step": 18919 + }, + { + "epoch": 2.94, + "learning_rate": 2.7063955599356813e-07, + "logits/chosen": -1.4850893020629883, + "logits/rejected": -2.566009044647217, + "logps/chosen": -219.94989013671875, + "logps/rejected": -535.15869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.129245281219482, + "rewards/margins": 15.212331771850586, + "rewards/rejected": -22.341577529907227, + "step": 18920 + }, + { + "epoch": 2.94, + "learning_rate": 2.699061154624203e-07, + "logits/chosen": -2.5659260749816895, + "logits/rejected": -2.7772440910339355, + "logps/chosen": -177.3022918701172, + "logps/rejected": -385.614501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.571885108947754, + "rewards/margins": 11.829320907592773, + "rewards/rejected": -19.401206970214844, + "step": 18921 + }, + { + "epoch": 2.94, + "learning_rate": 2.6917267493127237e-07, + "logits/chosen": -1.8909060955047607, + "logits/rejected": -2.8702800273895264, + "logps/chosen": -152.38949584960938, + "logps/rejected": -389.26922607421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.255218505859375, + "rewards/margins": 8.795818328857422, + "rewards/rejected": -19.051036834716797, + "step": 18922 + }, + { + "epoch": 2.94, + "learning_rate": 2.684392344001245e-07, + "logits/chosen": -2.4362945556640625, + "logits/rejected": -2.6241118907928467, + "logps/chosen": -149.85598754882812, + "logps/rejected": -436.9349365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.401588439941406, + "rewards/margins": 9.210321426391602, + "rewards/rejected": -19.611907958984375, + "step": 18923 + }, + { + "epoch": 2.94, + "learning_rate": 2.677057938689766e-07, + "logits/chosen": -2.454277515411377, + "logits/rejected": -1.9483052492141724, + "logps/chosen": -602.0533447265625, + "logps/rejected": -790.2939453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.225715637207031, + "rewards/margins": 9.660941123962402, + "rewards/rejected": -22.88665771484375, + "step": 18924 + }, + { + "epoch": 2.94, + "learning_rate": 2.6697235333782874e-07, + "logits/chosen": -2.482781171798706, + "logits/rejected": -2.7440831661224365, + "logps/chosen": -165.27456665039062, + "logps/rejected": -217.4517364501953, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.548135280609131, + "rewards/margins": 7.166369438171387, + "rewards/rejected": -14.71450424194336, + "step": 18925 + }, + { + "epoch": 2.94, + "learning_rate": 2.662389128066809e-07, + "logits/chosen": -2.766292095184326, + "logits/rejected": -2.81927490234375, + "logps/chosen": -138.4593505859375, + "logps/rejected": -213.0735321044922, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.09982681274414, + "rewards/margins": 5.210046291351318, + "rewards/rejected": -14.309873580932617, + "step": 18926 + }, + { + "epoch": 2.94, + "learning_rate": 2.65505472275533e-07, + "logits/chosen": -2.8931119441986084, + "logits/rejected": -1.9619718790054321, + "logps/chosen": -304.66339111328125, + "logps/rejected": -338.1647033691406, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.706850528717041, + "rewards/margins": 11.255369186401367, + "rewards/rejected": -17.96221923828125, + "step": 18927 + }, + { + "epoch": 2.94, + "learning_rate": 2.647720317443851e-07, + "logits/chosen": -2.1827945709228516, + "logits/rejected": -2.681016445159912, + "logps/chosen": -455.0303955078125, + "logps/rejected": -672.85595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.18271541595459, + "rewards/margins": 11.126373291015625, + "rewards/rejected": -24.30908966064453, + "step": 18928 + }, + { + "epoch": 2.94, + "learning_rate": 2.640385912132372e-07, + "logits/chosen": -2.786288022994995, + "logits/rejected": -2.623307228088379, + "logps/chosen": -615.5504760742188, + "logps/rejected": -648.680419921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.68285846710205, + "rewards/margins": 11.019950866699219, + "rewards/rejected": -21.702808380126953, + "step": 18929 + }, + { + "epoch": 2.94, + "learning_rate": 2.6330515068208935e-07, + "logits/chosen": -2.4284510612487793, + "logits/rejected": -2.8634748458862305, + "logps/chosen": -151.57867431640625, + "logps/rejected": -296.8038024902344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.359668731689453, + "rewards/margins": 9.786471366882324, + "rewards/rejected": -16.146141052246094, + "step": 18930 + }, + { + "epoch": 2.94, + "learning_rate": 2.6257171015094144e-07, + "logits/chosen": -2.6238656044006348, + "logits/rejected": -3.047858715057373, + "logps/chosen": -289.623291015625, + "logps/rejected": -470.6213073730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.787468910217285, + "rewards/margins": 11.480917930603027, + "rewards/rejected": -22.268386840820312, + "step": 18931 + }, + { + "epoch": 2.94, + "learning_rate": 2.6183826961979353e-07, + "logits/chosen": -2.993067502975464, + "logits/rejected": -2.962834358215332, + "logps/chosen": -415.98138427734375, + "logps/rejected": -620.1142578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.246392250061035, + "rewards/margins": 10.141871452331543, + "rewards/rejected": -22.388263702392578, + "step": 18932 + }, + { + "epoch": 2.94, + "learning_rate": 2.6110482908864567e-07, + "logits/chosen": -1.6644327640533447, + "logits/rejected": -2.5168278217315674, + "logps/chosen": -217.0455780029297, + "logps/rejected": -497.91845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.945396423339844, + "rewards/margins": 9.271712303161621, + "rewards/rejected": -21.21710968017578, + "step": 18933 + }, + { + "epoch": 2.94, + "learning_rate": 2.6037138855749776e-07, + "logits/chosen": -2.4352338314056396, + "logits/rejected": -1.71213960647583, + "logps/chosen": -240.4530487060547, + "logps/rejected": -392.533447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.663703918457031, + "rewards/margins": 13.12918472290039, + "rewards/rejected": -20.792888641357422, + "step": 18934 + }, + { + "epoch": 2.94, + "learning_rate": 2.596379480263499e-07, + "logits/chosen": -2.7070930004119873, + "logits/rejected": -2.577383041381836, + "logps/chosen": -263.44317626953125, + "logps/rejected": -393.53466796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.165319442749023, + "rewards/margins": 8.228799819946289, + "rewards/rejected": -22.394119262695312, + "step": 18935 + }, + { + "epoch": 2.94, + "learning_rate": 2.58904507495202e-07, + "logits/chosen": -2.32452130317688, + "logits/rejected": -2.749032735824585, + "logps/chosen": -117.57647705078125, + "logps/rejected": -329.6968688964844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.22850513458252, + "rewards/margins": 10.56859302520752, + "rewards/rejected": -19.79709815979004, + "step": 18936 + }, + { + "epoch": 2.95, + "learning_rate": 2.5817106696405414e-07, + "logits/chosen": -1.5785315036773682, + "logits/rejected": -2.8621578216552734, + "logps/chosen": -261.6235656738281, + "logps/rejected": -604.0303344726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.995218276977539, + "rewards/margins": 12.79195785522461, + "rewards/rejected": -22.78717613220215, + "step": 18937 + }, + { + "epoch": 2.95, + "learning_rate": 2.574376264329063e-07, + "logits/chosen": -0.9587469100952148, + "logits/rejected": -2.612370491027832, + "logps/chosen": -272.6190490722656, + "logps/rejected": -536.6455078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.249403953552246, + "rewards/margins": 7.934016227722168, + "rewards/rejected": -18.183420181274414, + "step": 18938 + }, + { + "epoch": 2.95, + "learning_rate": 2.5670418590175837e-07, + "logits/chosen": -1.7959680557250977, + "logits/rejected": -2.116314649581909, + "logps/chosen": -544.203369140625, + "logps/rejected": -512.4609375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.353326797485352, + "rewards/margins": 8.046185493469238, + "rewards/rejected": -21.399513244628906, + "step": 18939 + }, + { + "epoch": 2.95, + "learning_rate": 2.559707453706105e-07, + "logits/chosen": -2.8103389739990234, + "logits/rejected": -2.0577919483184814, + "logps/chosen": -325.3892517089844, + "logps/rejected": -292.408203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.388477325439453, + "rewards/margins": 9.071649551391602, + "rewards/rejected": -18.460126876831055, + "step": 18940 + }, + { + "epoch": 2.95, + "learning_rate": 2.552373048394626e-07, + "logits/chosen": -2.5797860622406006, + "logits/rejected": -2.641387462615967, + "logps/chosen": -189.2558135986328, + "logps/rejected": -380.5741271972656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.845026969909668, + "rewards/margins": 9.320077896118164, + "rewards/rejected": -17.16510581970215, + "step": 18941 + }, + { + "epoch": 2.95, + "learning_rate": 2.5450386430831475e-07, + "logits/chosen": -0.6245595812797546, + "logits/rejected": -2.441849708557129, + "logps/chosen": -145.6758270263672, + "logps/rejected": -590.218994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.834623336791992, + "rewards/margins": 12.249523162841797, + "rewards/rejected": -24.084148406982422, + "step": 18942 + }, + { + "epoch": 2.95, + "learning_rate": 2.537704237771669e-07, + "logits/chosen": -2.8933753967285156, + "logits/rejected": -2.69142484664917, + "logps/chosen": -146.55087280273438, + "logps/rejected": -254.5218048095703, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.279375076293945, + "rewards/margins": 8.33355712890625, + "rewards/rejected": -17.612932205200195, + "step": 18943 + }, + { + "epoch": 2.95, + "learning_rate": 2.53036983246019e-07, + "logits/chosen": -1.086208462715149, + "logits/rejected": -2.4644343852996826, + "logps/chosen": -197.97703552246094, + "logps/rejected": -496.99237060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.45292854309082, + "rewards/margins": 12.461750030517578, + "rewards/rejected": -22.9146785736084, + "step": 18944 + }, + { + "epoch": 2.95, + "learning_rate": 2.523035427148711e-07, + "logits/chosen": -2.743743896484375, + "logits/rejected": -2.864435911178589, + "logps/chosen": -138.0961151123047, + "logps/rejected": -323.6246337890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.305960655212402, + "rewards/margins": 10.066184043884277, + "rewards/rejected": -20.37214469909668, + "step": 18945 + }, + { + "epoch": 2.95, + "learning_rate": 2.515701021837232e-07, + "logits/chosen": -2.6320247650146484, + "logits/rejected": -2.50592303276062, + "logps/chosen": -390.7819519042969, + "logps/rejected": -497.72589111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.36898422241211, + "rewards/margins": 12.15351676940918, + "rewards/rejected": -20.52250099182129, + "step": 18946 + }, + { + "epoch": 2.95, + "learning_rate": 2.5083666165257536e-07, + "logits/chosen": -1.8731156587600708, + "logits/rejected": -2.5620410442352295, + "logps/chosen": -181.30169677734375, + "logps/rejected": -469.33306884765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.281316757202148, + "rewards/margins": 10.52956771850586, + "rewards/rejected": -21.810884475708008, + "step": 18947 + }, + { + "epoch": 2.95, + "learning_rate": 2.501032211214275e-07, + "logits/chosen": -2.021153688430786, + "logits/rejected": -2.315702199935913, + "logps/chosen": -201.3275604248047, + "logps/rejected": -458.63507080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.356369018554688, + "rewards/margins": 15.607450485229492, + "rewards/rejected": -26.96381950378418, + "step": 18948 + }, + { + "epoch": 2.95, + "learning_rate": 2.493697805902796e-07, + "logits/chosen": -1.7078932523727417, + "logits/rejected": -2.694974422454834, + "logps/chosen": -168.76425170898438, + "logps/rejected": -400.8076171875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.795393943786621, + "rewards/margins": 10.265226364135742, + "rewards/rejected": -18.06062126159668, + "step": 18949 + }, + { + "epoch": 2.95, + "learning_rate": 2.4863634005913173e-07, + "logits/chosen": -1.5398021936416626, + "logits/rejected": -1.7191901206970215, + "logps/chosen": -397.4400939941406, + "logps/rejected": -373.64068603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.738334655761719, + "rewards/margins": 8.42473030090332, + "rewards/rejected": -18.16306495666504, + "step": 18950 + }, + { + "epoch": 2.95, + "learning_rate": 2.479028995279838e-07, + "logits/chosen": -2.477830410003662, + "logits/rejected": -2.553537130355835, + "logps/chosen": -270.4991455078125, + "logps/rejected": -388.3831787109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.879369735717773, + "rewards/margins": 8.253005981445312, + "rewards/rejected": -20.132375717163086, + "step": 18951 + }, + { + "epoch": 2.95, + "learning_rate": 2.471694589968359e-07, + "logits/chosen": -2.762702226638794, + "logits/rejected": -2.535062551498413, + "logps/chosen": -873.619873046875, + "logps/rejected": -748.1348876953125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.067346572875977, + "rewards/margins": 7.491455554962158, + "rewards/rejected": -20.558801651000977, + "step": 18952 + }, + { + "epoch": 2.95, + "learning_rate": 2.4643601846568806e-07, + "logits/chosen": -1.6447371244430542, + "logits/rejected": -2.574054718017578, + "logps/chosen": -175.0282440185547, + "logps/rejected": -413.38311767578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.304647445678711, + "rewards/margins": 9.491341590881348, + "rewards/rejected": -19.795989990234375, + "step": 18953 + }, + { + "epoch": 2.95, + "learning_rate": 2.4570257793454015e-07, + "logits/chosen": -1.4490060806274414, + "logits/rejected": -1.919581413269043, + "logps/chosen": -223.223388671875, + "logps/rejected": -634.1023559570312, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.922155380249023, + "rewards/margins": 8.13720703125, + "rewards/rejected": -19.059362411499023, + "step": 18954 + }, + { + "epoch": 2.95, + "learning_rate": 2.449691374033923e-07, + "logits/chosen": -1.6086971759796143, + "logits/rejected": -2.441222906112671, + "logps/chosen": -236.84031677246094, + "logps/rejected": -428.6446533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.672178745269775, + "rewards/margins": 12.14276123046875, + "rewards/rejected": -18.814939498901367, + "step": 18955 + }, + { + "epoch": 2.95, + "learning_rate": 2.442356968722444e-07, + "logits/chosen": -2.223022222518921, + "logits/rejected": -2.7236626148223877, + "logps/chosen": -392.79913330078125, + "logps/rejected": -457.85870361328125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.288365364074707, + "rewards/margins": 10.462743759155273, + "rewards/rejected": -20.751110076904297, + "step": 18956 + }, + { + "epoch": 2.95, + "learning_rate": 2.435022563410965e-07, + "logits/chosen": -1.9272221326828003, + "logits/rejected": -2.8796274662017822, + "logps/chosen": -249.00326538085938, + "logps/rejected": -392.44866943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392145156860352, + "rewards/margins": 11.36213493347168, + "rewards/rejected": -16.75428009033203, + "step": 18957 + }, + { + "epoch": 2.95, + "learning_rate": 2.4276881580994867e-07, + "logits/chosen": -1.6583170890808105, + "logits/rejected": -2.8330135345458984, + "logps/chosen": -217.1848907470703, + "logps/rejected": -587.8308715820312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.527523040771484, + "rewards/margins": 9.58558177947998, + "rewards/rejected": -18.11310577392578, + "step": 18958 + }, + { + "epoch": 2.95, + "learning_rate": 2.4203537527880076e-07, + "logits/chosen": -1.7333412170410156, + "logits/rejected": -2.8601951599121094, + "logps/chosen": -125.59013366699219, + "logps/rejected": -385.6389465332031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2305803298950195, + "rewards/margins": 9.015392303466797, + "rewards/rejected": -16.245973587036133, + "step": 18959 + }, + { + "epoch": 2.95, + "learning_rate": 2.413019347476529e-07, + "logits/chosen": -2.3594167232513428, + "logits/rejected": -2.7918612957000732, + "logps/chosen": -524.2376708984375, + "logps/rejected": -499.4009704589844, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.093162536621094, + "rewards/margins": 4.537961959838867, + "rewards/rejected": -16.63112449645996, + "step": 18960 + }, + { + "epoch": 2.95, + "learning_rate": 2.40568494216505e-07, + "logits/chosen": -2.812572479248047, + "logits/rejected": -2.862074136734009, + "logps/chosen": -106.34452819824219, + "logps/rejected": -323.3211364746094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.83328104019165, + "rewards/margins": 9.46411418914795, + "rewards/rejected": -16.297395706176758, + "step": 18961 + }, + { + "epoch": 2.95, + "learning_rate": 2.3983505368535713e-07, + "logits/chosen": -1.5614904165267944, + "logits/rejected": -2.2245612144470215, + "logps/chosen": -146.93911743164062, + "logps/rejected": -319.7657470703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.588235855102539, + "rewards/margins": 8.821807861328125, + "rewards/rejected": -20.410045623779297, + "step": 18962 + }, + { + "epoch": 2.95, + "learning_rate": 2.391016131542093e-07, + "logits/chosen": -2.862318515777588, + "logits/rejected": -2.7293896675109863, + "logps/chosen": -389.32720947265625, + "logps/rejected": -414.0838623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.210082054138184, + "rewards/margins": 11.101606369018555, + "rewards/rejected": -20.311687469482422, + "step": 18963 + }, + { + "epoch": 2.95, + "learning_rate": 2.3836817262306136e-07, + "logits/chosen": -2.342590808868408, + "logits/rejected": -2.768507719039917, + "logps/chosen": -100.07783508300781, + "logps/rejected": -410.8804931640625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0970354080200195, + "rewards/margins": 13.233380317687988, + "rewards/rejected": -19.330415725708008, + "step": 18964 + }, + { + "epoch": 2.95, + "learning_rate": 2.376347320919135e-07, + "logits/chosen": -2.0358028411865234, + "logits/rejected": -2.6670095920562744, + "logps/chosen": -458.6156311035156, + "logps/rejected": -594.599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.170196533203125, + "rewards/margins": 12.562893867492676, + "rewards/rejected": -20.733089447021484, + "step": 18965 + }, + { + "epoch": 2.95, + "learning_rate": 2.369012915607656e-07, + "logits/chosen": -1.3731178045272827, + "logits/rejected": -2.593048095703125, + "logps/chosen": -473.0919494628906, + "logps/rejected": -897.8858642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.19057846069336, + "rewards/margins": 9.125526428222656, + "rewards/rejected": -23.316104888916016, + "step": 18966 + }, + { + "epoch": 2.95, + "learning_rate": 2.3616785102961771e-07, + "logits/chosen": -1.9726216793060303, + "logits/rejected": -2.747406005859375, + "logps/chosen": -194.36508178710938, + "logps/rejected": -361.937255859375, + "loss": 1.6988, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.963346481323242, + "rewards/margins": 6.545888423919678, + "rewards/rejected": -17.509235382080078, + "step": 18967 + }, + { + "epoch": 2.95, + "learning_rate": 2.354344104984698e-07, + "logits/chosen": -1.8229999542236328, + "logits/rejected": -2.610931158065796, + "logps/chosen": -186.34393310546875, + "logps/rejected": -317.435791015625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.331256866455078, + "rewards/margins": 7.048697471618652, + "rewards/rejected": -17.379955291748047, + "step": 18968 + }, + { + "epoch": 2.95, + "learning_rate": 2.3470096996732195e-07, + "logits/chosen": -2.744636297225952, + "logits/rejected": -2.082840919494629, + "logps/chosen": -598.1347045898438, + "logps/rejected": -609.6965942382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.133578777313232, + "rewards/margins": 16.88184928894043, + "rewards/rejected": -24.015426635742188, + "step": 18969 + }, + { + "epoch": 2.95, + "learning_rate": 2.339675294361741e-07, + "logits/chosen": -2.9743778705596924, + "logits/rejected": -2.859367847442627, + "logps/chosen": -129.93551635742188, + "logps/rejected": -241.7827911376953, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.436813354492188, + "rewards/margins": 8.132308959960938, + "rewards/rejected": -17.569122314453125, + "step": 18970 + }, + { + "epoch": 2.95, + "learning_rate": 2.3323408890502618e-07, + "logits/chosen": -2.5607879161834717, + "logits/rejected": -2.4223406314849854, + "logps/chosen": -418.4527587890625, + "logps/rejected": -547.9884643554688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.109785079956055, + "rewards/margins": 9.644552230834961, + "rewards/rejected": -21.754337310791016, + "step": 18971 + }, + { + "epoch": 2.95, + "learning_rate": 2.3250064837387832e-07, + "logits/chosen": -2.897587299346924, + "logits/rejected": -2.928276538848877, + "logps/chosen": -172.70016479492188, + "logps/rejected": -229.4310302734375, + "loss": 0.2698, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.118846893310547, + "rewards/margins": 4.1976237297058105, + "rewards/rejected": -15.316471099853516, + "step": 18972 + }, + { + "epoch": 2.95, + "learning_rate": 2.3176720784273041e-07, + "logits/chosen": -2.0094118118286133, + "logits/rejected": -2.602490186691284, + "logps/chosen": -314.2969665527344, + "logps/rejected": -513.730224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971396446228027, + "rewards/margins": 15.922147750854492, + "rewards/rejected": -21.893545150756836, + "step": 18973 + }, + { + "epoch": 2.95, + "learning_rate": 2.3103376731158256e-07, + "logits/chosen": -2.8413712978363037, + "logits/rejected": -2.8039042949676514, + "logps/chosen": -278.1662902832031, + "logps/rejected": -329.56182861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.273704528808594, + "rewards/margins": 9.963385581970215, + "rewards/rejected": -18.237091064453125, + "step": 18974 + }, + { + "epoch": 2.95, + "learning_rate": 2.303003267804347e-07, + "logits/chosen": -2.096278190612793, + "logits/rejected": -2.4747586250305176, + "logps/chosen": -185.86904907226562, + "logps/rejected": -416.8028869628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.603754043579102, + "rewards/margins": 15.688161849975586, + "rewards/rejected": -24.291915893554688, + "step": 18975 + }, + { + "epoch": 2.95, + "learning_rate": 2.295668862492868e-07, + "logits/chosen": -1.8263407945632935, + "logits/rejected": -2.5538570880889893, + "logps/chosen": -206.87522888183594, + "logps/rejected": -480.70355224609375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.216086387634277, + "rewards/margins": 10.577237129211426, + "rewards/rejected": -21.793323516845703, + "step": 18976 + }, + { + "epoch": 2.95, + "learning_rate": 2.288334457181389e-07, + "logits/chosen": -2.6325995922088623, + "logits/rejected": -2.946275234222412, + "logps/chosen": -539.888671875, + "logps/rejected": -429.7088317871094, + "loss": 0.0462, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5754075050354, + "rewards/margins": 7.218016624450684, + "rewards/rejected": -14.793424606323242, + "step": 18977 + }, + { + "epoch": 2.95, + "learning_rate": 2.28100005186991e-07, + "logits/chosen": -2.02531361579895, + "logits/rejected": -2.7763609886169434, + "logps/chosen": -319.16668701171875, + "logps/rejected": -518.60546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.21567440032959, + "rewards/margins": 12.728426933288574, + "rewards/rejected": -22.944101333618164, + "step": 18978 + }, + { + "epoch": 2.95, + "learning_rate": 2.2736656465584314e-07, + "logits/chosen": -3.0197064876556396, + "logits/rejected": -2.9474282264709473, + "logps/chosen": -100.68232727050781, + "logps/rejected": -359.36248779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.65812873840332, + "rewards/margins": 13.781352996826172, + "rewards/rejected": -20.439481735229492, + "step": 18979 + }, + { + "epoch": 2.95, + "learning_rate": 2.2663312412469528e-07, + "logits/chosen": -2.8604490756988525, + "logits/rejected": -2.7347824573516846, + "logps/chosen": -648.8466796875, + "logps/rejected": -588.6369018554688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.187549591064453, + "rewards/margins": 13.218610763549805, + "rewards/rejected": -20.406160354614258, + "step": 18980 + }, + { + "epoch": 2.95, + "learning_rate": 2.2589968359354737e-07, + "logits/chosen": -1.7718403339385986, + "logits/rejected": -2.3605751991271973, + "logps/chosen": -254.02413940429688, + "logps/rejected": -375.86737060546875, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.876455307006836, + "rewards/margins": 6.641017436981201, + "rewards/rejected": -20.517471313476562, + "step": 18981 + }, + { + "epoch": 2.95, + "learning_rate": 2.2516624306239952e-07, + "logits/chosen": -2.121216058731079, + "logits/rejected": -2.8459854125976562, + "logps/chosen": -180.35174560546875, + "logps/rejected": -444.4955749511719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.009026527404785, + "rewards/margins": 10.085512161254883, + "rewards/rejected": -19.094539642333984, + "step": 18982 + }, + { + "epoch": 2.95, + "learning_rate": 2.244328025312516e-07, + "logits/chosen": -2.579111099243164, + "logits/rejected": -1.5956212282180786, + "logps/chosen": -369.364013671875, + "logps/rejected": -333.9721984863281, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.196301460266113, + "rewards/margins": 8.486268997192383, + "rewards/rejected": -21.68256950378418, + "step": 18983 + }, + { + "epoch": 2.95, + "learning_rate": 2.2369936200010375e-07, + "logits/chosen": -2.2946128845214844, + "logits/rejected": -2.879845380783081, + "logps/chosen": -147.24700927734375, + "logps/rejected": -461.3204650878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.037595748901367, + "rewards/margins": 12.681018829345703, + "rewards/rejected": -22.71861457824707, + "step": 18984 + }, + { + "epoch": 2.95, + "learning_rate": 2.229659214689559e-07, + "logits/chosen": -2.608808755874634, + "logits/rejected": -2.2046892642974854, + "logps/chosen": -487.3646240234375, + "logps/rejected": -552.832275390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.758577346801758, + "rewards/margins": 8.509378433227539, + "rewards/rejected": -21.267955780029297, + "step": 18985 + }, + { + "epoch": 2.95, + "learning_rate": 2.2223248093780798e-07, + "logits/chosen": -2.8544259071350098, + "logits/rejected": -2.6329092979431152, + "logps/chosen": -240.67791748046875, + "logps/rejected": -232.5821075439453, + "loss": 0.9383, + "rewards/accuracies": 0.5, + "rewards/chosen": -11.826787948608398, + "rewards/margins": 5.2275800704956055, + "rewards/rejected": -17.054367065429688, + "step": 18986 + }, + { + "epoch": 2.95, + "learning_rate": 2.2149904040666012e-07, + "logits/chosen": -2.645371913909912, + "logits/rejected": -2.9323647022247314, + "logps/chosen": -244.84115600585938, + "logps/rejected": -482.6045837402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.903890609741211, + "rewards/margins": 14.940808296203613, + "rewards/rejected": -24.84469985961914, + "step": 18987 + }, + { + "epoch": 2.95, + "learning_rate": 2.2076559987551221e-07, + "logits/chosen": -2.3006203174591064, + "logits/rejected": -2.1854124069213867, + "logps/chosen": -290.83349609375, + "logps/rejected": -430.19757080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.207889556884766, + "rewards/margins": 13.709779739379883, + "rewards/rejected": -23.91766929626465, + "step": 18988 + }, + { + "epoch": 2.95, + "learning_rate": 2.2003215934436433e-07, + "logits/chosen": -2.8807504177093506, + "logits/rejected": -2.5184149742126465, + "logps/chosen": -375.9634704589844, + "logps/rejected": -640.2242431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.396651268005371, + "rewards/margins": 9.256248474121094, + "rewards/rejected": -17.65290069580078, + "step": 18989 + }, + { + "epoch": 2.95, + "learning_rate": 2.1929871881321645e-07, + "logits/chosen": -2.841034173965454, + "logits/rejected": -2.911423921585083, + "logps/chosen": -174.56529235839844, + "logps/rejected": -206.35308837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.81309700012207, + "rewards/margins": 9.045510292053223, + "rewards/rejected": -15.858606338500977, + "step": 18990 + }, + { + "epoch": 2.95, + "learning_rate": 2.1856527828206856e-07, + "logits/chosen": -1.6944690942764282, + "logits/rejected": -2.395203113555908, + "logps/chosen": -144.66549682617188, + "logps/rejected": -301.300537109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.607783317565918, + "rewards/margins": 10.209453582763672, + "rewards/rejected": -18.817237854003906, + "step": 18991 + }, + { + "epoch": 2.95, + "learning_rate": 2.1783183775092068e-07, + "logits/chosen": -2.1649727821350098, + "logits/rejected": -2.8518388271331787, + "logps/chosen": -182.74432373046875, + "logps/rejected": -575.1500244140625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.154412269592285, + "rewards/margins": 9.389558792114258, + "rewards/rejected": -19.54397201538086, + "step": 18992 + }, + { + "epoch": 2.95, + "learning_rate": 2.1709839721977282e-07, + "logits/chosen": -2.0030109882354736, + "logits/rejected": -2.6668715476989746, + "logps/chosen": -246.71969604492188, + "logps/rejected": -412.05975341796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169939041137695, + "rewards/margins": 9.002664566040039, + "rewards/rejected": -15.172603607177734, + "step": 18993 + }, + { + "epoch": 2.95, + "learning_rate": 2.1636495668862494e-07, + "logits/chosen": -2.8772528171539307, + "logits/rejected": -2.7102348804473877, + "logps/chosen": -266.5904846191406, + "logps/rejected": -191.02838134765625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.260261535644531, + "rewards/margins": 8.012724876403809, + "rewards/rejected": -15.272985458374023, + "step": 18994 + }, + { + "epoch": 2.95, + "learning_rate": 2.1563151615747706e-07, + "logits/chosen": -1.9969521760940552, + "logits/rejected": -2.572751045227051, + "logps/chosen": -154.0223846435547, + "logps/rejected": -317.52728271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.169493675231934, + "rewards/margins": 11.213150024414062, + "rewards/rejected": -19.382644653320312, + "step": 18995 + }, + { + "epoch": 2.95, + "learning_rate": 2.1489807562632917e-07, + "logits/chosen": -2.4575235843658447, + "logits/rejected": -2.5549697875976562, + "logps/chosen": -467.3065185546875, + "logps/rejected": -588.6790771484375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.586725234985352, + "rewards/margins": 9.006429672241211, + "rewards/rejected": -21.593154907226562, + "step": 18996 + }, + { + "epoch": 2.95, + "learning_rate": 2.141646350951813e-07, + "logits/chosen": -2.5554895401000977, + "logits/rejected": -1.959607481956482, + "logps/chosen": -378.26336669921875, + "logps/rejected": -590.1649169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.065997123718262, + "rewards/margins": 10.78177547454834, + "rewards/rejected": -20.8477725982666, + "step": 18997 + }, + { + "epoch": 2.95, + "learning_rate": 2.134311945640334e-07, + "logits/chosen": -2.7638063430786133, + "logits/rejected": -2.6260950565338135, + "logps/chosen": -256.3166809082031, + "logps/rejected": -431.4837951660156, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.968633651733398, + "rewards/margins": 6.851409912109375, + "rewards/rejected": -19.820043563842773, + "step": 18998 + }, + { + "epoch": 2.95, + "learning_rate": 2.1269775403288552e-07, + "logits/chosen": -2.62919545173645, + "logits/rejected": -2.073770523071289, + "logps/chosen": -338.9208068847656, + "logps/rejected": -300.5970458984375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.465619087219238, + "rewards/margins": 9.211909294128418, + "rewards/rejected": -16.677528381347656, + "step": 18999 + }, + { + "epoch": 2.95, + "learning_rate": 2.1196431350173764e-07, + "logits/chosen": -1.4398850202560425, + "logits/rejected": -2.2030880451202393, + "logps/chosen": -259.42169189453125, + "logps/rejected": -521.7786254882812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.318327903747559, + "rewards/margins": 13.929193496704102, + "rewards/rejected": -25.247522354125977, + "step": 19000 + }, + { + "epoch": 2.96, + "learning_rate": 2.1123087297058976e-07, + "logits/chosen": -2.2779221534729004, + "logits/rejected": -2.717113494873047, + "logps/chosen": -323.0811767578125, + "logps/rejected": -672.2117919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.907038688659668, + "rewards/margins": 16.720956802368164, + "rewards/rejected": -26.627994537353516, + "step": 19001 + }, + { + "epoch": 2.96, + "learning_rate": 2.1049743243944187e-07, + "logits/chosen": -2.554046154022217, + "logits/rejected": -0.948754072189331, + "logps/chosen": -327.9735107421875, + "logps/rejected": -374.90423583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.08066177368164, + "rewards/margins": 12.807596206665039, + "rewards/rejected": -22.88825798034668, + "step": 19002 + }, + { + "epoch": 2.96, + "learning_rate": 2.09763991908294e-07, + "logits/chosen": -2.6934192180633545, + "logits/rejected": -1.988906979560852, + "logps/chosen": -492.5622253417969, + "logps/rejected": -518.08544921875, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.787033081054688, + "rewards/margins": 7.972047328948975, + "rewards/rejected": -19.75908088684082, + "step": 19003 + }, + { + "epoch": 2.96, + "learning_rate": 2.0903055137714613e-07, + "logits/chosen": -2.722536563873291, + "logits/rejected": -2.2827227115631104, + "logps/chosen": -952.0801391601562, + "logps/rejected": -681.3919677734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.241156578063965, + "rewards/margins": 7.831286430358887, + "rewards/rejected": -21.07244300842285, + "step": 19004 + }, + { + "epoch": 2.96, + "learning_rate": 2.0829711084599825e-07, + "logits/chosen": -2.71077299118042, + "logits/rejected": -2.0496623516082764, + "logps/chosen": -170.06422424316406, + "logps/rejected": -405.0473937988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.582266807556152, + "rewards/margins": 13.117020606994629, + "rewards/rejected": -19.69928741455078, + "step": 19005 + }, + { + "epoch": 2.96, + "learning_rate": 2.0756367031485036e-07, + "logits/chosen": -1.809296727180481, + "logits/rejected": -2.7437903881073, + "logps/chosen": -498.8583679199219, + "logps/rejected": -961.452880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.958806991577148, + "rewards/margins": 16.72886085510254, + "rewards/rejected": -30.687667846679688, + "step": 19006 + }, + { + "epoch": 2.96, + "learning_rate": 2.0683022978370248e-07, + "logits/chosen": -2.3641834259033203, + "logits/rejected": -2.581045389175415, + "logps/chosen": -155.92718505859375, + "logps/rejected": -249.33456420898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.953559875488281, + "rewards/margins": 10.730993270874023, + "rewards/rejected": -19.684553146362305, + "step": 19007 + }, + { + "epoch": 2.96, + "learning_rate": 2.060967892525546e-07, + "logits/chosen": -1.6895803213119507, + "logits/rejected": -2.280944347381592, + "logps/chosen": -253.6566162109375, + "logps/rejected": -399.46624755859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.042481422424316, + "rewards/margins": 8.354355812072754, + "rewards/rejected": -17.39683723449707, + "step": 19008 + }, + { + "epoch": 2.96, + "learning_rate": 2.0536334872140671e-07, + "logits/chosen": -2.081712484359741, + "logits/rejected": -2.4976770877838135, + "logps/chosen": -205.96241760253906, + "logps/rejected": -410.5141906738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.997226715087891, + "rewards/margins": 11.036280632019043, + "rewards/rejected": -19.03350830078125, + "step": 19009 + }, + { + "epoch": 2.96, + "learning_rate": 2.0462990819025883e-07, + "logits/chosen": -2.0760653018951416, + "logits/rejected": -2.129262924194336, + "logps/chosen": -214.13455200195312, + "logps/rejected": -423.4967041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.883528709411621, + "rewards/margins": 9.716495513916016, + "rewards/rejected": -19.600025177001953, + "step": 19010 + }, + { + "epoch": 2.96, + "learning_rate": 2.0389646765911095e-07, + "logits/chosen": -2.3037898540496826, + "logits/rejected": -2.6370973587036133, + "logps/chosen": -396.38336181640625, + "logps/rejected": -488.3988037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.600438594818115, + "rewards/margins": 10.876165390014648, + "rewards/rejected": -16.476604461669922, + "step": 19011 + }, + { + "epoch": 2.96, + "learning_rate": 2.0316302712796306e-07, + "logits/chosen": -2.8359992504119873, + "logits/rejected": -2.7517266273498535, + "logps/chosen": -200.8878173828125, + "logps/rejected": -456.782958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.696496963500977, + "rewards/margins": 14.384153366088867, + "rewards/rejected": -25.080650329589844, + "step": 19012 + }, + { + "epoch": 2.96, + "learning_rate": 2.0242958659681518e-07, + "logits/chosen": -2.7306039333343506, + "logits/rejected": -2.8680238723754883, + "logps/chosen": -147.01734924316406, + "logps/rejected": -202.65399169921875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.975921630859375, + "rewards/margins": 8.09192943572998, + "rewards/rejected": -17.06785011291504, + "step": 19013 + }, + { + "epoch": 2.96, + "learning_rate": 2.0169614606566732e-07, + "logits/chosen": -2.1703875064849854, + "logits/rejected": -2.6762514114379883, + "logps/chosen": -308.59564208984375, + "logps/rejected": -465.0931396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.648981094360352, + "rewards/margins": 11.331336975097656, + "rewards/rejected": -22.980318069458008, + "step": 19014 + }, + { + "epoch": 2.96, + "learning_rate": 2.0096270553451944e-07, + "logits/chosen": -2.5043647289276123, + "logits/rejected": -2.568267822265625, + "logps/chosen": -266.26031494140625, + "logps/rejected": -418.54132080078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.291862487792969, + "rewards/margins": 14.329669952392578, + "rewards/rejected": -19.621532440185547, + "step": 19015 + }, + { + "epoch": 2.96, + "learning_rate": 2.0022926500337156e-07, + "logits/chosen": -2.7444570064544678, + "logits/rejected": -2.54659366607666, + "logps/chosen": -709.7499389648438, + "logps/rejected": -888.4242553710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.937118530273438, + "rewards/margins": 13.533256530761719, + "rewards/rejected": -22.470375061035156, + "step": 19016 + }, + { + "epoch": 2.96, + "learning_rate": 1.9949582447222367e-07, + "logits/chosen": -1.0546047687530518, + "logits/rejected": -2.5030531883239746, + "logps/chosen": -168.94227600097656, + "logps/rejected": -617.453857421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6878533363342285, + "rewards/margins": 14.064803123474121, + "rewards/rejected": -21.752656936645508, + "step": 19017 + }, + { + "epoch": 2.96, + "learning_rate": 1.987623839410758e-07, + "logits/chosen": -2.5633702278137207, + "logits/rejected": -2.4623889923095703, + "logps/chosen": -218.2099609375, + "logps/rejected": -371.360107421875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.581418991088867, + "rewards/margins": 7.050267219543457, + "rewards/rejected": -16.63168716430664, + "step": 19018 + }, + { + "epoch": 2.96, + "learning_rate": 1.9802894340992788e-07, + "logits/chosen": -1.312247633934021, + "logits/rejected": -1.9423284530639648, + "logps/chosen": -211.06886291503906, + "logps/rejected": -633.1035766601562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.052967071533203, + "rewards/margins": 14.22150707244873, + "rewards/rejected": -24.27447509765625, + "step": 19019 + }, + { + "epoch": 2.96, + "learning_rate": 1.9729550287878002e-07, + "logits/chosen": -3.0881428718566895, + "logits/rejected": -2.6841213703155518, + "logps/chosen": -180.90447998046875, + "logps/rejected": -327.10394287109375, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.430911540985107, + "rewards/margins": 7.679473876953125, + "rewards/rejected": -15.11038589477539, + "step": 19020 + }, + { + "epoch": 2.96, + "learning_rate": 1.9656206234763214e-07, + "logits/chosen": -1.874884009361267, + "logits/rejected": -2.614535331726074, + "logps/chosen": -433.90673828125, + "logps/rejected": -597.1796875, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.041303634643555, + "rewards/margins": 11.341593742370605, + "rewards/rejected": -22.382898330688477, + "step": 19021 + }, + { + "epoch": 2.96, + "learning_rate": 1.9582862181648426e-07, + "logits/chosen": -2.348942279815674, + "logits/rejected": -2.713423013687134, + "logps/chosen": -402.6327819824219, + "logps/rejected": -667.4825439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.585968017578125, + "rewards/margins": 13.812524795532227, + "rewards/rejected": -20.39849281311035, + "step": 19022 + }, + { + "epoch": 2.96, + "learning_rate": 1.9509518128533637e-07, + "logits/chosen": -1.0965971946716309, + "logits/rejected": -2.59360408782959, + "logps/chosen": -173.0583038330078, + "logps/rejected": -425.36627197265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.336345672607422, + "rewards/margins": 11.010004043579102, + "rewards/rejected": -21.346349716186523, + "step": 19023 + }, + { + "epoch": 2.96, + "learning_rate": 1.943617407541885e-07, + "logits/chosen": -2.2497847080230713, + "logits/rejected": -2.425020456314087, + "logps/chosen": -606.1979370117188, + "logps/rejected": -392.0338439941406, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.783780097961426, + "rewards/margins": 9.119256973266602, + "rewards/rejected": -16.903038024902344, + "step": 19024 + }, + { + "epoch": 2.96, + "learning_rate": 1.9362830022304063e-07, + "logits/chosen": -1.5352067947387695, + "logits/rejected": -2.715810537338257, + "logps/chosen": -211.34262084960938, + "logps/rejected": -584.5281372070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.850156784057617, + "rewards/margins": 13.565319061279297, + "rewards/rejected": -22.415475845336914, + "step": 19025 + }, + { + "epoch": 2.96, + "learning_rate": 1.9289485969189275e-07, + "logits/chosen": -2.8610007762908936, + "logits/rejected": -2.2037956714630127, + "logps/chosen": -553.301025390625, + "logps/rejected": -581.5738525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.002579689025879, + "rewards/margins": 12.371135711669922, + "rewards/rejected": -23.373714447021484, + "step": 19026 + }, + { + "epoch": 2.96, + "learning_rate": 1.9216141916074486e-07, + "logits/chosen": -2.0051660537719727, + "logits/rejected": -2.8734347820281982, + "logps/chosen": -148.6634521484375, + "logps/rejected": -616.6372680664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.887412071228027, + "rewards/margins": 14.153240203857422, + "rewards/rejected": -23.040653228759766, + "step": 19027 + }, + { + "epoch": 2.96, + "learning_rate": 1.9142797862959698e-07, + "logits/chosen": -2.7623465061187744, + "logits/rejected": -2.879340648651123, + "logps/chosen": -164.03419494628906, + "logps/rejected": -291.5543212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7735700607299805, + "rewards/margins": 9.359062194824219, + "rewards/rejected": -17.132633209228516, + "step": 19028 + }, + { + "epoch": 2.96, + "learning_rate": 1.9069453809844907e-07, + "logits/chosen": -2.7929861545562744, + "logits/rejected": -2.613542318344116, + "logps/chosen": -152.0133056640625, + "logps/rejected": -395.9777526855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.86429500579834, + "rewards/margins": 10.449895858764648, + "rewards/rejected": -20.314189910888672, + "step": 19029 + }, + { + "epoch": 2.96, + "learning_rate": 1.8996109756730121e-07, + "logits/chosen": -2.6528968811035156, + "logits/rejected": -1.86306631565094, + "logps/chosen": -327.4613342285156, + "logps/rejected": -312.63555908203125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.425735473632812, + "rewards/margins": 7.186413288116455, + "rewards/rejected": -16.61214828491211, + "step": 19030 + }, + { + "epoch": 2.96, + "learning_rate": 1.8922765703615333e-07, + "logits/chosen": -2.8963656425476074, + "logits/rejected": -0.8341960310935974, + "logps/chosen": -813.0498046875, + "logps/rejected": -348.49346923828125, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.552989959716797, + "rewards/margins": 4.269059658050537, + "rewards/rejected": -18.822050094604492, + "step": 19031 + }, + { + "epoch": 2.96, + "learning_rate": 1.8849421650500545e-07, + "logits/chosen": -2.5936989784240723, + "logits/rejected": -2.7029407024383545, + "logps/chosen": -118.979248046875, + "logps/rejected": -203.22589111328125, + "loss": 0.098, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.693485260009766, + "rewards/margins": 3.701072931289673, + "rewards/rejected": -12.39455795288086, + "step": 19032 + }, + { + "epoch": 2.96, + "learning_rate": 1.8776077597385756e-07, + "logits/chosen": -1.6289323568344116, + "logits/rejected": -2.9020888805389404, + "logps/chosen": -223.96463012695312, + "logps/rejected": -595.0661010742188, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.654228210449219, + "rewards/margins": 11.973791122436523, + "rewards/rejected": -20.628019332885742, + "step": 19033 + }, + { + "epoch": 2.96, + "learning_rate": 1.8702733544270968e-07, + "logits/chosen": -1.871665596961975, + "logits/rejected": -2.592571973800659, + "logps/chosen": -169.5196075439453, + "logps/rejected": -557.6871337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.632530212402344, + "rewards/margins": 20.772064208984375, + "rewards/rejected": -28.40459442138672, + "step": 19034 + }, + { + "epoch": 2.96, + "learning_rate": 1.862938949115618e-07, + "logits/chosen": -2.595616102218628, + "logits/rejected": -1.4490426778793335, + "logps/chosen": -275.12384033203125, + "logps/rejected": -429.1570739746094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.998672485351562, + "rewards/margins": 11.110563278198242, + "rewards/rejected": -22.109235763549805, + "step": 19035 + }, + { + "epoch": 2.96, + "learning_rate": 1.8556045438041394e-07, + "logits/chosen": -1.590714454650879, + "logits/rejected": -2.627899408340454, + "logps/chosen": -333.8360595703125, + "logps/rejected": -416.79132080078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.064469337463379, + "rewards/margins": 8.329069137573242, + "rewards/rejected": -18.393539428710938, + "step": 19036 + }, + { + "epoch": 2.96, + "learning_rate": 1.8482701384926606e-07, + "logits/chosen": -2.2504806518554688, + "logits/rejected": -2.7062466144561768, + "logps/chosen": -634.00537109375, + "logps/rejected": -636.4370727539062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.799966812133789, + "rewards/margins": 10.368717193603516, + "rewards/rejected": -21.168684005737305, + "step": 19037 + }, + { + "epoch": 2.96, + "learning_rate": 1.8409357331811817e-07, + "logits/chosen": -1.7927868366241455, + "logits/rejected": -2.5678608417510986, + "logps/chosen": -190.72018432617188, + "logps/rejected": -375.1488037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3170671463012695, + "rewards/margins": 9.432291030883789, + "rewards/rejected": -15.749359130859375, + "step": 19038 + }, + { + "epoch": 2.96, + "learning_rate": 1.8336013278697026e-07, + "logits/chosen": -1.74729585647583, + "logits/rejected": -2.779184579849243, + "logps/chosen": -171.8812255859375, + "logps/rejected": -314.5086364746094, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.074435234069824, + "rewards/margins": 5.247627258300781, + "rewards/rejected": -15.322062492370605, + "step": 19039 + }, + { + "epoch": 2.96, + "learning_rate": 1.8262669225582238e-07, + "logits/chosen": -1.4347013235092163, + "logits/rejected": -2.4712061882019043, + "logps/chosen": -162.220947265625, + "logps/rejected": -482.945068359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.347103118896484, + "rewards/margins": 9.781261444091797, + "rewards/rejected": -19.12836456298828, + "step": 19040 + }, + { + "epoch": 2.96, + "learning_rate": 1.8189325172467452e-07, + "logits/chosen": -2.6919732093811035, + "logits/rejected": -2.3045296669006348, + "logps/chosen": -213.5662384033203, + "logps/rejected": -305.4118347167969, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.401935577392578, + "rewards/margins": 6.012892246246338, + "rewards/rejected": -17.414827346801758, + "step": 19041 + }, + { + "epoch": 2.96, + "learning_rate": 1.8115981119352664e-07, + "logits/chosen": -1.6725798845291138, + "logits/rejected": -2.5252106189727783, + "logps/chosen": -231.99343872070312, + "logps/rejected": -650.8876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.057903289794922, + "rewards/margins": 17.236480712890625, + "rewards/rejected": -25.294384002685547, + "step": 19042 + }, + { + "epoch": 2.96, + "learning_rate": 1.8042637066237875e-07, + "logits/chosen": -2.2959725856781006, + "logits/rejected": -2.1384196281433105, + "logps/chosen": -178.7203369140625, + "logps/rejected": -320.80889892578125, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.394556045532227, + "rewards/margins": 6.458395957946777, + "rewards/rejected": -19.852951049804688, + "step": 19043 + }, + { + "epoch": 2.96, + "learning_rate": 1.7969293013123087e-07, + "logits/chosen": -2.654193878173828, + "logits/rejected": -2.594028949737549, + "logps/chosen": -236.07925415039062, + "logps/rejected": -429.9128112792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.507726669311523, + "rewards/margins": 12.540929794311523, + "rewards/rejected": -22.048656463623047, + "step": 19044 + }, + { + "epoch": 2.96, + "learning_rate": 1.78959489600083e-07, + "logits/chosen": -2.558928966522217, + "logits/rejected": -2.211707353591919, + "logps/chosen": -245.04733276367188, + "logps/rejected": -562.5859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.048078536987305, + "rewards/margins": 15.65826416015625, + "rewards/rejected": -27.706344604492188, + "step": 19045 + }, + { + "epoch": 2.96, + "learning_rate": 1.7822604906893513e-07, + "logits/chosen": -2.73248291015625, + "logits/rejected": -1.7218711376190186, + "logps/chosen": -202.638671875, + "logps/rejected": -252.14955139160156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.270524024963379, + "rewards/margins": 8.0557279586792, + "rewards/rejected": -17.326251983642578, + "step": 19046 + }, + { + "epoch": 2.96, + "learning_rate": 1.7749260853778725e-07, + "logits/chosen": -1.0151190757751465, + "logits/rejected": -2.5602383613586426, + "logps/chosen": -159.95521545410156, + "logps/rejected": -618.251953125, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.7354154586792, + "rewards/margins": 6.484383583068848, + "rewards/rejected": -17.219799041748047, + "step": 19047 + }, + { + "epoch": 2.96, + "learning_rate": 1.7675916800663936e-07, + "logits/chosen": -2.1912708282470703, + "logits/rejected": -2.6496148109436035, + "logps/chosen": -393.9544677734375, + "logps/rejected": -505.37066650390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.790674209594727, + "rewards/margins": 11.256288528442383, + "rewards/rejected": -21.04696273803711, + "step": 19048 + }, + { + "epoch": 2.96, + "learning_rate": 1.7602572747549145e-07, + "logits/chosen": -2.6349575519561768, + "logits/rejected": -2.5203425884246826, + "logps/chosen": -342.48590087890625, + "logps/rejected": -390.9803466796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.878449440002441, + "rewards/margins": 6.938575744628906, + "rewards/rejected": -17.81702423095703, + "step": 19049 + }, + { + "epoch": 2.96, + "learning_rate": 1.7529228694434357e-07, + "logits/chosen": -2.4406683444976807, + "logits/rejected": -2.8519885540008545, + "logps/chosen": -151.36611938476562, + "logps/rejected": -367.0008239746094, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.46146297454834, + "rewards/margins": 6.387533187866211, + "rewards/rejected": -17.848995208740234, + "step": 19050 + }, + { + "epoch": 2.96, + "learning_rate": 1.745588464131957e-07, + "logits/chosen": -0.582764208316803, + "logits/rejected": -2.746457815170288, + "logps/chosen": -186.89315795898438, + "logps/rejected": -494.3514404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.59410285949707, + "rewards/margins": 14.857954025268555, + "rewards/rejected": -28.452056884765625, + "step": 19051 + }, + { + "epoch": 2.96, + "learning_rate": 1.7382540588204783e-07, + "logits/chosen": -2.2591421604156494, + "logits/rejected": -1.7399235963821411, + "logps/chosen": -194.5017852783203, + "logps/rejected": -362.32452392578125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.40362548828125, + "rewards/margins": 6.418766498565674, + "rewards/rejected": -15.822391510009766, + "step": 19052 + }, + { + "epoch": 2.96, + "learning_rate": 1.7309196535089995e-07, + "logits/chosen": -2.663426160812378, + "logits/rejected": -2.186088800430298, + "logps/chosen": -215.79901123046875, + "logps/rejected": -375.2482604980469, + "loss": 0.243, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.149666786193848, + "rewards/margins": 4.835132122039795, + "rewards/rejected": -17.984798431396484, + "step": 19053 + }, + { + "epoch": 2.96, + "learning_rate": 1.7235852481975206e-07, + "logits/chosen": -2.9740731716156006, + "logits/rejected": -2.851969003677368, + "logps/chosen": -293.7713623046875, + "logps/rejected": -276.21417236328125, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.688407897949219, + "rewards/margins": 5.336637496948242, + "rewards/rejected": -15.025045394897461, + "step": 19054 + }, + { + "epoch": 2.96, + "learning_rate": 1.7162508428860418e-07, + "logits/chosen": -2.496016502380371, + "logits/rejected": -2.0406980514526367, + "logps/chosen": -181.836181640625, + "logps/rejected": -249.03683471679688, + "loss": 0.0487, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.07397747039795, + "rewards/margins": 5.525816917419434, + "rewards/rejected": -19.599794387817383, + "step": 19055 + }, + { + "epoch": 2.96, + "learning_rate": 1.708916437574563e-07, + "logits/chosen": -1.4886937141418457, + "logits/rejected": -2.930793285369873, + "logps/chosen": -106.44010925292969, + "logps/rejected": -277.4622802734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.608470916748047, + "rewards/margins": 8.82424259185791, + "rewards/rejected": -17.43271255493164, + "step": 19056 + }, + { + "epoch": 2.96, + "learning_rate": 1.7015820322630844e-07, + "logits/chosen": -1.4475586414337158, + "logits/rejected": -2.4367377758026123, + "logps/chosen": -219.62713623046875, + "logps/rejected": -386.86236572265625, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.880739212036133, + "rewards/margins": 7.491400241851807, + "rewards/rejected": -17.37213897705078, + "step": 19057 + }, + { + "epoch": 2.96, + "learning_rate": 1.6942476269516056e-07, + "logits/chosen": -2.26472806930542, + "logits/rejected": -2.4232277870178223, + "logps/chosen": -286.5162048339844, + "logps/rejected": -399.17913818359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.214725494384766, + "rewards/margins": 10.481731414794922, + "rewards/rejected": -18.696456909179688, + "step": 19058 + }, + { + "epoch": 2.96, + "learning_rate": 1.6869132216401265e-07, + "logits/chosen": -2.7984707355499268, + "logits/rejected": -2.5505027770996094, + "logps/chosen": -504.9315185546875, + "logps/rejected": -557.4617919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2931199073791504, + "rewards/margins": 14.977435111999512, + "rewards/rejected": -18.270553588867188, + "step": 19059 + }, + { + "epoch": 2.96, + "learning_rate": 1.6795788163286476e-07, + "logits/chosen": -2.5273733139038086, + "logits/rejected": -1.7904725074768066, + "logps/chosen": -556.501708984375, + "logps/rejected": -446.54400634765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.182074546813965, + "rewards/margins": 8.006933212280273, + "rewards/rejected": -19.189006805419922, + "step": 19060 + }, + { + "epoch": 2.96, + "learning_rate": 1.6722444110171688e-07, + "logits/chosen": -3.0096070766448975, + "logits/rejected": -2.9042770862579346, + "logps/chosen": -198.10690307617188, + "logps/rejected": -254.31422424316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.326232433319092, + "rewards/margins": 10.843602180480957, + "rewards/rejected": -16.16983413696289, + "step": 19061 + }, + { + "epoch": 2.96, + "learning_rate": 1.6649100057056902e-07, + "logits/chosen": -1.85910964012146, + "logits/rejected": -2.3271055221557617, + "logps/chosen": -321.311279296875, + "logps/rejected": -439.7462158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.763160705566406, + "rewards/margins": 9.31417465209961, + "rewards/rejected": -24.077335357666016, + "step": 19062 + }, + { + "epoch": 2.96, + "learning_rate": 1.6575756003942114e-07, + "logits/chosen": -2.678382396697998, + "logits/rejected": -2.314570665359497, + "logps/chosen": -259.4586486816406, + "logps/rejected": -473.500732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.149356842041016, + "rewards/margins": 9.958517074584961, + "rewards/rejected": -20.107873916625977, + "step": 19063 + }, + { + "epoch": 2.96, + "learning_rate": 1.6502411950827325e-07, + "logits/chosen": -2.5729787349700928, + "logits/rejected": -1.502497673034668, + "logps/chosen": -352.8092346191406, + "logps/rejected": -293.5575256347656, + "loss": 0.639, + "rewards/accuracies": 0.5, + "rewards/chosen": -10.03528881072998, + "rewards/margins": 4.822933197021484, + "rewards/rejected": -14.858221054077148, + "step": 19064 + }, + { + "epoch": 2.97, + "learning_rate": 1.6429067897712537e-07, + "logits/chosen": -1.7343131303787231, + "logits/rejected": -2.5449764728546143, + "logps/chosen": -279.4132385253906, + "logps/rejected": -648.491455078125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.735185623168945, + "rewards/margins": 11.392982482910156, + "rewards/rejected": -27.1281681060791, + "step": 19065 + }, + { + "epoch": 2.97, + "learning_rate": 1.635572384459775e-07, + "logits/chosen": -2.542569398880005, + "logits/rejected": -1.949344277381897, + "logps/chosen": -408.1123352050781, + "logps/rejected": -388.09283447265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.162145614624023, + "rewards/margins": 8.795549392700195, + "rewards/rejected": -18.95769500732422, + "step": 19066 + }, + { + "epoch": 2.97, + "learning_rate": 1.628237979148296e-07, + "logits/chosen": -2.407585382461548, + "logits/rejected": -2.818187952041626, + "logps/chosen": -265.3102111816406, + "logps/rejected": -427.0125732421875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.429899215698242, + "rewards/margins": 8.626134872436523, + "rewards/rejected": -20.056034088134766, + "step": 19067 + }, + { + "epoch": 2.97, + "learning_rate": 1.6209035738368175e-07, + "logits/chosen": -1.0937470197677612, + "logits/rejected": -2.39089298248291, + "logps/chosen": -150.2547607421875, + "logps/rejected": -431.1657409667969, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.901354789733887, + "rewards/margins": 9.147188186645508, + "rewards/rejected": -20.048542022705078, + "step": 19068 + }, + { + "epoch": 2.97, + "learning_rate": 1.6135691685253386e-07, + "logits/chosen": -2.816441059112549, + "logits/rejected": -2.5451819896698, + "logps/chosen": -663.9370727539062, + "logps/rejected": -628.72412109375, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.687657356262207, + "rewards/margins": 6.870382785797119, + "rewards/rejected": -18.558040618896484, + "step": 19069 + }, + { + "epoch": 2.97, + "learning_rate": 1.6062347632138595e-07, + "logits/chosen": -0.9157353639602661, + "logits/rejected": -2.45974063873291, + "logps/chosen": -168.80538940429688, + "logps/rejected": -540.589599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.026849746704102, + "rewards/margins": 13.866997718811035, + "rewards/rejected": -24.893848419189453, + "step": 19070 + }, + { + "epoch": 2.97, + "learning_rate": 1.5989003579023807e-07, + "logits/chosen": -2.1177706718444824, + "logits/rejected": -0.7057890295982361, + "logps/chosen": -485.70587158203125, + "logps/rejected": -285.59002685546875, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.592611312866211, + "rewards/margins": 7.710665702819824, + "rewards/rejected": -17.30327796936035, + "step": 19071 + }, + { + "epoch": 2.97, + "learning_rate": 1.5915659525909019e-07, + "logits/chosen": -2.9360544681549072, + "logits/rejected": -2.9305107593536377, + "logps/chosen": -207.95489501953125, + "logps/rejected": -530.1541748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.547733306884766, + "rewards/margins": 12.424418449401855, + "rewards/rejected": -20.972152709960938, + "step": 19072 + }, + { + "epoch": 2.97, + "learning_rate": 1.5842315472794233e-07, + "logits/chosen": -2.2185821533203125, + "logits/rejected": -1.8675411939620972, + "logps/chosen": -496.6864013671875, + "logps/rejected": -467.3983154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.337798118591309, + "rewards/margins": 12.316240310668945, + "rewards/rejected": -23.65403938293457, + "step": 19073 + }, + { + "epoch": 2.97, + "learning_rate": 1.5768971419679445e-07, + "logits/chosen": -2.2797892093658447, + "logits/rejected": -2.7819833755493164, + "logps/chosen": -151.61041259765625, + "logps/rejected": -202.89341735839844, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673254013061523, + "rewards/margins": 7.120828628540039, + "rewards/rejected": -13.794082641601562, + "step": 19074 + }, + { + "epoch": 2.97, + "learning_rate": 1.5695627366564656e-07, + "logits/chosen": -2.7468793392181396, + "logits/rejected": -1.8158535957336426, + "logps/chosen": -327.900146484375, + "logps/rejected": -234.7117919921875, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.928297996520996, + "rewards/margins": 4.73117733001709, + "rewards/rejected": -13.659475326538086, + "step": 19075 + }, + { + "epoch": 2.97, + "learning_rate": 1.5622283313449868e-07, + "logits/chosen": -1.6107927560806274, + "logits/rejected": -2.4052069187164307, + "logps/chosen": -287.7669677734375, + "logps/rejected": -432.2126159667969, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.007532119750977, + "rewards/margins": 12.606182098388672, + "rewards/rejected": -23.61371421813965, + "step": 19076 + }, + { + "epoch": 2.97, + "learning_rate": 1.554893926033508e-07, + "logits/chosen": -2.9764716625213623, + "logits/rejected": -2.3862051963806152, + "logps/chosen": -199.7125701904297, + "logps/rejected": -261.7716979980469, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.526442527770996, + "rewards/margins": 7.816965103149414, + "rewards/rejected": -17.343406677246094, + "step": 19077 + }, + { + "epoch": 2.97, + "learning_rate": 1.5475595207220294e-07, + "logits/chosen": -1.6707849502563477, + "logits/rejected": -2.078514814376831, + "logps/chosen": -416.39788818359375, + "logps/rejected": -623.5338745117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.182680130004883, + "rewards/margins": 14.610929489135742, + "rewards/rejected": -23.793609619140625, + "step": 19078 + }, + { + "epoch": 2.97, + "learning_rate": 1.5402251154105506e-07, + "logits/chosen": -2.8820858001708984, + "logits/rejected": -2.70158314704895, + "logps/chosen": -666.8573608398438, + "logps/rejected": -728.8594970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.509601593017578, + "rewards/margins": 13.733528137207031, + "rewards/rejected": -22.24312973022461, + "step": 19079 + }, + { + "epoch": 2.97, + "learning_rate": 1.5328907100990715e-07, + "logits/chosen": -2.327024221420288, + "logits/rejected": -2.7918806076049805, + "logps/chosen": -301.15740966796875, + "logps/rejected": -509.6151428222656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.919107437133789, + "rewards/margins": 12.225337028503418, + "rewards/rejected": -22.14444351196289, + "step": 19080 + }, + { + "epoch": 2.97, + "learning_rate": 1.5255563047875926e-07, + "logits/chosen": -2.7572760581970215, + "logits/rejected": -3.0177841186523438, + "logps/chosen": -117.41332244873047, + "logps/rejected": -177.57891845703125, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.829977512359619, + "rewards/margins": 5.610561370849609, + "rewards/rejected": -12.44053840637207, + "step": 19081 + }, + { + "epoch": 2.97, + "learning_rate": 1.5182218994761138e-07, + "logits/chosen": -2.730924606323242, + "logits/rejected": -2.8811559677124023, + "logps/chosen": -304.0496826171875, + "logps/rejected": -297.92596435546875, + "loss": 0.1197, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.995434761047363, + "rewards/margins": 4.702814102172852, + "rewards/rejected": -15.698248863220215, + "step": 19082 + }, + { + "epoch": 2.97, + "learning_rate": 1.510887494164635e-07, + "logits/chosen": -2.9948809146881104, + "logits/rejected": -3.071657657623291, + "logps/chosen": -176.17007446289062, + "logps/rejected": -365.3170166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.38772201538086, + "rewards/margins": 12.445780754089355, + "rewards/rejected": -23.83350372314453, + "step": 19083 + }, + { + "epoch": 2.97, + "learning_rate": 1.5035530888531564e-07, + "logits/chosen": -2.6204044818878174, + "logits/rejected": -2.89747953414917, + "logps/chosen": -205.89572143554688, + "logps/rejected": -384.8004150390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.188613891601562, + "rewards/margins": 10.813859939575195, + "rewards/rejected": -19.002471923828125, + "step": 19084 + }, + { + "epoch": 2.97, + "learning_rate": 1.4962186835416775e-07, + "logits/chosen": -1.393415093421936, + "logits/rejected": -2.274815320968628, + "logps/chosen": -333.9801940917969, + "logps/rejected": -642.0294189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.581184387207031, + "rewards/margins": 11.514347076416016, + "rewards/rejected": -23.095531463623047, + "step": 19085 + }, + { + "epoch": 2.97, + "learning_rate": 1.4888842782301987e-07, + "logits/chosen": -2.6091854572296143, + "logits/rejected": -2.6411545276641846, + "logps/chosen": -220.24822998046875, + "logps/rejected": -392.2950439453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.47700023651123, + "rewards/margins": 13.911785125732422, + "rewards/rejected": -24.38878631591797, + "step": 19086 + }, + { + "epoch": 2.97, + "learning_rate": 1.48154987291872e-07, + "logits/chosen": -1.5721439123153687, + "logits/rejected": -2.625114917755127, + "logps/chosen": -278.0501403808594, + "logps/rejected": -289.3793640136719, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.37181568145752, + "rewards/margins": 4.961699485778809, + "rewards/rejected": -16.333515167236328, + "step": 19087 + }, + { + "epoch": 2.97, + "learning_rate": 1.474215467607241e-07, + "logits/chosen": -2.8724894523620605, + "logits/rejected": -2.6232218742370605, + "logps/chosen": -389.58984375, + "logps/rejected": -414.64556884765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.744755744934082, + "rewards/margins": 8.055562973022461, + "rewards/rejected": -16.80031967163086, + "step": 19088 + }, + { + "epoch": 2.97, + "learning_rate": 1.4668810622957625e-07, + "logits/chosen": -2.0046658515930176, + "logits/rejected": -2.7160069942474365, + "logps/chosen": -209.56195068359375, + "logps/rejected": -398.8016357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.26522445678711, + "rewards/margins": 9.361299514770508, + "rewards/rejected": -17.626523971557617, + "step": 19089 + }, + { + "epoch": 2.97, + "learning_rate": 1.4595466569842834e-07, + "logits/chosen": -2.77644681930542, + "logits/rejected": -2.6285059452056885, + "logps/chosen": -567.2207641601562, + "logps/rejected": -792.3776245117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.428253173828125, + "rewards/margins": 12.92879867553711, + "rewards/rejected": -20.357051849365234, + "step": 19090 + }, + { + "epoch": 2.97, + "learning_rate": 1.4522122516728045e-07, + "logits/chosen": -1.914406418800354, + "logits/rejected": -2.9023633003234863, + "logps/chosen": -161.78285217285156, + "logps/rejected": -492.001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.24915599822998, + "rewards/margins": 12.30502986907959, + "rewards/rejected": -21.55418586730957, + "step": 19091 + }, + { + "epoch": 2.97, + "learning_rate": 1.4448778463613257e-07, + "logits/chosen": -1.5692172050476074, + "logits/rejected": -2.714900493621826, + "logps/chosen": -179.0550537109375, + "logps/rejected": -519.5087280273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.726576805114746, + "rewards/margins": 14.78200626373291, + "rewards/rejected": -21.508583068847656, + "step": 19092 + }, + { + "epoch": 2.97, + "learning_rate": 1.4375434410498469e-07, + "logits/chosen": -2.8788881301879883, + "logits/rejected": -2.7653424739837646, + "logps/chosen": -712.553955078125, + "logps/rejected": -857.4172973632812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.568170547485352, + "rewards/margins": 14.302152633666992, + "rewards/rejected": -19.870323181152344, + "step": 19093 + }, + { + "epoch": 2.97, + "learning_rate": 1.4302090357383683e-07, + "logits/chosen": -2.6675615310668945, + "logits/rejected": -1.739189863204956, + "logps/chosen": -451.85614013671875, + "logps/rejected": -371.6976318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.710877418518066, + "rewards/margins": 10.8471040725708, + "rewards/rejected": -21.557981491088867, + "step": 19094 + }, + { + "epoch": 2.97, + "learning_rate": 1.4228746304268895e-07, + "logits/chosen": -2.373015880584717, + "logits/rejected": -1.6065189838409424, + "logps/chosen": -185.13714599609375, + "logps/rejected": -469.41021728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.904512405395508, + "rewards/margins": 13.71462345123291, + "rewards/rejected": -21.619136810302734, + "step": 19095 + }, + { + "epoch": 2.97, + "learning_rate": 1.4155402251154106e-07, + "logits/chosen": -1.8339420557022095, + "logits/rejected": -2.789830446243286, + "logps/chosen": -237.34552001953125, + "logps/rejected": -523.2850341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.592899322509766, + "rewards/margins": 15.599906921386719, + "rewards/rejected": -28.192806243896484, + "step": 19096 + }, + { + "epoch": 2.97, + "learning_rate": 1.4082058198039318e-07, + "logits/chosen": -2.480790138244629, + "logits/rejected": -2.791489839553833, + "logps/chosen": -892.6826171875, + "logps/rejected": -1183.4954833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.736050605773926, + "rewards/margins": 26.515899658203125, + "rewards/rejected": -36.251953125, + "step": 19097 + }, + { + "epoch": 2.97, + "learning_rate": 1.400871414492453e-07, + "logits/chosen": -2.06752872467041, + "logits/rejected": -3.0516645908355713, + "logps/chosen": -183.6534423828125, + "logps/rejected": -481.07958984375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.255781173706055, + "rewards/margins": 14.60623550415039, + "rewards/rejected": -22.862016677856445, + "step": 19098 + }, + { + "epoch": 2.97, + "learning_rate": 1.393537009180974e-07, + "logits/chosen": -1.8228665590286255, + "logits/rejected": -2.788508176803589, + "logps/chosen": -108.99105834960938, + "logps/rejected": -331.7728576660156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.114487648010254, + "rewards/margins": 10.112711906433105, + "rewards/rejected": -19.22719955444336, + "step": 19099 + }, + { + "epoch": 2.97, + "learning_rate": 1.3862026038694953e-07, + "logits/chosen": -2.1774284839630127, + "logits/rejected": -2.915769338607788, + "logps/chosen": -114.73368835449219, + "logps/rejected": -365.21282958984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.595717430114746, + "rewards/margins": 9.967179298400879, + "rewards/rejected": -16.562896728515625, + "step": 19100 + }, + { + "epoch": 2.97, + "learning_rate": 1.3788681985580165e-07, + "logits/chosen": -1.8790982961654663, + "logits/rejected": -2.6167337894439697, + "logps/chosen": -460.3890686035156, + "logps/rejected": -675.2622680664062, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.117422103881836, + "rewards/margins": 7.505460739135742, + "rewards/rejected": -17.622882843017578, + "step": 19101 + }, + { + "epoch": 2.97, + "learning_rate": 1.3715337932465376e-07, + "logits/chosen": -2.2628748416900635, + "logits/rejected": -2.761542797088623, + "logps/chosen": -193.5445556640625, + "logps/rejected": -372.76373291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7666826248168945, + "rewards/margins": 11.377528190612793, + "rewards/rejected": -17.144210815429688, + "step": 19102 + }, + { + "epoch": 2.97, + "learning_rate": 1.3641993879350588e-07, + "logits/chosen": -2.2397475242614746, + "logits/rejected": -2.78582501411438, + "logps/chosen": -224.7003631591797, + "logps/rejected": -430.1546630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.536421775817871, + "rewards/margins": 14.395120620727539, + "rewards/rejected": -21.931541442871094, + "step": 19103 + }, + { + "epoch": 2.97, + "learning_rate": 1.35686498262358e-07, + "logits/chosen": -2.779191732406616, + "logits/rejected": -2.545762300491333, + "logps/chosen": -323.67303466796875, + "logps/rejected": -410.4977722167969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.739020824432373, + "rewards/margins": 8.742606163024902, + "rewards/rejected": -16.481626510620117, + "step": 19104 + }, + { + "epoch": 2.97, + "learning_rate": 1.3495305773121014e-07, + "logits/chosen": -2.8305673599243164, + "logits/rejected": -1.9954279661178589, + "logps/chosen": -745.8911743164062, + "logps/rejected": -629.9679565429688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.008879661560059, + "rewards/margins": 10.093778610229492, + "rewards/rejected": -18.102657318115234, + "step": 19105 + }, + { + "epoch": 2.97, + "learning_rate": 1.3421961720006225e-07, + "logits/chosen": -0.42749685049057007, + "logits/rejected": -2.1991055011749268, + "logps/chosen": -163.4315185546875, + "logps/rejected": -686.8956909179688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.600723266601562, + "rewards/margins": 12.729137420654297, + "rewards/rejected": -25.32986068725586, + "step": 19106 + }, + { + "epoch": 2.97, + "learning_rate": 1.3348617666891437e-07, + "logits/chosen": -1.2711693048477173, + "logits/rejected": -2.7284061908721924, + "logps/chosen": -323.32025146484375, + "logps/rejected": -699.5301513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.698972702026367, + "rewards/margins": 9.842388153076172, + "rewards/rejected": -22.541362762451172, + "step": 19107 + }, + { + "epoch": 2.97, + "learning_rate": 1.327527361377665e-07, + "logits/chosen": -2.267028570175171, + "logits/rejected": -2.6665780544281006, + "logps/chosen": -236.98919677734375, + "logps/rejected": -377.59930419921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.923851013183594, + "rewards/margins": 8.976560592651367, + "rewards/rejected": -18.90041160583496, + "step": 19108 + }, + { + "epoch": 2.97, + "learning_rate": 1.320192956066186e-07, + "logits/chosen": -2.8908121585845947, + "logits/rejected": -1.9827474355697632, + "logps/chosen": -307.0072021484375, + "logps/rejected": -323.05242919921875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.420211791992188, + "rewards/margins": 10.144959449768066, + "rewards/rejected": -19.565170288085938, + "step": 19109 + }, + { + "epoch": 2.97, + "learning_rate": 1.3128585507547072e-07, + "logits/chosen": -3.028208017349243, + "logits/rejected": -2.8302433490753174, + "logps/chosen": -223.73448181152344, + "logps/rejected": -277.7880859375, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.03695011138916, + "rewards/margins": 5.518265724182129, + "rewards/rejected": -16.55521583557129, + "step": 19110 + }, + { + "epoch": 2.97, + "learning_rate": 1.3055241454432284e-07, + "logits/chosen": -2.3899943828582764, + "logits/rejected": -2.572031021118164, + "logps/chosen": -116.04556274414062, + "logps/rejected": -286.47930908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.439332008361816, + "rewards/margins": 11.517297744750977, + "rewards/rejected": -19.95663070678711, + "step": 19111 + }, + { + "epoch": 2.97, + "learning_rate": 1.2981897401317495e-07, + "logits/chosen": -1.3570072650909424, + "logits/rejected": -2.591618537902832, + "logps/chosen": -225.4737548828125, + "logps/rejected": -524.260498046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.721077919006348, + "rewards/margins": 7.668933868408203, + "rewards/rejected": -19.390010833740234, + "step": 19112 + }, + { + "epoch": 2.97, + "learning_rate": 1.2908553348202707e-07, + "logits/chosen": -2.4360313415527344, + "logits/rejected": -2.818263292312622, + "logps/chosen": -258.7918701171875, + "logps/rejected": -362.91339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.158045768737793, + "rewards/margins": 12.425004959106445, + "rewards/rejected": -20.583049774169922, + "step": 19113 + }, + { + "epoch": 2.97, + "learning_rate": 1.2835209295087919e-07, + "logits/chosen": -2.6538522243499756, + "logits/rejected": -2.45637845993042, + "logps/chosen": -775.0679321289062, + "logps/rejected": -594.16162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.618762016296387, + "rewards/margins": 8.790290832519531, + "rewards/rejected": -14.409051895141602, + "step": 19114 + }, + { + "epoch": 2.97, + "learning_rate": 1.276186524197313e-07, + "logits/chosen": -2.4016435146331787, + "logits/rejected": -2.6392860412597656, + "logps/chosen": -562.8341064453125, + "logps/rejected": -412.6886291503906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.0148344039917, + "rewards/margins": 8.039013862609863, + "rewards/rejected": -19.053848266601562, + "step": 19115 + }, + { + "epoch": 2.97, + "learning_rate": 1.2688521188858345e-07, + "logits/chosen": -1.6923110485076904, + "logits/rejected": -2.5943446159362793, + "logps/chosen": -307.4402160644531, + "logps/rejected": -431.83514404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.85618782043457, + "rewards/margins": 10.297487258911133, + "rewards/rejected": -19.153675079345703, + "step": 19116 + }, + { + "epoch": 2.97, + "learning_rate": 1.2615177135743556e-07, + "logits/chosen": -2.6789815425872803, + "logits/rejected": -1.5020208358764648, + "logps/chosen": -404.2049865722656, + "logps/rejected": -412.87200927734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.516796112060547, + "rewards/margins": 8.663902282714844, + "rewards/rejected": -20.18069839477539, + "step": 19117 + }, + { + "epoch": 2.97, + "learning_rate": 1.2541833082628768e-07, + "logits/chosen": -2.4285333156585693, + "logits/rejected": -2.7395706176757812, + "logps/chosen": -136.2532958984375, + "logps/rejected": -337.88568115234375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9829936027526855, + "rewards/margins": 9.897619247436523, + "rewards/rejected": -16.880611419677734, + "step": 19118 + }, + { + "epoch": 2.97, + "learning_rate": 1.246848902951398e-07, + "logits/chosen": -2.361356496810913, + "logits/rejected": -2.90507173538208, + "logps/chosen": -416.16204833984375, + "logps/rejected": -578.9590454101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.426217079162598, + "rewards/margins": 10.506548881530762, + "rewards/rejected": -22.93276596069336, + "step": 19119 + }, + { + "epoch": 2.97, + "learning_rate": 1.239514497639919e-07, + "logits/chosen": -2.0808773040771484, + "logits/rejected": -2.990523099899292, + "logps/chosen": -410.2067565917969, + "logps/rejected": -540.620849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.770843505859375, + "rewards/margins": 11.491065979003906, + "rewards/rejected": -20.26190948486328, + "step": 19120 + }, + { + "epoch": 2.97, + "learning_rate": 1.2321800923284403e-07, + "logits/chosen": -2.5228254795074463, + "logits/rejected": -2.800467014312744, + "logps/chosen": -175.82313537597656, + "logps/rejected": -446.8352355957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.105432510375977, + "rewards/margins": 10.789871215820312, + "rewards/rejected": -18.89530372619629, + "step": 19121 + }, + { + "epoch": 2.97, + "learning_rate": 1.2248456870169614e-07, + "logits/chosen": -2.622277021408081, + "logits/rejected": -2.9264676570892334, + "logps/chosen": -223.6639404296875, + "logps/rejected": -583.645751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.220244407653809, + "rewards/margins": 12.668965339660645, + "rewards/rejected": -20.889209747314453, + "step": 19122 + }, + { + "epoch": 2.97, + "learning_rate": 1.2175112817054826e-07, + "logits/chosen": -1.827857494354248, + "logits/rejected": -2.608673334121704, + "logps/chosen": -193.37442016601562, + "logps/rejected": -478.4400329589844, + "loss": 0.1446, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.73575210571289, + "rewards/margins": 8.420040130615234, + "rewards/rejected": -21.155792236328125, + "step": 19123 + }, + { + "epoch": 2.97, + "learning_rate": 1.2101768763940038e-07, + "logits/chosen": -2.920811176300049, + "logits/rejected": -2.9031877517700195, + "logps/chosen": -134.18798828125, + "logps/rejected": -238.27206420898438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.42780590057373, + "rewards/margins": 7.290406227111816, + "rewards/rejected": -18.718212127685547, + "step": 19124 + }, + { + "epoch": 2.97, + "learning_rate": 1.202842471082525e-07, + "logits/chosen": -1.9385361671447754, + "logits/rejected": -2.5216782093048096, + "logps/chosen": -177.0924072265625, + "logps/rejected": -383.650146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6169962882995605, + "rewards/margins": 11.430885314941406, + "rewards/rejected": -17.047882080078125, + "step": 19125 + }, + { + "epoch": 2.97, + "learning_rate": 1.1955080657710464e-07, + "logits/chosen": -1.1956660747528076, + "logits/rejected": -2.732255220413208, + "logps/chosen": -132.2200164794922, + "logps/rejected": -509.04296875, + "loss": 0.0777, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.70797348022461, + "rewards/margins": 4.887881755828857, + "rewards/rejected": -16.595855712890625, + "step": 19126 + }, + { + "epoch": 2.97, + "learning_rate": 1.1881736604595675e-07, + "logits/chosen": -2.5379483699798584, + "logits/rejected": -1.9988863468170166, + "logps/chosen": -562.60791015625, + "logps/rejected": -530.9359130859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.7262601852417, + "rewards/margins": 14.515583992004395, + "rewards/rejected": -23.241844177246094, + "step": 19127 + }, + { + "epoch": 2.97, + "learning_rate": 1.1808392551480886e-07, + "logits/chosen": -1.3407752513885498, + "logits/rejected": -2.1982061862945557, + "logps/chosen": -439.871826171875, + "logps/rejected": -511.8341979980469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.19831657409668, + "rewards/margins": 13.411947250366211, + "rewards/rejected": -24.61026382446289, + "step": 19128 + }, + { + "epoch": 2.97, + "learning_rate": 1.1735048498366097e-07, + "logits/chosen": -2.710045576095581, + "logits/rejected": -2.607025623321533, + "logps/chosen": -191.91497802734375, + "logps/rejected": -321.504150390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.242209434509277, + "rewards/margins": 10.17538833618164, + "rewards/rejected": -16.4175968170166, + "step": 19129 + }, + { + "epoch": 2.98, + "learning_rate": 1.1661704445251309e-07, + "logits/chosen": -2.0627472400665283, + "logits/rejected": -2.3720719814300537, + "logps/chosen": -356.3330383300781, + "logps/rejected": -587.7278442382812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.965242385864258, + "rewards/margins": 12.708744049072266, + "rewards/rejected": -21.673986434936523, + "step": 19130 + }, + { + "epoch": 2.98, + "learning_rate": 1.1588360392136521e-07, + "logits/chosen": -2.2437875270843506, + "logits/rejected": -2.896761894226074, + "logps/chosen": -283.06829833984375, + "logps/rejected": -511.50885009765625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.245798110961914, + "rewards/margins": 8.42003059387207, + "rewards/rejected": -16.665828704833984, + "step": 19131 + }, + { + "epoch": 2.98, + "learning_rate": 1.1515016339021735e-07, + "logits/chosen": -2.2140040397644043, + "logits/rejected": -2.8579845428466797, + "logps/chosen": -302.25677490234375, + "logps/rejected": -471.69677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.94211483001709, + "rewards/margins": 13.466695785522461, + "rewards/rejected": -21.408809661865234, + "step": 19132 + }, + { + "epoch": 2.98, + "learning_rate": 1.1441672285906945e-07, + "logits/chosen": -2.955517292022705, + "logits/rejected": -2.998415946960449, + "logps/chosen": -107.44366455078125, + "logps/rejected": -497.32452392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8806352615356445, + "rewards/margins": 12.551203727722168, + "rewards/rejected": -18.431838989257812, + "step": 19133 + }, + { + "epoch": 2.98, + "learning_rate": 1.1368328232792157e-07, + "logits/chosen": -2.8794593811035156, + "logits/rejected": -2.6749937534332275, + "logps/chosen": -306.56683349609375, + "logps/rejected": -656.7460327148438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.541237831115723, + "rewards/margins": 13.83398723602295, + "rewards/rejected": -22.375225067138672, + "step": 19134 + }, + { + "epoch": 2.98, + "learning_rate": 1.1294984179677369e-07, + "logits/chosen": -2.7623727321624756, + "logits/rejected": -2.9577863216400146, + "logps/chosen": -167.42910766601562, + "logps/rejected": -255.34080505371094, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.653522491455078, + "rewards/margins": 7.334464073181152, + "rewards/rejected": -16.987987518310547, + "step": 19135 + }, + { + "epoch": 2.98, + "learning_rate": 1.122164012656258e-07, + "logits/chosen": -2.410012722015381, + "logits/rejected": -2.4011709690093994, + "logps/chosen": -336.9117431640625, + "logps/rejected": -493.003662109375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.548114776611328, + "rewards/margins": 10.116120338439941, + "rewards/rejected": -19.664234161376953, + "step": 19136 + }, + { + "epoch": 2.98, + "learning_rate": 1.1148296073447795e-07, + "logits/chosen": -1.2909600734710693, + "logits/rejected": -2.752699136734009, + "logps/chosen": -188.0487823486328, + "logps/rejected": -635.1117553710938, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.303360939025879, + "rewards/margins": 6.816149711608887, + "rewards/rejected": -18.119510650634766, + "step": 19137 + }, + { + "epoch": 2.98, + "learning_rate": 1.1074952020333006e-07, + "logits/chosen": -2.576702117919922, + "logits/rejected": -1.987963318824768, + "logps/chosen": -284.5608825683594, + "logps/rejected": -366.402587890625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.609746932983398, + "rewards/margins": 6.576240062713623, + "rewards/rejected": -17.185985565185547, + "step": 19138 + }, + { + "epoch": 2.98, + "learning_rate": 1.1001607967218217e-07, + "logits/chosen": -2.716323137283325, + "logits/rejected": -1.7435129880905151, + "logps/chosen": -529.7391967773438, + "logps/rejected": -315.69293212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.541109085083008, + "rewards/margins": 14.939336776733398, + "rewards/rejected": -21.480445861816406, + "step": 19139 + }, + { + "epoch": 2.98, + "learning_rate": 1.0928263914103428e-07, + "logits/chosen": -2.7401180267333984, + "logits/rejected": -1.6926672458648682, + "logps/chosen": -447.4640808105469, + "logps/rejected": -558.1973266601562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.578128814697266, + "rewards/margins": 8.77760124206543, + "rewards/rejected": -18.355728149414062, + "step": 19140 + }, + { + "epoch": 2.98, + "learning_rate": 1.0854919860988641e-07, + "logits/chosen": -2.96571946144104, + "logits/rejected": -2.4672112464904785, + "logps/chosen": -975.937255859375, + "logps/rejected": -666.711181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.686339378356934, + "rewards/margins": 11.952449798583984, + "rewards/rejected": -17.638790130615234, + "step": 19141 + }, + { + "epoch": 2.98, + "learning_rate": 1.0781575807873853e-07, + "logits/chosen": -2.3896851539611816, + "logits/rejected": -2.7354984283447266, + "logps/chosen": -574.2765502929688, + "logps/rejected": -725.5598754882812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.902149200439453, + "rewards/margins": 9.323204040527344, + "rewards/rejected": -17.225353240966797, + "step": 19142 + }, + { + "epoch": 2.98, + "learning_rate": 1.0708231754759064e-07, + "logits/chosen": -1.6437429189682007, + "logits/rejected": -2.936650276184082, + "logps/chosen": -579.3407592773438, + "logps/rejected": -671.160400390625, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.339105606079102, + "rewards/margins": 12.088552474975586, + "rewards/rejected": -19.427658081054688, + "step": 19143 + }, + { + "epoch": 2.98, + "learning_rate": 1.0634887701644276e-07, + "logits/chosen": -2.5884063243865967, + "logits/rejected": -2.1210832595825195, + "logps/chosen": -476.7791442871094, + "logps/rejected": -588.2476806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.711356163024902, + "rewards/margins": 9.394119262695312, + "rewards/rejected": -21.10547637939453, + "step": 19144 + }, + { + "epoch": 2.98, + "learning_rate": 1.0561543648529488e-07, + "logits/chosen": -1.6534236669540405, + "logits/rejected": -2.4203646183013916, + "logps/chosen": -188.02638244628906, + "logps/rejected": -415.5995178222656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.326976776123047, + "rewards/margins": 13.089008331298828, + "rewards/rejected": -22.415985107421875, + "step": 19145 + }, + { + "epoch": 2.98, + "learning_rate": 1.04881995954147e-07, + "logits/chosen": -2.6432485580444336, + "logits/rejected": -2.3218209743499756, + "logps/chosen": -372.677978515625, + "logps/rejected": -418.663818359375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.36324691772461, + "rewards/margins": 7.9245829582214355, + "rewards/rejected": -22.287830352783203, + "step": 19146 + }, + { + "epoch": 2.98, + "learning_rate": 1.0414855542299912e-07, + "logits/chosen": -2.658064126968384, + "logits/rejected": -2.7379374504089355, + "logps/chosen": -525.6357421875, + "logps/rejected": -825.7991943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.294469833374023, + "rewards/margins": 14.370636940002441, + "rewards/rejected": -26.66510772705078, + "step": 19147 + }, + { + "epoch": 2.98, + "learning_rate": 1.0341511489185124e-07, + "logits/chosen": -2.886079788208008, + "logits/rejected": -2.3155627250671387, + "logps/chosen": -243.48374938964844, + "logps/rejected": -375.6751403808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.252102851867676, + "rewards/margins": 12.370929718017578, + "rewards/rejected": -20.623031616210938, + "step": 19148 + }, + { + "epoch": 2.98, + "learning_rate": 1.0268167436070336e-07, + "logits/chosen": -2.8101320266723633, + "logits/rejected": -2.3821208477020264, + "logps/chosen": -280.5772399902344, + "logps/rejected": -260.61700439453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764330863952637, + "rewards/margins": 9.553448677062988, + "rewards/rejected": -15.317779541015625, + "step": 19149 + }, + { + "epoch": 2.98, + "learning_rate": 1.0194823382955547e-07, + "logits/chosen": -1.427350640296936, + "logits/rejected": -2.4656906127929688, + "logps/chosen": -212.09353637695312, + "logps/rejected": -880.525634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.099942207336426, + "rewards/margins": 19.0224666595459, + "rewards/rejected": -29.12240982055664, + "step": 19150 + }, + { + "epoch": 2.98, + "learning_rate": 1.0121479329840759e-07, + "logits/chosen": -2.933448553085327, + "logits/rejected": -0.7008412480354309, + "logps/chosen": -1064.203125, + "logps/rejected": -373.901123046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.89431381225586, + "rewards/margins": 11.541645050048828, + "rewards/rejected": -20.435958862304688, + "step": 19151 + }, + { + "epoch": 2.98, + "learning_rate": 1.0048135276725972e-07, + "logits/chosen": -2.329716444015503, + "logits/rejected": -2.528974771499634, + "logps/chosen": -197.4138946533203, + "logps/rejected": -365.99517822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.522491455078125, + "rewards/margins": 9.677760124206543, + "rewards/rejected": -19.200252532958984, + "step": 19152 + }, + { + "epoch": 2.98, + "learning_rate": 9.974791223611184e-08, + "logits/chosen": -2.372999429702759, + "logits/rejected": -2.7600789070129395, + "logps/chosen": -181.10711669921875, + "logps/rejected": -225.79306030273438, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.793608665466309, + "rewards/margins": 7.176913261413574, + "rewards/rejected": -15.970521926879883, + "step": 19153 + }, + { + "epoch": 2.98, + "learning_rate": 9.901447170496394e-08, + "logits/chosen": -1.3929483890533447, + "logits/rejected": -1.977504014968872, + "logps/chosen": -535.0396118164062, + "logps/rejected": -846.5072021484375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.749991416931152, + "rewards/margins": 17.723718643188477, + "rewards/rejected": -30.473709106445312, + "step": 19154 + }, + { + "epoch": 2.98, + "learning_rate": 9.828103117381607e-08, + "logits/chosen": -2.763789653778076, + "logits/rejected": -2.8634514808654785, + "logps/chosen": -588.3980712890625, + "logps/rejected": -750.989501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022710800170898, + "rewards/margins": 13.573116302490234, + "rewards/rejected": -19.595827102661133, + "step": 19155 + }, + { + "epoch": 2.98, + "learning_rate": 9.754759064266819e-08, + "logits/chosen": -0.6281614303588867, + "logits/rejected": -1.340733528137207, + "logps/chosen": -359.0935363769531, + "logps/rejected": -497.89605712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.97062873840332, + "rewards/margins": 11.921710968017578, + "rewards/rejected": -20.89234161376953, + "step": 19156 + }, + { + "epoch": 2.98, + "learning_rate": 9.681415011152032e-08, + "logits/chosen": -2.4283602237701416, + "logits/rejected": -2.694380521774292, + "logps/chosen": -477.09100341796875, + "logps/rejected": -514.1270751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.672581672668457, + "rewards/margins": 13.418577194213867, + "rewards/rejected": -22.09115982055664, + "step": 19157 + }, + { + "epoch": 2.98, + "learning_rate": 9.608070958037243e-08, + "logits/chosen": -1.1587034463882446, + "logits/rejected": -2.797295093536377, + "logps/chosen": -188.69537353515625, + "logps/rejected": -504.09600830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.463221549987793, + "rewards/margins": 9.605864524841309, + "rewards/rejected": -19.0690860748291, + "step": 19158 + }, + { + "epoch": 2.98, + "learning_rate": 9.534726904922454e-08, + "logits/chosen": -2.2357254028320312, + "logits/rejected": -2.2654130458831787, + "logps/chosen": -205.30288696289062, + "logps/rejected": -500.39434814453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.747373580932617, + "rewards/margins": 9.499984741210938, + "rewards/rejected": -18.247358322143555, + "step": 19159 + }, + { + "epoch": 2.98, + "learning_rate": 9.461382851807667e-08, + "logits/chosen": -2.101764440536499, + "logits/rejected": -2.404348611831665, + "logps/chosen": -229.043212890625, + "logps/rejected": -303.0050048828125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.698657989501953, + "rewards/margins": 6.245082378387451, + "rewards/rejected": -16.943740844726562, + "step": 19160 + }, + { + "epoch": 2.98, + "learning_rate": 9.388038798692878e-08, + "logits/chosen": -2.132868528366089, + "logits/rejected": -2.4670844078063965, + "logps/chosen": -214.32403564453125, + "logps/rejected": -518.176513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.401973724365234, + "rewards/margins": 12.701415061950684, + "rewards/rejected": -23.1033878326416, + "step": 19161 + }, + { + "epoch": 2.98, + "learning_rate": 9.31469474557809e-08, + "logits/chosen": -2.617145538330078, + "logits/rejected": -2.1193182468414307, + "logps/chosen": -377.54559326171875, + "logps/rejected": -423.93634033203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.413692474365234, + "rewards/margins": 8.50222396850586, + "rewards/rejected": -19.915916442871094, + "step": 19162 + }, + { + "epoch": 2.98, + "learning_rate": 9.241350692463303e-08, + "logits/chosen": -2.7429418563842773, + "logits/rejected": -3.0083510875701904, + "logps/chosen": -117.62489318847656, + "logps/rejected": -311.6793212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.906576156616211, + "rewards/margins": 12.056173324584961, + "rewards/rejected": -21.962749481201172, + "step": 19163 + }, + { + "epoch": 2.98, + "learning_rate": 9.168006639348513e-08, + "logits/chosen": -1.8409315347671509, + "logits/rejected": -2.72866153717041, + "logps/chosen": -267.7762451171875, + "logps/rejected": -731.6898803710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.942261695861816, + "rewards/margins": 18.12331199645996, + "rewards/rejected": -30.065574645996094, + "step": 19164 + }, + { + "epoch": 2.98, + "learning_rate": 9.094662586233726e-08, + "logits/chosen": -2.461361885070801, + "logits/rejected": -3.032522201538086, + "logps/chosen": -135.31246948242188, + "logps/rejected": -308.0784606933594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.748188972473145, + "rewards/margins": 9.848323822021484, + "rewards/rejected": -18.596511840820312, + "step": 19165 + }, + { + "epoch": 2.98, + "learning_rate": 9.021318533118938e-08, + "logits/chosen": -0.7622181177139282, + "logits/rejected": -1.3784493207931519, + "logps/chosen": -119.35029602050781, + "logps/rejected": -725.6796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.527170658111572, + "rewards/margins": 49.742366790771484, + "rewards/rejected": -55.26953887939453, + "step": 19166 + }, + { + "epoch": 2.98, + "learning_rate": 8.94797448000415e-08, + "logits/chosen": -2.1994736194610596, + "logits/rejected": -2.8581418991088867, + "logps/chosen": -137.6761474609375, + "logps/rejected": -372.2494812011719, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.146751403808594, + "rewards/margins": 7.5715155601501465, + "rewards/rejected": -15.718267440795898, + "step": 19167 + }, + { + "epoch": 2.98, + "learning_rate": 8.874630426889362e-08, + "logits/chosen": -2.7321109771728516, + "logits/rejected": -1.5910507440567017, + "logps/chosen": -747.5558471679688, + "logps/rejected": -559.0501708984375, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.141908645629883, + "rewards/margins": 8.312856674194336, + "rewards/rejected": -18.45476531982422, + "step": 19168 + }, + { + "epoch": 2.98, + "learning_rate": 8.801286373774573e-08, + "logits/chosen": -1.8071445226669312, + "logits/rejected": -2.5255727767944336, + "logps/chosen": -204.24517822265625, + "logps/rejected": -373.60498046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.421430587768555, + "rewards/margins": 12.93783950805664, + "rewards/rejected": -20.359270095825195, + "step": 19169 + }, + { + "epoch": 2.98, + "learning_rate": 8.727942320659784e-08, + "logits/chosen": -1.0946364402770996, + "logits/rejected": -2.4011850357055664, + "logps/chosen": -134.61154174804688, + "logps/rejected": -358.0871276855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.950940132141113, + "rewards/margins": 13.098640441894531, + "rewards/rejected": -22.049579620361328, + "step": 19170 + } + ], + "logging_steps": 1, + "max_steps": 19289, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 45, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}